Skip to main content

uor_addr/composition/
canonicalize.rs

1//! Shared byte-level canonicalize disciplines for the five categorical
2//! operations on the Atlas image inside E₈ per wiki [ADR-061] §(3).
3//!
4//! Each function operates on the **raw digest bytes** of operand
5//! κ-labels — the lowercase-hex digest body, decoded back to its 32- or
6//! 64-byte raw form — and emits canonical-form bytes that the
7//! composition shape's ψ-pipeline folds into the composed κ-label via
8//! the bound σ-axis.
9//!
10//! Per CA-5, these are uor-addr's realization commitments under
11//! ADR-061 §(3). The framework names each operation's algebraic
12//! structure (commutative binary product, 2-element equivalence
13//! relation, 2-class partition, 24-element equivalence relation,
14//! identity); the realization commits the specific byte-level relation
15//! that implements that algebraic structure.
16//!
17//! [ADR-061]: https://github.com/UOR-Foundation/UOR-Framework/wiki/ADR-061
18
19#![cfg(feature = "alloc")]
20
21use alloc::vec::Vec;
22
23use crate::composition::CompositionFailure;
24use crate::label::KappaLabel;
25
26extern crate alloc;
27
28// ─── Operand parsing ─────────────────────────────────────────────────
29
30/// Decode a κ-label's raw digest bytes (the lowercase-hex body after
31/// the σ-axis prefix). Returns the σ-axis name (the prefix without
32/// trailing `:`) and the raw digest bytes (32 or 64 bytes).
33pub fn decode_operand<const N: usize>(
34    operand: &KappaLabel<N>,
35) -> Result<(&str, Vec<u8>), CompositionFailure> {
36    let axis = operand
37        .sigma_axis()
38        .ok_or(CompositionFailure::MalformedOperand)?;
39    let hex_digest = operand
40        .sigma_axis_digest_hex()
41        .ok_or(CompositionFailure::MalformedOperand)?;
42    if hex_digest.len() % 2 != 0 {
43        return Err(CompositionFailure::MalformedOperand);
44    }
45    let mut raw = Vec::with_capacity(hex_digest.len() / 2);
46    let bytes = hex_digest.as_bytes();
47    for pair in bytes.chunks_exact(2) {
48        let hi = hex_nibble(pair[0]).ok_or(CompositionFailure::MalformedOperand)?;
49        let lo = hex_nibble(pair[1]).ok_or(CompositionFailure::MalformedOperand)?;
50        raw.push((hi << 4) | lo);
51    }
52    Ok((axis, raw))
53}
54
55/// Lowercase-hex nibble decoder. Returns `None` for any non-lowercase-hex
56/// ASCII byte. (Pipeline-emitted κ-labels are lowercase-hex per
57/// ADR-058's emission discipline.)
58fn hex_nibble(b: u8) -> Option<u8> {
59    match b {
60        b'0'..=b'9' => Some(b - b'0'),
61        b'a'..=b'f' => Some(10 + b - b'a'),
62        _ => None,
63    }
64}
65
66/// Validate that an operand's σ-axis matches the expected axis (per
67/// CA-3 σ-axis homogeneity).
68pub fn check_axis(
69    operand_axis: &str,
70    expected_axis: &'static str,
71) -> Result<(), CompositionFailure> {
72    if operand_axis == expected_axis {
73        Ok(())
74    } else {
75        // Convert the operand's borrowed axis name to a static str for
76        // the failure variant. We compare against the five admissible
77        // axes; anything else is rejected as MalformedOperand.
78        let static_axis = match operand_axis {
79            "sha256" => "sha256",
80            "blake3" => "blake3",
81            "sha3-256" => "sha3-256",
82            "keccak256" => "keccak256",
83            "sha512" => "sha512",
84            _ => return Err(CompositionFailure::MalformedOperand),
85        };
86        Err(CompositionFailure::OperandSigmaAxisMismatch {
87            expected_axis,
88            operand_axis: static_axis,
89        })
90    }
91}
92
93// ─── CS-G2 — commutative binary product (lex-min-first ordering) ──────
94
95/// CS-G2 canonicalize discipline: the commutative binary product of
96/// two operand κ-labels. Per wiki ADR-061 §(3) the framework names the
97/// algebraic structure (commutativity at the C-level per ADR-059); the
98/// realization commits lex-min-first ordering as the byte-level rule
99/// realizing that algebra.
100///
101/// Returns the concatenation `lo || hi` of the two operand byte
102/// representations, where `(lo, hi) = (min(a, b), max(a, b))` under
103/// bytewise lexicographic order. Commutativity is structural:
104/// `canonicalize_g2(a, b)` and `canonicalize_g2(b, a)` produce
105/// byte-identical canonical forms.
106pub fn canonicalize_g2<const N: usize>(left: &KappaLabel<N>, right: &KappaLabel<N>) -> Vec<u8> {
107    let l = left.as_bytes();
108    let r = right.as_bytes();
109    let mut out = Vec::with_capacity(N + N);
110    if l <= r {
111        out.extend_from_slice(l);
112        out.extend_from_slice(r);
113    } else {
114        out.extend_from_slice(r);
115        out.extend_from_slice(l);
116    }
117    out
118}
119
120// ─── CS-F4 — ± involution quotient (bitwise-complement mirror) ────────
121
122/// CS-F4 canonicalize discipline: the 2-element equivalence relation
123/// on operands under the ± mirror involution. Per wiki ADR-061 §(3)
124/// the framework names the algebraic structure (a 2-element
125/// equivalence relation per ADR-059); the realization commits
126/// bitwise-complement of the raw digest as the partition-inducing
127/// mirror.
128///
129/// The canonical representative is the lex-min of the 2-element class:
130/// compare the operand's raw digest with its bitwise-complement; the
131/// canonical form is the σ-axis prefix concatenated with the lex-min
132/// raw digest re-encoded as lowercase hex.
133pub fn canonicalize_f4<const N: usize>(
134    operand: &KappaLabel<N>,
135) -> Result<Vec<u8>, CompositionFailure> {
136    let (axis, raw) = decode_operand(operand)?;
137    let complement: Vec<u8> = raw.iter().map(|b| !b).collect();
138    let canon_raw: &[u8] = if raw[..] <= complement[..] {
139        &raw[..]
140    } else {
141        &complement[..]
142    };
143    Ok(emit_canonical(axis, canon_raw))
144}
145
146// ─── CS-E6 — degree-partition filtration (mod-9 partition) ────────────
147
148/// The two degree-partition tag values per ADR-059's 64:8 vertex
149/// partition of the Atlas. degree-5 vertices outnumber degree-6 by 8:1.
150const DEGREE_5_TAG: u8 = 0x05;
151const DEGREE_6_TAG: u8 = 0x06;
152
153/// CS-E6 canonicalize discipline: the 2-class partition with 8:1
154/// population ratio per ADR-059. Per wiki ADR-061 §(3) the framework
155/// names the algebraic structure (a 2-class partition with population
156/// ratio 8:1 per ADR-059); the realization commits the partition
157/// derived from `first_raw_digest_byte mod 9`.
158///
159/// The canonical form is `[degree_class_tag] || operand.as_bytes()` —
160/// a one-byte tag (`0x05` for degree-5, `0x06` for degree-6) prepended
161/// to the operand's full κ-label bytes. Total width = `N + 1` per
162/// wiki ADR-061 §(2).
163pub fn canonicalize_e6<const N: usize>(
164    operand: &KappaLabel<N>,
165) -> Result<Vec<u8>, CompositionFailure> {
166    let (_axis, raw) = decode_operand(operand)?;
167    if raw.is_empty() {
168        return Err(CompositionFailure::MalformedOperand);
169    }
170    let tag = match raw[0] % 9 {
171        0..=7 => DEGREE_5_TAG,
172        8 => DEGREE_6_TAG,
173        _ => unreachable!("u8 % 9 is in 0..=8"),
174    };
175    let mut out = Vec::with_capacity(1 + N);
176    out.push(tag);
177    out.extend_from_slice(operand.as_bytes());
178    Ok(out)
179}
180
181// ─── CS-E7 — S₄-orbit augmentation (quarter-permutation orbit) ────────
182
183/// The 24 permutations of S₄ as quarter-index arrays. Generated
184/// lexicographically — the canonical-form lex-min property below
185/// depends on enumerating every member of the orbit, not on the
186/// enumeration order.
187const S4_PERMUTATIONS: [[usize; 4]; 24] = [
188    [0, 1, 2, 3],
189    [0, 1, 3, 2],
190    [0, 2, 1, 3],
191    [0, 2, 3, 1],
192    [0, 3, 1, 2],
193    [0, 3, 2, 1],
194    [1, 0, 2, 3],
195    [1, 0, 3, 2],
196    [1, 2, 0, 3],
197    [1, 2, 3, 0],
198    [1, 3, 0, 2],
199    [1, 3, 2, 0],
200    [2, 0, 1, 3],
201    [2, 0, 3, 1],
202    [2, 1, 0, 3],
203    [2, 1, 3, 0],
204    [2, 3, 0, 1],
205    [2, 3, 1, 0],
206    [3, 0, 1, 2],
207    [3, 0, 2, 1],
208    [3, 1, 0, 2],
209    [3, 1, 2, 0],
210    [3, 2, 0, 1],
211    [3, 2, 1, 0],
212];
213
214/// CS-E7 canonicalize discipline: the 24-element equivalence relation
215/// on operands under the S₄ quarter-permutation orbit. Per wiki
216/// ADR-061 §(3) the framework names the algebraic structure (a
217/// 24-element equivalence relation per ADR-059); the realization
218/// commits the S₄ action by quarter-permutation of the raw digest.
219///
220/// The raw digest is partitioned into 4 equal-width quarters; the 24
221/// permutations of S₄ generate 24 candidate raw-digest byte sequences;
222/// the canonical representative is the lex-min of the 24-element
223/// orbit. The σ-axis prefix is preserved.
224pub fn canonicalize_e7<const N: usize>(
225    operand: &KappaLabel<N>,
226) -> Result<Vec<u8>, CompositionFailure> {
227    let (axis, raw) = decode_operand(operand)?;
228    if raw.len() % 4 != 0 || raw.is_empty() {
229        return Err(CompositionFailure::MalformedOperand);
230    }
231    let q = raw.len() / 4;
232    let quarters: [&[u8]; 4] = [
233        &raw[0..q],
234        &raw[q..2 * q],
235        &raw[2 * q..3 * q],
236        &raw[3 * q..],
237    ];
238
239    let mut canon: Option<Vec<u8>> = None;
240    for perm in S4_PERMUTATIONS.iter() {
241        let mut candidate = Vec::with_capacity(raw.len());
242        for &idx in perm.iter() {
243            candidate.extend_from_slice(quarters[idx]);
244        }
245        match &canon {
246            None => canon = Some(candidate),
247            Some(current) if candidate < *current => canon = Some(candidate),
248            _ => {}
249        }
250    }
251    let canon_raw = canon.expect("S4_PERMUTATIONS is non-empty");
252    Ok(emit_canonical(axis, &canon_raw))
253}
254
255// ─── CS-E8 — direct embedding (identity on canonical-form bytes) ──────
256
257/// CS-E8 canonicalize discipline: the identity. Per wiki ADR-061 §(3)
258/// the framework names the algebraic structure (the identity relation
259/// per ADR-059 — every operand is its own equivalence class); the
260/// realization commits identity on canonical-form bytes.
261///
262/// The composed κ-label is distinguished from the operand's κ-label
263/// by realization-IRI provenance, not by digest bytes.
264pub fn canonicalize_e8<const N: usize>(operand: &KappaLabel<N>) -> Vec<u8> {
265    operand.as_bytes().to_vec()
266}
267
268// ─── Helpers ─────────────────────────────────────────────────────────
269
270/// Re-emit `<axis>:<lowercase-hex-of-raw>` from the σ-axis name and
271/// the raw digest bytes. Used by CS-F4 and CS-E7 to produce
272/// canonical-form bytes for the composition's ψ-pipeline input.
273fn emit_canonical(axis: &str, raw: &[u8]) -> Vec<u8> {
274    let mut out = Vec::with_capacity(axis.len() + 1 + 2 * raw.len());
275    out.extend_from_slice(axis.as_bytes());
276    out.push(b':');
277    for &byte in raw {
278        out.push(hex_lo(byte >> 4));
279        out.push(hex_lo(byte & 0x0F));
280    }
281    out
282}
283
284/// Lowercase-hex digit for a nibble (`0..=15`). Matches the
285/// foundation's ψ_9 σ-projection emission discipline.
286fn hex_lo(nibble: u8) -> u8 {
287    match nibble {
288        0..=9 => b'0' + nibble,
289        10..=15 => b'a' + (nibble - 10),
290        _ => unreachable!("nibble is `& 0x0F`"),
291    }
292}
293
294#[cfg(test)]
295mod tests {
296    use super::*;
297
298    fn label<const N: usize>(s: &str) -> KappaLabel<N> {
299        KappaLabel::from_bytes(s.as_bytes()).expect("test label admits")
300    }
301
302    #[test]
303    fn g2_is_commutative() {
304        let a =
305            label::<71>("sha256:0000000000000000000000000000000000000000000000000000000000000000");
306        let b =
307            label::<71>("sha256:1111111111111111111111111111111111111111111111111111111111111111");
308        let ab = canonicalize_g2(&a, &b);
309        let ba = canonicalize_g2(&b, &a);
310        assert_eq!(ab, ba, "CS-G2 commutativity is structural");
311        assert_eq!(ab.len(), 142, "G2 canonical form is 2N bytes");
312    }
313
314    #[test]
315    fn f4_mirror_collapses() {
316        // An operand and its mirror produce byte-identical canonical
317        // forms (the 2-element class collapses to its lex-min member).
318        let a =
319            label::<71>("sha256:0000000000000000000000000000000000000000000000000000000000000000");
320        // The mirror (all-FF raw) is its bitwise complement.
321        let m =
322            label::<71>("sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff");
323        let ca = canonicalize_f4(&a).expect("a canonicalizes");
324        let cm = canonicalize_f4(&m).expect("m canonicalizes");
325        assert_eq!(ca, cm, "CS-F4 mirror collapses to lex-min representative");
326    }
327
328    #[test]
329    fn e6_prepends_degree_tag() {
330        let a =
331            label::<71>("sha256:0000000000000000000000000000000000000000000000000000000000000000");
332        let canon = canonicalize_e6(&a).expect("canonicalizes");
333        assert_eq!(canon.len(), 72, "CS-E6 canonical form is N + 1 bytes");
334        assert!(canon[0] == DEGREE_5_TAG || canon[0] == DEGREE_6_TAG);
335        assert_eq!(&canon[1..], a.as_bytes());
336    }
337
338    #[test]
339    fn e6_partition_distinguishes_classes() {
340        // first_raw_digest_byte = 0x00 → 0x00 % 9 = 0 → degree-5
341        let a =
342            label::<71>("sha256:0000000000000000000000000000000000000000000000000000000000000000");
343        let ca = canonicalize_e6(&a).expect("canonicalizes");
344        assert_eq!(ca[0], DEGREE_5_TAG, "first byte 0x00 → degree-5");
345        // first_raw_digest_byte = 0x08 → 0x08 % 9 = 8 → degree-6
346        let b =
347            label::<71>("sha256:0800000000000000000000000000000000000000000000000000000000000000");
348        let cb = canonicalize_e6(&b).expect("canonicalizes");
349        assert_eq!(cb[0], DEGREE_6_TAG, "first byte 0x08 → degree-6");
350    }
351
352    #[test]
353    fn e7_preserves_width_and_collapses_orbit() {
354        // CS-E7 canonical form is the lex-min over the 24-orbit; it
355        // preserves the σ-axis prefix + digest width, and any
356        // quarter-permutation of an operand lands on the same canonical
357        // representative.
358        let a =
359            label::<71>("sha256:0102030405060708090a0b0c0d0e0f1011121314151617181920212223242526");
360        let ca = canonicalize_e7(&a).expect("canonicalizes");
361        assert_eq!(ca.len(), a.as_bytes().len(), "CS-E7 preserves width");
362        // Re-canonicalizing the canonical form is a fixed point (the
363        // lex-min member's orbit lex-min is itself).
364        let ca_label = KappaLabel::<71>::from_bytes(&ca).expect("canonical is a label");
365        let ca2 = canonicalize_e7(&ca_label).expect("canonicalizes");
366        assert_eq!(ca, ca2, "CS-E7 lex-min is an orbit fixed point");
367    }
368
369    #[test]
370    fn e8_is_identity() {
371        let a =
372            label::<71>("sha256:0000000000000000000000000000000000000000000000000000000000000000");
373        let canon = canonicalize_e8(&a);
374        assert_eq!(
375            canon,
376            a.as_bytes(),
377            "CS-E8 is identity on canonical-form bytes"
378        );
379    }
380
381    #[test]
382    fn axis_check_admits_match() {
383        assert!(check_axis("sha256", "sha256").is_ok());
384    }
385
386    #[test]
387    fn axis_check_rejects_mismatch() {
388        match check_axis("blake3", "sha256") {
389            Err(CompositionFailure::OperandSigmaAxisMismatch {
390                expected_axis,
391                operand_axis,
392            }) => {
393                assert_eq!(expected_axis, "sha256");
394                assert_eq!(operand_axis, "blake3");
395            }
396            _ => panic!("σ-axis mismatch must be reported"),
397        }
398    }
399}