Skip to main content

uor_addr/gguf/
value.rs

1//! GGUF v3 typed input (ADR-023 amended by ADR-060).
2//!
3//! The GGUF spec defines no canonical form; this realization defines one
4//! — a **flat Merkle skeleton**. Two GGUF files that decode to the same
5//! logical content canonicalize to byte-identical skeletons. Every
6//! variable-length leaf (a string, an array payload, a tensor's data
7//! region) is replaced by its streamed SHA-256 digest, so the skeleton's
8//! size grows only with the KV / tensor **counts** (never with model
9//! size), while still binding every weight byte into the κ-label.
10//!
11//! ```text
12//! LE_u32(GGUF_MAGIC)
13//! LE_u32(GGUF_VERSION_REQUIRED)
14//! LE_u64(tensor_count)
15//! LE_u64(kv_count)
16//! LE_u64(canonical_alignment)
17//! ── metadata KVs, sorted by key bytes ──
18//!   for kv: sha256(key) || LE_u32(type_tag) || canonical_value(kv)
19//!     scalar  → the value's natural little-endian bytes
20//!     string  → LE_u64(len) || sha256(utf8 bytes)
21//!     array   → LE_u32(elem_type) || LE_u64(len) || sha256(wire payload)
22//! ── tensor info, sorted by name bytes ──
23//!   for t: sha256(name) || LE_u32(n_dims) || (LE_u64(dim) × n_dims)
24//!       || LE_u32(ggml_type_id) || LE_u64(recomputed_offset)
25//!       || sha256(tensor data bytes)        ← streamed; binds the weights
26//! ```
27//!
28//! `recomputed_offset` is the cumulative aligned byte position in
29//! sorted-tensor order (NOT the input's stored offset), so two inputs
30//! whose tensor-data sections are laid out in different orders
31//! canonicalize identically.
32//!
33//! Under ADR-060 the **full skeleton** flows through the pipeline as a
34//! [`TermValue::Borrowed`] carrier and ψ₉ folds it through the σ-axis —
35//! there is no two-level commitment, no carrier ceiling, and no count /
36//! width cap. Tensor data and large string / array payloads are streamed
37//! through [`prism::crypto::Sha256Hasher`] with bounded resident memory,
38//! so arbitrarily large weights bind into the κ-label.
39//!
40//! [`GgufValue`] (the owned parsed value, `alloc`-gated) holds the
41//! skeleton; [`GgufCarrier`] is the borrowed model-input handle the
42//! pipeline binds.
43
44use prism::crypto::Sha256Hasher;
45use prism::operation::TermValue;
46use prism::pipeline::{
47    ConstrainedTypeShape, ConstraintRef, IntoBindingValue, PartitionProductFields, ShapeViolation,
48    ViolationKind,
49};
50use prism::vocabulary::Hasher;
51
52use crate::gguf::dtype::GgmlType;
53use crate::gguf::shapes::bounds::{
54    GGUF_DEFAULT_ALIGNMENT, GGUF_HEADER_BYTES, GGUF_MAGIC, GGUF_MAX_DIMS,
55    GGUF_METADATA_ARRAY_DEPTH_MAX, GGUF_VERSION_REQUIRED,
56};
57
58// ─── ShapeViolation IRIs ────────────────────────────────────────────────
59
60macro_rules! violation {
61    ($name:ident, $constraint:literal, $property:literal, $kind:expr) => {
62        const $name: ShapeViolation = ShapeViolation {
63            shape_iri: "https://uor.foundation/addr/GgufValue",
64            constraint_iri: concat!("https://uor.foundation/addr/GgufValue/", $constraint),
65            property_iri: concat!("https://uor.foundation/addr/GgufValue/", $property),
66            expected_range: "http://www.w3.org/2001/XMLSchema#nonNegativeInteger",
67            min_count: 0,
68            max_count: 1,
69            kind: $kind,
70        };
71    };
72}
73
74violation!(
75    INVALID_MAGIC,
76    "validMagic",
77    "magic",
78    ViolationKind::ValueCheck
79);
80violation!(
81    UNSUPPORTED_VERSION,
82    "supportedVersion",
83    "version",
84    ViolationKind::ValueCheck
85);
86violation!(
87    TRUNCATED,
88    "notTruncated",
89    "byteSpan",
90    ViolationKind::ValueCheck
91);
92violation!(
93    DIMS_EXCEEDED,
94    "tensorRankBound",
95    "nDims",
96    ViolationKind::CardinalityViolation
97);
98violation!(
99    ARRAY_DEPTH,
100    "arrayDepthBound",
101    "arrayDepth",
102    ViolationKind::CardinalityViolation
103);
104violation!(
105    INVALID_ALIGNMENT,
106    "validAlignment",
107    "alignment",
108    ViolationKind::ValueCheck
109);
110violation!(
111    UNKNOWN_TENSOR_TYPE,
112    "knownTensorType",
113    "tensorType",
114    ViolationKind::ValueCheck
115);
116violation!(
117    OVERFLOW,
118    "noOverflow",
119    "byteCount",
120    ViolationKind::ValueCheck
121);
122
123// ─── GGUF metadata value type tags (gguf.md) ─────────────────────────────
124
125const T_UINT8: u32 = 0;
126const T_INT8: u32 = 1;
127const T_UINT16: u32 = 2;
128const T_INT16: u32 = 3;
129const T_UINT32: u32 = 4;
130const T_INT32: u32 = 5;
131const T_FLOAT32: u32 = 6;
132const T_BOOL: u32 = 7;
133const T_STRING: u32 = 8;
134const T_ARRAY: u32 = 9;
135const T_UINT64: u32 = 10;
136const T_INT64: u32 = 11;
137const T_FLOAT64: u32 = 12;
138
139/// Fixed wire width of a scalar metadata value type, or `None` for the
140/// variable-length `STRING`/`ARRAY` types.
141const fn scalar_width(type_tag: u32) -> Option<usize> {
142    Some(match type_tag {
143        T_UINT8 | T_INT8 | T_BOOL => 1,
144        T_UINT16 | T_INT16 => 2,
145        T_UINT32 | T_INT32 | T_FLOAT32 => 4,
146        T_UINT64 | T_INT64 | T_FLOAT64 => 8,
147        _ => return None,
148    })
149}
150
151// ─── Little-endian readers over a borrowed cursor ────────────────────────
152
153struct Cursor<'a> {
154    buf: &'a [u8],
155    pos: usize,
156}
157
158impl<'a> Cursor<'a> {
159    fn new(buf: &'a [u8]) -> Self {
160        Self { buf, pos: 0 }
161    }
162    fn take(&mut self, n: usize) -> Result<&'a [u8], ShapeViolation> {
163        let end = self.pos.checked_add(n).ok_or(TRUNCATED)?;
164        if end > self.buf.len() {
165            return Err(TRUNCATED);
166        }
167        let s = &self.buf[self.pos..end];
168        self.pos = end;
169        Ok(s)
170    }
171    fn u32(&mut self) -> Result<u32, ShapeViolation> {
172        let b = self.take(4)?;
173        Ok(u32::from_le_bytes([b[0], b[1], b[2], b[3]]))
174    }
175    fn u64(&mut self) -> Result<u64, ShapeViolation> {
176        let b = self.take(8)?;
177        Ok(u64::from_le_bytes([
178            b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7],
179        ]))
180    }
181}
182
183#[inline]
184fn sha256(bytes: &[u8]) -> [u8; 32] {
185    Sha256Hasher::initial().fold_bytes(bytes).finalize()
186}
187
188#[inline]
189const fn align_up(offset: u64, alignment: u64) -> u64 {
190    let rem = offset % alignment;
191    if rem == 0 {
192        offset
193    } else {
194        offset + (alignment - rem)
195    }
196}
197
198// ─── GgufCarrier — the borrowed model-input handle (no_alloc) ───────────
199
200/// Borrowed canonical-skeleton input handle (ADR-060 borrowed carrier). A
201/// thin, `Copy` borrow of the skeleton bytes produced by [`canonicalize`];
202/// `as_binding_value` returns the `Borrowed` carrier zero-copy.
203#[derive(Clone, Copy, Debug)]
204pub struct GgufCarrier<'a>(&'a [u8]);
205
206impl<'a> GgufCarrier<'a> {
207    /// Wrap a canonical-skeleton byte slice as a model input handle.
208    #[must_use]
209    pub fn new(skeleton: &'a [u8]) -> Self {
210        Self(skeleton)
211    }
212
213    /// Borrow the canonical-skeleton bytes.
214    #[must_use]
215    pub fn canonical_bytes(&self) -> &'a [u8] {
216        self.0
217    }
218}
219
220impl ConstrainedTypeShape for GgufCarrier<'_> {
221    const IRI: &'static str = "https://uor.foundation/addr/GgufValue";
222    const SITE_COUNT: usize = 1;
223    const CONSTRAINTS: &'static [ConstraintRef] = &[];
224    const CYCLE_SIZE: u64 = u64::MAX;
225}
226
227impl prism::uor_foundation::pipeline::__sdk_seal::Sealed for GgufCarrier<'_> {}
228
229impl<'a> IntoBindingValue<'a> for GgufCarrier<'a> {
230    fn as_binding_value<const INLINE_BYTES: usize>(&self) -> TermValue<'a, INLINE_BYTES> {
231        TermValue::borrowed(self.0)
232    }
233}
234
235impl PartitionProductFields for GgufCarrier<'_> {
236    const FIELDS: &'static [(u32, u32)] = &[];
237    const FIELD_NAMES: &'static [&'static str] = &[];
238}
239
240// ═════════════════════════════════════════════════════════════════════
241// alloc-gated parser + owned value
242// ═════════════════════════════════════════════════════════════════════
243
244#[cfg(feature = "alloc")]
245pub use alloc_impl::{canonicalize, GgufValue};
246
247#[cfg(feature = "alloc")]
248mod alloc_impl {
249    use super::*;
250    use alloc::vec::Vec;
251
252    /// A parsed, canonicalized GGUF v3 file. The stored bytes are the
253    /// flat canonical skeleton (see [module docs](super)). **`alloc`-gated**
254    /// — the pipeline binds the borrowed [`GgufCarrier`].
255    #[derive(Clone, PartialEq, Eq)]
256    pub struct GgufValue {
257        bytes: Vec<u8>,
258    }
259
260    impl core::fmt::Debug for GgufValue {
261        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
262            f.debug_struct("GgufValue")
263                .field("canonical_len", &self.bytes.len())
264                .finish_non_exhaustive()
265        }
266    }
267
268    struct KvEntry {
269        key_off: usize,
270        key_len: usize,
271        type_tag: u32,
272        val_off: usize,
273        val_span: usize,
274    }
275
276    struct TensorEntry {
277        name_off: usize,
278        name_len: usize,
279        n_dims: u32,
280        dims: [u64; GGUF_MAX_DIMS],
281        ggml_type: GgmlType,
282        stored_offset: u64,
283        data_bytes: u64,
284    }
285
286    impl GgufValue {
287        /// Borrow the canonical-skeleton bytes.
288        #[must_use]
289        pub fn canonical_bytes(&self) -> &[u8] {
290            &self.bytes
291        }
292
293        /// Parse a GGUF v3 input slice into a canonicalized skeleton.
294        ///
295        /// # Errors
296        ///
297        /// A [`ShapeViolation`] whose `constraint_iri` names the violated
298        /// invariant (bad magic, unsupported version, truncation, an
299        /// over-rank tensor, over-deep array nesting, invalid alignment,
300        /// an unknown tensor type, or arithmetic overflow).
301        pub fn parse(raw: &[u8]) -> Result<Self, ShapeViolation> {
302            let mut cur = Cursor::new(raw);
303
304            // ── Header ──
305            if cur.u32()? != GGUF_MAGIC {
306                return Err(INVALID_MAGIC);
307            }
308            if cur.u32()? != GGUF_VERSION_REQUIRED {
309                return Err(UNSUPPORTED_VERSION);
310            }
311            let tensor_count = cur.u64()?;
312            let kv_count = cur.u64()?;
313            debug_assert_eq!(cur.pos, GGUF_HEADER_BYTES);
314
315            // ── Metadata KV section ──
316            let mut kvs: Vec<KvEntry> = Vec::new();
317            let mut alignment = GGUF_DEFAULT_ALIGNMENT;
318            for _ in 0..kv_count {
319                let key_len = cur.u64()? as usize;
320                let key_off = cur.pos;
321                let key = cur.take(key_len)?;
322                let type_tag = cur.u32()?;
323                let val_off = cur.pos;
324                let val_span = measure_value(&mut cur, type_tag, 0)?;
325
326                if key == b"general.alignment" && type_tag == T_UINT32 {
327                    let a = u32::from_le_bytes([
328                        raw[val_off],
329                        raw[val_off + 1],
330                        raw[val_off + 2],
331                        raw[val_off + 3],
332                    ]) as u64;
333                    if a < 8 || !a.is_power_of_two() {
334                        return Err(INVALID_ALIGNMENT);
335                    }
336                    alignment = a;
337                }
338
339                kvs.push(KvEntry {
340                    key_off,
341                    key_len,
342                    type_tag,
343                    val_off,
344                    val_span,
345                });
346            }
347
348            // ── Tensor info section ──
349            let mut tensors: Vec<TensorEntry> = Vec::new();
350            for _ in 0..tensor_count {
351                let name_len = cur.u64()? as usize;
352                let name_off = cur.pos;
353                cur.take(name_len)?;
354                let n_dims = cur.u32()?;
355                if n_dims as usize > GGUF_MAX_DIMS {
356                    return Err(DIMS_EXCEEDED);
357                }
358                let mut dims = [0u64; GGUF_MAX_DIMS];
359                let mut n_elements: u64 = 1;
360                for d in dims.iter_mut().take(n_dims as usize) {
361                    *d = cur.u64()?;
362                    n_elements = n_elements.checked_mul(*d).ok_or(OVERFLOW)?;
363                }
364                let type_id = cur.u32()?;
365                let ggml_type = GgmlType::from_u32(type_id).ok_or(UNKNOWN_TENSOR_TYPE)?;
366                let stored_offset = cur.u64()?;
367                let data_bytes = ggml_type
368                    .tensor_data_bytes(n_elements)
369                    .ok_or(UNKNOWN_TENSOR_TYPE)?;
370                tensors.push(TensorEntry {
371                    name_off,
372                    name_len,
373                    n_dims,
374                    dims,
375                    ggml_type,
376                    stored_offset,
377                    data_bytes,
378                });
379            }
380
381            // Tensor-data section begins at the next alignment boundary
382            // past the end of the tensor-info section.
383            let data_section_start = align_up(cur.pos as u64, alignment);
384
385            // ── Sort orders (lexicographic on raw UTF-8 bytes) ──
386            let mut kv_order: Vec<usize> = (0..kvs.len()).collect();
387            kv_order.sort_by(|&a, &b| {
388                raw[kvs[a].key_off..kvs[a].key_off + kvs[a].key_len]
389                    .cmp(&raw[kvs[b].key_off..kvs[b].key_off + kvs[b].key_len])
390            });
391            let mut t_order: Vec<usize> = (0..tensors.len()).collect();
392            t_order.sort_by(|&a, &b| {
393                raw[tensors[a].name_off..tensors[a].name_off + tensors[a].name_len]
394                    .cmp(&raw[tensors[b].name_off..tensors[b].name_off + tensors[b].name_len])
395            });
396
397            // ── Emit the flat canonical skeleton ──
398            let mut out: Vec<u8> = Vec::new();
399            out.extend_from_slice(&GGUF_MAGIC.to_le_bytes());
400            out.extend_from_slice(&GGUF_VERSION_REQUIRED.to_le_bytes());
401            out.extend_from_slice(&tensor_count.to_le_bytes());
402            out.extend_from_slice(&kv_count.to_le_bytes());
403            out.extend_from_slice(&alignment.to_le_bytes());
404
405            for &idx in &kv_order {
406                let kv = &kvs[idx];
407                let key = &raw[kv.key_off..kv.key_off + kv.key_len];
408                out.extend_from_slice(&sha256(key));
409                out.extend_from_slice(&kv.type_tag.to_le_bytes());
410                emit_canonical_value(&mut out, raw, kv);
411            }
412
413            let mut canonical_offset: u64 = 0;
414            for &idx in &t_order {
415                let t = &tensors[idx];
416                let name = &raw[t.name_off..t.name_off + t.name_len];
417                out.extend_from_slice(&sha256(name));
418                out.extend_from_slice(&t.n_dims.to_le_bytes());
419                for d in t.dims.iter().take(t.n_dims as usize) {
420                    out.extend_from_slice(&d.to_le_bytes());
421                }
422                out.extend_from_slice(&t.ggml_type.id().to_le_bytes());
423                out.extend_from_slice(&canonical_offset.to_le_bytes());
424
425                // Stream the tensor's data region through SHA-256.
426                let start = data_section_start
427                    .checked_add(t.stored_offset)
428                    .ok_or(TRUNCATED)? as usize;
429                let end = start.checked_add(t.data_bytes as usize).ok_or(TRUNCATED)?;
430                if end > raw.len() {
431                    return Err(TRUNCATED);
432                }
433                out.extend_from_slice(&sha256(&raw[start..end]));
434
435                canonical_offset = align_up(
436                    canonical_offset.checked_add(t.data_bytes).ok_or(OVERFLOW)?,
437                    alignment,
438                );
439            }
440
441            Ok(Self { bytes: out })
442        }
443    }
444
445    /// Measure (and bounds-check) the wire span of a metadata value,
446    /// advancing the cursor past it. Recurses into ARRAY payloads,
447    /// guarding the native stack with [`GGUF_METADATA_ARRAY_DEPTH_MAX`].
448    fn measure_value(
449        cur: &mut Cursor<'_>,
450        type_tag: u32,
451        depth: usize,
452    ) -> Result<usize, ShapeViolation> {
453        let start = cur.pos;
454        if let Some(w) = scalar_width(type_tag) {
455            cur.take(w)?;
456        } else if type_tag == T_STRING {
457            let n = cur.u64()? as usize;
458            cur.take(n)?;
459        } else if type_tag == T_ARRAY {
460            if depth >= GGUF_METADATA_ARRAY_DEPTH_MAX {
461                return Err(ARRAY_DEPTH);
462            }
463            let elem_type = cur.u32()?;
464            let len = cur.u64()? as usize;
465            for _ in 0..len {
466                measure_value(cur, elem_type, depth + 1)?;
467            }
468        } else {
469            return Err(TRUNCATED); // unknown type tag
470        }
471        Ok(cur.pos - start)
472    }
473
474    /// Emit the canonical representation of a metadata value into `out`:
475    /// scalars inline (natural little-endian bytes); STRING / ARRAY as a
476    /// length-tagged header plus a streamed digest of the wire payload.
477    fn emit_canonical_value(out: &mut Vec<u8>, raw: &[u8], kv: &KvEntry) {
478        let payload = &raw[kv.val_off..kv.val_off + kv.val_span];
479        if scalar_width(kv.type_tag).is_some() {
480            out.extend_from_slice(payload);
481        } else if kv.type_tag == T_STRING {
482            let len = u64::from_le_bytes(payload[..8].try_into().unwrap_or([0; 8]));
483            out.extend_from_slice(&len.to_le_bytes());
484            out.extend_from_slice(&sha256(&payload[8..]));
485        } else if kv.type_tag == T_ARRAY {
486            let elem_type = u32::from_le_bytes(payload[..4].try_into().unwrap_or([0; 4]));
487            let len = u64::from_le_bytes(payload[4..12].try_into().unwrap_or([0; 8]));
488            out.extend_from_slice(&elem_type.to_le_bytes());
489            out.extend_from_slice(&len.to_le_bytes());
490            out.extend_from_slice(&sha256(&payload[12..]));
491        }
492    }
493
494    /// Canonical skeleton as an owned `Vec<u8>`.
495    ///
496    /// # Errors
497    ///
498    /// Surfaces the [`ShapeViolation`] [`GgufValue::parse`] would raise.
499    pub fn canonicalize(raw: &[u8]) -> Result<Vec<u8>, ShapeViolation> {
500        Ok(GgufValue::parse(raw)?.bytes)
501    }
502
503    #[cfg(test)]
504    mod tests {
505        use super::*;
506
507        fn minimal_gguf() -> Vec<u8> {
508            // magic, version, tensor_count=0, kv_count=0.
509            let mut v = Vec::new();
510            v.extend_from_slice(&GGUF_MAGIC.to_le_bytes());
511            v.extend_from_slice(&GGUF_VERSION_REQUIRED.to_le_bytes());
512            v.extend_from_slice(&0u64.to_le_bytes());
513            v.extend_from_slice(&0u64.to_le_bytes());
514            v
515        }
516
517        #[test]
518        fn parses_minimal_header() {
519            let canon = canonicalize(&minimal_gguf()).expect("valid");
520            // header: magic(4)+version(4)+tcount(8)+kvcount(8)+align(8) = 32.
521            assert_eq!(canon.len(), 32);
522        }
523
524        #[test]
525        fn rejects_bad_magic() {
526            let mut v = minimal_gguf();
527            v[0] ^= 0xFF;
528            let err = GgufValue::parse(&v).expect_err("bad magic");
529            assert_eq!(err.constraint_iri, INVALID_MAGIC.constraint_iri);
530        }
531
532        #[test]
533        fn rejects_unsupported_version() {
534            let mut v = minimal_gguf();
535            v[4] = 2;
536            let err = GgufValue::parse(&v).expect_err("v2");
537            assert_eq!(err.constraint_iri, UNSUPPORTED_VERSION.constraint_iri);
538        }
539    }
540}