Skip to main content

uor_addr/codemodule/
value.rs

1//! Code-module AST typed input under the Canonical Code-Module AST
2//! Serialization (CCMAS) form (ADR-023 amended by ADR-060).
3//!
4//! CCMAS is Rivest canonical S-expressions over the AST grammar cases:
5//! the canonical byte output is a Rivest `(s₁ s₂ … sₙ)` flat list
6//! (Sexp.txt §4.3) with `<length>:<bytes>` atoms (§4.2). The canonical
7//! form is therefore **identical** to the [`crate::sexp`] realization's,
8//! so the pipeline reuses sexp's no_alloc streaming canonicalizer
9//! ([`SExprCanon`]) — under the `CodeModuleValue` typed-input IRI. There
10//! is no size, name-width, item-count, or nesting-depth ceiling.
11//!
12//! [`CodeModuleValue`] (the owned AST **builder**, `alloc`-gated)
13//! constructs canonical CCMAS bytes programmatically (`module`,
14//! `function`, `atom`) for reference and testing; [`CodeModuleCarrier`]
15//! is the borrowed model-input handle the pipeline binds.
16
17use prism::operation::TermValue;
18use prism::pipeline::{
19    ConstrainedTypeShape, ConstraintRef, IntoBindingValue, PartitionProductFields,
20};
21// `ShapeViolation` is consumed only by the `alloc`-gated builder / parser.
22#[cfg(feature = "alloc")]
23use prism::pipeline::ShapeViolation;
24#[cfg(feature = "alloc")]
25use prism::uor_foundation::pipeline::ChunkSource;
26
27use crate::sexp::SExprCanon;
28
29/// The CCMAS typed-input IRI.
30pub(crate) const CODEMODULE_IRI: &str = "https://uor.foundation/addr/CodeModuleValue";
31
32// ─── CodeModuleCarrier — the borrowed model-input handle (no_alloc) ─────
33
34/// Borrowed CCMAS input handle (ADR-060 stream carrier). A thin, `Copy`
35/// borrow of a [`SExprCanon`]; `as_binding_value` returns the `Stream`
36/// carrier zero-copy under the `CodeModuleValue` IRI.
37#[derive(Clone, Copy, Debug)]
38pub struct CodeModuleCarrier<'a>(&'a SExprCanon<'a>);
39
40impl<'a> CodeModuleCarrier<'a> {
41    /// Wrap a validated canonical-form carrier as a model input handle.
42    #[must_use]
43    pub fn new(canon: &'a SExprCanon<'a>) -> Self {
44        Self(canon)
45    }
46}
47
48impl ConstrainedTypeShape for CodeModuleCarrier<'_> {
49    const IRI: &'static str = CODEMODULE_IRI;
50    const SITE_COUNT: usize = 1;
51    const CONSTRAINTS: &'static [ConstraintRef] = &[];
52    const CYCLE_SIZE: u64 = u64::MAX;
53}
54
55impl prism::uor_foundation::pipeline::__sdk_seal::Sealed for CodeModuleCarrier<'_> {}
56
57impl<'a> IntoBindingValue<'a> for CodeModuleCarrier<'a> {
58    fn as_binding_value<const INLINE_BYTES: usize>(&self) -> TermValue<'a, INLINE_BYTES> {
59        TermValue::stream(self.0)
60    }
61}
62
63impl PartitionProductFields for CodeModuleCarrier<'_> {
64    const FIELDS: &'static [(u32, u32)] = &[];
65    const FIELD_NAMES: &'static [&'static str] = &[];
66}
67
68// ─── CodeModuleValue — the owned AST builder (alloc) ────────────────────
69
70/// Owned CCMAS value + AST builder. Constructs canonical CCMAS bytes
71/// programmatically for reference and testing. **`alloc`-gated** — the
72/// pipeline binds the borrowed [`CodeModuleCarrier`] handle, which needs
73/// no allocator. There is no width / count ceiling.
74#[cfg(feature = "alloc")]
75#[derive(Clone, PartialEq, Eq)]
76pub struct CodeModuleValue {
77    bytes: alloc::vec::Vec<u8>,
78}
79
80#[cfg(feature = "alloc")]
81impl core::fmt::Debug for CodeModuleValue {
82    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
83        f.debug_struct("CodeModuleValue")
84            .field("len", &self.bytes.len())
85            .finish_non_exhaustive()
86    }
87}
88
89#[cfg(feature = "alloc")]
90impl CodeModuleValue {
91    /// Parse + canonicalize raw CCMAS bytes (Rivest canonical or
92    /// token-list form), retaining the canonical bytes.
93    ///
94    /// # Errors
95    ///
96    /// A `validCcmas`/`validUtf8SExpr` [`ShapeViolation`] if `raw` is not
97    /// a well-formed S-expression.
98    pub fn parse(raw: &[u8]) -> Result<Self, ShapeViolation> {
99        SExprCanon::validate(raw)?;
100        let canon = SExprCanon::new(raw);
101        let mut bytes = alloc::vec::Vec::new();
102        canon.for_each_chunk(&mut |chunk| bytes.extend_from_slice(chunk));
103        Ok(Self { bytes })
104    }
105
106    /// Build a Module AST node: `(3:mod <name> <item>…)`.
107    #[must_use]
108    pub fn module(name: &str, items: &[CodeModuleValue]) -> Self {
109        Self::ast_call("mod", name, items)
110    }
111
112    /// Build a Function AST node: `(3:fun <name> (<param>…) <ret> <body>)`.
113    #[must_use]
114    pub fn function(
115        name: &str,
116        parameters: &[CodeModuleValue],
117        return_type: &CodeModuleValue,
118        body: &CodeModuleValue,
119    ) -> Self {
120        let mut out = alloc::vec::Vec::new();
121        out.extend_from_slice(b"(3:fun ");
122        write_atom(&mut out, name.as_bytes());
123        out.extend_from_slice(b" (");
124        for (i, p) in parameters.iter().enumerate() {
125            if i > 0 {
126                out.push(b' ');
127            }
128            out.extend_from_slice(&p.bytes);
129        }
130        out.push(b')');
131        out.push(b' ');
132        out.extend_from_slice(&return_type.bytes);
133        out.push(b' ');
134        out.extend_from_slice(&body.bytes);
135        out.push(b')');
136        Self { bytes: out }
137    }
138
139    /// Build an Atom AST node (Identifier, Literal, etc.): `<len>:<text>`.
140    #[must_use]
141    pub fn atom(text: &str) -> Self {
142        let mut out = alloc::vec::Vec::new();
143        write_atom(&mut out, text.as_bytes());
144        Self { bytes: out }
145    }
146
147    fn ast_call(tag: &str, name: &str, items: &[CodeModuleValue]) -> Self {
148        let mut out = alloc::vec::Vec::new();
149        out.push(b'(');
150        write_atom(&mut out, tag.as_bytes());
151        out.push(b' ');
152        write_atom(&mut out, name.as_bytes());
153        for item in items {
154            out.push(b' ');
155            out.extend_from_slice(&item.bytes);
156        }
157        out.push(b')');
158        Self { bytes: out }
159    }
160
161    /// Borrow the CCMAS canonical bytes.
162    #[must_use]
163    pub fn tagged_bytes(&self) -> &[u8] {
164        &self.bytes
165    }
166}
167
168/// Append a Rivest canonical atom `<len>:<bytes>` to `out`.
169#[cfg(feature = "alloc")]
170fn write_atom(out: &mut alloc::vec::Vec<u8>, bytes: &[u8]) {
171    let mut buf = [0u8; 20];
172    let mut n = bytes.len();
173    let s = if n == 0 {
174        buf[0] = b'0';
175        &buf[..1]
176    } else {
177        let mut idx = buf.len();
178        while n > 0 {
179            idx -= 1;
180            buf[idx] = b'0' + (n % 10) as u8;
181            n /= 10;
182        }
183        &buf[idx..]
184    };
185    out.extend_from_slice(s);
186    out.push(b':');
187    out.extend_from_slice(bytes);
188}
189
190/// Validate + materialize the canonical CCMAS bytes.
191///
192/// **Available only under the `alloc` feature.**
193///
194/// # Errors
195///
196/// Surfaces the [`ShapeViolation`] [`SExprCanon::validate`] would raise.
197#[cfg(feature = "alloc")]
198pub fn canonicalize(raw: &[u8]) -> Result<alloc::vec::Vec<u8>, ShapeViolation> {
199    Ok(CodeModuleValue::parse(raw)?.bytes)
200}
201
202#[cfg(all(test, feature = "alloc"))]
203mod tests {
204    use super::*;
205
206    #[test]
207    fn module_canonical_bytes() {
208        let body = CodeModuleValue::atom("value");
209        let m = CodeModuleValue::module("demo", &[body]);
210        assert_eq!(m.tagged_bytes(), b"(3:mod 4:demo 5:value)");
211    }
212
213    #[test]
214    fn function_canonical_bytes() {
215        let body = CodeModuleValue::atom("body");
216        let ret = CodeModuleValue::atom("u32");
217        let p = CodeModuleValue::atom("x");
218        let f = CodeModuleValue::function("add", &[p], &ret, &body);
219        assert_eq!(f.tagged_bytes(), b"(3:fun 3:add (1:x) 3:u32 4:body)");
220    }
221
222    #[test]
223    fn parse_is_idempotent_on_canonical_bytes() {
224        let m = CodeModuleValue::module("library", &[]);
225        let parsed = CodeModuleValue::parse(m.tagged_bytes()).expect("parse");
226        assert_eq!(parsed.tagged_bytes(), m.tagged_bytes());
227    }
228
229    #[test]
230    fn canonical_form_matches_sexp_realization() {
231        // CCMAS canonical bytes are Rivest canonical S-expressions, so the
232        // sexp realization's canonicalizer is the identity on them.
233        let m = CodeModuleValue::module("demo", &[]);
234        let sexp_canon = crate::sexp::canonicalize(m.tagged_bytes()).expect("sexp accepts CCMAS");
235        assert_eq!(sexp_canon, m.tagged_bytes());
236    }
237}