Skip to main content

uor_addr/gguf/
dtype.rs

1//! `GgmlType` — the GGUF tensor element-type alphabet, a total mapping
2//! from the GGML `ggml_type` integer IDs to the
3//! [`prism::tensor::dtype`] shapes.
4//!
5//! The mapping is the single source of truth for GGUF tensor-type
6//! validation: every `ggml_type` ID admitted at the typed-input
7//! boundary resolves to a `prism::tensor::dtype` shape carrying that
8//! dtype's `BLOCK_BYTES` (block size in bytes) and `BLOCK_ELEMS`
9//! (elements per block). The per-tensor byte count derives mechanically
10//! as `(num_elements / BLOCK_ELEMS) * BLOCK_BYTES`.
11//!
12//! IDs `4` and `5` (deprecated `GGML_TYPE_Q4_2` / `GGML_TYPE_Q4_3`) are
13//! rejected at the typed-input boundary — they carry no
14//! `prism::tensor::dtype` counterpart.
15//!
16//! Authoritative source: the `ggml_type` enum in
17//! <https://github.com/ggml-org/ggml/blob/master/include/ggml.h>.
18
19use prism::tensor::dtype::{
20    Dtype, BF16, F16, F32, F64, I16, I32, I64, I8, IQ1_M, IQ1_S, IQ2_S, IQ2_XS, IQ2_XXS, IQ3_S,
21    IQ3_XXS, IQ4_NL, IQ4_XS, Q2_K, Q3_K, Q4_0, Q4_1, Q4_K, Q5_0, Q5_1, Q5_K, Q6_K, Q8_0, Q8_1,
22    Q8_K,
23};
24
25/// A GGUF tensor element type, identified by its `ggml_type` integer
26/// ID. Each variant maps 1:1 to a [`prism::tensor::dtype`] shape.
27///
28/// Variant spellings deliberately mirror the GGML `ggml_type` enum and
29/// the `prism::tensor::dtype` type names (`Q4_0`, `IQ4_NL`, …) rather
30/// than Rust camel case, so the mapping reads 1:1 against the
31/// authoritative source.
32#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33#[allow(non_camel_case_types)]
34#[non_exhaustive]
35pub enum GgmlType {
36    /// `GGML_TYPE_F32` (ID 0) → [`F32`].
37    F32,
38    /// `GGML_TYPE_F16` (ID 1) → [`F16`].
39    F16,
40    /// `GGML_TYPE_Q4_0` (ID 2) → [`Q4_0`].
41    Q4_0,
42    /// `GGML_TYPE_Q4_1` (ID 3) → [`Q4_1`].
43    Q4_1,
44    /// `GGML_TYPE_Q5_0` (ID 6) → [`Q5_0`].
45    Q5_0,
46    /// `GGML_TYPE_Q5_1` (ID 7) → [`Q5_1`].
47    Q5_1,
48    /// `GGML_TYPE_Q8_0` (ID 8) → [`Q8_0`].
49    Q8_0,
50    /// `GGML_TYPE_Q8_1` (ID 9) → [`Q8_1`].
51    Q8_1,
52    /// `GGML_TYPE_Q2_K` (ID 10) → [`Q2_K`].
53    Q2_K,
54    /// `GGML_TYPE_Q3_K` (ID 11) → [`Q3_K`].
55    Q3_K,
56    /// `GGML_TYPE_Q4_K` (ID 12) → [`Q4_K`].
57    Q4_K,
58    /// `GGML_TYPE_Q5_K` (ID 13) → [`Q5_K`].
59    Q5_K,
60    /// `GGML_TYPE_Q6_K` (ID 14) → [`Q6_K`].
61    Q6_K,
62    /// `GGML_TYPE_Q8_K` (ID 15) → [`Q8_K`].
63    Q8_K,
64    /// `GGML_TYPE_IQ2_XXS` (ID 16) → [`IQ2_XXS`].
65    IQ2_XXS,
66    /// `GGML_TYPE_IQ2_XS` (ID 17) → [`IQ2_XS`].
67    IQ2_XS,
68    /// `GGML_TYPE_IQ3_XXS` (ID 18) → [`IQ3_XXS`].
69    IQ3_XXS,
70    /// `GGML_TYPE_IQ1_S` (ID 19) → [`IQ1_S`].
71    IQ1_S,
72    /// `GGML_TYPE_IQ4_NL` (ID 20) → [`IQ4_NL`].
73    IQ4_NL,
74    /// `GGML_TYPE_IQ3_S` (ID 21) → [`IQ3_S`].
75    IQ3_S,
76    /// `GGML_TYPE_IQ2_S` (ID 22) → [`IQ2_S`].
77    IQ2_S,
78    /// `GGML_TYPE_IQ4_XS` (ID 23) → [`IQ4_XS`].
79    IQ4_XS,
80    /// `GGML_TYPE_I8` (ID 24) → [`I8`].
81    I8,
82    /// `GGML_TYPE_I16` (ID 25) → [`I16`].
83    I16,
84    /// `GGML_TYPE_I32` (ID 26) → [`I32`].
85    I32,
86    /// `GGML_TYPE_I64` (ID 27) → [`I64`].
87    I64,
88    /// `GGML_TYPE_F64` (ID 28) → [`F64`].
89    F64,
90    /// `GGML_TYPE_IQ1_M` (ID 29) → [`IQ1_M`].
91    IQ1_M,
92    /// `GGML_TYPE_BF16` (ID 30) → [`BF16`].
93    BF16,
94}
95
96impl GgmlType {
97    /// Map a raw `ggml_type` integer ID to a [`GgmlType`].
98    ///
99    /// Returns `None` for IDs outside the GGUF v3 tensor-type set —
100    /// including the deprecated IDs `4` and `5`.
101    #[must_use]
102    pub const fn from_u32(id: u32) -> Option<Self> {
103        Some(match id {
104            0 => Self::F32,
105            1 => Self::F16,
106            2 => Self::Q4_0,
107            3 => Self::Q4_1,
108            6 => Self::Q5_0,
109            7 => Self::Q5_1,
110            8 => Self::Q8_0,
111            9 => Self::Q8_1,
112            10 => Self::Q2_K,
113            11 => Self::Q3_K,
114            12 => Self::Q4_K,
115            13 => Self::Q5_K,
116            14 => Self::Q6_K,
117            15 => Self::Q8_K,
118            16 => Self::IQ2_XXS,
119            17 => Self::IQ2_XS,
120            18 => Self::IQ3_XXS,
121            19 => Self::IQ1_S,
122            20 => Self::IQ4_NL,
123            21 => Self::IQ3_S,
124            22 => Self::IQ2_S,
125            23 => Self::IQ4_XS,
126            24 => Self::I8,
127            25 => Self::I16,
128            26 => Self::I32,
129            27 => Self::I64,
130            28 => Self::F64,
131            29 => Self::IQ1_M,
132            30 => Self::BF16,
133            _ => return None,
134        })
135    }
136
137    /// The canonical `ggml_type` integer ID for this dtype.
138    #[must_use]
139    pub const fn id(self) -> u32 {
140        match self {
141            Self::F32 => 0,
142            Self::F16 => 1,
143            Self::Q4_0 => 2,
144            Self::Q4_1 => 3,
145            Self::Q5_0 => 6,
146            Self::Q5_1 => 7,
147            Self::Q8_0 => 8,
148            Self::Q8_1 => 9,
149            Self::Q2_K => 10,
150            Self::Q3_K => 11,
151            Self::Q4_K => 12,
152            Self::Q5_K => 13,
153            Self::Q6_K => 14,
154            Self::Q8_K => 15,
155            Self::IQ2_XXS => 16,
156            Self::IQ2_XS => 17,
157            Self::IQ3_XXS => 18,
158            Self::IQ1_S => 19,
159            Self::IQ4_NL => 20,
160            Self::IQ3_S => 21,
161            Self::IQ2_S => 22,
162            Self::IQ4_XS => 23,
163            Self::I8 => 24,
164            Self::I16 => 25,
165            Self::I32 => 26,
166            Self::I64 => 27,
167            Self::F64 => 28,
168            Self::IQ1_M => 29,
169            Self::BF16 => 30,
170        }
171    }
172
173    /// Bytes per block, sourced from the [`prism::tensor::dtype`] shape.
174    #[must_use]
175    pub const fn block_bytes(self) -> usize {
176        match self {
177            Self::F32 => F32::BLOCK_BYTES,
178            Self::F16 => F16::BLOCK_BYTES,
179            Self::Q4_0 => Q4_0::BLOCK_BYTES,
180            Self::Q4_1 => Q4_1::BLOCK_BYTES,
181            Self::Q5_0 => Q5_0::BLOCK_BYTES,
182            Self::Q5_1 => Q5_1::BLOCK_BYTES,
183            Self::Q8_0 => Q8_0::BLOCK_BYTES,
184            Self::Q8_1 => Q8_1::BLOCK_BYTES,
185            Self::Q2_K => Q2_K::BLOCK_BYTES,
186            Self::Q3_K => Q3_K::BLOCK_BYTES,
187            Self::Q4_K => Q4_K::BLOCK_BYTES,
188            Self::Q5_K => Q5_K::BLOCK_BYTES,
189            Self::Q6_K => Q6_K::BLOCK_BYTES,
190            Self::Q8_K => Q8_K::BLOCK_BYTES,
191            Self::IQ2_XXS => IQ2_XXS::BLOCK_BYTES,
192            Self::IQ2_XS => IQ2_XS::BLOCK_BYTES,
193            Self::IQ3_XXS => IQ3_XXS::BLOCK_BYTES,
194            Self::IQ1_S => IQ1_S::BLOCK_BYTES,
195            Self::IQ4_NL => IQ4_NL::BLOCK_BYTES,
196            Self::IQ3_S => IQ3_S::BLOCK_BYTES,
197            Self::IQ2_S => IQ2_S::BLOCK_BYTES,
198            Self::IQ4_XS => IQ4_XS::BLOCK_BYTES,
199            Self::I8 => I8::BLOCK_BYTES,
200            Self::I16 => I16::BLOCK_BYTES,
201            Self::I32 => I32::BLOCK_BYTES,
202            Self::I64 => I64::BLOCK_BYTES,
203            Self::F64 => F64::BLOCK_BYTES,
204            Self::IQ1_M => IQ1_M::BLOCK_BYTES,
205            Self::BF16 => BF16::BLOCK_BYTES,
206        }
207    }
208
209    /// Elements per block, sourced from the [`prism::tensor::dtype`]
210    /// shape.
211    #[must_use]
212    pub const fn block_elems(self) -> usize {
213        match self {
214            Self::F32 => F32::BLOCK_ELEMS,
215            Self::F16 => F16::BLOCK_ELEMS,
216            Self::Q4_0 => Q4_0::BLOCK_ELEMS,
217            Self::Q4_1 => Q4_1::BLOCK_ELEMS,
218            Self::Q5_0 => Q5_0::BLOCK_ELEMS,
219            Self::Q5_1 => Q5_1::BLOCK_ELEMS,
220            Self::Q8_0 => Q8_0::BLOCK_ELEMS,
221            Self::Q8_1 => Q8_1::BLOCK_ELEMS,
222            Self::Q2_K => Q2_K::BLOCK_ELEMS,
223            Self::Q3_K => Q3_K::BLOCK_ELEMS,
224            Self::Q4_K => Q4_K::BLOCK_ELEMS,
225            Self::Q5_K => Q5_K::BLOCK_ELEMS,
226            Self::Q6_K => Q6_K::BLOCK_ELEMS,
227            Self::Q8_K => Q8_K::BLOCK_ELEMS,
228            Self::IQ2_XXS => IQ2_XXS::BLOCK_ELEMS,
229            Self::IQ2_XS => IQ2_XS::BLOCK_ELEMS,
230            Self::IQ3_XXS => IQ3_XXS::BLOCK_ELEMS,
231            Self::IQ1_S => IQ1_S::BLOCK_ELEMS,
232            Self::IQ4_NL => IQ4_NL::BLOCK_ELEMS,
233            Self::IQ3_S => IQ3_S::BLOCK_ELEMS,
234            Self::IQ2_S => IQ2_S::BLOCK_ELEMS,
235            Self::IQ4_XS => IQ4_XS::BLOCK_ELEMS,
236            Self::I8 => I8::BLOCK_ELEMS,
237            Self::I16 => I16::BLOCK_ELEMS,
238            Self::I32 => I32::BLOCK_ELEMS,
239            Self::I64 => I64::BLOCK_ELEMS,
240            Self::F64 => F64::BLOCK_ELEMS,
241            Self::IQ1_M => IQ1_M::BLOCK_ELEMS,
242            Self::BF16 => BF16::BLOCK_ELEMS,
243        }
244    }
245
246    /// The total byte count for a tensor of `num_elements` of this
247    /// dtype: `(num_elements / BLOCK_ELEMS) * BLOCK_BYTES`. Returns
248    /// `None` if `num_elements` is not a whole multiple of
249    /// `BLOCK_ELEMS` (a malformed quantized tensor) or on overflow.
250    #[must_use]
251    pub const fn tensor_data_bytes(self, num_elements: u64) -> Option<u64> {
252        let elems = self.block_elems() as u64;
253        if elems == 0 || num_elements % elems != 0 {
254            return None;
255        }
256        let blocks = num_elements / elems;
257        blocks.checked_mul(self.block_bytes() as u64)
258    }
259}
260
261#[cfg(test)]
262mod tests {
263    use super::*;
264
265    #[test]
266    fn deprecated_ids_rejected() {
267        assert!(GgmlType::from_u32(4).is_none());
268        assert!(GgmlType::from_u32(5).is_none());
269        assert!(GgmlType::from_u32(31).is_none());
270    }
271
272    #[test]
273    fn id_round_trips() {
274        for id in [0u32, 1, 2, 3, 6, 7, 8, 9, 10, 14, 23, 24, 28, 29, 30] {
275            let t = GgmlType::from_u32(id).expect("known id");
276            assert_eq!(t.id(), id);
277        }
278    }
279
280    #[test]
281    fn block_geometry_matches_prism() {
282        // Continuous types: 1 element per block.
283        assert_eq!(GgmlType::F32.block_bytes(), 4);
284        assert_eq!(GgmlType::F32.block_elems(), 1);
285        assert_eq!(GgmlType::F16.block_bytes(), 2);
286        assert_eq!(GgmlType::BF16.block_bytes(), 2);
287        assert_eq!(GgmlType::F64.block_bytes(), 8);
288        // Legacy block-32 quant.
289        assert_eq!(GgmlType::Q4_0.block_elems(), 32);
290        // K-series block-256 quant.
291        assert_eq!(GgmlType::Q4_K.block_elems(), 256);
292    }
293
294    #[test]
295    fn tensor_data_bytes_mechanical() {
296        // 64 F32 elements = 64 * 4 = 256 bytes.
297        assert_eq!(GgmlType::F32.tensor_data_bytes(64), Some(256));
298        // 256 Q4_K elements = 1 block * BLOCK_BYTES.
299        assert_eq!(
300            GgmlType::Q4_K.tensor_data_bytes(256),
301            Some(Q4_K::BLOCK_BYTES as u64)
302        );
303        // Non-multiple of block size for a quantized type is rejected.
304        assert_eq!(GgmlType::Q4_K.tensor_data_bytes(100), None);
305    }
306}