1use prism::tensor::dtype::{
20 Dtype, BF16, F16, F32, F64, I16, I32, I64, I8, IQ1_M, IQ1_S, IQ2_S, IQ2_XS, IQ2_XXS, IQ3_S,
21 IQ3_XXS, IQ4_NL, IQ4_XS, Q2_K, Q3_K, Q4_0, Q4_1, Q4_K, Q5_0, Q5_1, Q5_K, Q6_K, Q8_0, Q8_1,
22 Q8_K,
23};
24
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33#[allow(non_camel_case_types)]
34#[non_exhaustive]
35pub enum GgmlType {
36 F32,
38 F16,
40 Q4_0,
42 Q4_1,
44 Q5_0,
46 Q5_1,
48 Q8_0,
50 Q8_1,
52 Q2_K,
54 Q3_K,
56 Q4_K,
58 Q5_K,
60 Q6_K,
62 Q8_K,
64 IQ2_XXS,
66 IQ2_XS,
68 IQ3_XXS,
70 IQ1_S,
72 IQ4_NL,
74 IQ3_S,
76 IQ2_S,
78 IQ4_XS,
80 I8,
82 I16,
84 I32,
86 I64,
88 F64,
90 IQ1_M,
92 BF16,
94}
95
96impl GgmlType {
97 #[must_use]
102 pub const fn from_u32(id: u32) -> Option<Self> {
103 Some(match id {
104 0 => Self::F32,
105 1 => Self::F16,
106 2 => Self::Q4_0,
107 3 => Self::Q4_1,
108 6 => Self::Q5_0,
109 7 => Self::Q5_1,
110 8 => Self::Q8_0,
111 9 => Self::Q8_1,
112 10 => Self::Q2_K,
113 11 => Self::Q3_K,
114 12 => Self::Q4_K,
115 13 => Self::Q5_K,
116 14 => Self::Q6_K,
117 15 => Self::Q8_K,
118 16 => Self::IQ2_XXS,
119 17 => Self::IQ2_XS,
120 18 => Self::IQ3_XXS,
121 19 => Self::IQ1_S,
122 20 => Self::IQ4_NL,
123 21 => Self::IQ3_S,
124 22 => Self::IQ2_S,
125 23 => Self::IQ4_XS,
126 24 => Self::I8,
127 25 => Self::I16,
128 26 => Self::I32,
129 27 => Self::I64,
130 28 => Self::F64,
131 29 => Self::IQ1_M,
132 30 => Self::BF16,
133 _ => return None,
134 })
135 }
136
137 #[must_use]
139 pub const fn id(self) -> u32 {
140 match self {
141 Self::F32 => 0,
142 Self::F16 => 1,
143 Self::Q4_0 => 2,
144 Self::Q4_1 => 3,
145 Self::Q5_0 => 6,
146 Self::Q5_1 => 7,
147 Self::Q8_0 => 8,
148 Self::Q8_1 => 9,
149 Self::Q2_K => 10,
150 Self::Q3_K => 11,
151 Self::Q4_K => 12,
152 Self::Q5_K => 13,
153 Self::Q6_K => 14,
154 Self::Q8_K => 15,
155 Self::IQ2_XXS => 16,
156 Self::IQ2_XS => 17,
157 Self::IQ3_XXS => 18,
158 Self::IQ1_S => 19,
159 Self::IQ4_NL => 20,
160 Self::IQ3_S => 21,
161 Self::IQ2_S => 22,
162 Self::IQ4_XS => 23,
163 Self::I8 => 24,
164 Self::I16 => 25,
165 Self::I32 => 26,
166 Self::I64 => 27,
167 Self::F64 => 28,
168 Self::IQ1_M => 29,
169 Self::BF16 => 30,
170 }
171 }
172
173 #[must_use]
175 pub const fn block_bytes(self) -> usize {
176 match self {
177 Self::F32 => F32::BLOCK_BYTES,
178 Self::F16 => F16::BLOCK_BYTES,
179 Self::Q4_0 => Q4_0::BLOCK_BYTES,
180 Self::Q4_1 => Q4_1::BLOCK_BYTES,
181 Self::Q5_0 => Q5_0::BLOCK_BYTES,
182 Self::Q5_1 => Q5_1::BLOCK_BYTES,
183 Self::Q8_0 => Q8_0::BLOCK_BYTES,
184 Self::Q8_1 => Q8_1::BLOCK_BYTES,
185 Self::Q2_K => Q2_K::BLOCK_BYTES,
186 Self::Q3_K => Q3_K::BLOCK_BYTES,
187 Self::Q4_K => Q4_K::BLOCK_BYTES,
188 Self::Q5_K => Q5_K::BLOCK_BYTES,
189 Self::Q6_K => Q6_K::BLOCK_BYTES,
190 Self::Q8_K => Q8_K::BLOCK_BYTES,
191 Self::IQ2_XXS => IQ2_XXS::BLOCK_BYTES,
192 Self::IQ2_XS => IQ2_XS::BLOCK_BYTES,
193 Self::IQ3_XXS => IQ3_XXS::BLOCK_BYTES,
194 Self::IQ1_S => IQ1_S::BLOCK_BYTES,
195 Self::IQ4_NL => IQ4_NL::BLOCK_BYTES,
196 Self::IQ3_S => IQ3_S::BLOCK_BYTES,
197 Self::IQ2_S => IQ2_S::BLOCK_BYTES,
198 Self::IQ4_XS => IQ4_XS::BLOCK_BYTES,
199 Self::I8 => I8::BLOCK_BYTES,
200 Self::I16 => I16::BLOCK_BYTES,
201 Self::I32 => I32::BLOCK_BYTES,
202 Self::I64 => I64::BLOCK_BYTES,
203 Self::F64 => F64::BLOCK_BYTES,
204 Self::IQ1_M => IQ1_M::BLOCK_BYTES,
205 Self::BF16 => BF16::BLOCK_BYTES,
206 }
207 }
208
209 #[must_use]
212 pub const fn block_elems(self) -> usize {
213 match self {
214 Self::F32 => F32::BLOCK_ELEMS,
215 Self::F16 => F16::BLOCK_ELEMS,
216 Self::Q4_0 => Q4_0::BLOCK_ELEMS,
217 Self::Q4_1 => Q4_1::BLOCK_ELEMS,
218 Self::Q5_0 => Q5_0::BLOCK_ELEMS,
219 Self::Q5_1 => Q5_1::BLOCK_ELEMS,
220 Self::Q8_0 => Q8_0::BLOCK_ELEMS,
221 Self::Q8_1 => Q8_1::BLOCK_ELEMS,
222 Self::Q2_K => Q2_K::BLOCK_ELEMS,
223 Self::Q3_K => Q3_K::BLOCK_ELEMS,
224 Self::Q4_K => Q4_K::BLOCK_ELEMS,
225 Self::Q5_K => Q5_K::BLOCK_ELEMS,
226 Self::Q6_K => Q6_K::BLOCK_ELEMS,
227 Self::Q8_K => Q8_K::BLOCK_ELEMS,
228 Self::IQ2_XXS => IQ2_XXS::BLOCK_ELEMS,
229 Self::IQ2_XS => IQ2_XS::BLOCK_ELEMS,
230 Self::IQ3_XXS => IQ3_XXS::BLOCK_ELEMS,
231 Self::IQ1_S => IQ1_S::BLOCK_ELEMS,
232 Self::IQ4_NL => IQ4_NL::BLOCK_ELEMS,
233 Self::IQ3_S => IQ3_S::BLOCK_ELEMS,
234 Self::IQ2_S => IQ2_S::BLOCK_ELEMS,
235 Self::IQ4_XS => IQ4_XS::BLOCK_ELEMS,
236 Self::I8 => I8::BLOCK_ELEMS,
237 Self::I16 => I16::BLOCK_ELEMS,
238 Self::I32 => I32::BLOCK_ELEMS,
239 Self::I64 => I64::BLOCK_ELEMS,
240 Self::F64 => F64::BLOCK_ELEMS,
241 Self::IQ1_M => IQ1_M::BLOCK_ELEMS,
242 Self::BF16 => BF16::BLOCK_ELEMS,
243 }
244 }
245
246 #[must_use]
251 pub const fn tensor_data_bytes(self, num_elements: u64) -> Option<u64> {
252 let elems = self.block_elems() as u64;
253 if elems == 0 || num_elements % elems != 0 {
254 return None;
255 }
256 let blocks = num_elements / elems;
257 blocks.checked_mul(self.block_bytes() as u64)
258 }
259}
260
261#[cfg(test)]
262mod tests {
263 use super::*;
264
265 #[test]
266 fn deprecated_ids_rejected() {
267 assert!(GgmlType::from_u32(4).is_none());
268 assert!(GgmlType::from_u32(5).is_none());
269 assert!(GgmlType::from_u32(31).is_none());
270 }
271
272 #[test]
273 fn id_round_trips() {
274 for id in [0u32, 1, 2, 3, 6, 7, 8, 9, 10, 14, 23, 24, 28, 29, 30] {
275 let t = GgmlType::from_u32(id).expect("known id");
276 assert_eq!(t.id(), id);
277 }
278 }
279
280 #[test]
281 fn block_geometry_matches_prism() {
282 assert_eq!(GgmlType::F32.block_bytes(), 4);
284 assert_eq!(GgmlType::F32.block_elems(), 1);
285 assert_eq!(GgmlType::F16.block_bytes(), 2);
286 assert_eq!(GgmlType::BF16.block_bytes(), 2);
287 assert_eq!(GgmlType::F64.block_bytes(), 8);
288 assert_eq!(GgmlType::Q4_0.block_elems(), 32);
290 assert_eq!(GgmlType::Q4_K.block_elems(), 256);
292 }
293
294 #[test]
295 fn tensor_data_bytes_mechanical() {
296 assert_eq!(GgmlType::F32.tensor_data_bytes(64), Some(256));
298 assert_eq!(
300 GgmlType::Q4_K.tensor_data_bytes(256),
301 Some(Q4_K::BLOCK_BYTES as u64)
302 );
303 assert_eq!(GgmlType::Q4_K.tensor_data_bytes(100), None);
305 }
306}