@@ -97,6 +97,12 @@ class QuantizedType : public Type {
9797 return -getDefaultMaximumForF8E5M2 ();
9898 }
9999
100+ static constexpr int64_t getDefaultMaximumForF4E2M1FN () { return 6 ; }
101+
102+ static constexpr int64_t getDefaultMinimumForF4E2M1FN () {
103+ return -getDefaultMaximumForF4E2M1FN ();
104+ }
105+
100106 // / Gets the original expressed type that this quantized type approximates.
101107 // / Note that this presumes that the quantized type was always derived from
102108 // / a floating point type, which in the broadest definition, is not true (i.e.
@@ -267,7 +273,7 @@ class AnyQuantizedType
267273// / Per-layer, optional parameters omitted:
268274// / !quant<uniform[StorageType]{Scale}>
269275// /
270- // / StorageType: 'i'|'u' NumBits
276+ // / StorageType: 'i'|'u' NumBits, 'f4', 'hf8', 'bf8'
271277// / ExpressedType: 'f16', 'f32', 'bf16', 'f64'
272278// / Scale: A legal double value
273279// / ZeroPoint: An integer value
@@ -327,7 +333,7 @@ class UniformQuantizedType
327333// / Per-axis, optional parameters omitted:
328334// / !quant<uniform[StorageType]{Scale}>
329335// /
330- // / StorageType: 'i'|'u' NumBits
336+ // / StorageType: 'i'|'u' NumBits, 'f4', 'hf8', 'bf8'
331337// / ExpressedType: 'f16', 'f32', 'bf16', 'f64'
332338// / QuantizedDim: An integer value
333339// / QuantParams: (Scale ':' ZeroPoint)+
@@ -414,7 +420,7 @@ class UniformQuantizedPerAxisType
414420// / ScaleZeroList ::= ScaleZero (',' ScaleZero)*
415421// / ScaleZero ::= Scale (':' ZeroPoint)?
416422// /
417- // / StorageType: 'i'|'u' NumBits
423+ // / StorageType: 'i'|'u' NumBits, 'f4', 'hf8', 'bf8'
418424// / ExpressedType: 'f16', 'f32', 'bf16', 'f64'
419425// / AxisSpec: An integer value
420426// / BlockSizeSpec: An integer value
@@ -534,18 +540,17 @@ class UniformQuantizedSubChannelType
534540// / QuantileQuantizedType derives from UniformQuantizedType and adds to it a
535541// / look up table array of quantile values. The type of the data in the look up
536542// / table is determined by the quantileType member: supported quantileType types
537- // / are integer/unsigned/hf8/bf8/f16/bf16/f32/f64.
543+ // / are integer/unsigned/f4/ hf8/bf8/f16/bf16/f32/f64.
538544// /
539545// / Syntax synopsis:
540546// / Per-layer, all parameters expressed:
541547// / !quant<quantile[StorageType:QuantileType:ExpressedType]{Quantiles}:{Scale:ZeroPoint}>
542548// / Per-layer, optional parameters omitted:
543549// / !quant<quantile[StorageType:QuantileType]{Quantiles}:{Scale}>
544550// /
545- // / StorageType: 'i'|'u' NumBits
546- // / QuantileType: 'i'|'u' NumBits, 'hf8', 'bf8', 'f16', 'bf16', 'f32', 'f64'
547- // / ExpressedType: 'f16', 'f32', 'bf16', 'f64'
548- // / Quantiles: Quantile+
551+ // / StorageType: 'i'|'u' NumBits, 'f4', 'hf8', 'bf8'
552+ // / QuantileType: 'i'|'u' NumBits, 'f4', 'hf8', 'bf8', 'f16', 'bf16', 'f32',
553+ // / 'f64' ExpressedType: 'f16', 'f32', 'bf16', 'f64' Quantiles: Quantile+
549554// / Quantile: A legal double value
550555// / Scale: A legal double value
551556// / ZeroPoint: An integer value
@@ -601,23 +606,20 @@ class QuantileQuantizedType
601606// / Represents per-axis QuantileQuantizedType (also known as per-channel
602607// / quantization). The type of the data in the look up table is determined by
603608// / the quantileType member: supported quantileType types are
604- // / integer/unsigned/hf8/bf8/f16/bf16/f32/f64.
609+ // / integer/unsigned/f4/ hf8/bf8/f16/bf16/f32/f64.
605610// /
606611// / Syntax synopsis:
607612// / Per-axis, all parameters expressed:
608613// / !quant<quantile[StorageType:QuantileType:ExpressedType:QuantizedDim]{Quantiles}:{QuantParams}>
609614// / Per-axis, optional parameters omitted:
610615// / !quant<quantile[StorageType:QuantileType]{Quantiles}:{Scale}>
611616// /
612- // / StorageType: 'i'|'u' NumBits
613- // / QuantileType: 'i'|'u' NumBits, 'hf8', 'bf8', 'f16', 'bf16', 'f32', 'f64'
614- // / ExpressedType: 'f16', 'f32', 'bf16', 'f64'
615- // / QuantizedDim: An integer value
616- // / Quantiles: Quantile+
617- // / Quantile: A legal double value
618- // / QuantParams: (Scale ':' ZeroPoint)+
619- // / Scale: A legal double value
620- // / ZeroPoint: An integer value
617+ // / StorageType: 'i'|'u' NumBits, 'f4', 'hf8', 'bf8'
618+ // / QuantileType: 'i'|'u' NumBits, 'f4', 'hf8', 'bf8', 'f16', 'bf16', 'f32',
619+ // / 'f64' ExpressedType: 'f16', 'f32', 'bf16', 'f64' QuantizedDim: An integer
620+ // / value Quantiles: Quantile+ Quantile: A legal double value QuantParams:
621+ // / (Scale ':' ZeroPoint)+ Scale: A legal double value ZeroPoint: An integer
622+ // / value
621623class QuantileQuantizedPerAxisType
622624 : public Type::TypeBase<QuantileQuantizedPerAxisType,
623625 UniformQuantizedPerAxisType,
0 commit comments