diff --git a/MaxText/configs/quantization/mp_scale_1.json b/MaxText/configs/quantization/mp_scale_1.json new file mode 100644 index 0000000000..bef718ba77 --- /dev/null +++ b/MaxText/configs/quantization/mp_scale_1.json @@ -0,0 +1,8 @@ +{ + ".*/query": {"bits": 4, "scale": 0.8}, + ".*/key": {"bits": 4, "scale": 0.9}, + ".*/value": {"bits": 8}, + ".*/out": {"bits": 4}, + ".*/wi_0": {"bits": 4}, + ".*/wo": {"bits": 8} +} diff --git a/MaxText/configs/quantization/mp_scale_2.json b/MaxText/configs/quantization/mp_scale_2.json new file mode 100644 index 0000000000..760e3d8015 --- /dev/null +++ b/MaxText/configs/quantization/mp_scale_2.json @@ -0,0 +1,8 @@ +{ + ".*/query": {"bits": 4, "scale": 0.8}, + ".*/key": {"bits": 4, "scale": 0.9}, + ".*/value": {"bits": 8}, + ".*/out": {"bits": 4}, + ".*/wi_0": {"bits": 4}, + ".*/wo": {"bits": 4} +}