doc: clarify that `--quantize` is not needed for pre-quantized models (#2536)

abd24dd3 · Daniël de Kok · GitHub · c1037601 · abd24dd3 · abd24dd3
Unverified Commit abd24dd3 authored Sep 19, 2024 by Daniël de Kok Committed by GitHub Sep 19, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 9 additions and 2 deletions

docs/source/reference/launcher.md docs/source/reference/launcher.md +3 -1

flake.nix flake.nix +1 -0

launcher/src/main.rs launcher/src/main.rs +5 -1

No files found.
--- a/docs/source/reference/launcher.md
+++ b/docs/source/reference/launcher.md
@@ -55,7 +55,9 @@ Options:
 ## QUANTIZE
 ```shell
      --quantize <QUANTIZE>
-          Whether you want the model to be quantized
+          Quantization method to use for the model. It is not necessary to specify this option for pre-quantized models, since the quantization method is read from the model configuration.
+          
+          Marlin kernels will be used automatically for GPTQ/AWQ models.
          
          [env: QUANTIZE=]


--- a/flake.nix
+++ b/flake.nix
@@ -157,6 +157,7 @@
                pyright
                pytest
                pytest-asyncio
+                redocly
                ruff
                syrupy
              ]);

--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@@ -367,7 +367,11 @@ struct Args {
    #[clap(long, env)]
    num_shard: Option<usize>,

-    /// Whether you want the model to be quantized.
+    /// Quantization method to use for the model. It is not necessary to specify this option
+    /// for pre-quantized models, since the quantization method is read from the model
+    /// configuration.
+    ///
+    /// Marlin kernels will be used automatically for GPTQ/AWQ models.
    #[clap(long, env, value_enum)]
    quantize: Option<Quantization>,