[Misc] Fix arg names (#5524)

d74674bb · Allen.Dou · GitHub · 703475f6 · d74674bb · d74674bb
Unverified Commit d74674bb authored Jun 15, 2024 by Allen.Dou Committed by GitHub Jun 14, 2024
3 changed files
--- a/benchmarks/kernels/benchmark_paged_attention.py
+++ b/benchmarks/kernels/benchmark_paged_attention.py
@@ -165,7 +165,7 @@ if __name__ == '__main__':
                        choices=["v1", "v2"],
                        default="v2")
    parser.add_argument("--batch-size", type=int, default=8)
-    parser.add_argument("--seq_len", type=int, default=4096)
+    parser.add_argument("--seq-len", type=int, default=4096)
    parser.add_argument("--num-query-heads", type=int, default=64)
    parser.add_argument("--num-kv-heads", type=int, default=8)
    parser.add_argument("--head-size",

--- a/examples/aqlm_example.py
+++ b/examples/aqlm_example.py
@@ -17,7 +17,7 @@ def main():
                        type=int,
                        default=0,
                        help='known good models by index, [0-4]')
-    parser.add_argument('--tensor_parallel_size',
+    parser.add_argument('--tensor-parallel-size',
                        '-t',
                        type=int,
                        default=1,

--- a/examples/fp8/extract_scales.py
+++ b/examples/fp8/extract_scales.py
@@ -327,7 +327,7 @@ if __name__ == "__main__":
        "--quantization-param-path <filename>). This is only used "
        "if the KV cache dtype is FP8 and on ROCm (AMD GPU).")
    parser.add_argument(
-        "--quantized_model",
+        "--quantized-model",
        help="Specify the directory containing a single quantized HF model. "
        "It is expected that the quantization format is FP8_E4M3, for use "
        "on ROCm (AMD GPU).",
@@ -339,18 +339,18 @@ if __name__ == "__main__":
        choices=["auto", "safetensors", "npz", "pt"],
        default="auto")
    parser.add_argument(
-        "--output_dir",
+        "--output-dir",
        help="Optionally specify the output directory. By default the "
        "KV cache scaling factors will be saved in the model directory, "
        "however you can override this behavior here.",
        default=None)
    parser.add_argument(
-        "--output_name",
+        "--output-name",
        help="Optionally specify the output filename.",
        # TODO: Change this once additional scaling factors are enabled
        default="kv_cache_scales.json")
    parser.add_argument(
-        "--tp_size",
+        "--tp-size",
        help="Optionally specify the tensor-parallel (TP) size that the "
        "quantized model should correspond to. If specified, during KV "
        "cache scaling factor extraction the observed TP size will be "