Set AWQ_BATCH_SIZE environment variable

6e0bde15 · Casper Hansen · a2aa804c · 6e0bde15
Commit 6e0bde15 authored Sep 13, 2023 by Casper Hansen
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 1 deletion

awq/models/auto.py awq/models/auto.py +4 -1

No files found.
--- a/awq/models/auto.py
+++ b/awq/models/auto.py
+import os
 from transformers import AutoConfig
 from awq.models import *
 from awq.models.base import BaseAWQForCausalLM
@@ -35,7 +36,9 @@ class AutoAWQForCausalLM:
    @classmethod
    def from_quantized(self, quant_path, quant_filename, max_new_tokens=None,
-                       device='balanced', trust_remote_code=True, fuse_layers=True) -> BaseAWQForCausalLM:
+                       device='balanced', trust_remote_code=True, fuse_layers=True,
+                       batch_size=1) -> BaseAWQForCausalLM:
+        os.environ["AWQ_BATCH_SIZE"] = str(batch_size)
        model_type = check_and_get_model_type(quant_path, trust_remote_code)
        return AWQ_CAUSAL_LM_MODEL_MAP[model_type].from_quantized(