Load on CPU to avoid OOM (#236)

6b5dc29f · Casper · GitHub · 5eb1d2f0 · 6b5dc29f
Unverified Commit 6b5dc29f authored Dec 08, 2023 by Casper Committed by GitHub Dec 08, 2023
Show whitespace changes
Inline Side-by-side

Showing with 0 additions and 20 deletions

awq/models/base.py awq/models/base.py +0 -20

No files found.
--- a/awq/models/base.py
+++ b/awq/models/base.py
@@ -115,26 +115,6 @@ class BaseAWQForCausalLM(nn.Module):
            self, model_path, '', safetensors, trust_remote_code=trust_remote_code
        )
-        if device_map is None:
-            with init_empty_weights():
-                model = AutoModelForCausalLM.from_config(config=config, torch_dtype=torch_dtype, trust_remote_code=trust_remote_code)
-            # Evenly distribute memory on GPUs
-            max_memory = get_balanced_memory(
-                model,
-                no_split_module_classes=[self.layer_type],
-                dtype=torch_dtype
-            )
-            # Get device map
-            device_map = infer_auto_device_map(
-                model,
-                max_memory=max_memory,
-                no_split_module_classes=[self.layer_type],
-                dtype=torch_dtype
-            )
-            del model
        # If not quantized, must load with AutoModelForCausalLM
        model = AutoModelForCausalLM.from_pretrained(
            model_weights_path,