Commit ffaaa259 authored by s4rduk4r's avatar s4rduk4r
Browse files

Rely on accelerate.dispatch_model() only

parent 91de3bfa
......@@ -168,23 +168,16 @@ class BaseAWQForCausalLM(nn.Module):
)
# Dispath to devices
if max_memory is None:
# VRAM only
model = simple_dispatch_model(model, device_map)
if fuse_layers:
self.fuse_layers(model, quant_config)
if fuse_layers:
self.fuse_layers(model, quant_config)
else:
if fuse_layers:
self.fuse_layers(model, quant_config)
# Offloading dispatch
from accelerate import dispatch_model
model = dispatch_model(
model,
device_map=device_map,
offload_dir=offload_folder
)
# Offloading dispatch
from accelerate import dispatch_model
model = dispatch_model(
model,
device_map=device_map,
offload_dir=offload_folder
)
return self(model, model_type, is_quantized=is_quantized, quant_config=quant_config)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment