Unverified Commit 82f3937e authored by Divakar Verma's avatar Divakar Verma Committed by GitHub
Browse files

[Misc] add process_weights_after_loading for DummyLoader (#8969)

parent 7da24875
...@@ -441,6 +441,18 @@ class DummyModelLoader(BaseModelLoader): ...@@ -441,6 +441,18 @@ class DummyModelLoader(BaseModelLoader):
# NOTE(woosuk): For accurate performance evaluation, we assign # NOTE(woosuk): For accurate performance evaluation, we assign
# random values to the weights. # random values to the weights.
initialize_dummy_weights(model) initialize_dummy_weights(model)
for _, module in model.named_modules():
quant_method = getattr(module, "quant_method", None)
if quant_method is not None:
# When quant methods need to process weights after loading
# (for repacking, quantizing, etc), they expect parameters
# to be on the global target device. This scope is for the
# case where cpu offloading is used, where we will move the
# parameters onto device for processing and back off after.
with device_loading_context(
module, torch.device(device_config.device)):
quant_method.process_weights_after_loading(module)
return model.eval() return model.eval()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment