Unverified Commit 433266c1 authored by fzyzcjy's avatar fzyzcjy Committed by GitHub
Browse files

Reintroduce memory usage fix (#9535)

parent fda47926
...@@ -1212,11 +1212,13 @@ class ModelOptNvFp4FusedMoEMethod(FusedMoEMethodBase): ...@@ -1212,11 +1212,13 @@ class ModelOptNvFp4FusedMoEMethod(FusedMoEMethodBase):
# Process w13 weights # Process w13 weights
w13_blockscale_swizzled = self.swizzle_blockscale(layer.w13_weight_scale) w13_blockscale_swizzled = self.swizzle_blockscale(layer.w13_weight_scale)
del layer.w13_weight_scale
layer.w13_blockscale_swizzled.data.copy_(w13_blockscale_swizzled) layer.w13_blockscale_swizzled.data.copy_(w13_blockscale_swizzled)
layer.w13_weight = Parameter(layer.w13_weight.data, requires_grad=False) layer.w13_weight = Parameter(layer.w13_weight.data, requires_grad=False)
# Process w2 weights # Process w2 weights
w2_blockscale_swizzled = self.swizzle_blockscale(layer.w2_weight_scale) w2_blockscale_swizzled = self.swizzle_blockscale(layer.w2_weight_scale)
del layer.w2_weight_scale
layer.w2_blockscale_swizzled.data.copy_(w2_blockscale_swizzled) layer.w2_blockscale_swizzled.data.copy_(w2_blockscale_swizzled)
layer.w2_weight = Parameter(layer.w2_weight.data, requires_grad=False) layer.w2_weight = Parameter(layer.w2_weight.data, requires_grad=False)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment