[`RWKV`] Rwkv fix for 8bit inference (#23468)

* rwkv fix for 8bit inference * add comment

[`RWKV`] Rwkv fix for 8bit inference (#23468)
* rwkv fix for 8bit inference * add comment
21bd3be1 · Younes Belkada · GitHub · 1c460a52 · 21bd3be1
Unverified Commit 21bd3be1 authored May 19, 2023 by Younes Belkada Committed by GitHub May 19, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 7 additions and 2 deletions

src/transformers/models/rwkv/modeling_rwkv.py src/transformers/models/rwkv/modeling_rwkv.py +7 -2

No files found.
--- a/src/transformers/models/rwkv/modeling_rwkv.py
+++ b/src/transformers/models/rwkv/modeling_rwkv.py
@@ -709,8 +709,13 @@ class RwkvModel(RwkvPreTrainedModel):
                        block.attention.output.weight.mul_(2 ** int(block_id // self.config.rescale_every))
                        block.feed_forward.value.weight.mul_(2 ** int(block_id // self.config.rescale_every))
                    else:
-                        block.attention.output.weight.div_(2 ** int(block_id // self.config.rescale_every))
+                        # Deal with quantization statistics
-                        block.feed_forward.value.weight.div_(2 ** int(block_id // self.config.rescale_every))
+                        if hasattr(block.attention.output.weight, "SCB"):
+                            block.attention.output.weight.SCB.div_(2 ** int(block_id // self.config.rescale_every))
+                            block.feed_forward.value.weight.SCB.div_(2 ** int(block_id // self.config.rescale_every))
+                        else:
+                            block.attention.output.weight.div_(2 ** int(block_id // self.config.rescale_every))
+                            block.feed_forward.value.weight.div_(2 ** int(block_id // self.config.rescale_every))
        self.layers_are_rescaled = not self.training