[`RWKV`] Rwkv fix for 8bit inference (#23468)

* rwkv fix for 8bit inference * add comment

[`RWKV`] Rwkv fix for 8bit inference (#23468)
* rwkv fix for 8bit inference * add comment
21bd3be1 · Younes Belkada · GitHub · 1c460a52 · 21bd3be1
Unverified Commit 21bd3be1 authored May 19, 2023 by Younes Belkada Committed by GitHub May 19, 2023
Show whitespace changes
Inline Side-by-side

Showing with 7 additions and 2 deletions

src/transformers/models/rwkv/modeling_rwkv.py src/transformers/models/rwkv/modeling_rwkv.py +7 -2

No files found.
--- a/src/transformers/models/rwkv/modeling_rwkv.py
+++ b/src/transformers/models/rwkv/modeling_rwkv.py
@@ -708,6 +708,11 @@ class RwkvModel(RwkvPreTrainedModel):
                    if self.training:
                        block.attention.output.weight.mul_(2 ** int(block_id // self.config.rescale_every))
                        block.feed_forward.value.weight.mul_(2 ** int(block_id // self.config.rescale_every))
+                    else:
+                        # Deal with quantization statistics
+                        if hasattr(block.attention.output.weight, "SCB"):
+                            block.attention.output.weight.SCB.div_(2 ** int(block_id // self.config.rescale_every))
+                            block.feed_forward.value.weight.SCB.div_(2 ** int(block_id // self.config.rescale_every))
                        else:
                            block.attention.output.weight.div_(2 ** int(block_id // self.config.rescale_every))
                            block.feed_forward.value.weight.div_(2 ** int(block_id // self.config.rescale_every))