PegasusX add _no_split_modules (#25933)

* no_split_modules * no_split_modules * inputs_embeds+pos same device * update _no_split_modules * update _no_split_modules

PegasusX add _no_split_modules (#25933)
* no_split_modules * no_split_modules * inputs_embeds+pos same device * update _no_split_modules * update _no_split_modules
da1af21d · andreeahedes · GitHub · 70a98024 · da1af21d
Unverified Commit da1af21d authored Sep 05, 2023 by andreeahedes Committed by GitHub Sep 05, 2023
Show whitespace changes
Inline Side-by-side

Showing with 3 additions and 0 deletions

src/transformers/models/pegasus_x/modeling_pegasus_x.py src/transformers/models/pegasus_x/modeling_pegasus_x.py +3 -0

No files found.
--- a/src/transformers/models/pegasus_x/modeling_pegasus_x.py
+++ b/src/transformers/models/pegasus_x/modeling_pegasus_x.py
@@ -769,6 +769,7 @@ class PegasusXPreTrainedModel(PreTrainedModel):
    config_class = PegasusXConfig
    base_model_prefix = "model"
    supports_gradient_checkpointing = True
+    _no_split_modules = [r"PegasusXEncoderLayer", r"PegasusXDecoderLayer"]
    def _init_weights(self, module):
        std = self.config.init_std
@@ -1299,6 +1300,8 @@ class PegasusXDecoder(PegasusXPreTrainedModel):
        # embed positions
        positions = self.embed_positions(inputs_embeds, past_key_values_length)
+        positions = positions.to(inputs_embeds.device)
        hidden_states = inputs_embeds + positions
        hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training)