"tests/t5/test_modeling_tf_t5.py" did not exist on "fecb08c2b8d8003f19147c8c6f5ad6d4df23c710"
Unverified Commit a28325e2 authored by Bowen Bao's avatar Bowen Bao Committed by GitHub
Browse files

Replace python random with torch.rand to enable dynamo.export (#24434)

* Replace python random with torch.rand to enable dynamo.export

* revert changes to flax model code

* Remove unused random import

* Fix torch template

* Move torch.manual_seed(0) to right location
parent c036c814
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
""" PyTorch MVP model.""" """ PyTorch MVP model."""
import copy import copy
import math import math
import random
from typing import List, Optional, Tuple, Union from typing import List, Optional, Tuple, Union
import torch import torch
...@@ -941,7 +940,7 @@ class MvpEncoder(MvpPreTrainedModel): ...@@ -941,7 +940,7 @@ class MvpEncoder(MvpPreTrainedModel):
if output_hidden_states: if output_hidden_states:
encoder_states = encoder_states + (hidden_states,) encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1) dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None) layer_outputs = (None, None)
else: else:
...@@ -1216,7 +1215,7 @@ class MvpDecoder(MvpPreTrainedModel): ...@@ -1216,7 +1215,7 @@ class MvpDecoder(MvpPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states: if output_hidden_states:
all_hidden_states += (hidden_states,) all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1) dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): if self.training and (dropout_probability < self.layerdrop):
continue continue
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
import math import math
import random
from typing import List, Optional, Tuple, Union from typing import List, Optional, Tuple, Union
import torch import torch
...@@ -1143,7 +1142,7 @@ class NllbMoeEncoder(NllbMoePreTrainedModel): ...@@ -1143,7 +1142,7 @@ class NllbMoeEncoder(NllbMoePreTrainedModel):
if output_hidden_states: if output_hidden_states:
encoder_states = encoder_states + (hidden_states,) encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1) dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None, None) layer_outputs = (None, None, None)
else: else:
...@@ -1405,7 +1404,7 @@ class NllbMoeDecoder(NllbMoePreTrainedModel): ...@@ -1405,7 +1404,7 @@ class NllbMoeDecoder(NllbMoePreTrainedModel):
all_hidden_states += (hidden_states,) all_hidden_states += (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1) dropout_probability = torch.rand([])
skip_the_layer = True if self.training and (dropout_probability < self.layerdrop) else False skip_the_layer = True if self.training and (dropout_probability < self.layerdrop) else False
if not skip_the_layer or deepspeed_zero3_is_enabled: if not skip_the_layer or deepspeed_zero3_is_enabled:
......
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" PyTorch OPT model.""" """ PyTorch OPT model."""
import random
from typing import List, Optional, Tuple, Union from typing import List, Optional, Tuple, Union
import torch import torch
...@@ -685,7 +684,7 @@ class OPTDecoder(OPTPreTrainedModel): ...@@ -685,7 +684,7 @@ class OPTDecoder(OPTPreTrainedModel):
if output_hidden_states: if output_hidden_states:
all_hidden_states += (hidden_states,) all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1) dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): if self.training and (dropout_probability < self.layerdrop):
continue continue
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
import copy import copy
import math import math
import random
from typing import List, Optional, Tuple, Union from typing import List, Optional, Tuple, Union
import numpy as np import numpy as np
...@@ -793,7 +792,7 @@ class PegasusEncoder(PegasusPreTrainedModel): ...@@ -793,7 +792,7 @@ class PegasusEncoder(PegasusPreTrainedModel):
if output_hidden_states: if output_hidden_states:
encoder_states = encoder_states + (hidden_states,) encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1) dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None) layer_outputs = (None, None)
else: else:
...@@ -1074,7 +1073,7 @@ class PegasusDecoder(PegasusPreTrainedModel): ...@@ -1074,7 +1073,7 @@ class PegasusDecoder(PegasusPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states: if output_hidden_states:
all_hidden_states += (hidden_states,) all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1) dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): if self.training and (dropout_probability < self.layerdrop):
continue continue
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
import dataclasses import dataclasses
import math import math
import random
from typing import Optional, Tuple, Union from typing import Optional, Tuple, Union
import numpy as np import numpy as np
...@@ -1060,7 +1059,7 @@ class PegasusXEncoder(PegasusXPreTrainedModel): ...@@ -1060,7 +1059,7 @@ class PegasusXEncoder(PegasusXPreTrainedModel):
if output_hidden_states: if output_hidden_states:
encoder_states = encoder_states + (hidden_states,) encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1) dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None) layer_outputs = (None, None)
else: else:
...@@ -1315,7 +1314,7 @@ class PegasusXDecoder(PegasusXPreTrainedModel): ...@@ -1315,7 +1314,7 @@ class PegasusXDecoder(PegasusXPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states: if output_hidden_states:
all_hidden_states += (hidden_states,) all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1) dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): if self.training and (dropout_probability < self.layerdrop):
continue continue
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
""" PyTorch PLBART model.""" """ PyTorch PLBART model."""
import copy import copy
import math import math
import random
from typing import Any, Dict, List, Optional, Tuple, Union from typing import Any, Dict, List, Optional, Tuple, Union
import torch import torch
...@@ -798,7 +797,7 @@ class PLBartEncoder(PLBartPreTrainedModel): ...@@ -798,7 +797,7 @@ class PLBartEncoder(PLBartPreTrainedModel):
if output_hidden_states: if output_hidden_states:
encoder_states = encoder_states + (hidden_states,) encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1) dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None) layer_outputs = (None, None)
else: else:
...@@ -1052,7 +1051,7 @@ class PLBartDecoder(PLBartPreTrainedModel): ...@@ -1052,7 +1051,7 @@ class PLBartDecoder(PLBartPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states: if output_hidden_states:
all_hidden_states += (hidden_states,) all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1) dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): if self.training and (dropout_probability < self.layerdrop):
continue continue
......
...@@ -667,7 +667,7 @@ class SEWEncoder(nn.Module): ...@@ -667,7 +667,7 @@ class SEWEncoder(nn.Module):
all_hidden_states = all_hidden_states + (hidden_states,) all_hidden_states = all_hidden_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = np.random.uniform(0, 1) dropout_probability = torch.rand([])
skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False
if not skip_the_layer or deepspeed_zero3_is_enabled: if not skip_the_layer or deepspeed_zero3_is_enabled:
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
import math import math
import random
from typing import Optional, Tuple, Union from typing import Optional, Tuple, Union
import torch import torch
...@@ -808,7 +807,7 @@ class Speech2TextEncoder(Speech2TextPreTrainedModel): ...@@ -808,7 +807,7 @@ class Speech2TextEncoder(Speech2TextPreTrainedModel):
if output_hidden_states: if output_hidden_states:
encoder_states = encoder_states + (hidden_states,) encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1) dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None) layer_outputs = (None, None)
else: else:
...@@ -1053,7 +1052,7 @@ class Speech2TextDecoder(Speech2TextPreTrainedModel): ...@@ -1053,7 +1052,7 @@ class Speech2TextDecoder(Speech2TextPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states: if output_hidden_states:
all_hidden_states += (hidden_states,) all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1) dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): if self.training and (dropout_probability < self.layerdrop):
continue continue
......
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
import copy import copy
import math import math
import random
from typing import Optional, Tuple, Union from typing import Optional, Tuple, Union
import torch import torch
...@@ -662,7 +661,7 @@ class Speech2Text2Decoder(Speech2Text2PreTrainedModel): ...@@ -662,7 +661,7 @@ class Speech2Text2Decoder(Speech2Text2PreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states: if output_hidden_states:
all_hidden_states += (hidden_states,) all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1) dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): if self.training and (dropout_probability < self.layerdrop):
continue continue
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
""" PyTorch SpeechT5 model.""" """ PyTorch SpeechT5 model."""
import math import math
import random
import warnings import warnings
from typing import List, Optional, Tuple, Union from typing import List, Optional, Tuple, Union
...@@ -1381,7 +1380,7 @@ class SpeechT5Encoder(SpeechT5PreTrainedModel): ...@@ -1381,7 +1380,7 @@ class SpeechT5Encoder(SpeechT5PreTrainedModel):
all_hidden_states = all_hidden_states + (hidden_states,) all_hidden_states = all_hidden_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = np.random.uniform(0, 1) dropout_probability = torch.rand([])
skip_the_layer = self.training and (dropout_probability < self.layerdrop) skip_the_layer = self.training and (dropout_probability < self.layerdrop)
if not skip_the_layer or deepspeed_zero3_is_enabled: if not skip_the_layer or deepspeed_zero3_is_enabled:
...@@ -1706,7 +1705,7 @@ class SpeechT5Decoder(SpeechT5PreTrainedModel): ...@@ -1706,7 +1705,7 @@ class SpeechT5Decoder(SpeechT5PreTrainedModel):
all_hidden_states = all_hidden_states + (hidden_states,) all_hidden_states = all_hidden_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1) dropout_probability = torch.rand([])
skip_the_layer = self.training and (dropout_probability < self.layerdrop) skip_the_layer = self.training and (dropout_probability < self.layerdrop)
if skip_the_layer and not deepspeed_zero3_is_enabled: if skip_the_layer and not deepspeed_zero3_is_enabled:
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
import math import math
import random
from dataclasses import dataclass from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple from typing import Dict, List, Optional, Tuple
...@@ -920,7 +919,7 @@ class TableTransformerEncoder(TableTransformerPreTrainedModel): ...@@ -920,7 +919,7 @@ class TableTransformerEncoder(TableTransformerPreTrainedModel):
if output_hidden_states: if output_hidden_states:
encoder_states = encoder_states + (hidden_states,) encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1) dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None) layer_outputs = (None, None)
else: else:
...@@ -1062,7 +1061,7 @@ class TableTransformerDecoder(TableTransformerPreTrainedModel): ...@@ -1062,7 +1061,7 @@ class TableTransformerDecoder(TableTransformerPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states: if output_hidden_states:
all_hidden_states += (hidden_states,) all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1) dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): if self.training and (dropout_probability < self.layerdrop):
continue continue
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
# limitations under the License. # limitations under the License.
""" PyTorch Time Series Transformer model.""" """ PyTorch Time Series Transformer model."""
import random
from typing import List, Optional, Tuple, Union from typing import List, Optional, Tuple, Union
import numpy as np import numpy as np
...@@ -937,7 +936,7 @@ class TimeSeriesTransformerEncoder(TimeSeriesTransformerPreTrainedModel): ...@@ -937,7 +936,7 @@ class TimeSeriesTransformerEncoder(TimeSeriesTransformerPreTrainedModel):
if output_hidden_states: if output_hidden_states:
encoder_states = encoder_states + (hidden_states,) encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1) dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None) layer_outputs = (None, None)
else: else:
...@@ -1151,7 +1150,7 @@ class TimeSeriesTransformerDecoder(TimeSeriesTransformerPreTrainedModel): ...@@ -1151,7 +1150,7 @@ class TimeSeriesTransformerDecoder(TimeSeriesTransformerPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states: if output_hidden_states:
all_hidden_states += (hidden_states,) all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1) dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): if self.training and (dropout_probability < self.layerdrop):
continue continue
......
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
import copy import copy
import math import math
import random
from typing import Optional, Tuple, Union from typing import Optional, Tuple, Union
import torch import torch
...@@ -694,7 +693,7 @@ class TrOCRDecoder(TrOCRPreTrainedModel): ...@@ -694,7 +693,7 @@ class TrOCRDecoder(TrOCRPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states: if output_hidden_states:
all_hidden_states += (hidden_states,) all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1) dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): if self.training and (dropout_probability < self.layerdrop):
continue continue
......
...@@ -761,7 +761,7 @@ class UniSpeechEncoder(nn.Module): ...@@ -761,7 +761,7 @@ class UniSpeechEncoder(nn.Module):
all_hidden_states = all_hidden_states + (hidden_states,) all_hidden_states = all_hidden_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = np.random.uniform(0, 1) dropout_probability = torch.rand([])
skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False
if not skip_the_layer or deepspeed_zero3_is_enabled: if not skip_the_layer or deepspeed_zero3_is_enabled:
...@@ -850,7 +850,7 @@ class UniSpeechEncoderStableLayerNorm(nn.Module): ...@@ -850,7 +850,7 @@ class UniSpeechEncoderStableLayerNorm(nn.Module):
all_hidden_states = all_hidden_states + (hidden_states,) all_hidden_states = all_hidden_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = np.random.uniform(0, 1) dropout_probability = torch.rand([])
skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False
if not skip_the_layer or deepspeed_zero3_is_enabled: if not skip_the_layer or deepspeed_zero3_is_enabled:
......
...@@ -775,7 +775,7 @@ class UniSpeechSatEncoder(nn.Module): ...@@ -775,7 +775,7 @@ class UniSpeechSatEncoder(nn.Module):
all_hidden_states = all_hidden_states + (hidden_states,) all_hidden_states = all_hidden_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = np.random.uniform(0, 1) dropout_probability = torch.rand([])
skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False
if not skip_the_layer or deepspeed_zero3_is_enabled: if not skip_the_layer or deepspeed_zero3_is_enabled:
...@@ -864,7 +864,7 @@ class UniSpeechSatEncoderStableLayerNorm(nn.Module): ...@@ -864,7 +864,7 @@ class UniSpeechSatEncoderStableLayerNorm(nn.Module):
all_hidden_states = all_hidden_states + (hidden_states,) all_hidden_states = all_hidden_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = np.random.uniform(0, 1) dropout_probability = torch.rand([])
skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False
if not skip_the_layer or deepspeed_zero3_is_enabled: if not skip_the_layer or deepspeed_zero3_is_enabled:
......
...@@ -797,7 +797,7 @@ class Wav2Vec2Encoder(nn.Module): ...@@ -797,7 +797,7 @@ class Wav2Vec2Encoder(nn.Module):
all_hidden_states = all_hidden_states + (hidden_states,) all_hidden_states = all_hidden_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = np.random.uniform(0, 1) dropout_probability = torch.rand([])
skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False
if not skip_the_layer or deepspeed_zero3_is_enabled: if not skip_the_layer or deepspeed_zero3_is_enabled:
...@@ -885,7 +885,7 @@ class Wav2Vec2EncoderStableLayerNorm(nn.Module): ...@@ -885,7 +885,7 @@ class Wav2Vec2EncoderStableLayerNorm(nn.Module):
all_hidden_states = all_hidden_states + (hidden_states,) all_hidden_states = all_hidden_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = np.random.uniform(0, 1) dropout_probability = torch.rand([])
skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False
if not skip_the_layer or deepspeed_zero3_is_enabled: if not skip_the_layer or deepspeed_zero3_is_enabled:
......
...@@ -903,7 +903,7 @@ class Wav2Vec2ConformerEncoder(nn.Module): ...@@ -903,7 +903,7 @@ class Wav2Vec2ConformerEncoder(nn.Module):
all_hidden_states = all_hidden_states + (hidden_states,) all_hidden_states = all_hidden_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = np.random.uniform(0, 1) dropout_probability = torch.rand([])
skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False
if not skip_the_layer or deepspeed_zero3_is_enabled: if not skip_the_layer or deepspeed_zero3_is_enabled:
......
...@@ -707,7 +707,7 @@ class WavLMEncoder(nn.Module): ...@@ -707,7 +707,7 @@ class WavLMEncoder(nn.Module):
all_hidden_states = all_hidden_states + (hidden_states,) all_hidden_states = all_hidden_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = np.random.uniform(0, 1) dropout_probability = torch.rand([])
skip_the_layer = self.training and i > 0 and (dropout_probability < self.config.layerdrop) skip_the_layer = self.training and i > 0 and (dropout_probability < self.config.layerdrop)
if not skip_the_layer or deepspeed_zero3_is_enabled: if not skip_the_layer or deepspeed_zero3_is_enabled:
...@@ -797,7 +797,7 @@ class WavLMEncoderStableLayerNorm(nn.Module): ...@@ -797,7 +797,7 @@ class WavLMEncoderStableLayerNorm(nn.Module):
all_hidden_states = all_hidden_states + (hidden_states,) all_hidden_states = all_hidden_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = np.random.uniform(0, 1) dropout_probability = torch.rand([])
skip_the_layer = self.training and i > 0 and (dropout_probability < self.config.layerdrop) skip_the_layer = self.training and i > 0 and (dropout_probability < self.config.layerdrop)
if not skip_the_layer or deepspeed_zero3_is_enabled: if not skip_the_layer or deepspeed_zero3_is_enabled:
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
""" PyTorch Whisper model.""" """ PyTorch Whisper model."""
import math import math
import random
from typing import Optional, Tuple, Union from typing import Optional, Tuple, Union
import numpy as np import numpy as np
...@@ -916,7 +915,7 @@ class WhisperEncoder(WhisperPreTrainedModel): ...@@ -916,7 +915,7 @@ class WhisperEncoder(WhisperPreTrainedModel):
if output_hidden_states: if output_hidden_states:
encoder_states = encoder_states + (hidden_states,) encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1) dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None) layer_outputs = (None, None)
else: else:
...@@ -1145,7 +1144,7 @@ class WhisperDecoder(WhisperPreTrainedModel): ...@@ -1145,7 +1144,7 @@ class WhisperDecoder(WhisperPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states: if output_hidden_states:
all_hidden_states += (hidden_states,) all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1) dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): if self.training and (dropout_probability < self.layerdrop):
continue continue
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
import math import math
import random
from typing import List, Optional, Tuple, Union from typing import List, Optional, Tuple, Union
import torch import torch
...@@ -668,7 +667,7 @@ class XGLMModel(XGLMPreTrainedModel): ...@@ -668,7 +667,7 @@ class XGLMModel(XGLMPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states: if output_hidden_states:
all_hidden_states += (hidden_states,) all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1) dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): if self.training and (dropout_probability < self.layerdrop):
continue continue
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment