Unverified Commit a28325e2 authored by Bowen Bao's avatar Bowen Bao Committed by GitHub
Browse files

Replace python random with torch.rand to enable dynamo.export (#24434)

* Replace python random with torch.rand to enable dynamo.export

* revert changes to flax model code

* Remove unused random import

* Fix torch template

* Move torch.manual_seed(0) to right location
parent c036c814
......@@ -15,7 +15,6 @@
""" PyTorch MVP model."""
import copy
import math
import random
from typing import List, Optional, Tuple, Union
import torch
......@@ -941,7 +940,7 @@ class MvpEncoder(MvpPreTrainedModel):
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None)
else:
......@@ -1216,7 +1215,7 @@ class MvpDecoder(MvpPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states:
all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop):
continue
......
......@@ -16,7 +16,6 @@
import math
import random
from typing import List, Optional, Tuple, Union
import torch
......@@ -1143,7 +1142,7 @@ class NllbMoeEncoder(NllbMoePreTrainedModel):
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None, None)
else:
......@@ -1405,7 +1404,7 @@ class NllbMoeDecoder(NllbMoePreTrainedModel):
all_hidden_states += (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
skip_the_layer = True if self.training and (dropout_probability < self.layerdrop) else False
if not skip_the_layer or deepspeed_zero3_is_enabled:
......
......@@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
""" PyTorch OPT model."""
import random
from typing import List, Optional, Tuple, Union
import torch
......@@ -685,7 +684,7 @@ class OPTDecoder(OPTPreTrainedModel):
if output_hidden_states:
all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop):
continue
......
......@@ -16,7 +16,6 @@
import copy
import math
import random
from typing import List, Optional, Tuple, Union
import numpy as np
......@@ -793,7 +792,7 @@ class PegasusEncoder(PegasusPreTrainedModel):
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None)
else:
......@@ -1074,7 +1073,7 @@ class PegasusDecoder(PegasusPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states:
all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop):
continue
......
......@@ -16,7 +16,6 @@
import dataclasses
import math
import random
from typing import Optional, Tuple, Union
import numpy as np
......@@ -1060,7 +1059,7 @@ class PegasusXEncoder(PegasusXPreTrainedModel):
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None)
else:
......@@ -1315,7 +1314,7 @@ class PegasusXDecoder(PegasusXPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states:
all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop):
continue
......
......@@ -15,7 +15,6 @@
""" PyTorch PLBART model."""
import copy
import math
import random
from typing import Any, Dict, List, Optional, Tuple, Union
import torch
......@@ -798,7 +797,7 @@ class PLBartEncoder(PLBartPreTrainedModel):
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None)
else:
......@@ -1052,7 +1051,7 @@ class PLBartDecoder(PLBartPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states:
all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop):
continue
......
......@@ -667,7 +667,7 @@ class SEWEncoder(nn.Module):
all_hidden_states = all_hidden_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = np.random.uniform(0, 1)
dropout_probability = torch.rand([])
skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False
if not skip_the_layer or deepspeed_zero3_is_enabled:
......
......@@ -16,7 +16,6 @@
import math
import random
from typing import Optional, Tuple, Union
import torch
......@@ -808,7 +807,7 @@ class Speech2TextEncoder(Speech2TextPreTrainedModel):
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None)
else:
......@@ -1053,7 +1052,7 @@ class Speech2TextDecoder(Speech2TextPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states:
all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop):
continue
......
......@@ -17,7 +17,6 @@
import copy
import math
import random
from typing import Optional, Tuple, Union
import torch
......@@ -662,7 +661,7 @@ class Speech2Text2Decoder(Speech2Text2PreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states:
all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop):
continue
......
......@@ -15,7 +15,6 @@
""" PyTorch SpeechT5 model."""
import math
import random
import warnings
from typing import List, Optional, Tuple, Union
......@@ -1381,7 +1380,7 @@ class SpeechT5Encoder(SpeechT5PreTrainedModel):
all_hidden_states = all_hidden_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = np.random.uniform(0, 1)
dropout_probability = torch.rand([])
skip_the_layer = self.training and (dropout_probability < self.layerdrop)
if not skip_the_layer or deepspeed_zero3_is_enabled:
......@@ -1706,7 +1705,7 @@ class SpeechT5Decoder(SpeechT5PreTrainedModel):
all_hidden_states = all_hidden_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
skip_the_layer = self.training and (dropout_probability < self.layerdrop)
if skip_the_layer and not deepspeed_zero3_is_enabled:
......
......@@ -16,7 +16,6 @@
import math
import random
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple
......@@ -920,7 +919,7 @@ class TableTransformerEncoder(TableTransformerPreTrainedModel):
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None)
else:
......@@ -1062,7 +1061,7 @@ class TableTransformerDecoder(TableTransformerPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states:
all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop):
continue
......
......@@ -15,7 +15,6 @@
# limitations under the License.
""" PyTorch Time Series Transformer model."""
import random
from typing import List, Optional, Tuple, Union
import numpy as np
......@@ -937,7 +936,7 @@ class TimeSeriesTransformerEncoder(TimeSeriesTransformerPreTrainedModel):
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None)
else:
......@@ -1151,7 +1150,7 @@ class TimeSeriesTransformerDecoder(TimeSeriesTransformerPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states:
all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop):
continue
......
......@@ -17,7 +17,6 @@
import copy
import math
import random
from typing import Optional, Tuple, Union
import torch
......@@ -694,7 +693,7 @@ class TrOCRDecoder(TrOCRPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states:
all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop):
continue
......
......@@ -761,7 +761,7 @@ class UniSpeechEncoder(nn.Module):
all_hidden_states = all_hidden_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = np.random.uniform(0, 1)
dropout_probability = torch.rand([])
skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False
if not skip_the_layer or deepspeed_zero3_is_enabled:
......@@ -850,7 +850,7 @@ class UniSpeechEncoderStableLayerNorm(nn.Module):
all_hidden_states = all_hidden_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = np.random.uniform(0, 1)
dropout_probability = torch.rand([])
skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False
if not skip_the_layer or deepspeed_zero3_is_enabled:
......
......@@ -775,7 +775,7 @@ class UniSpeechSatEncoder(nn.Module):
all_hidden_states = all_hidden_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = np.random.uniform(0, 1)
dropout_probability = torch.rand([])
skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False
if not skip_the_layer or deepspeed_zero3_is_enabled:
......@@ -864,7 +864,7 @@ class UniSpeechSatEncoderStableLayerNorm(nn.Module):
all_hidden_states = all_hidden_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = np.random.uniform(0, 1)
dropout_probability = torch.rand([])
skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False
if not skip_the_layer or deepspeed_zero3_is_enabled:
......
......@@ -797,7 +797,7 @@ class Wav2Vec2Encoder(nn.Module):
all_hidden_states = all_hidden_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = np.random.uniform(0, 1)
dropout_probability = torch.rand([])
skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False
if not skip_the_layer or deepspeed_zero3_is_enabled:
......@@ -885,7 +885,7 @@ class Wav2Vec2EncoderStableLayerNorm(nn.Module):
all_hidden_states = all_hidden_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = np.random.uniform(0, 1)
dropout_probability = torch.rand([])
skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False
if not skip_the_layer or deepspeed_zero3_is_enabled:
......
......@@ -903,7 +903,7 @@ class Wav2Vec2ConformerEncoder(nn.Module):
all_hidden_states = all_hidden_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = np.random.uniform(0, 1)
dropout_probability = torch.rand([])
skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False
if not skip_the_layer or deepspeed_zero3_is_enabled:
......
......@@ -707,7 +707,7 @@ class WavLMEncoder(nn.Module):
all_hidden_states = all_hidden_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = np.random.uniform(0, 1)
dropout_probability = torch.rand([])
skip_the_layer = self.training and i > 0 and (dropout_probability < self.config.layerdrop)
if not skip_the_layer or deepspeed_zero3_is_enabled:
......@@ -797,7 +797,7 @@ class WavLMEncoderStableLayerNorm(nn.Module):
all_hidden_states = all_hidden_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = np.random.uniform(0, 1)
dropout_probability = torch.rand([])
skip_the_layer = self.training and i > 0 and (dropout_probability < self.config.layerdrop)
if not skip_the_layer or deepspeed_zero3_is_enabled:
......
......@@ -15,7 +15,6 @@
""" PyTorch Whisper model."""
import math
import random
from typing import Optional, Tuple, Union
import numpy as np
......@@ -916,7 +915,7 @@ class WhisperEncoder(WhisperPreTrainedModel):
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None)
else:
......@@ -1145,7 +1144,7 @@ class WhisperDecoder(WhisperPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states:
all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop):
continue
......
......@@ -16,7 +16,6 @@
import math
import random
from typing import List, Optional, Tuple, Union
import torch
......@@ -668,7 +667,7 @@ class XGLMModel(XGLMPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states:
all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop):
continue
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment