Unverified Commit a28325e2 authored by Bowen Bao's avatar Bowen Bao Committed by GitHub
Browse files

Replace python random with torch.rand to enable dynamo.export (#24434)

* Replace python random with torch.rand to enable dynamo.export

* revert changes to flax model code

* Remove unused random import

* Fix torch template

* Move torch.manual_seed(0) to right location
parent c036c814
......@@ -17,7 +17,6 @@
""" PyTorch Autoformer model."""
import math
import random
from dataclasses import dataclass
from typing import List, Optional, Tuple, Union
......@@ -1198,7 +1197,7 @@ class AutoformerEncoder(AutoformerPreTrainedModel):
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None)
else:
......@@ -1408,7 +1407,7 @@ class AutoformerDecoder(AutoformerPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states:
all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop):
continue
......
......@@ -15,7 +15,6 @@
""" PyTorch BART model."""
import copy
import math
import random
import warnings
from typing import List, Optional, Tuple, Union
......@@ -837,7 +836,7 @@ class BartEncoder(BartPretrainedModel):
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None)
else:
......@@ -1090,7 +1089,7 @@ class BartDecoder(BartPretrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states:
all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop):
continue
......
......@@ -17,7 +17,6 @@
import copy
import math
import random
from typing import List, Optional, Tuple, Union
import numpy as np
......@@ -1933,7 +1932,7 @@ class BigBirdPegasusEncoder(BigBirdPegasusPreTrainedModel):
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None)
else:
......@@ -2276,7 +2275,7 @@ class BigBirdPegasusDecoder(BigBirdPegasusPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states:
all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop):
continue
......
......@@ -16,7 +16,6 @@
import math
import random
from typing import Optional, Tuple, Union
import torch
......@@ -579,7 +578,7 @@ class BioGptModel(BioGptPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states:
all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop):
continue
......
......@@ -18,7 +18,6 @@
import copy
import math
import os
import random
import warnings
from typing import List, Optional, Tuple, Union
......@@ -767,7 +766,7 @@ class BlenderbotEncoder(BlenderbotPreTrainedModel):
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None)
else:
......@@ -1019,7 +1018,7 @@ class BlenderbotDecoder(BlenderbotPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states:
all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop):
continue
......
......@@ -17,7 +17,6 @@
import copy
import math
import random
from typing import List, Optional, Tuple, Union
import torch
......@@ -765,7 +764,7 @@ class BlenderbotSmallEncoder(BlenderbotSmallPreTrainedModel):
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None)
else:
......@@ -1016,7 +1015,7 @@ class BlenderbotSmallDecoder(BlenderbotSmallPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states:
all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop):
continue
......
......@@ -16,7 +16,6 @@
import math
import random
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple
......@@ -1224,7 +1223,7 @@ class ConditionalDetrEncoder(ConditionalDetrPreTrainedModel):
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None)
else:
......@@ -1378,7 +1377,7 @@ class ConditionalDetrDecoder(ConditionalDetrPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states:
all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop):
continue
if idx == 0:
......
......@@ -587,7 +587,7 @@ class Data2VecAudioEncoder(nn.Module):
all_hidden_states = all_hidden_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = np.random.uniform(0, 1)
dropout_probability = torch.rand([])
skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False
if not skip_the_layer or deepspeed_zero3_is_enabled:
......
......@@ -16,7 +16,6 @@
import math
import random
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple
......@@ -979,7 +978,7 @@ class DetrEncoder(DetrPreTrainedModel):
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None)
else:
......@@ -1118,7 +1117,7 @@ class DetrDecoder(DetrPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states:
all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop):
continue
......
......@@ -16,7 +16,6 @@
import itertools
import math
import random
from dataclasses import dataclass
from typing import Dict, Optional, Tuple, Union
......@@ -580,7 +579,7 @@ class FlaubertModel(FlaubertPreTrainedModel):
attentions = () if output_attentions else None
for i in range(self.n_layers):
# LayerDrop
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop):
continue
......
......@@ -28,7 +28,6 @@
"""PyTorch Fairseq model, ported from https://github.com/pytorch/fairseq/tree/master/examples/wmt19"""
import math
import random
from typing import Any, Dict, List, Optional, Tuple, Union
import torch
......@@ -550,7 +549,7 @@ class FSMTEncoder(nn.Module):
encoder_states += (x,)
x = x.transpose(0, 1) # B x T x C -> T x B x C
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer
attn = None
else:
......@@ -794,7 +793,7 @@ class FSMTDecoder(nn.Module):
x = x.transpose(0, 1)
all_hidden_states += (x,)
x = x.transpose(0, 1)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop):
continue
......
......@@ -725,7 +725,7 @@ class HubertEncoder(nn.Module):
all_hidden_states = all_hidden_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = np.random.uniform(0, 1)
dropout_probability = torch.rand([])
skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False
if not skip_the_layer or deepspeed_zero3_is_enabled:
......@@ -814,7 +814,7 @@ class HubertEncoderStableLayerNorm(nn.Module):
all_hidden_states = all_hidden_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = np.random.uniform(0, 1)
dropout_probability = torch.rand([])
skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False
if not skip_the_layer or deepspeed_zero3_is_enabled:
......
......@@ -14,7 +14,6 @@
# limitations under the License.
""" PyTorch Informer model."""
import random
from typing import List, Optional, Tuple, Union
import numpy as np
......@@ -1205,7 +1204,7 @@ class InformerEncoder(InformerPreTrainedModel):
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None)
else:
......@@ -1425,7 +1424,7 @@ class InformerDecoder(InformerPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states:
all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop):
continue
......
......@@ -16,7 +16,6 @@
import math
import random
import warnings
from dataclasses import dataclass
from typing import List, Optional, Tuple, Union
......@@ -1871,7 +1870,7 @@ class LEDEncoder(LEDPreTrainedModel):
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None, None)
......@@ -2135,7 +2134,7 @@ class LEDDecoder(LEDPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states:
all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop):
continue
......
......@@ -16,7 +16,6 @@
import math
import random
from typing import List, Optional, Tuple, Union
import torch
......@@ -813,7 +812,7 @@ class M2M100Encoder(M2M100PreTrainedModel):
encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
skip_the_layer = True if self.training and (dropout_probability < self.layerdrop) else False
if not skip_the_layer or deepspeed_zero3_is_enabled:
......@@ -1057,7 +1056,7 @@ class M2M100Decoder(M2M100PreTrainedModel):
all_hidden_states += (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
skip_the_layer = True if self.training and (dropout_probability < self.layerdrop) else False
if not skip_the_layer or deepspeed_zero3_is_enabled:
......
......@@ -17,7 +17,6 @@
import copy
import math
import random
from typing import Dict, List, Optional, Tuple, Union
import numpy as np
......@@ -778,7 +777,7 @@ class MarianEncoder(MarianPreTrainedModel):
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None)
else:
......@@ -1024,7 +1023,7 @@ class MarianDecoder(MarianPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states:
all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop):
continue
......
......@@ -15,7 +15,6 @@
""" PyTorch Mask2Former model."""
import math
import random
import warnings
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple
......@@ -1862,7 +1861,7 @@ class Mask2FormerMaskedAttentionDecoder(nn.Module):
if output_hidden_states:
all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop):
continue
......
......@@ -15,7 +15,6 @@
""" PyTorch MaskFormer model."""
import math
import random
from dataclasses import dataclass
from numbers import Number
from typing import Dict, List, Optional, Tuple
......@@ -764,7 +763,7 @@ class DetrDecoder(nn.Module):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states:
all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop):
continue
......
......@@ -15,7 +15,6 @@
""" PyTorch MBART model."""
import copy
import math
import random
from typing import List, Optional, Tuple, Union
import torch
......@@ -819,7 +818,7 @@ class MBartEncoder(MBartPreTrainedModel):
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop): # skip the layer
layer_outputs = (None, None)
else:
......@@ -1074,7 +1073,7 @@ class MBartDecoder(MBartPreTrainedModel):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states:
all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
if self.training and (dropout_probability < self.layerdrop):
continue
......
......@@ -16,7 +16,6 @@
import math
import random
from typing import Optional, Tuple, Union
import torch
......@@ -610,7 +609,7 @@ class MCTCTEncoder(MCTCTPreTrainedModel):
encoder_states = encoder_states + (hidden_states,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = random.uniform(0, 1)
dropout_probability = torch.rand([])
skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False
if not skip_the_layer or deepspeed_zero3_is_enabled:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment