Unverified Commit 77262ef7 authored by Stas Bekman's avatar Stas Bekman Committed by GitHub
Browse files

fix --gradient_checkpointing (#13964)

parent 3d607df8
...@@ -54,12 +54,6 @@ class ModelArguments: ...@@ -54,12 +54,6 @@ class ModelArguments:
freeze_feature_extractor: Optional[bool] = field( freeze_feature_extractor: Optional[bool] = field(
default=True, metadata={"help": "Whether to freeze the feature extractor layers of the model."} default=True, metadata={"help": "Whether to freeze the feature extractor layers of the model."}
) )
gradient_checkpointing: Optional[bool] = field(
default=False,
metadata={
"help": "If True, use gradient checkpointing to save memory at the expense of slower backward pass."
},
)
verbose_logging: Optional[bool] = field( verbose_logging: Optional[bool] = field(
default=False, default=False,
metadata={"help": "Whether to log verbose messages or not."}, metadata={"help": "Whether to log verbose messages or not."},
...@@ -352,7 +346,7 @@ def main(): ...@@ -352,7 +346,7 @@ def main():
model = Wav2Vec2ForCTC.from_pretrained( model = Wav2Vec2ForCTC.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
gradient_checkpointing=model_args.gradient_checkpointing, gradient_checkpointing=training_args.gradient_checkpointing,
vocab_size=len(processor.tokenizer), vocab_size=len(processor.tokenizer),
) )
......
...@@ -84,12 +84,6 @@ class ModelArguments: ...@@ -84,12 +84,6 @@ class ModelArguments:
"vectors will be masked along the time axis. This is only relevant if ``apply_spec_augment is True``." "vectors will be masked along the time axis. This is only relevant if ``apply_spec_augment is True``."
}, },
) )
gradient_checkpointing: Optional[bool] = field(
default=True,
metadata={
"help": "If True, use gradient checkpointing to save memory at the expense of slower backward pass."
},
)
layerdrop: Optional[float] = field(default=0.0, metadata={"help": "The LayerDrop probability."}) layerdrop: Optional[float] = field(default=0.0, metadata={"help": "The LayerDrop probability."})
...@@ -373,7 +367,7 @@ def main(): ...@@ -373,7 +367,7 @@ def main():
hidden_dropout=model_args.hidden_dropout, hidden_dropout=model_args.hidden_dropout,
feat_proj_dropout=model_args.feat_proj_dropout, feat_proj_dropout=model_args.feat_proj_dropout,
mask_time_prob=model_args.mask_time_prob, mask_time_prob=model_args.mask_time_prob,
gradient_checkpointing=model_args.gradient_checkpointing, gradient_checkpointing=training_args.gradient_checkpointing,
layerdrop=model_args.layerdrop, layerdrop=model_args.layerdrop,
ctc_loss_reduction="mean", ctc_loss_reduction="mean",
pad_token_id=processor.tokenizer.pad_token_id, pad_token_id=processor.tokenizer.pad_token_id,
......
...@@ -50,12 +50,6 @@ class ModelArguments: ...@@ -50,12 +50,6 @@ class ModelArguments:
freeze_feature_extractor: Optional[bool] = field( freeze_feature_extractor: Optional[bool] = field(
default=True, metadata={"help": "Whether to freeze the feature extractor layers of the model."} default=True, metadata={"help": "Whether to freeze the feature extractor layers of the model."}
) )
gradient_checkpointing: Optional[bool] = field(
default=False,
metadata={
"help": "If True, use gradient checkpointing to save memory at the expense of slower backward pass."
},
)
verbose_logging: Optional[bool] = field( verbose_logging: Optional[bool] = field(
default=False, default=False,
metadata={"help": "Whether to log verbose messages or not."}, metadata={"help": "Whether to log verbose messages or not."},
...@@ -370,7 +364,7 @@ def main(): ...@@ -370,7 +364,7 @@ def main():
config = Wav2Vec2Config.from_pretrained( config = Wav2Vec2Config.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
gradient_checkpointing=model_args.gradient_checkpointing, gradient_checkpointing=training_args.gradient_checkpointing,
) )
if not config.do_stable_layer_norm or config.feat_extract_norm != "layer": if not config.do_stable_layer_norm or config.feat_extract_norm != "layer":
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment