"docs/source/vscode:/vscode.git/clone" did not exist on "78a53d59cb6fa444a95d6be4d15fb3a25e6a8a2e"
Unverified Commit 60d5f8f9 authored by Zach Mueller's avatar Zach Mueller Committed by GitHub
Browse files

🚨🚨🚨Deprecate `evaluation_strategy` to `eval_strategy`🚨🚨🚨 (#30190)

* Alias

* Note alias

* Tests and src

* Rest

* Clean

* Change typing?

* Fix tests

* Deprecation versions
parent c86d020e
...@@ -283,7 +283,7 @@ To enable Neptune logging, in your `TrainingArguments`, set the `report_to` argu ...@@ -283,7 +283,7 @@ To enable Neptune logging, in your `TrainingArguments`, set the `report_to` argu
```python ```python
training_args = TrainingArguments( training_args = TrainingArguments(
"quick-training-distilbert-mrpc", "quick-training-distilbert-mrpc",
evaluation_strategy="steps", eval_strategy="steps",
eval_steps=20, eval_steps=20,
report_to="neptune", report_to="neptune",
) )
......
...@@ -50,7 +50,7 @@ python run_audio_classification.py \ ...@@ -50,7 +50,7 @@ python run_audio_classification.py \
--dataloader_num_workers 4 \ --dataloader_num_workers 4 \
--logging_strategy steps \ --logging_strategy steps \
--logging_steps 10 \ --logging_steps 10 \
--evaluation_strategy epoch \ --eval_strategy epoch \
--save_strategy epoch \ --save_strategy epoch \
--load_best_model_at_end True \ --load_best_model_at_end True \
--metric_for_best_model accuracy \ --metric_for_best_model accuracy \
...@@ -92,7 +92,7 @@ python run_audio_classification.py \ ...@@ -92,7 +92,7 @@ python run_audio_classification.py \
--dataloader_num_workers 8 \ --dataloader_num_workers 8 \
--logging_strategy steps \ --logging_strategy steps \
--logging_steps 10 \ --logging_steps 10 \
--evaluation_strategy epoch \ --eval_strategy epoch \
--save_strategy epoch \ --save_strategy epoch \
--load_best_model_at_end True \ --load_best_model_at_end True \
--metric_for_best_model accuracy \ --metric_for_best_model accuracy \
......
...@@ -52,7 +52,7 @@ python run_image_classification.py \ ...@@ -52,7 +52,7 @@ python run_image_classification.py \
--per_device_eval_batch_size 8 \ --per_device_eval_batch_size 8 \
--logging_strategy steps \ --logging_strategy steps \
--logging_steps 10 \ --logging_steps 10 \
--evaluation_strategy epoch \ --eval_strategy epoch \
--save_strategy epoch \ --save_strategy epoch \
--load_best_model_at_end True \ --load_best_model_at_end True \
--save_total_limit 3 \ --save_total_limit 3 \
......
...@@ -56,7 +56,7 @@ Alternatively, one can decide to further pre-train an already pre-trained (or fi ...@@ -56,7 +56,7 @@ Alternatively, one can decide to further pre-train an already pre-trained (or fi
--per_device_eval_batch_size 8 \ --per_device_eval_batch_size 8 \
--logging_strategy steps \ --logging_strategy steps \
--logging_steps 10 \ --logging_steps 10 \
--evaluation_strategy epoch \ --eval_strategy epoch \
--save_strategy epoch \ --save_strategy epoch \
--load_best_model_at_end True \ --load_best_model_at_end True \
--save_total_limit 3 \ --save_total_limit 3 \
...@@ -106,7 +106,7 @@ Next, we can run the script by providing the path to this custom configuration ( ...@@ -106,7 +106,7 @@ Next, we can run the script by providing the path to this custom configuration (
--per_device_eval_batch_size 8 \ --per_device_eval_batch_size 8 \
--logging_strategy steps \ --logging_strategy steps \
--logging_steps 10 \ --logging_steps 10 \
--evaluation_strategy epoch \ --eval_strategy epoch \
--save_strategy epoch \ --save_strategy epoch \
--load_best_model_at_end True \ --load_best_model_at_end True \
--save_total_limit 3 \ --save_total_limit 3 \
...@@ -172,7 +172,7 @@ python run_mae.py \ ...@@ -172,7 +172,7 @@ python run_mae.py \
--per_device_eval_batch_size 8 \ --per_device_eval_batch_size 8 \
--logging_strategy steps \ --logging_strategy steps \
--logging_steps 10 \ --logging_steps 10 \
--evaluation_strategy epoch \ --eval_strategy epoch \
--save_strategy epoch \ --save_strategy epoch \
--load_best_model_at_end True \ --load_best_model_at_end True \
--save_total_limit 3 \ --save_total_limit 3 \
......
...@@ -118,7 +118,7 @@ python run_semantic_segmentation.py \ ...@@ -118,7 +118,7 @@ python run_semantic_segmentation.py \
--per_device_eval_batch_size 8 \ --per_device_eval_batch_size 8 \
--logging_strategy steps \ --logging_strategy steps \
--logging_steps 100 \ --logging_steps 100 \
--evaluation_strategy epoch \ --eval_strategy epoch \
--save_strategy epoch \ --save_strategy epoch \
--seed 1337 --seed 1337
``` ```
......
...@@ -76,7 +76,7 @@ python run_speech_recognition_ctc.py \ ...@@ -76,7 +76,7 @@ python run_speech_recognition_ctc.py \
--gradient_accumulation_steps="2" \ --gradient_accumulation_steps="2" \
--learning_rate="3e-4" \ --learning_rate="3e-4" \
--warmup_steps="500" \ --warmup_steps="500" \
--evaluation_strategy="steps" \ --eval_strategy="steps" \
--text_column_name="sentence" \ --text_column_name="sentence" \
--length_column_name="input_length" \ --length_column_name="input_length" \
--save_steps="400" \ --save_steps="400" \
...@@ -111,7 +111,7 @@ torchrun \ ...@@ -111,7 +111,7 @@ torchrun \
--per_device_train_batch_size="4" \ --per_device_train_batch_size="4" \
--learning_rate="3e-4" \ --learning_rate="3e-4" \
--warmup_steps="500" \ --warmup_steps="500" \
--evaluation_strategy="steps" \ --eval_strategy="steps" \
--text_column_name="sentence" \ --text_column_name="sentence" \
--length_column_name="input_length" \ --length_column_name="input_length" \
--save_steps="400" \ --save_steps="400" \
...@@ -162,7 +162,7 @@ However, the `--shuffle_buffer_size` argument controls how many examples we can ...@@ -162,7 +162,7 @@ However, the `--shuffle_buffer_size` argument controls how many examples we can
--gradient_accumulation_steps="2" \ --gradient_accumulation_steps="2" \
--learning_rate="5e-4" \ --learning_rate="5e-4" \
--warmup_steps="500" \ --warmup_steps="500" \
--evaluation_strategy="steps" \ --eval_strategy="steps" \
--text_column_name="sentence" \ --text_column_name="sentence" \
--save_steps="500" \ --save_steps="500" \
--eval_steps="500" \ --eval_steps="500" \
...@@ -293,7 +293,7 @@ python run_speech_recognition_ctc.py \ ...@@ -293,7 +293,7 @@ python run_speech_recognition_ctc.py \
--per_device_train_batch_size="32" \ --per_device_train_batch_size="32" \
--learning_rate="1e-3" \ --learning_rate="1e-3" \
--warmup_steps="100" \ --warmup_steps="100" \
--evaluation_strategy="steps" \ --eval_strategy="steps" \
--text_column_name="sentence" \ --text_column_name="sentence" \
--length_column_name="input_length" \ --length_column_name="input_length" \
--save_steps="200" \ --save_steps="200" \
...@@ -330,7 +330,7 @@ python run_speech_recognition_ctc.py \ ...@@ -330,7 +330,7 @@ python run_speech_recognition_ctc.py \
--per_device_train_batch_size="32" \ --per_device_train_batch_size="32" \
--learning_rate="1e-3" \ --learning_rate="1e-3" \
--warmup_steps="100" \ --warmup_steps="100" \
--evaluation_strategy="steps" \ --eval_strategy="steps" \
--text_column_name="sentence" \ --text_column_name="sentence" \
--length_column_name="input_length" \ --length_column_name="input_length" \
--save_steps="200" \ --save_steps="200" \
...@@ -378,7 +378,7 @@ python run_speech_recognition_seq2seq.py \ ...@@ -378,7 +378,7 @@ python run_speech_recognition_seq2seq.py \
--logging_steps="25" \ --logging_steps="25" \
--learning_rate="1e-5" \ --learning_rate="1e-5" \
--warmup_steps="500" \ --warmup_steps="500" \
--evaluation_strategy="steps" \ --eval_strategy="steps" \
--eval_steps="1000" \ --eval_steps="1000" \
--save_strategy="steps" \ --save_strategy="steps" \
--save_steps="1000" \ --save_steps="1000" \
...@@ -419,7 +419,7 @@ torchrun \ ...@@ -419,7 +419,7 @@ torchrun \
--logging_steps="25" \ --logging_steps="25" \
--learning_rate="1e-5" \ --learning_rate="1e-5" \
--warmup_steps="500" \ --warmup_steps="500" \
--evaluation_strategy="steps" \ --eval_strategy="steps" \
--eval_steps="1000" \ --eval_steps="1000" \
--save_strategy="steps" \ --save_strategy="steps" \
--save_steps="1000" \ --save_steps="1000" \
...@@ -547,7 +547,7 @@ python run_speech_recognition_seq2seq.py \ ...@@ -547,7 +547,7 @@ python run_speech_recognition_seq2seq.py \
--gradient_accumulation_steps="8" \ --gradient_accumulation_steps="8" \
--learning_rate="3e-4" \ --learning_rate="3e-4" \
--warmup_steps="400" \ --warmup_steps="400" \
--evaluation_strategy="steps" \ --eval_strategy="steps" \
--text_column_name="text" \ --text_column_name="text" \
--save_steps="400" \ --save_steps="400" \
--eval_steps="400" \ --eval_steps="400" \
...@@ -589,7 +589,7 @@ torchrun \ ...@@ -589,7 +589,7 @@ torchrun \
--gradient_accumulation_steps="1" \ --gradient_accumulation_steps="1" \
--learning_rate="3e-4" \ --learning_rate="3e-4" \
--warmup_steps="400" \ --warmup_steps="400" \
--evaluation_strategy="steps" \ --eval_strategy="steps" \
--text_column_name="text" \ --text_column_name="text" \
--save_steps="400" \ --save_steps="400" \
--eval_steps="400" \ --eval_steps="400" \
......
...@@ -100,7 +100,7 @@ def main(): ...@@ -100,7 +100,7 @@ def main():
output_dir=args.output_dir, output_dir=args.output_dir,
learning_rate=args.learning_rate, learning_rate=args.learning_rate,
lr_scheduler_type=args.lr_scheduler_type, lr_scheduler_type=args.lr_scheduler_type,
evaluation_strategy="epoch", eval_strategy="epoch",
save_strategy="epoch", save_strategy="epoch",
logging_strategy="epoch", logging_strategy="epoch",
per_device_train_batch_size=args.batch_size, per_device_train_batch_size=args.batch_size,
......
...@@ -32,7 +32,7 @@ python run_funsd_cord.py \ ...@@ -32,7 +32,7 @@ python run_funsd_cord.py \
--do_train \ --do_train \
--do_eval \ --do_eval \
--max_steps 1000 \ --max_steps 1000 \
--evaluation_strategy steps \ --eval_strategy steps \
--eval_steps 100 \ --eval_steps 100 \
--learning_rate 1e-5 \ --learning_rate 1e-5 \
--load_best_model_at_end \ --load_best_model_at_end \
...@@ -57,7 +57,7 @@ python run_funsd_cord.py \ ...@@ -57,7 +57,7 @@ python run_funsd_cord.py \
--do_train \ --do_train \
--do_eval \ --do_eval \
--max_steps 1000 \ --max_steps 1000 \
--evaluation_strategy steps \ --eval_strategy steps \
--eval_steps 100 \ --eval_steps 100 \
--learning_rate 5e-5 \ --learning_rate 5e-5 \
--load_best_model_at_end \ --load_best_model_at_end \
......
...@@ -362,7 +362,7 @@ echo '''python run_speech_recognition_ctc.py \ ...@@ -362,7 +362,7 @@ echo '''python run_speech_recognition_ctc.py \
--per_device_train_batch_size="2" \ --per_device_train_batch_size="2" \
--learning_rate="3e-4" \ --learning_rate="3e-4" \
--save_total_limit="1" \ --save_total_limit="1" \
--evaluation_strategy="steps" \ --eval_strategy="steps" \
--text_column_name="sentence" \ --text_column_name="sentence" \
--length_column_name="input_length" \ --length_column_name="input_length" \
--save_steps="5" \ --save_steps="5" \
...@@ -438,7 +438,7 @@ echo '''python run_speech_recognition_ctc.py \ ...@@ -438,7 +438,7 @@ echo '''python run_speech_recognition_ctc.py \
--learning_rate="7.5e-5" \ --learning_rate="7.5e-5" \
--warmup_steps="2000" \ --warmup_steps="2000" \
--length_column_name="input_length" \ --length_column_name="input_length" \
--evaluation_strategy="steps" \ --eval_strategy="steps" \
--text_column_name="sentence" \ --text_column_name="sentence" \
--chars_to_ignore , ? . ! \- \; \: \" “ % ‘ ” � — ’ … – \ --chars_to_ignore , ? . ! \- \; \: \" “ % ‘ ” � — ’ … – \
--save_steps="500" \ --save_steps="500" \
......
...@@ -51,7 +51,7 @@ parameters_dict = { ...@@ -51,7 +51,7 @@ parameters_dict = {
'train_file': os.path.join(data_dir, 'train.csv'), 'train_file': os.path.join(data_dir, 'train.csv'),
'infer_file': os.path.join(data_dir, 'infer.csv'), 'infer_file': os.path.join(data_dir, 'infer.csv'),
'eval_file': os.path.join(data_dir, 'eval.csv'), 'eval_file': os.path.join(data_dir, 'eval.csv'),
'evaluation_strategy': 'steps', 'eval_strategy': 'steps',
'task_name': 'scitail', 'task_name': 'scitail',
'label_list': ['entails', 'neutral'], 'label_list': ['entails', 'neutral'],
'per_device_train_batch_size': 32, 'per_device_train_batch_size': 32,
......
...@@ -190,7 +190,7 @@ class FTTrainingArguments: ...@@ -190,7 +190,7 @@ class FTTrainingArguments:
) )
}, },
) )
evaluation_strategy: Optional[str] = dataclasses.field( eval_strategy: Optional[str] = dataclasses.field(
default="no", default="no",
metadata={ metadata={
"help": 'The evaluation strategy to adopt during training. Possible values are: ["no", "step", "epoch]' "help": 'The evaluation strategy to adopt during training. Possible values are: ["no", "step", "epoch]'
...@@ -198,7 +198,7 @@ class FTTrainingArguments: ...@@ -198,7 +198,7 @@ class FTTrainingArguments:
) )
eval_steps: Optional[int] = dataclasses.field( eval_steps: Optional[int] = dataclasses.field(
default=1, default=1,
metadata={"help": 'Number of update steps between two evaluations if `evaluation_strategy="steps"`.'}, metadata={"help": 'Number of update steps between two evaluations if `eval_strategy="steps"`.'},
) )
eval_metric: Optional[str] = dataclasses.field( eval_metric: Optional[str] = dataclasses.field(
default="accuracy", metadata={"help": "The evaluation metric used for the task."} default="accuracy", metadata={"help": "The evaluation metric used for the task."}
...@@ -265,7 +265,7 @@ def train(args, accelerator, model, tokenizer, train_dataloader, optimizer, lr_s ...@@ -265,7 +265,7 @@ def train(args, accelerator, model, tokenizer, train_dataloader, optimizer, lr_s
# Evaluate during training # Evaluate during training
if ( if (
eval_dataloader is not None eval_dataloader is not None
and args.evaluation_strategy == IntervalStrategy.STEPS.value and args.eval_strategy == IntervalStrategy.STEPS.value
and args.eval_steps > 0 and args.eval_steps > 0
and completed_steps % args.eval_steps == 0 and completed_steps % args.eval_steps == 0
): ):
...@@ -331,7 +331,7 @@ def train(args, accelerator, model, tokenizer, train_dataloader, optimizer, lr_s ...@@ -331,7 +331,7 @@ def train(args, accelerator, model, tokenizer, train_dataloader, optimizer, lr_s
break break
# Evaluate during training # Evaluate during training
if eval_dataloader is not None and args.evaluation_strategy == IntervalStrategy.EPOCH.value: if eval_dataloader is not None and args.eval_strategy == IntervalStrategy.EPOCH.value:
accelerator.wait_for_everyone() accelerator.wait_for_everyone()
new_checkpoint = f"checkpoint-{IntervalStrategy.EPOCH.value}-{epoch}" new_checkpoint = f"checkpoint-{IntervalStrategy.EPOCH.value}-{epoch}"
new_eval_result = evaluate(args, accelerator, eval_dataloader, "eval", model, new_checkpoint)[ new_eval_result = evaluate(args, accelerator, eval_dataloader, "eval", model, new_checkpoint)[
...@@ -571,7 +571,7 @@ def finetune(accelerator, model_name_or_path, train_file, output_dir, **kwargs): ...@@ -571,7 +571,7 @@ def finetune(accelerator, model_name_or_path, train_file, output_dir, **kwargs):
assert args.train_file is not None assert args.train_file is not None
data_files[Split.TRAIN.value] = args.train_file data_files[Split.TRAIN.value] = args.train_file
if args.do_eval or args.evaluation_strategy != IntervalStrategy.NO.value: if args.do_eval or args.eval_strategy != IntervalStrategy.NO.value:
assert args.eval_file is not None assert args.eval_file is not None
data_files[Split.EVAL.value] = args.eval_file data_files[Split.EVAL.value] = args.eval_file
......
...@@ -60,7 +60,7 @@ parameters_dict = { ...@@ -60,7 +60,7 @@ parameters_dict = {
'train_file': os.path.join(data_dir, '${TRAIN_FILE}'), 'train_file': os.path.join(data_dir, '${TRAIN_FILE}'),
'infer_file': os.path.join(data_dir, '${INFER_FILE}'), 'infer_file': os.path.join(data_dir, '${INFER_FILE}'),
'eval_file': os.path.join(data_dir, '${EVAL_FILE}'), 'eval_file': os.path.join(data_dir, '${EVAL_FILE}'),
'evaluation_strategy': 'steps', 'eval_strategy': 'steps',
'task_name': 'scitail', 'task_name': 'scitail',
'label_list': ['entails', 'neutral'], 'label_list': ['entails', 'neutral'],
'per_device_train_batch_size': 32, 'per_device_train_batch_size': 32,
......
...@@ -79,7 +79,7 @@ class STTrainingArguments: ...@@ -79,7 +79,7 @@ class STTrainingArguments:
eval_metric: Optional[str] = dataclasses.field( eval_metric: Optional[str] = dataclasses.field(
default="accuracy", metadata={"help": "The evaluation metric used for the task."} default="accuracy", metadata={"help": "The evaluation metric used for the task."}
) )
evaluation_strategy: Optional[str] = dataclasses.field( eval_strategy: Optional[str] = dataclasses.field(
default="no", default="no",
metadata={ metadata={
"help": 'The evaluation strategy to adopt during training. Possible values are: ["no", "step", "epoch]' "help": 'The evaluation strategy to adopt during training. Possible values are: ["no", "step", "epoch]'
...@@ -208,7 +208,7 @@ def selftrain(model_name_or_path, train_file, infer_file, output_dir, **kwargs): ...@@ -208,7 +208,7 @@ def selftrain(model_name_or_path, train_file, infer_file, output_dir, **kwargs):
data_files["train"] = args.train_file data_files["train"] = args.train_file
data_files["infer"] = args.infer_file data_files["infer"] = args.infer_file
if args.evaluation_strategy != IntervalStrategy.NO.value: if args.eval_strategy != IntervalStrategy.NO.value:
assert args.eval_file is not None assert args.eval_file is not None
data_files["eval"] = args.eval_file data_files["eval"] = args.eval_file
...@@ -267,7 +267,7 @@ def selftrain(model_name_or_path, train_file, infer_file, output_dir, **kwargs): ...@@ -267,7 +267,7 @@ def selftrain(model_name_or_path, train_file, infer_file, output_dir, **kwargs):
"label_list": args.label_list, "label_list": args.label_list,
"output_dir": current_output_dir, "output_dir": current_output_dir,
"eval_metric": args.eval_metric, "eval_metric": args.eval_metric,
"evaluation_strategy": args.evaluation_strategy, "eval_strategy": args.eval_strategy,
"early_stopping_patience": args.early_stopping_patience, "early_stopping_patience": args.early_stopping_patience,
"early_stopping_threshold": args.early_stopping_threshold, "early_stopping_threshold": args.early_stopping_threshold,
"seed": args.seed, "seed": args.seed,
...@@ -341,7 +341,7 @@ def selftrain(model_name_or_path, train_file, infer_file, output_dir, **kwargs): ...@@ -341,7 +341,7 @@ def selftrain(model_name_or_path, train_file, infer_file, output_dir, **kwargs):
data_files["train_pseudo"] = os.path.join(next_data_dir, f"train_pseudo.{args.data_file_extension}") data_files["train_pseudo"] = os.path.join(next_data_dir, f"train_pseudo.{args.data_file_extension}")
if args.evaluation_strategy != IntervalStrategy.NO.value: if args.eval_strategy != IntervalStrategy.NO.value:
new_eval_result = eval_result new_eval_result = eval_result
if best_iteration is None: if best_iteration is None:
......
...@@ -71,7 +71,7 @@ python run_wikisql_with_tapex.py \ ...@@ -71,7 +71,7 @@ python run_wikisql_with_tapex.py \
--eval_steps 1000 \ --eval_steps 1000 \
--save_steps 1000 \ --save_steps 1000 \
--warmup_steps 1000 \ --warmup_steps 1000 \
--evaluation_strategy steps \ --eval_strategy steps \
--predict_with_generate \ --predict_with_generate \
--num_beams 5 \ --num_beams 5 \
--weight_decay 1e-2 \ --weight_decay 1e-2 \
...@@ -101,7 +101,7 @@ python run_wikisql_with_tapex.py \ ...@@ -101,7 +101,7 @@ python run_wikisql_with_tapex.py \
--eval_steps 1000 \ --eval_steps 1000 \
--save_steps 1000 \ --save_steps 1000 \
--warmup_steps 1000 \ --warmup_steps 1000 \
--evaluation_strategy steps \ --eval_strategy steps \
--predict_with_generate \ --predict_with_generate \
--num_beams 5 \ --num_beams 5 \
--weight_decay 1e-2 \ --weight_decay 1e-2 \
...@@ -132,7 +132,7 @@ python run_wikitablequestions_with_tapex.py \ ...@@ -132,7 +132,7 @@ python run_wikitablequestions_with_tapex.py \
--eval_steps 1000 \ --eval_steps 1000 \
--save_steps 1000 \ --save_steps 1000 \
--warmup_steps 1000 \ --warmup_steps 1000 \
--evaluation_strategy steps \ --eval_strategy steps \
--predict_with_generate \ --predict_with_generate \
--num_beams 5 \ --num_beams 5 \
--weight_decay 1e-2 \ --weight_decay 1e-2 \
...@@ -162,7 +162,7 @@ python run_wikitablequestions_with_tapex.py \ ...@@ -162,7 +162,7 @@ python run_wikitablequestions_with_tapex.py \
--eval_steps 1000 \ --eval_steps 1000 \
--save_steps 1000 \ --save_steps 1000 \
--warmup_steps 1000 \ --warmup_steps 1000 \
--evaluation_strategy steps \ --eval_strategy steps \
--predict_with_generate \ --predict_with_generate \
--num_beams 5 \ --num_beams 5 \
--weight_decay 1e-2 \ --weight_decay 1e-2 \
...@@ -223,7 +223,7 @@ python run_tabfact_with_tapex.py \ ...@@ -223,7 +223,7 @@ python run_tabfact_with_tapex.py \
--learning_rate 3e-5 \ --learning_rate 3e-5 \
--eval_steps 1000 \ --eval_steps 1000 \
--save_steps 1000 \ --save_steps 1000 \
--evaluation_strategy steps \ --eval_strategy steps \
--weight_decay 1e-2 \ --weight_decay 1e-2 \
--max_steps 30000 \ --max_steps 30000 \
--max_grad_norm 0.1 --max_grad_norm 0.1
...@@ -252,7 +252,7 @@ python run_tabfact_with_tapex.py \ ...@@ -252,7 +252,7 @@ python run_tabfact_with_tapex.py \
--learning_rate 3e-5 \ --learning_rate 3e-5 \
--eval_steps 1000 \ --eval_steps 1000 \
--save_steps 1000 \ --save_steps 1000 \
--evaluation_strategy steps \ --eval_strategy steps \
--weight_decay 1e-2 \ --weight_decay 1e-2 \
--max_steps 30000 \ --max_steps 30000 \
--max_grad_norm 0.1 --max_grad_norm 0.1
......
...@@ -182,7 +182,7 @@ Here we will run the script on the *Turkish* Common Voice dataset for demonstrat ...@@ -182,7 +182,7 @@ Here we will run the script on the *Turkish* Common Voice dataset for demonstrat
--per_device_train_batch_size="16" \ --per_device_train_batch_size="16" \
--learning_rate="3e-4" \ --learning_rate="3e-4" \
--warmup_steps="500" \ --warmup_steps="500" \
--evaluation_strategy="steps" \ --eval_strategy="steps" \
--save_steps="400" \ --save_steps="400" \
--eval_steps="400" \ --eval_steps="400" \
--logging_steps="400" \ --logging_steps="400" \
...@@ -209,7 +209,7 @@ Here we will run the script on the *Turkish* Common Voice dataset for demonstrat ...@@ -209,7 +209,7 @@ Here we will run the script on the *Turkish* Common Voice dataset for demonstrat
--per_device_train_batch_size="16" \ --per_device_train_batch_size="16" \
--learning_rate="3e-4" \ --learning_rate="3e-4" \
--warmup_steps="500" \ --warmup_steps="500" \
--evaluation_strategy="steps" \ --eval_strategy="steps" \
--save_steps="400" \ --save_steps="400" \
--eval_steps="400" \ --eval_steps="400" \
--logging_steps="400" \ --logging_steps="400" \
......
...@@ -18,7 +18,7 @@ python run_asr.py \ ...@@ -18,7 +18,7 @@ python run_asr.py \
--num_train_epochs="30" \ --num_train_epochs="30" \
--per_device_train_batch_size="20" \ --per_device_train_batch_size="20" \
--per_device_eval_batch_size="20" \ --per_device_eval_batch_size="20" \
--evaluation_strategy="steps" \ --eval_strategy="steps" \
--save_steps="500" \ --save_steps="500" \
--eval_steps="100" \ --eval_steps="100" \
--logging_steps="50" \ --logging_steps="50" \
...@@ -73,7 +73,7 @@ python run_asr.py \ ...@@ -73,7 +73,7 @@ python run_asr.py \
--per_device_train_batch_size="1" \ --per_device_train_batch_size="1" \
--per_device_eval_batch_size="1" \ --per_device_eval_batch_size="1" \
--gradient_accumulation_steps="8" \ --gradient_accumulation_steps="8" \
--evaluation_strategy="steps" \ --eval_strategy="steps" \
--save_steps="500" \ --save_steps="500" \
--eval_steps="100" \ --eval_steps="100" \
--logging_steps="50" \ --logging_steps="50" \
...@@ -152,7 +152,7 @@ ZeRO-2: ...@@ -152,7 +152,7 @@ ZeRO-2:
PYTHONPATH=../../../src deepspeed --num_gpus 2 \ PYTHONPATH=../../../src deepspeed --num_gpus 2 \
run_asr.py \ run_asr.py \
--output_dir=output_dir --num_train_epochs=2 --per_device_train_batch_size=2 \ --output_dir=output_dir --num_train_epochs=2 --per_device_train_batch_size=2 \
--per_device_eval_batch_size=2 --evaluation_strategy=steps --save_steps=500 --eval_steps=100 \ --per_device_eval_batch_size=2 --eval_strategy=steps --save_steps=500 --eval_steps=100 \
--logging_steps=5 --learning_rate=5e-4 --warmup_steps=3000 \ --logging_steps=5 --learning_rate=5e-4 --warmup_steps=3000 \
--model_name_or_path=patrickvonplaten/wav2vec2_tiny_random_robust \ --model_name_or_path=patrickvonplaten/wav2vec2_tiny_random_robust \
--dataset_name=hf-internal-testing/librispeech_asr_dummy --dataset_config_name=clean \ --dataset_name=hf-internal-testing/librispeech_asr_dummy --dataset_config_name=clean \
...@@ -176,7 +176,7 @@ ZeRO-3: ...@@ -176,7 +176,7 @@ ZeRO-3:
PYTHONPATH=../../../src deepspeed --num_gpus 2 \ PYTHONPATH=../../../src deepspeed --num_gpus 2 \
run_asr.py \ run_asr.py \
--output_dir=output_dir --num_train_epochs=2 --per_device_train_batch_size=2 \ --output_dir=output_dir --num_train_epochs=2 --per_device_train_batch_size=2 \
--per_device_eval_batch_size=2 --evaluation_strategy=steps --save_steps=500 --eval_steps=100 \ --per_device_eval_batch_size=2 --eval_strategy=steps --save_steps=500 --eval_steps=100 \
--logging_steps=5 --learning_rate=5e-4 --warmup_steps=3000 \ --logging_steps=5 --learning_rate=5e-4 --warmup_steps=3000 \
--model_name_or_path=patrickvonplaten/wav2vec2_tiny_random_robust \ --model_name_or_path=patrickvonplaten/wav2vec2_tiny_random_robust \
--dataset_name=hf-internal-testing/librispeech_asr_dummy --dataset_config_name=clean \ --dataset_name=hf-internal-testing/librispeech_asr_dummy --dataset_config_name=clean \
......
...@@ -4,7 +4,7 @@ python run_asr.py \ ...@@ -4,7 +4,7 @@ python run_asr.py \
--num_train_epochs="30" \ --num_train_epochs="30" \
--per_device_train_batch_size="32" \ --per_device_train_batch_size="32" \
--per_device_eval_batch_size="32" \ --per_device_eval_batch_size="32" \
--evaluation_strategy="steps" \ --eval_strategy="steps" \
--save_total_limit="3" \ --save_total_limit="3" \
--save_steps="500" \ --save_steps="500" \
--eval_steps="100" \ --eval_steps="100" \
......
...@@ -4,7 +4,7 @@ python run_asr.py \ ...@@ -4,7 +4,7 @@ python run_asr.py \
--num_train_epochs="30" \ --num_train_epochs="30" \
--per_device_train_batch_size="20" \ --per_device_train_batch_size="20" \
--per_device_eval_batch_size="20" \ --per_device_eval_batch_size="20" \
--evaluation_strategy="steps" \ --eval_strategy="steps" \
--save_steps="500" \ --save_steps="500" \
--eval_steps="100" \ --eval_steps="100" \
--logging_steps="50" \ --logging_steps="50" \
......
...@@ -4,7 +4,7 @@ python run_asr.py \ ...@@ -4,7 +4,7 @@ python run_asr.py \
--num_train_epochs="30" \ --num_train_epochs="30" \
--per_device_train_batch_size="16" \ --per_device_train_batch_size="16" \
--per_device_eval_batch_size="16" \ --per_device_eval_batch_size="16" \
--evaluation_strategy="steps" \ --eval_strategy="steps" \
--save_total_limit="3" \ --save_total_limit="3" \
--save_steps="500" \ --save_steps="500" \
--eval_steps="100" \ --eval_steps="100" \
......
...@@ -5,7 +5,7 @@ python run_asr.py \ ...@@ -5,7 +5,7 @@ python run_asr.py \
--per_device_train_batch_size="2" \ --per_device_train_batch_size="2" \
--per_device_eval_batch_size="2" \ --per_device_eval_batch_size="2" \
--gradient_accumulation_steps="4" \ --gradient_accumulation_steps="4" \
--evaluation_strategy="steps" \ --eval_strategy="steps" \
--save_steps="500" \ --save_steps="500" \
--eval_steps="100" \ --eval_steps="100" \
--logging_steps="50" \ --logging_steps="50" \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment