command: - python3 - ${program} - --load_best_model_at_end - --fp16 - --fp16_full_eval - --do_train - --do_eval - --trust_remote_code - --overwrite_output_dir - --ignore_mismatched_sizes - --gradient_checkpointing - ${args} method: grid metric: goal: maximize name: eval/accuracy parameters: model_name_or_path: value: facebook/mms-lid-126 train_dataset_name: value: stable-speech/concatenated-accent-dataset train_dataset_config_name: value: default train_split_name: value: train train_label_column_name: value: labels eval_dataset_name: value: stable-speech/concatenated-accent-dataset eval_dataset_config_name: value: default eval_split_name: value: test eval_label_column_name: value: labels output_dir: value: ./ remove_unused_columns: value: false learning_rate: value: 1e-4 lr_scheduler_type: value: constant_with_warmup max_length_seconds: value: 20 # give some data diversity for longer audio samples min_length_seconds: value: 7 attention_mask: value: true warmup_steps: value: 100 max_steps: value: 2000 per_device_train_batch_size: value: 32 per_device_eval_batch_size: value: 16 preprocessing_num_workers: value: 4 dataloader_num_workers: value: 4 logging_strategy: value: steps logging_steps: value: 10 evaluation_strategy: value: steps eval_steps: value: 1000 save_strategy: value: steps save_steps: value: 2000 metric_for_best_model: value: accuracy freeze_base_model: values: - false - true group_by_length: value: false # TODO(SG): batch by length push_to_hub: value: false program: run_audio_classification.py project: mms-lid-accent-classification