run_mms_lid_with_cv.sh 1.36 KB
Newer Older
sanchit-gandhi's avatar
sanchit-gandhi committed
1
2
3
4
#!/usr/bin/env bash

python run_audio_classification.py \
    --model_name_or_path "facebook/mms-lid-126" \
sanchit-gandhi's avatar
sanchit-gandhi committed
5
    --train_dataset_name "parler-tts/concatenated-normalized-accent-dataset+parler-tts/concatenated-common-voice-15-accented" \
sanchit-gandhi's avatar
sanchit-gandhi committed
6
7
8
    --train_dataset_config_name "default+default" \
    --train_split_name "train+train" \
    --train_label_column_name "labels+labels" \
sanchit-gandhi's avatar
sanchit-gandhi committed
9
    --eval_dataset_name "parler-tts/concatenated-normalized-accent-dataset" \
sanchit-gandhi's avatar
sanchit-gandhi committed
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
    --eval_dataset_config_name "default" \
    --eval_split_name "test" \
    --eval_label_column_name "labels" \
    --output_dir "./" \
    --do_train \
    --do_eval \
    --overwrite_output_dir \
    --remove_unused_columns False \
    --fp16 \
    --fp16_full_eval \
    --learning_rate 1e-4 \
    --lr_scheduler_type "constant_with_warmup" \
    --max_length_seconds 20 \
    --min_length_seconds 5 \
    --attention_mask \
    --warmup_steps 100 \
    --max_steps 5000 \
    --per_device_train_batch_size 32 \
    --per_device_eval_batch_size 32 \
    --preprocessing_num_workers 4 \
    --dataloader_num_workers 4 \
    --logging_strategy "steps" \
    --logging_steps 10 \
    --evaluation_strategy "steps" \
    --eval_steps 1000 \
    --save_strategy "no" \
    --save_steps 5000 \
    --filter_threshold 0.01 \
    --freeze_base_model False \
sanchit-gandhi's avatar
up  
sanchit-gandhi committed
39
    --gradient_checkpointing \
sanchit-gandhi's avatar
sanchit-gandhi committed
40
41
    --push_to_hub False \
    --trust_remote_code