run_sweep.yaml 2.08 KB
Newer Older
sanchit-gandhi's avatar
sanchit-gandhi committed
1
2
3
4
command:
  - python3
  - ${program}
  - --fp16
sanchit-gandhi's avatar
sanchit-gandhi committed
5
  - --fp16_full_eval
sanchit-gandhi's avatar
sanchit-gandhi committed
6
7
8
9
10
  - --do_train
  - --do_eval
  - --trust_remote_code
  - --overwrite_output_dir
  - ${args}
11
method: random
sanchit-gandhi's avatar
sanchit-gandhi committed
12
metric:
sanchit-gandhi's avatar
sanchit-gandhi committed
13
  goal: maximize
sanchit-gandhi's avatar
sanchit-gandhi committed
14
15
16
  name: eval/accuracy
parameters:
  model_name_or_path:
17
    value: facebook/mms-lid-4017
sanchit-gandhi's avatar
sanchit-gandhi committed
18
  train_dataset_name:
19
    value:  "stable-speech/concatenated-normalized-accent-dataset+stable-speech/concatenated-common-voice-15-accented"
sanchit-gandhi's avatar
sanchit-gandhi committed
20
  train_dataset_config_name:
21
    value: "default+default"
sanchit-gandhi's avatar
sanchit-gandhi committed
22
  train_split_name:
23
    value: "train+train"
sanchit-gandhi's avatar
sanchit-gandhi committed
24
  train_label_column_name:
25
    value: "labels+labels"
sanchit-gandhi's avatar
sanchit-gandhi committed
26
  eval_dataset_name:
27
    value: stable-speech/concatenated-normalized-accent-dataset
sanchit-gandhi's avatar
sanchit-gandhi committed
28
29
30
31
32
  eval_dataset_config_name:
    value: default
  eval_split_name:
    value: test
  eval_label_column_name:
sanchit-gandhi's avatar
sanchit-gandhi committed
33
    value: labels
sanchit-gandhi's avatar
sanchit-gandhi committed
34
  output_dir:
35
    value: "/raid/yoach/tmp/"
sanchit-gandhi's avatar
sanchit-gandhi committed
36
37
38
  remove_unused_columns:
    value: false
  learning_rate:
39
40
41
    distribution: log_uniform_values
    min: 3e-6
    max: 0.01
sanchit-gandhi's avatar
sanchit-gandhi committed
42
  lr_scheduler_type:
43
    value: constant
sanchit-gandhi's avatar
sanchit-gandhi committed
44
  max_length_seconds:
sanchit-gandhi's avatar
sanchit-gandhi committed
45
    value: 20  # give some data diversity for longer audio samples
sanchit-gandhi's avatar
sanchit-gandhi committed
46
  min_length_seconds:
47
    value: 5
sanchit-gandhi's avatar
sanchit-gandhi committed
48
  attention_mask:
49
50
51
52
53
54
55
56
57
58
    values:
    - true
  num_train_epochs:
    values:
      - 2
      - 5
      - 10
      - 20
      - 40
      - 60
sanchit-gandhi's avatar
sanchit-gandhi committed
59
60
61
  per_device_train_batch_size:
    value: 32
  per_device_eval_batch_size:
62
    value: 32
sanchit-gandhi's avatar
sanchit-gandhi committed
63
  preprocessing_num_workers:
64
    value: 8
sanchit-gandhi's avatar
sanchit-gandhi committed
65
  dataloader_num_workers:
66
    value: 8
sanchit-gandhi's avatar
sanchit-gandhi committed
67
68
69
70
71
  logging_strategy:
    value: steps
  logging_steps:
    value: 10
  evaluation_strategy:
sanchit-gandhi's avatar
sanchit-gandhi committed
72
73
    value: steps
  eval_steps:
74
    value: 2000
sanchit-gandhi's avatar
sanchit-gandhi committed
75
  save_strategy:
76
    value: "no"
sanchit-gandhi's avatar
sanchit-gandhi committed
77
  save_steps:
sanchit-gandhi's avatar
sanchit-gandhi committed
78
    value: 2000
sanchit-gandhi's avatar
sanchit-gandhi committed
79
80
81
82
  metric_for_best_model:
    value: accuracy
  push_to_hub:
    value: false
83
84
85
86
87
88
89
90
91
92
93
94
95
96
  use_weighted_layer_sum:
    value: false
  freeze_base_model:
    value: true
  max_samples_per_label:
    value: 10000
  save_to_disk:
    value: "/raid/yoach/tmp_dataset_accents/" 
  temporary_save_to_disk:
    value: "/raid/yoach/tmp_hidden_states/"
  use_last_embedding_layer:
    value: true
  filter_threshold:
    value: "0.001"
sanchit-gandhi's avatar
sanchit-gandhi committed
97
program: run_audio_classification.py
98
project: mms-lid-accent-classification-v2