run_dataset_concatenation.sh 830 Bytes
Newer Older
sanchit-gandhi's avatar
sanchit-gandhi committed
1
2
3
#!/usr/bin/env bash

python run_dataset_concatenation.py \
sanchit-gandhi's avatar
sanchit-gandhi committed
4
    --dataset_name "sanchit-gandhi/vctk+facebook/voxpopuli+sanchit-gandhi/edacc-normalized" \
sanchit-gandhi's avatar
sanchit-gandhi committed
5
6
7
8
9
    --dataset_config_name "default+en_accented+default" \
    --dataset_split_name "train+test+validation" \
    --label_column_name "accent+accent+accent" \
    --text_column_name "text+normalized_text+text" \
    --speaker_column_name "speaker_id+speaker_id+speaker" \
sanchit-gandhi's avatar
sanchit-gandhi committed
10
    --batch_size 500 \
sanchit-gandhi's avatar
sanchit-gandhi committed
11
    --output_dir "./concatenated-dataset"
sanchit-gandhi's avatar
sanchit-gandhi committed
12
13
14
15
16
17
18
19
20
21

python run_dataset_concatenation.py \
    --dataset_name "sanchit-gandhi/edacc-normalized" \
    --dataset_config_name "default" \
    --dataset_split_name "test" \
    --label_column_name "accent" \
    --text_column_name "text" \
    --speaker_column_name "speaker" \
    --batch_size 500 \
    --output_dir "./concatenated-dataset-test"