Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
huaerkl
fairseq-data2vec_pytorch
Commits
72f5785f
Commit
72f5785f
authored
Aug 15, 2023
by
huaerkl
Browse files
v1.0
parents
Pipeline
#505
canceled with stages
Changes
508
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
536 additions
and
0 deletions
+536
-0
examples/data2vec/scripts/multi/finetune_all_fair_aws_local_lr.sh
.../data2vec/scripts/multi/finetune_all_fair_aws_local_lr.sh
+18
-0
examples/data2vec/scripts/multi/finetune_all_fair_aws_local_lr_nodep.sh
...vec/scripts/multi/finetune_all_fair_aws_local_lr_nodep.sh
+16
-0
examples/data2vec/scripts/multi/finetune_all_fair_local_lr.sh
...ples/data2vec/scripts/multi/finetune_all_fair_local_lr.sh
+28
-0
examples/data2vec/scripts/text/finetune_all_char_fair_aws_local_lr.sh
...a2vec/scripts/text/finetune_all_char_fair_aws_local_lr.sh
+17
-0
examples/data2vec/scripts/text/finetune_all_fair.sh
examples/data2vec/scripts/text/finetune_all_fair.sh
+21
-0
examples/data2vec/scripts/text/finetune_all_fair_aws.sh
examples/data2vec/scripts/text/finetune_all_fair_aws.sh
+21
-0
examples/data2vec/scripts/text/finetune_all_fair_aws_local_lr.sh
...s/data2vec/scripts/text/finetune_all_fair_aws_local_lr.sh
+17
-0
examples/data2vec/scripts/text/finetune_all_fair_aws_lr.sh
examples/data2vec/scripts/text/finetune_all_fair_aws_lr.sh
+23
-0
examples/data2vec/scripts/text/finetune_all_fair_local_lr.sh
examples/data2vec/scripts/text/finetune_all_fair_local_lr.sh
+25
-0
examples/data2vec/scripts/text/finetune_all_fair_nodep.sh
examples/data2vec/scripts/text/finetune_all_fair_nodep.sh
+19
-0
examples/data2vec/scripts/text/finetune_all_fair_nodep_aws.sh
...ples/data2vec/scripts/text/finetune_all_fair_nodep_aws.sh
+19
-0
examples/data2vec/scripts/text/finetune_all_fair_nodep_aws_local_lr.sh
...2vec/scripts/text/finetune_all_fair_nodep_aws_local_lr.sh
+15
-0
examples/data2vec/scripts/text/finetune_all_fair_nodep_aws_lr.sh
...s/data2vec/scripts/text/finetune_all_fair_nodep_aws_lr.sh
+21
-0
examples/data2vec/scripts/text/finetune_all_fair_nodep_aws_lr_nopos.sh
...2vec/scripts/text/finetune_all_fair_nodep_aws_lr_nopos.sh
+21
-0
examples/data2vec/scripts/text/finetune_all_large_fair_aws_local_lr.sh
...2vec/scripts/text/finetune_all_large_fair_aws_local_lr.sh
+17
-0
examples/data2vec/scripts/text/finetune_all_large_fair_local_lr.sh
...data2vec/scripts/text/finetune_all_large_fair_local_lr.sh
+26
-0
examples/data2vec/scripts/text/finetune_all_large_fair_nodep_aws_local_lr.sh
...cripts/text/finetune_all_large_fair_nodep_aws_local_lr.sh
+15
-0
examples/data2vec/scripts/text/finetune_sst2_qnli_sweep_fair_nodep.sh
...a2vec/scripts/text/finetune_sst2_qnli_sweep_fair_nodep.sh
+20
-0
examples/data2vec/scripts/text/glue.py
examples/data2vec/scripts/text/glue.py
+34
-0
examples/data2vec/scripts/text/glue_lr.py
examples/data2vec/scripts/text/glue_lr.py
+143
-0
No files found.
Too many changes to show.
To preserve performance only
508 of 508+
files are displayed.
Plain diff
Email patch
examples/data2vec/scripts/multi/finetune_all_fair_aws_local_lr.sh
0 → 100644
View file @
72f5785f
#!/bin/bash
set
-eu
job_id
=
"
$1
"
task_id
=
"
$2
"
dir
=
"
$3
"
echo
"job_id:
$job_id
, task_id:
$task_id
, dir:
$dir
"
mkdir
-p
"
$dir
/log"
sbatch_args
=
"-p wav2vec --nodes=1 --ntasks-per-node=1"
sbatch_args
=
"
$sbatch_args
--gpus-per-node=1 --cpus-per-task=8 --mem=0 --time=24:00:00"
sbatch_args
=
"
$sbatch_args
-d afterok:
$job_id
-o
$dir
/log/decode_sweep_%A.out"
sbatch_args
=
"
$sbatch_args
-e
$dir
/log/decode_sweep_%A.err"
sbatch
$sbatch_args
examples/data2vec/scripts/multi/finetune_all_fair_local_lr.sh
$dir
examples/data2vec/scripts/multi/finetune_all_fair_aws_local_lr_nodep.sh
0 → 100644
View file @
72f5785f
#!/bin/bash
set
-eu
dir
=
"
$1
"
echo
"dir:
$dir
"
mkdir
-p
"
$dir
/log"
sbatch_args
=
"-p wav2vec --nodes=1 --ntasks-per-node=1"
sbatch_args
=
"
$sbatch_args
--gpus-per-node=1 --cpus-per-task=8 --mem=0 --time=24:00:00"
sbatch_args
=
"
$sbatch_args
-o
$dir
/log/decode_sweep_%A.out"
sbatch_args
=
"
$sbatch_args
-e
$dir
/log/decode_sweep_%A.err"
sbatch
$sbatch_args
examples/data2vec/scripts/multi/finetune_all_fair_local_lr.sh
$dir
examples/data2vec/scripts/multi/finetune_all_fair_local_lr.sh
0 → 100644
View file @
72f5785f
#!/usr/bin/env zsh
dir
=
"
$1
"
cp
=
"
$dir
/checkpoints/checkpoint_last.pt"
echo
"dir:
$dir
"
declare
-A
tasks
tasks[cola]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/CoLA-bin"
tasks[qnli]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/QNLI-bin"
tasks[mrpc]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/MRPC-bin"
tasks[rte]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/RTE-bin"
tasks[sst_2]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/SST-2-bin"
tasks[mnli]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/MNLI-bin"
tasks[qqp]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/QQP-bin"
tasks[sts_b]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/STS-B-bin"
lrs
=(
5e-6 8e-6 1e-5 2e-5
)
for
task data_path
in
${
(kv)tasks
}
;
do
for
lr
in
$lrs
;
do
echo
$lr
$task
PYTHONPATH
=
.
PREFIX
=
"
${
PREFIX
}
"
SUFFIX
=
""
\
python fairseq_cli/hydra_train.py
-m
--config-dir
examples/data2vec/config/multi/text_finetuning
\
--config-name
$task
+run_config
=
local
task.data
=
"
$data_path
"
common.log_interval
=
200 dataset.num_workers
=
1
\
model.model_path
=
"
$cp
"
hydra.sweep.dir
=
"
$dir
/finetune_lr/
$task
/
$lr
"
"optimization.lr=[
${
lr
}
]"
+model
=
text_wrap
done
done
examples/data2vec/scripts/text/finetune_all_char_fair_aws_local_lr.sh
0 → 100644
View file @
72f5785f
#!/bin/bash
set
-eu
job_id
=
"
$1
"
task_id
=
"
$2
"
dir
=
"
$3
"
echo
"job_id:
$job_id
, task_id:
$task_id
, dir:
$dir
"
mkdir
-p
"
$dir
/log"
sbatch_args
=
"-p wav2vec --nodes=1 --ntasks-per-node=1"
sbatch_args
=
"
$sbatch_args
--gpus-per-node=1 --cpus-per-task=8 --mem=0 --time=24:00:00"
sbatch_args
=
"
$sbatch_args
-d afterok:
$job_id
-o
$dir
/log/ft_%A.out"
sbatch_args
=
"
$sbatch_args
-e
$dir
/log/ft_%A.err"
sbatch
$sbatch_args
examples/data2vec/scripts/text/finetune_all_char_fair_local_lr.sh
$dir
examples/data2vec/scripts/text/finetune_all_fair.sh
0 → 100644
View file @
72f5785f
#!/usr/bin/env zsh
job_id
=
$1
task_id
=
$2
dir
=
"
$3
"
cp
=
"
$dir
/
$task_id
/checkpoints/checkpoint_last.pt"
echo
"job_id:
$job_id
, task_id:
$task_id
, dir:
$dir
"
declare
-A
tasks
tasks[cola]
=
"/private/home/jgu/data/GLUE/CoLA-bin"
tasks[qnli]
=
"/private/home/jgu/data/GLUE/QNLI-bin"
tasks[mrpc]
=
"/private/home/jgu/data/GLUE/MRPC-bin"
tasks[rte]
=
"/private/home/jgu/data/GLUE/RTE-bin"
tasks[sst_2]
=
"/private/home/jgu/data/GLUE/SST-2-bin"
for
task data_path
in
${
(kv)tasks
}
;
do
PYTHONPATH
=
.
PREFIX
=
"
${
PREFIX
}
"
SUFFIX
=
""
nohup
python fairseq_cli/hydra_train.py
-m
--config-dir
examples/roberta/config/finetuning
\
--config-name
$task
hydra/launcher
=
submitit_slurm +run_config
=
slurm_1g task.data
=
"
$data_path
"
hydra.launcher.name
=
finetune_
${
task
}
_
${
PREFIX
}
\
checkpoint.restore_file
=
"
$cp
"
+hydra.launcher.additional_parameters.dependency
=
"afterok:
$job_id
"
hydra.sweep.dir
=
"
$dir
/finetune/
$task
"
&
done
examples/data2vec/scripts/text/finetune_all_fair_aws.sh
0 → 100644
View file @
72f5785f
#!/usr/bin/env zsh
job_id
=
$1
task_id
=
$2
dir
=
"
$3
"
cp
=
"
$dir
/checkpoints/checkpoint_last.pt"
echo
"job_id:
$job_id
, task_id:
$task_id
, dir:
$dir
"
declare
-A
tasks
tasks[cola]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/CoLA-bin"
tasks[qnli]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/QNLI-bin"
tasks[mrpc]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/MRPC-bin"
tasks[rte]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/RTE-bin"
tasks[sst_2]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/SST-2-bin"
for
task data_path
in
${
(kv)tasks
}
;
do
PYTHONPATH
=
.
PREFIX
=
"
${
PREFIX
}
"
SUFFIX
=
""
nohup
python fairseq_cli/hydra_train.py
-m
--config-dir
examples/roberta/config/finetuning
\
--config-name
$task
hydra/launcher
=
submitit_slurm +run_config
=
slurm_1g_aws task.data
=
"
$data_path
"
hydra.launcher.name
=
finetune_
${
task
}
_
${
PREFIX
}
\
checkpoint.restore_file
=
"
$cp
"
+hydra.launcher.additional_parameters.dependency
=
"afterok:
$job_id
"
hydra.sweep.dir
=
"
$dir
/finetune/
$task
"
&
done
examples/data2vec/scripts/text/finetune_all_fair_aws_local_lr.sh
0 → 100644
View file @
72f5785f
#!/bin/bash
set
-eu
job_id
=
"
$1
"
task_id
=
"
$2
"
dir
=
"
$3
"
echo
"job_id:
$job_id
, task_id:
$task_id
, dir:
$dir
"
mkdir
-p
"
$dir
/log"
sbatch_args
=
"-p wav2vec --nodes=1 --ntasks-per-node=1"
sbatch_args
=
"
$sbatch_args
--gpus-per-node=1 --cpus-per-task=8 --mem=0 --time=24:00:00"
sbatch_args
=
"
$sbatch_args
-d afterok:
$job_id
-o
$dir
/log/decode_sweep_%A.out"
sbatch_args
=
"
$sbatch_args
-e
$dir
/log/decode_sweep_%A.err"
sbatch
$sbatch_args
examples/data2vec/scripts/text/finetune_all_fair_local_lr.sh
$dir
examples/data2vec/scripts/text/finetune_all_fair_aws_lr.sh
0 → 100644
View file @
72f5785f
#!/usr/bin/env zsh
job_id
=
$1
task_id
=
$2
dir
=
"
$3
"
cp
=
"
$dir
/checkpoints/checkpoint_last.pt"
echo
"job_id:
$job_id
, task_id:
$task_id
, dir:
$dir
"
declare
-A
tasks
tasks[cola]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/CoLA-bin"
tasks[qnli]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/QNLI-bin"
tasks[mrpc]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/MRPC-bin"
tasks[rte]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/RTE-bin"
tasks[sst_2]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/SST-2-bin"
for
task data_path
in
${
(kv)tasks
}
;
do
for
lr
in
5e-6 8e-6 1e-5 2e-5 5e-5 8e-5 1e-4 2e-4
;
do
PYTHONPATH
=
.
PREFIX
=
"
${
PREFIX
}
"
SUFFIX
=
""
nohup
python fairseq_cli/hydra_train.py
-m
--config-dir
examples/roberta/config/finetuning
\
--config-name
$task
hydra/launcher
=
submitit_slurm +run_config
=
slurm_1g_aws task.data
=
"
$data_path
"
hydra.launcher.name
=
finetune_
${
task
}
_
${
PREFIX
}
\
checkpoint.restore_file
=
"
$cp
"
+hydra.launcher.additional_parameters.dependency
=
"afterok:
$job_id
"
hydra.sweep.dir
=
"
$dir
/finetune_lr/
$task
/
$lr
"
"optimization.lr=[
${
lr
}
]"
&
done
done
examples/data2vec/scripts/text/finetune_all_fair_local_lr.sh
0 → 100644
View file @
72f5785f
#!/usr/bin/env zsh
dir
=
"
$1
"
cp
=
"
$dir
/checkpoints/checkpoint_last.pt"
echo
"dir:
$dir
"
declare
-A
tasks
tasks[cola]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/CoLA-bin"
tasks[qnli]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/QNLI-bin"
tasks[mrpc]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/MRPC-bin"
tasks[rte]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/RTE-bin"
tasks[sst_2]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/SST-2-bin"
lrs
=(
5e-6 8e-6 1e-5 2e-5
)
for
task data_path
in
${
(kv)tasks
}
;
do
for
lr
in
$lrs
;
do
echo
$lr
$task
PYTHONPATH
=
.
PREFIX
=
"
${
PREFIX
}
"
SUFFIX
=
""
\
python fairseq_cli/hydra_train.py
-m
--config-dir
examples/roberta/config/finetuning
\
--config-name
$task
+run_config
=
local
task.data
=
"
$data_path
"
common.log_interval
=
200 dataset.num_workers
=
1
\
checkpoint.restore_file
=
"
$cp
"
hydra.sweep.dir
=
"
$dir
/finetune_lr/
$task
/
$lr
"
"optimization.lr=[
${
lr
}
]"
done
done
examples/data2vec/scripts/text/finetune_all_fair_nodep.sh
0 → 100644
View file @
72f5785f
#!/usr/bin/env zsh
dir
=
"
$1
"
cp
=
"
$dir
/checkpoints/checkpoint_last.pt"
echo
"dir:
$dir
"
declare
-A
tasks
tasks[cola]
=
"/private/home/jgu/data/GLUE/CoLA-bin"
tasks[qnli]
=
"/private/home/jgu/data/GLUE/QNLI-bin"
tasks[mrpc]
=
"/private/home/jgu/data/GLUE/MRPC-bin"
tasks[rte]
=
"/private/home/jgu/data/GLUE/RTE-bin"
tasks[sst_2]
=
"/private/home/jgu/data/GLUE/SST-2-bin"
for
task data_path
in
${
(kv)tasks
}
;
do
PYTHONPATH
=
.
PREFIX
=
"
${
PREFIX
}
"
SUFFIX
=
""
nohup
python fairseq_cli/hydra_train.py
-m
--config-dir
examples/roberta/config/finetuning
\
--config-name
$task
hydra/launcher
=
submitit_slurm +run_config
=
slurm_1g task.data
=
"
$data_path
"
hydra.launcher.name
=
finetune_
${
task
}
_
${
PREFIX
}
\
checkpoint.restore_file
=
"
$cp
"
hydra.sweep.dir
=
"
$dir
/finetune/
$task
"
&
done
examples/data2vec/scripts/text/finetune_all_fair_nodep_aws.sh
0 → 100644
View file @
72f5785f
#!/usr/bin/env zsh
dir
=
"
$1
"
cp
=
"
$dir
/checkpoints/checkpoint_last.pt"
echo
"dir:
$dir
"
declare
-A
tasks
tasks[cola]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/CoLA-bin"
tasks[qnli]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/QNLI-bin"
tasks[mrpc]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/MRPC-bin"
tasks[rte]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/RTE-bin"
tasks[sst_2]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/SST-2-bin"
for
task data_path
in
${
(kv)tasks
}
;
do
PYTHONPATH
=
.
PREFIX
=
"
${
PREFIX
}
"
SUFFIX
=
""
nohup
python fairseq_cli/hydra_train.py
-m
--config-dir
examples/roberta/config/finetuning
\
--config-name
$task
hydra/launcher
=
submitit_slurm +run_config
=
slurm_1g_aws task.data
=
"
$data_path
"
hydra.launcher.name
=
finetune_
${
task
}
_
${
PREFIX
}
\
checkpoint.restore_file
=
"
$cp
"
hydra.sweep.dir
=
"
$dir
/finetune/
$task
"
&
done
examples/data2vec/scripts/text/finetune_all_fair_nodep_aws_local_lr.sh
0 → 100644
View file @
72f5785f
#!/bin/bash
set
-eu
dir
=
"
$1
"
echo
"dir:
$dir
"
mkdir
-p
"
$dir
/log"
sbatch_args
=
"-p wav2vec --nodes=1 --ntasks-per-node=1"
sbatch_args
=
"
$sbatch_args
--gpus-per-node=1 --cpus-per-task=8 --mem=0 --time=24:00:00"
sbatch_args
=
"
$sbatch_args
-o
$dir
/log/decode_sweep_%A.out"
sbatch_args
=
"
$sbatch_args
-e
$dir
/log/decode_sweep_%A.err"
sbatch
$sbatch_args
examples/data2vec/scripts/text/finetune_all_fair_local_lr.sh
$dir
examples/data2vec/scripts/text/finetune_all_fair_nodep_aws_lr.sh
0 → 100644
View file @
72f5785f
#!/usr/bin/env zsh
dir
=
"
$1
"
cp
=
"
$dir
/checkpoints/checkpoint_last.pt"
echo
"dir:
$dir
"
declare
-A
tasks
tasks[cola]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/CoLA-bin"
tasks[qnli]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/QNLI-bin"
tasks[mrpc]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/MRPC-bin"
tasks[rte]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/RTE-bin"
tasks[sst_2]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/SST-2-bin"
for
task data_path
in
${
(kv)tasks
}
;
do
for
lr
in
5e-6 8e-6 1e-5 2e-5 5e-5 8e-5 1e-4 2e-4
;
do
PYTHONPATH
=
.
PREFIX
=
"
${
PREFIX
}
"
SUFFIX
=
""
nohup
python fairseq_cli/hydra_train.py
-m
--config-dir
examples/roberta/config/finetuning
\
--config-name
$task
hydra/launcher
=
submitit_slurm +run_config
=
slurm_1g_aws task.data
=
"
$data_path
"
hydra.launcher.name
=
finetune_
${
task
}
_
${
PREFIX
}
\
checkpoint.restore_file
=
"
$cp
"
hydra.sweep.dir
=
"
$dir
/finetune_lr/
$task
/
$lr
"
"optimization.lr=[
${
lr
}
]"
&
done
done
examples/data2vec/scripts/text/finetune_all_fair_nodep_aws_lr_nopos.sh
0 → 100644
View file @
72f5785f
#!/usr/bin/env zsh
dir
=
"
$1
"
cp
=
"
$dir
/checkpoints/checkpoint_last.pt"
echo
"dir:
$dir
"
declare
-A
tasks
tasks[cola]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/CoLA-bin"
tasks[qnli]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/QNLI-bin"
tasks[mrpc]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/MRPC-bin"
tasks[rte]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/RTE-bin"
tasks[sst_2]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/SST-2-bin"
for
task data_path
in
${
(kv)tasks
}
;
do
for
lr
in
5e-6 8e-6 1e-5 2e-5 5e-5 8e-5 1e-4 2e-4
;
do
PYTHONPATH
=
.
PREFIX
=
"
${
PREFIX
}
"
SUFFIX
=
""
nohup
python fairseq_cli/hydra_train.py
-m
--config-dir
examples/roberta/config/finetuning
\
--config-name
$task
hydra/launcher
=
submitit_slurm +run_config
=
slurm_1g_aws task.data
=
"
$data_path
"
hydra.launcher.name
=
finetune_
${
task
}
_
${
PREFIX
}
\
checkpoint.restore_file
=
"
$cp
"
hydra.sweep.dir
=
"
$dir
/finetune_lr/
$task
/
$lr
"
"optimization.lr=[
${
lr
}
]"
+model.encoder_learned_pos
=
False &
done
done
examples/data2vec/scripts/text/finetune_all_large_fair_aws_local_lr.sh
0 → 100644
View file @
72f5785f
#!/bin/bash
set
-eu
job_id
=
"
$1
"
task_id
=
"
$2
"
dir
=
"
$3
"
echo
"job_id:
$job_id
, task_id:
$task_id
, dir:
$dir
"
mkdir
-p
"
$dir
/log"
sbatch_args
=
"-p wav2vec --nodes=1 --ntasks-per-node=1"
sbatch_args
=
"
$sbatch_args
--gpus-per-node=1 --cpus-per-task=8 --mem=0 --time=24:00:00"
sbatch_args
=
"
$sbatch_args
-d afterok:
$job_id
-o
$dir
/log/decode_sweep_%A.out"
sbatch_args
=
"
$sbatch_args
-e
$dir
/log/decode_sweep_%A.err"
sbatch
$sbatch_args
examples/data2vec/scripts/text/finetune_all_large_fair_local_lr.sh
$dir
examples/data2vec/scripts/text/finetune_all_large_fair_local_lr.sh
0 → 100644
View file @
72f5785f
#!/usr/bin/env zsh
dir
=
"
$1
"
cp
=
"
$dir
/checkpoints/checkpoint_last.pt"
echo
"dir:
$dir
"
declare
-A
tasks
tasks[cola]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/CoLA-bin"
tasks[qnli]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/QNLI-bin"
tasks[mrpc]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/MRPC-bin"
tasks[rte]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/RTE-bin"
tasks[sst_2]
=
"/fsx-wav2vec/abaevski/data/nlp/GLUE/SST-2-bin"
lrs
=(
5e-6 8e-6 1e-5 2e-5
)
for
task data_path
in
${
(kv)tasks
}
;
do
for
lr
in
$lrs
;
do
echo
$lr
$task
PYTHONPATH
=
.
PREFIX
=
"
${
PREFIX
}
"
SUFFIX
=
""
\
python fairseq_cli/hydra_train.py
-m
--config-dir
examples/roberta/config/finetuning
\
--config-name
$task
+run_config
=
local
task.data
=
"
$data_path
"
common.log_interval
=
200 dataset.num_workers
=
1
\
checkpoint.restore_file
=
"
$cp
"
hydra.sweep.dir
=
"
$dir
/finetune_lr/
$task
/
$lr
"
"optimization.lr=[
${
lr
}
]"
\
model._name
=
roberta_large
done
done
examples/data2vec/scripts/text/finetune_all_large_fair_nodep_aws_local_lr.sh
0 → 100644
View file @
72f5785f
#!/bin/bash
set
-eu
dir
=
"
$1
"
echo
"dir:
$dir
"
mkdir
-p
"
$dir
/log"
sbatch_args
=
"-p wav2vec --nodes=1 --ntasks-per-node=1"
sbatch_args
=
"
$sbatch_args
--gpus-per-node=1 --cpus-per-task=8 --mem=0 --time=24:00:00"
sbatch_args
=
"
$sbatch_args
-o
$dir
/log/decode_sweep_%A.out"
sbatch_args
=
"
$sbatch_args
-e
$dir
/log/decode_sweep_%A.err"
sbatch
$sbatch_args
examples/data2vec/scripts/text/finetune_all_large_fair_local_lr.sh
$dir
examples/data2vec/scripts/text/finetune_sst2_qnli_sweep_fair_nodep.sh
0 → 100644
View file @
72f5785f
#!/usr/bin/env zsh
dir
=
"
$1
"
cp
=
"
$dir
/checkpoints/checkpoint_last.pt"
echo
"dir:
$dir
"
declare
-A
tasks
tasks[qnli]
=
"/private/home/jgu/data/GLUE/QNLI-bin"
tasks[sst_2]
=
"/private/home/jgu/data/GLUE/SST-2-bin"
lrs
=
"5e-6 1e-5 2e-5 5e-5 1e-4 2e-4 5e-4 1e-3"
for
task data_path
in
${
(kv)tasks
}
;
do
for
lr
in
$(
echo
"
$lrs
"
)
;
do
PYTHONPATH
=
.
PREFIX
=
"
${
PREFIX
}
"
SUFFIX
=
""
nohup
python fairseq_cli/hydra_train.py
-m
--config-dir
examples/roberta/config/finetuning
\
--config-name
$task
hydra/launcher
=
submitit_slurm +run_config
=
slurm_1g task.data
=
"
$data_path
"
hydra.launcher.name
=
finetune_
${
task
}
_
${
PREFIX
}
\
checkpoint.restore_file
=
"
$cp
"
hydra.sweep.dir
=
"
$dir
/finetune_sweep/
$task
/lr_
$lr
"
"optimization.lr=[
${
lr
}
]"
&
done
done
examples/data2vec/scripts/text/glue.py
0 → 100644
View file @
72f5785f
from
valids
import
parser
,
main
as
valids_main
import
os.path
as
osp
args
=
parser
.
parse_args
()
args
.
target
=
"valid_accuracy"
args
.
best_biggest
=
True
args
.
best
=
True
args
.
last
=
0
args
.
path_contains
=
None
res
=
valids_main
(
args
,
print_output
=
False
)
grouped
=
{}
for
k
,
v
in
res
.
items
():
k
=
osp
.
dirname
(
k
)
run
=
osp
.
dirname
(
k
)
task
=
osp
.
basename
(
k
)
val
=
v
[
"valid_accuracy"
]
if
run
not
in
grouped
:
grouped
[
run
]
=
{}
grouped
[
run
][
task
]
=
val
for
run
,
tasks
in
grouped
.
items
():
print
(
run
)
avg
=
sum
(
float
(
v
)
for
v
in
tasks
.
values
())
/
len
(
tasks
)
avg_norte
=
sum
(
float
(
v
)
for
k
,
v
in
tasks
.
items
()
if
k
!=
'rte'
)
/
(
len
(
tasks
)
-
1
)
try
:
print
(
f
"
{
tasks
[
'cola'
]
}
\t
{
tasks
[
'qnli'
]
}
\t
{
tasks
[
'mrpc'
]
}
\t
{
tasks
[
'rte'
]
}
\t
{
tasks
[
'sst_2'
]
}
\t
{
avg
:.
2
f
}
\t
{
avg_norte
:.
2
f
}
"
)
except
:
print
(
tasks
)
print
()
examples/data2vec/scripts/text/glue_lr.py
0 → 100644
View file @
72f5785f
import
os.path
as
osp
import
re
from
collections
import
defaultdict
from
valids
import
parser
,
main
as
valids_main
TASK_TO_METRIC
=
{
"cola"
:
"mcc"
,
"qnli"
:
"accuracy"
,
"mrpc"
:
"acc_and_f1"
,
"rte"
:
"accuracy"
,
"sst_2"
:
"accuracy"
,
"mnli"
:
"accuracy"
,
"qqp"
:
"acc_and_f1"
,
"sts_b"
:
"pearson_and_spearman"
,
}
TASKS
=
[
"cola"
,
"qnli"
,
"mrpc"
,
"rte"
,
"sst_2"
,
"mnli"
,
"qqp"
,
"sts_b"
]
def
get_best_stat_str
(
task_vals
,
show_subdir
):
task_to_best_val
=
{}
task_to_best_dir
=
{}
for
task
,
subdir_to_val
in
task_vals
.
items
():
task_to_best_val
[
task
]
=
max
(
subdir_to_val
.
values
())
task_to_best_dir
[
task
]
=
max
(
subdir_to_val
.
keys
(),
key
=
lambda
x
:
subdir_to_val
[
x
])
# import pdb; pdb.set_trace()
N1
=
len
(
task_to_best_val
)
N2
=
len
([
k
for
k
in
task_to_best_val
if
k
!=
"rte"
])
avg1
=
sum
(
task_to_best_val
.
values
())
/
N1
avg2
=
sum
(
v
for
task
,
v
in
task_to_best_val
.
items
()
if
task
!=
"rte"
)
/
N2
try
:
msg
=
""
for
task
in
TASKS
:
dir
=
task_to_best_dir
.
get
(
task
,
'null'
)
val
=
task_to_best_val
.
get
(
task
,
-
100
)
msg
+=
f
"(
{
dir
}
,
{
val
}
)
\t
"
if
show_subdir
else
f
"
{
val
}
\t
"
msg
+=
f
"
{
avg1
:.
2
f
}
\t
{
avg2
:.
2
f
}
"
except
Exception
as
e
:
msg
=
str
(
e
)
msg
+=
str
(
sorted
(
task_vals
.
items
()))
return
msg
def
get_all_stat_str
(
task_vals
):
msg
=
""
for
task
in
[
task
for
task
in
TASKS
if
task
in
task_vals
]:
msg
+=
f
"===
{
task
}
\n
"
for
subdir
in
sorted
(
task_vals
[
task
].
keys
()):
msg
+=
f
"
\t
{
subdir
}
\t
{
task_vals
[
task
][
subdir
]
}
\n
"
return
msg
def
get_tabular_stat_str
(
task_vals
):
"""assume subdir is <param>/run_*/0"""
msg
=
""
for
task
in
[
task
for
task
in
TASKS
if
task
in
task_vals
]:
msg
+=
f
"===
{
task
}
\n
"
param_to_runs
=
defaultdict
(
dict
)
for
subdir
in
task_vals
[
task
]:
match
=
re
.
match
(
"(.*)/(run_.*)/0"
,
subdir
)
assert
match
,
"subdir"
param
,
run
=
match
.
groups
()
param_to_runs
[
param
][
run
]
=
task_vals
[
task
][
subdir
]
params
=
sorted
(
param_to_runs
,
key
=
lambda
x
:
float
(
x
))
runs
=
sorted
(
set
(
run
for
runs
in
param_to_runs
.
values
()
for
run
in
runs
))
msg
+=
(
"runs:"
+
"
\t
"
.
join
(
runs
)
+
"
\n
"
)
msg
+=
(
"params:"
+
"
\t
"
.
join
(
params
)
+
"
\n
"
)
for
param
in
params
:
msg
+=
"
\t
"
.
join
([
str
(
param_to_runs
[
param
].
get
(
run
,
None
))
for
run
in
runs
])
msg
+=
"
\n
"
# for subdir in sorted(task_vals[task].keys()):
# msg += f"\t{subdir}\t{task_vals[task][subdir]}\n"
return
msg
def
main
():
parser
.
add_argument
(
"--show_glue"
,
action
=
"store_true"
,
help
=
"show glue metric for each task instead of accuracy"
)
parser
.
add_argument
(
"--print_mode"
,
default
=
"best"
,
help
=
"best|all|tabular"
)
parser
.
add_argument
(
"--show_subdir"
,
action
=
"store_true"
,
help
=
"print the subdir that has the best results for each run"
)
parser
.
add_argument
(
"--override_target"
,
default
=
"valid_accuracy"
,
help
=
"override target"
)
args
=
parser
.
parse_args
()
args
.
target
=
args
.
override_target
args
.
best_biggest
=
True
args
.
best
=
True
args
.
last
=
0
args
.
path_contains
=
None
res
=
valids_main
(
args
,
print_output
=
False
)
grouped_acc
=
{}
grouped_met
=
{}
# use official metric for each task
for
path
,
v
in
res
.
items
():
path
=
"/"
.
join
([
args
.
base
,
path
])
path
=
re
.
sub
(
"//*"
,
"/"
,
path
)
match
=
re
.
match
(
"(.*)finetune[^/]*/([^/]*)/(.*)"
,
path
)
if
not
match
:
continue
run
,
task
,
subdir
=
match
.
groups
()
if
run
not
in
grouped_acc
:
grouped_acc
[
run
]
=
{}
grouped_met
[
run
]
=
{}
if
task
not
in
grouped_acc
[
run
]:
grouped_acc
[
run
][
task
]
=
{}
grouped_met
[
run
][
task
]
=
{}
if
v
is
not
None
:
grouped_acc
[
run
][
task
][
subdir
]
=
float
(
v
.
get
(
"valid_accuracy"
,
-
100
))
grouped_met
[
run
][
task
][
subdir
]
=
float
(
v
.
get
(
f
"valid_
{
TASK_TO_METRIC
[
task
]
}
"
,
-
100
))
else
:
print
(
f
"
{
path
}
has None return"
)
header
=
"
\t
"
.
join
(
TASKS
)
for
run
in
sorted
(
grouped_acc
):
print
(
run
)
if
args
.
print_mode
==
"all"
:
if
args
.
show_glue
:
print
(
"===== GLUE ====="
)
print
(
get_all_stat_str
(
grouped_met
[
run
]))
else
:
print
(
"===== ACC ====="
)
print
(
get_all_stat_str
(
grouped_acc
[
run
]))
elif
args
.
print_mode
==
"best"
:
print
(
f
"
{
header
}
"
)
if
args
.
show_glue
:
print
(
f
"GLEU:
{
get_best_stat_str
(
grouped_met
[
run
],
args
.
show_subdir
)
}
"
)
else
:
print
(
f
"ACC:
{
get_best_stat_str
(
grouped_acc
[
run
],
args
.
show_subdir
)
}
"
)
elif
args
.
print_mode
==
"tabular"
:
if
args
.
show_glue
:
print
(
"===== GLUE ====="
)
print
(
get_tabular_stat_str
(
grouped_met
[
run
]))
else
:
print
(
"===== ACC ====="
)
print
(
get_tabular_stat_str
(
grouped_acc
[
run
]))
else
:
raise
ValueError
(
args
.
print_mode
)
print
()
if
__name__
==
"__main__"
:
main
()
Prev
1
…
14
15
16
17
18
19
20
21
22
…
26
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment