Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Fairseq
Commits
2af922f1
Commit
2af922f1
authored
May 09, 2019
by
Myle Ott
Browse files
Revert "Add sweep scripts"
This reverts commit
8e8e1afc
.
parent
8e8e1afc
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
0 additions
and
328 deletions
+0
-328
fb_sweep/sweep_lm_data.py
fb_sweep/sweep_lm_data.py
+0
-37
fb_sweep/sweep_ooo_bi_transformer_big.py
fb_sweep/sweep_ooo_bi_transformer_big.py
+0
-146
fb_sweep/sweep_ooo_bi_transformer_huge.py
fb_sweep/sweep_ooo_bi_transformer_huge.py
+0
-145
No files found.
fb_sweep/sweep_lm_data.py
deleted
100644 → 0
View file @
8e8e1afc
#!/usr/bin/env python3
def
set_data_based_on_shortname
(
args
):
def
set_data
(
fmt
,
num_shards
):
if
num_shards
==
0
:
args
.
data
=
fmt
.
format
(
0
)
else
:
args
.
data
=
':'
.
join
([
fmt
.
format
(
i
)
for
i
in
range
(
num_shards
)])
# mmap datasets
if
args
.
data
==
'CC-NEWS-en.v7.1'
:
set_data
(
'/private/home/myleott/data/data-bin/CC-NEWS-en.v7.1/shard{}'
,
10
)
elif
args
.
data
==
'fb_posts'
:
set_data
(
'/data/tmp/fb_posts.en.2018-2019.bpe.mmap-bin/shard{}'
,
100
)
elif
args
.
data
==
'fb_posts_gfs'
:
set_data
(
'/mnt/vol/gfsai-flash2-east/ai-group/users/myleott/fb_posts/fb_posts.en.2018-2019.bpe.mmap-bin/shard{}'
,
100
)
# old datasets
elif
args
.
data
==
'CC-NEWS-en.v6'
:
set_data
(
'/private/home/myleott/data/data-bin/CC-NEWS-en.v6'
,
0
)
elif
args
.
data
==
'CC-NEWS-en.v9'
:
set_data
(
'/private/home/namangoyal/fairseq-py/data-bin/CC-NEWS-en.v9/shard{}'
,
100
)
elif
args
.
data
==
'bookwiki'
:
set_data
(
'/private/home/myleott/data/data-bin/bookwiki.10shards/shard{}'
,
10
)
elif
args
.
data
==
'bookwiki_full'
:
set_data
(
'/private/home/myleott/data/data-bin/bookwiki-bin'
,
0
)
elif
args
.
data
==
'fb_posts_old'
:
set_data
(
'/data/tmp/mono.english.public.2018-2019.shard{}.sents.bpe-bin'
,
100
)
elif
args
.
data
==
'fb_posts_gfs'
:
set_data
(
'/mnt/vol/gfsai-flash2-east/ai-group/users/myleott/fb_posts/en/mono.english.public.2018-2019.shard{}.sents.bpe-bin'
,
100
)
elif
args
.
data
==
'wmt19_en_news_docs'
:
set_data
(
'/private/home/myleott/data/data-bin/wmt19_en_news_docs/wmt19_en_news_docs.bpe.shard{}'
,
100
)
else
:
set_data
(
args
.
data
,
0
)
return
args
fb_sweep/sweep_ooo_bi_transformer_big.py
deleted
100755 → 0
View file @
8e8e1afc
#!/usr/bin/env python
try
:
import
sweep_chronos
as
sweep
from
sweep_chronos
import
hyperparam
except
:
import
sweep
from
sweep
import
hyperparam
from
sweep_lm_data
import
set_data_based_on_shortname
def
get_grid
(
args
):
grid
=
[]
enable_ooo
=
True
max_update
=
250000
num_data_loaders
=
4
ddp_backend
=
'no_c10d'
# 64 x 16GB Volta
arch
=
'bi_transformer_lm_big'
max_sentences
=
16
update_freq
=
2
peak_lr
=
2.5e-4
lr_scheduler
=
'inverse_sqrt'
#lr_scheduler = 'polynomial'
#grid += [hyperparam('--decoder-learned-pos', save_dir_key=lambda val: 'learnpos')]
max_tokens
=
550
*
max_sentences
set_data_based_on_shortname
(
args
)
# batch size
grid
+=
[
hyperparam
(
'--tokens-per-sample'
,
512
,
save_dir_key
=
lambda
val
:
f
'st
{
val
}
'
),
hyperparam
(
'--max-sentences'
,
max_sentences
,
save_dir_key
=
lambda
val
:
f
'ms
{
val
}
'
),
hyperparam
(
'--max-tokens'
,
max_tokens
,
save_dir_key
=
lambda
val
:
f
'mt
{
val
}
'
),
hyperparam
(
'--update-freq'
,
update_freq
,
save_dir_key
=
lambda
val
:
f
'uf
{
val
}
'
),
]
# task settings
grid
+=
[
hyperparam
(
'--task'
,
'odd_one_out_lm'
),
]
# model settings
grid
+=
[
hyperparam
(
'--arch'
,
arch
,
save_dir_key
=
lambda
val
:
val
),
hyperparam
(
'--activation-fn'
,
'gelu_fast'
,
save_dir_key
=
lambda
val
:
val
),
hyperparam
(
'--share-decoder-input-output-embed'
),
]
# regularization
grid
+=
[
hyperparam
(
'--dropout'
,
0.1
,
save_dir_key
=
lambda
val
:
f
'dr
{
val
}
'
),
hyperparam
(
'--attention-dropout'
,
0.0
,
save_dir_key
=
lambda
val
:
f
'atdr
{
val
}
'
),
hyperparam
(
'--weight-decay'
,
0.01
,
save_dir_key
=
lambda
val
:
f
'wd
{
val
}
'
),
]
# optimization settings
grid
+=
[
hyperparam
(
'--optimizer'
,
'adam'
,
save_dir_key
=
lambda
val
:
val
),
hyperparam
(
'--adam-betas'
,
'(0.9, 0.999)'
,
save_dir_key
=
lambda
val
:
'beta2_999'
),
hyperparam
(
'--clip-norm'
,
0.0
,
save_dir_key
=
lambda
val
:
f
'clip
{
val
}
'
),
]
# lr scheduler
if
lr_scheduler
==
'inverse_sqrt'
:
grid
+=
[
hyperparam
(
'--lr-scheduler'
,
'inverse_sqrt'
),
hyperparam
(
'--lr'
,
peak_lr
,
save_dir_key
=
lambda
val
:
f
'lr
{
val
}
'
),
hyperparam
(
'--warmup-init-lr'
,
0
),
hyperparam
(
'--warmup-updates'
,
16000
,
save_dir_key
=
lambda
val
:
f
'warm
{
val
}
'
),
]
elif
lr_scheduler
==
'polynomial'
:
grid
+=
[
hyperparam
(
'--lr-scheduler'
,
'polynomial_decay'
),
hyperparam
(
'--lr'
,
peak_lr
,
save_dir_key
=
lambda
val
:
f
'lr
{
val
}
'
),
hyperparam
(
'--total-num-update'
,
max_update
),
hyperparam
(
'--warmup-updates'
,
16000
,
save_dir_key
=
lambda
val
:
f
'warm
{
val
}
'
),
]
if
enable_ooo
:
grid
+=
[
hyperparam
(
'--criterion'
,
'odd_one_out'
),
hyperparam
(
'--ooo-weight'
,
[
1.0
],
save_dir_key
=
lambda
val
:
str
(
val
)),
hyperparam
(
'--short-item-prob'
,
0.1
,
save_dir_key
=
lambda
val
:
f
'short
{
val
}
'
),
]
# FP16 + distributed settings
grid
+=
[
hyperparam
(
'--ddp-backend'
,
ddp_backend
),
hyperparam
(
'--fp16'
,
save_dir_key
=
lambda
val
:
'fp16'
),
#hyperparam('--memory-efficient-fp16', save_dir_key=lambda val: 'me_fp16'),
hyperparam
(
'--fp16-init-scale'
,
4
),
hyperparam
(
'--threshold-loss-scale'
,
1
),
hyperparam
(
'--fp16-scale-window'
,
128
),
]
# data loading settings
grid
+=
[
hyperparam
(
'--dataset-impl'
,
'mmap'
),
hyperparam
(
'--num-workers'
,
num_data_loaders
),
]
# validation and checkpoint settings
grid
+=
[
hyperparam
(
'--save-interval-updates'
,
2000
),
hyperparam
(
'--no-epoch-checkpoints'
),
hyperparam
(
'--max-update'
,
max_update
,
save_dir_key
=
lambda
val
:
f
'mu
{
val
}
'
),
]
# logging settings
grid
+=
[
hyperparam
(
'--log-format'
,
'json'
),
hyperparam
(
'--log-interval'
,
25
),
]
# random seed
grid
+=
[
hyperparam
(
'--seed'
,
[
1
],
save_dir_key
=
lambda
val
:
f
'seed
{
val
}
'
),
]
if
args
.
local
:
grid
+=
[
hyperparam
(
'--log-format'
,
'json'
),
hyperparam
(
'--log-interval'
,
1
),
]
return
grid
def
postprocess_hyperparams
(
args
,
config
):
"""Postprocess a given hyperparameter configuration."""
# if config['--seq-beam'].current_value <= 8:
# config['--max-tokens'].current_value = 400
# else:
# config['--max-tokens'].current_value = 300
pass
if
__name__
==
'__main__'
:
sweep
.
main
(
get_grid
,
postprocess_hyperparams
)
fb_sweep/sweep_ooo_bi_transformer_huge.py
deleted
100755 → 0
View file @
8e8e1afc
#!/usr/bin/env python
try
:
import
sweep_chronos
as
sweep
from
sweep_chronos
import
hyperparam
except
:
import
sweep
from
sweep
import
hyperparam
from
sweep_lm_data
import
set_data_based_on_shortname
def
get_grid
(
args
):
grid
=
[]
enable_ooo
=
True
max_update
=
200000
num_data_loaders
=
4
ddp_backend
=
'no_c10d'
# 256 x 32GB Volta
arch
=
'bi_transformer_lm_huge_relu'
max_sentences
=
2
update_freq
=
8
peak_lr
=
1e-4
#lr_scheduler = 'inverse_sqrt'
lr_scheduler
=
'polynomial'
#grid += [hyperparam('--decoder-learned-pos', save_dir_key=lambda val: 'learnpos')]
max_tokens
=
550
*
max_sentences
set_data_based_on_shortname
(
args
)
# batch size
grid
+=
[
hyperparam
(
'--tokens-per-sample'
,
512
,
save_dir_key
=
lambda
val
:
f
'st
{
val
}
'
),
hyperparam
(
'--max-sentences'
,
max_sentences
,
save_dir_key
=
lambda
val
:
f
'ms
{
val
}
'
),
hyperparam
(
'--max-tokens'
,
max_tokens
,
save_dir_key
=
lambda
val
:
f
'mt
{
val
}
'
),
hyperparam
(
'--update-freq'
,
update_freq
,
save_dir_key
=
lambda
val
:
f
'uf
{
val
}
'
),
]
# task settings
grid
+=
[
hyperparam
(
'--task'
,
'odd_one_out_lm'
),
]
# model settings
grid
+=
[
hyperparam
(
'--arch'
,
arch
,
save_dir_key
=
lambda
val
:
val
),
hyperparam
(
'--share-decoder-input-output-embed'
),
]
# regularization
grid
+=
[
hyperparam
(
'--dropout'
,
0.1
,
save_dir_key
=
lambda
val
:
f
'dr
{
val
}
'
),
hyperparam
(
'--attention-dropout'
,
0.0
,
save_dir_key
=
lambda
val
:
f
'atdr
{
val
}
'
),
hyperparam
(
'--weight-decay'
,
0.01
,
save_dir_key
=
lambda
val
:
f
'wd
{
val
}
'
),
]
# optimization settings
grid
+=
[
hyperparam
(
'--optimizer'
,
'adafactor'
,
save_dir_key
=
lambda
val
:
val
),
hyperparam
(
"--decay-rate"
,
-
0.8
,
save_dir_key
=
lambda
val
:
f
"decay
{
val
}
"
),
hyperparam
(
'--clip-norm'
,
0.0
,
save_dir_key
=
lambda
val
:
f
'clip
{
val
}
'
),
]
# lr scheduler
if
lr_scheduler
==
'inverse_sqrt'
:
grid
+=
[
hyperparam
(
'--lr-scheduler'
,
'inverse_sqrt'
),
hyperparam
(
'--lr'
,
peak_lr
,
save_dir_key
=
lambda
val
:
f
'lr
{
val
}
'
),
hyperparam
(
'--warmup-init-lr'
,
0
),
hyperparam
(
'--warmup-updates'
,
16000
,
save_dir_key
=
lambda
val
:
f
'warm
{
val
}
'
),
]
elif
lr_scheduler
==
'polynomial'
:
grid
+=
[
hyperparam
(
'--lr-scheduler'
,
'polynomial_decay'
),
hyperparam
(
'--lr'
,
peak_lr
,
save_dir_key
=
lambda
val
:
f
'lr
{
val
}
'
),
hyperparam
(
'--total-num-update'
,
max_update
),
hyperparam
(
'--warmup-updates'
,
16000
,
save_dir_key
=
lambda
val
:
f
'warm
{
val
}
'
),
]
if
enable_ooo
:
grid
+=
[
hyperparam
(
'--criterion'
,
'odd_one_out'
),
hyperparam
(
'--ooo-weight'
,
[
1.0
],
save_dir_key
=
lambda
val
:
str
(
val
)),
hyperparam
(
'--short-item-prob'
,
0.1
,
save_dir_key
=
lambda
val
:
f
'short
{
val
}
'
),
]
# FP16 + distributed settings
grid
+=
[
hyperparam
(
'--ddp-backend'
,
ddp_backend
),
#hyperparam('--fp16', save_dir_key=lambda val: 'fp16'),
hyperparam
(
'--memory-efficient-fp16'
,
save_dir_key
=
lambda
val
:
'me_fp16'
),
hyperparam
(
'--fp16-init-scale'
,
4
),
hyperparam
(
'--threshold-loss-scale'
,
1
),
hyperparam
(
'--fp16-scale-window'
,
128
),
]
# data loading settings
grid
+=
[
hyperparam
(
'--dataset-impl'
,
'mmap'
),
hyperparam
(
'--num-workers'
,
num_data_loaders
),
]
# validation and checkpoint settings
grid
+=
[
hyperparam
(
'--save-interval-updates'
,
2000
),
hyperparam
(
'--no-epoch-checkpoints'
),
hyperparam
(
'--max-update'
,
max_update
,
save_dir_key
=
lambda
val
:
f
'mu
{
val
}
'
),
]
# logging settings
grid
+=
[
hyperparam
(
'--log-format'
,
'json'
),
hyperparam
(
'--log-interval'
,
25
),
]
# random seed
grid
+=
[
hyperparam
(
'--seed'
,
[
1
],
save_dir_key
=
lambda
val
:
f
'seed
{
val
}
'
),
]
if
args
.
local
:
grid
+=
[
hyperparam
(
'--log-format'
,
'json'
),
hyperparam
(
'--log-interval'
,
1
),
]
return
grid
def
postprocess_hyperparams
(
args
,
config
):
"""Postprocess a given hyperparameter configuration."""
# if config['--seq-beam'].current_value <= 8:
# config['--max-tokens'].current_value = 400
# else:
# config['--max-tokens'].current_value = 300
pass
if
__name__
==
'__main__'
:
sweep
.
main
(
get_grid
,
postprocess_hyperparams
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment