Commit 72f5785f authored by huaerkl's avatar huaerkl
Browse files

v1.0

parents
Pipeline #505 canceled with stages
# @package _group_
common:
fp16: true
log_format: json
log_interval: 200
tensorboard_logdir: tb
fp16_no_flatten_grads: true
user_dir: ${env:PWD}/examples/data2vec
checkpoint:
no_epoch_checkpoints: true
save_interval_updates: 50000
keep_interval_updates: 1
distributed_training:
distributed_world_size: 16
ddp_backend: legacy_ddp
task:
_name: masked_lm
data: /fsx-wav2vec/abaevski/data/nlp/bookwiki_aml-full-mmap2-bin
sample_break_mode: none
tokens_per_sample: 512
include_target_tokens: true
random_token_prob: 0
leave_unmasked_prob: 0
include_index: True
skip_masking: True
d2v2_multi: True
criterion:
_name: model
log_keys:
- ema_decay
- target_var
- pred_var
- model_norm
- ema_norm
- masked_pct
dataset:
batch_size: 4
ignore_unused_valid_subsets: true
skip_invalid_size_inputs_valid_test: true
disable_validation: true
optimization:
clip_norm: 1
lr: [0.0002]
max_update: 1000000
update_freq: [1]
optimizer:
_name: composite
dynamic_groups: true
groups:
default:
lr_float: 0.0002
optimizer:
_name: adam
adam_betas: [0.9,0.98]
adam_eps: 1e-06
weight_decay: 0.01
lr_scheduler:
_name: cosine
warmup_updates: 4000
lr_scheduler: pass_through
model:
_name: data2vec_multi
loss_beta: 0
loss_scale: 1
depth: 12
embed_dim: 768
clone_batch: 8
ema_decay: 0.9999
ema_end_decay: 0.99999
ema_anneal_end_step: 100000
ema_encoder_only: true
average_top_k_layers: 12
layer_norm_target_layer: false
instance_norm_target_layer: true
batch_norm_target_layer: false
instance_norm_targets: false
layer_norm_targets: false
layerdrop: 0
norm_eps: 1e-5
supported_modality: TEXT
modalities:
text:
mask_prob: 0.48
mask_length: 1
mask_noise_std: 0.01
prenet_depth: 0
decoder:
input_dropout: 0.1
decoder_dim: 768
decoder_groups: 1
decoder_kernel: 9
decoder_layers: 5
decoder_residual: false
projection_layers: 2
projection_ratio: 2.0
# @package _group_
common:
fp16: true
log_format: json
log_interval: 200
tensorboard_logdir: tb
min_loss_scale: 1e-6
fp16_no_flatten_grads: true
user_dir: ${env:PWD}/examples/data2vec
checkpoint:
save_interval: 5
save_interval_updates: 25000
keep_interval_updates: 1
no_epoch_checkpoints: true
task:
_name: mae_image_pretraining
data: /datasets01/imagenet_full_size/061417/
rebuild_batches: true
local_cache_path: /scratch/cache_abaevski/imagenet
key: source
precompute_mask_config: {}
dataset:
num_workers: 10
batch_size: 8
skip_invalid_size_inputs_valid_test: true
required_batch_size_multiple: 1
disable_validation: true
distributed_training:
distributed_world_size: 32
ddp_backend: c10d
criterion:
_name: model
log_keys:
- ema_decay
- target_var
- pred_var
- model_norm
- ema_norm
- masked_pct
optimization:
max_update: 500000
lr: [ 0.0004 ]
debug_param_names: true
clip_norm: 4
optimizer:
_name: composite
dynamic_groups: true
groups:
default:
lr_float: 4e-4
optimizer:
_name: adam
adam_betas: [0.9,0.95]
weight_decay: 0.05
lr_scheduler:
_name: cosine
warmup_updates: 50040
lr_scheduler: pass_through
model:
_name: data2vec_multi
ema_decay: 0.9998
ema_end_decay: 1
ema_anneal_end_step: 300000
instance_norm_target_layer: true
layer_norm_target_layer: false
layer_norm_targets: true
end_of_block_targets: false
depth: 32
embed_dim: 1280
num_heads: 16
average_top_k_layers: 24
clone_batch: 16
norm_eps: 1e-6
min_target_var: 0
min_pred_var: 0
encoder_dropout: 0
post_mlp_drop: 0
attention_dropout: 0
activation_dropout: 0
supported_modality: IMAGE
cls_loss: 0.01
ema_encoder_only: false
modalities:
image:
patch_size: 14
inverse_mask: true
mask_prob: 0.75
mask_prob_adjust: 0.1
mask_length: 3
mask_noise_std: 0.01
prenet_depth: 0
ema_local_encoder: true
num_extra_tokens: 1
init_extra_token_zero: false
use_alibi_encoder: false
embed_dim: 1280
decoder:
decoder_dim: 1024
decoder_groups: 16
decoder_kernel: 5
decoder_layers: 3
final_layer_norm: false
input_dropout: 0
\ No newline at end of file
# @package _group_
common:
fp16: true
log_format: json
log_interval: 200
tensorboard_logdir: tb
min_loss_scale: 1e-6
fp16_no_flatten_grads: true
user_dir: ${env:PWD}/examples/data2vec
checkpoint:
save_interval: 5
save_interval_updates: 25000
keep_interval_updates: 1
no_epoch_checkpoints: true
task:
_name: mae_image_pretraining
data: /datasets01/imagenet_full_size/061417/
rebuild_batches: true
local_cache_path: /scratch/cache_abaevski/imagenet
key: source
precompute_mask_config: {}
dataset:
num_workers: 10
batch_size: 8
skip_invalid_size_inputs_valid_test: true
required_batch_size_multiple: 1
disable_validation: true
distributed_training:
distributed_world_size: 16
ddp_backend: c10d
criterion:
_name: model
log_keys:
- ema_decay
- target_var
- pred_var
- model_norm
- ema_norm
- masked_pct
optimization:
max_update: 375300
lr: [ 0.0004 ]
debug_param_names: true
clip_norm: 4
optimizer:
_name: composite
dynamic_groups: true
groups:
default:
lr_float: 4e-4
optimizer:
_name: adam
adam_betas: [0.9,0.95]
weight_decay: 0.05
lr_scheduler:
_name: cosine
warmup_updates: 50040
lr_scheduler: pass_through
model:
_name: data2vec_multi
ema_decay: 0.9998
ema_end_decay: 0.99995
ema_anneal_end_step: 150000
instance_norm_target_layer: true
layer_norm_target_layer: false
layer_norm_targets: true
end_of_block_targets: false
depth: 32
embed_dim: 1280
num_heads: 16
average_top_k_layers: 24
clone_batch: 16
norm_eps: 1e-6
min_target_var: 0
min_pred_var: 0
encoder_dropout: 0
post_mlp_drop: 0
attention_dropout: 0
activation_dropout: 0
supported_modality: IMAGE
cls_loss: 0.01
ema_encoder_only: false
modalities:
image:
inverse_mask: true
mask_prob: 0.75
mask_prob_adjust: 0.1
mask_length: 3
mask_noise_std: 0.01
prenet_depth: 0
ema_local_encoder: true
num_extra_tokens: 1
init_extra_token_zero: false
use_alibi_encoder: false
embed_dim: 1280
decoder:
decoder_dim: 1024
decoder_groups: 16
decoder_kernel: 5
decoder_layers: 3
input_dropout: 0
\ No newline at end of file
# @package _group_
common:
fp16: true
log_format: json
log_interval: 200
tensorboard_logdir: tb
min_loss_scale: 1e-6
fp16_no_flatten_grads: true
user_dir: ${env:PWD}/examples/data2vec
checkpoint:
save_interval: 1
save_interval_updates: 25000
keep_interval_updates: 1
no_epoch_checkpoints: true
task:
_name: audio_pretraining
data: /fsx-wav2vec/abaevski/data/librivox/no_silence
max_sample_size: 320000
min_sample_size: 32000
normalize: true
precompute_mask_config: {}
dataset:
num_workers: 8
max_tokens: 320000
skip_invalid_size_inputs_valid_test: true
validate_interval: 5
required_batch_size_multiple: 1
disable_validation: true
distributed_training:
distributed_world_size: 48
ddp_backend: c10d
criterion:
_name: model
log_keys:
- ema_decay
- target_var
- pred_var
- model_norm
- ema_norm
- masked_pct
optimization:
max_update: 600000
debug_param_names: true
clip_norm: 1
optimizer:
_name: composite
dynamic_groups: true
groups:
default:
lr_float: 0.0004
optimizer:
_name: adam
adam_betas: [0.9,0.98]
adam_eps: 1e-06
weight_decay: 0.01
lr_scheduler:
_name: cosine
warmup_updates: 10000
lr_scheduler: pass_through
model:
_name: data2vec_multi
loss_beta: 0
loss_scale: null
depth: 16
embed_dim: 1024
num_heads: 16
clone_batch: 12
ema_decay: 0.9997
ema_end_decay: 1
ema_anneal_end_step: 300000
ema_encoder_only: false
average_top_k_layers: 16
instance_norm_target_layer: true
layer_norm_target_layer: false
layer_norm_targets: false
layerdrop: 0
norm_eps: 1e-5
supported_modality: AUDIO
modalities:
audio:
feature_encoder_spec: '[(512, 10, 5)] + [(512, 3, 2)] * 4 + [(512,2,2)] + [(512,2,2)]'
conv_pos_depth: 5
conv_pos_width: 95
conv_pos_groups: 16
prenet_depth: 8
mask_prob: 0.55
mask_prob_adjust: 0.1
inverse_mask: false
mask_length: 5
mask_noise_std: 0.01
mask_dropout: 0
add_masks: false
ema_local_encoder: false
use_alibi_encoder: true
prenet_layerdrop: 0
prenet_dropout: 0.1
learned_alibi_scale: true
learned_alibi_scale_per_head: true
decoder:
input_dropout: 0.1
decoder_dim: 768
decoder_groups: 16
decoder_kernel: 7
decoder_layers: 4
# @package _group_
common:
fp16: true
log_format: json
log_interval: 200
tensorboard_logdir: tb
min_loss_scale: 1e-6
fp16_no_flatten_grads: true
user_dir: ${env:PWD}/examples/data2vec
checkpoint:
save_interval: 5
save_interval_updates: 25000
keep_interval_updates: 1
no_epoch_checkpoints: true
task:
_name: mae_image_pretraining
data: /datasets01/imagenet_full_size/061417/
rebuild_batches: true
local_cache_path: /scratch/cache_abaevski/imagenet
key: source
precompute_mask_config: {}
dataset:
num_workers: 10
batch_size: 8
skip_invalid_size_inputs_valid_test: true
required_batch_size_multiple: 1
disable_validation: true
distributed_training:
distributed_world_size: 16
ddp_backend: c10d
criterion:
_name: model
log_keys:
- ema_decay
- target_var
- pred_var
- model_norm
- ema_norm
- masked_pct
optimization:
max_update: 375300
lr: [ 0.0004 ]
debug_param_names: true
clip_norm: 4
optimizer:
_name: composite
dynamic_groups: true
groups:
default:
lr_float: 4e-4
optimizer:
_name: adam
adam_betas: [0.9,0.95]
weight_decay: 0.05
lr_scheduler:
_name: cosine
warmup_updates: 50040
lr_scheduler: pass_through
model:
_name: data2vec_multi
ema_decay: 0.9998
ema_end_decay: 0.99999
ema_anneal_end_step: 150000
instance_norm_target_layer: true
layer_norm_target_layer: false
layer_norm_targets: true
end_of_block_targets: false
depth: 24
embed_dim: 1024
num_heads: 16
average_top_k_layers: 18
clone_batch: 16
norm_eps: 1e-6
min_target_var: 0
min_pred_var: 0
encoder_dropout: 0
post_mlp_drop: 0
attention_dropout: 0
activation_dropout: 0
supported_modality: IMAGE
cls_loss: 0.01
ema_encoder_only: false
modalities:
image:
inverse_mask: true
mask_prob: 0.75
mask_prob_adjust: 0.1
mask_length: 3
mask_noise_std: 0.01
prenet_depth: 0
ema_local_encoder: true
num_extra_tokens: 1
init_extra_token_zero: false
use_alibi_encoder: false
embed_dim: 1024
decoder:
decoder_dim: 1024
decoder_groups: 16
decoder_kernel: 5
decoder_layers: 3
input_dropout: 0
\ No newline at end of file
# @package _group_
common:
fp16: true
log_format: json
log_interval: 200
tensorboard_logdir: tb
min_loss_scale: 1e-6
fp16_no_flatten_grads: true
user_dir: ${env:PWD}/examples/data2vec
checkpoint:
save_interval_updates: 50000
keep_interval_updates: 1
no_epoch_checkpoints: true
task:
_name: masked_lm
data: /fsx-wav2vec/abaevski/data/nlp/bookwiki_aml-full-mmap2-bin
sample_break_mode: none
tokens_per_sample: 512
include_target_tokens: true
random_token_prob: 0
leave_unmasked_prob: 0
include_index: True
skip_masking: True
d2v2_multi: True
dataset:
batch_size: 2
ignore_unused_valid_subsets: true
skip_invalid_size_inputs_valid_test: true
disable_validation: true
distributed_training:
distributed_world_size: 32
ddp_backend: c10d
criterion:
_name: model
log_keys:
- ema_decay
- target_var
- pred_var
- model_norm
- ema_norm
- masked_pct
optimization:
max_update: 600000
clip_norm: 1
optimizer:
_name: composite
dynamic_groups: true
groups:
default:
lr_float: 0.0001
optimizer:
_name: adam
adam_betas: [0.9,0.98]
adam_eps: 1e-06
weight_decay: 0.01
lr_scheduler:
_name: cosine
warmup_updates: 4000
lr_scheduler: pass_through
model:
_name: data2vec_multi
loss_beta: 0
loss_scale: 1
depth: 24
num_heads: 16
embed_dim: 1024
clone_batch: 8
ema_decay: 0.9999
ema_end_decay: 0.99999
ema_anneal_end_step: 100000
ema_encoder_only: true
average_top_k_layers: 24
layer_norm_target_layer: true
instance_norm_target_layer: false
batch_norm_target_layer: false
instance_norm_targets: true
layer_norm_targets: false
layerdrop: 0
norm_eps: 1e-5
supported_modality: TEXT
modalities:
text:
mask_prob: 0.5
mask_length: 1
mask_noise_std: 0.01
prenet_depth: 0
decoder:
input_dropout: 0.1
decoder_dim: 768
decoder_groups: 1
decoder_kernel: 9
decoder_layers: 5
decoder_residual: false
projection_layers: 2
projection_ratio: 2.0
# @package _group_
common:
fp16: true
log_format: json
log_interval: 200
tensorboard_logdir: tb
fp16_no_flatten_grads: true
user_dir: ${env:PWD}/examples/data2vec
checkpoint:
no_epoch_checkpoints: true
save_interval_updates: 50000
keep_interval_updates: 1
distributed_training:
distributed_world_size: 32
ddp_backend: legacy_ddp
task:
_name: masked_lm
data: /fsx-wav2vec/abaevski/data/nlp/bookwiki_aml-full-mmap2-bin
sample_break_mode: none
tokens_per_sample: 512
include_target_tokens: true
random_token_prob: 0
leave_unmasked_prob: 0
include_index: True
skip_masking: True
d2v2_multi: True
criterion:
_name: model
log_keys:
- ema_decay
- target_var
- pred_var
- model_norm
- ema_norm
- masked_pct
dataset:
batch_size: 2
ignore_unused_valid_subsets: true
skip_invalid_size_inputs_valid_test: true
disable_validation: true
optimization:
clip_norm: 1
lr: [3e-4]
max_update: 1000000
update_freq: [1]
optimizer:
_name: composite
groups:
default:
lr_float: 1e-4
optimizer:
_name: adam
adam_betas: [0.9,0.98]
adam_eps: 1e-06
weight_decay: 0.01
lr_scheduler:
_name: cosine
warmup_updates: 4000
decoder:
lr_float: 1e-4
optimizer:
_name: adam
adam_betas: [0.9,0.98]
adam_eps: 1e-06
weight_decay: 0.01
lr_scheduler:
_name: cosine
warmup_updates: 4000
lr_scheduler: pass_through
model:
_name: data2vec_multi
loss_beta: 4
loss_scale: 1
depth: 24
num_heads: 16
embed_dim: 1024
clone_batch: 8
ema_decay: 0.9999
ema_end_decay: 0.99999
ema_anneal_end_step: 100000
ema_encoder_only: true
average_top_k_layers: 24
layer_norm_target_layer: true
instance_norm_target_layer: false
batch_norm_target_layer: false
instance_norm_targets: true
layer_norm_targets: false
layerdrop: 0
norm_eps: 1e-5
supported_modality: TEXT
decoder_group: true
modalities:
text:
mask_prob: 0.5
mask_length: 1
mask_noise_std: 0.01
prenet_depth: 0
decoder:
input_dropout: 0.1
decoder_dim: 768
decoder_groups: 1
decoder_kernel: 9
decoder_layers: 5
decoder_residual: false
projection_layers: 2
projection_ratio: 2.0
# @package _global_
hydra:
sweep:
dir: ${env:PWD}/tmp_dbg/${now:%H-%M-%S}
distributed_training:
distributed_world_size: 1
nprocs_per_node: 1
distributed_port: -1
common:
log_interval: 1
dataset:
num_workers: 0
# @package _global_
hydra:
job:
config:
override_dirname:
kv_sep: ':'
item_sep: '/'
exclude_keys:
- run_config
- distributed_training.distributed_port
- distributed_training.distributed_world_size
- model.pretrained_model_path
- model.target_network_path
- next_script
- task.cache_in_scratch
- task.data
- checkpoint.save_interval_updates
- checkpoint.keep_interval_updates
- checkpoint.save_on_overflow
- common.log_interval
- common.user_dir
sweep:
dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname}
subdir: ''
launcher:
submitit_folder: ${hydra.sweep.dir}
timeout_min: 4320
cpus_per_task: 80
gpus_per_node: 8
tasks_per_node: 1
mem_gb: 450
nodes: 1
name: ${env:PREFIX}_${hydra.job.config_name}
partition: devlab,learnlab,learnfair,scavenge
constraint: volta32gb,ib4
max_num_timeout: 30
# @package _global_
hydra:
job:
config:
override_dirname:
kv_sep: ':'
item_sep: '/'
exclude_keys:
- run_config
- distributed_training.distributed_port
- distributed_training.distributed_world_size
- model.pretrained_model_path
- model.target_network_path
- next_script
- task.cache_in_scratch
- task.local_cache_path
- task.data
- checkpoint.save_interval_updates
- checkpoint.keep_interval_updates
- checkpoint.save_on_overflow
- common.log_interval
- common.user_dir
sweep:
dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname}
subdir: ''
launcher:
submitit_folder: ${hydra.sweep.dir}
timeout_min: 4320
cpus_per_task: 80
gpus_per_node: 8
tasks_per_node: 1
mem_gb: 0
nodes: 1
name: ${env:PREFIX}_${hydra.job.config_name}
partition: wav2vec,learnlab,learnfair
max_num_timeout: 30
# @package _global_
hydra:
job:
config:
override_dirname:
kv_sep: ':'
item_sep: '/'
exclude_keys:
- run_config
- distributed_training.distributed_port
- distributed_training.distributed_world_size
- model.pretrained_model_path
- model.target_network_path
- next_script
- task.cache_in_scratch
- task.data
- checkpoint.save_interval_updates
- checkpoint.keep_interval_updates
- checkpoint.save_on_overflow
- common.log_interval
- common.user_dir
sweep:
dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname}
subdir: ''
launcher:
submitit_folder: ${hydra.sweep.dir}
timeout_min: 4320
cpus_per_task: 10
gpus_per_node: 8
tasks_per_node: 8
mem_gb: 450
nodes: 2
name: ${env:PREFIX}_${hydra.job.config_name}
partition: devlab,learnlab,learnfair,scavenge
constraint: volta32gb,ib4
max_num_timeout: 30
# @package _global_
hydra:
job:
config:
override_dirname:
kv_sep: ':'
item_sep: '/'
exclude_keys:
- run_config
- distributed_training.distributed_port
- distributed_training.distributed_world_size
- model.pretrained_model_path
- model.target_network_path
- next_script
- task.cache_in_scratch
- task.local_cache_path
- task.data
- task.post_save_script
- checkpoint.save_interval_updates
- checkpoint.keep_interval_updates
- checkpoint.save_on_overflow
- common.log_interval
- common.user_dir
- model.model_path
sweep:
dir: /fsx-wav2vec/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname}
subdir: ''
launcher:
submitit_folder: ${hydra.sweep.dir}
timeout_min: 4320
cpus_per_task: 12
gpus_per_node: 8
tasks_per_node: 8
mem_gb: 0
nodes: 2
name: ${env:PREFIX}_${hydra.job.config_name}
partition: wav2vec
max_num_timeout: 30
# @package _global_
hydra:
job:
config:
override_dirname:
kv_sep: ':'
item_sep: '/'
exclude_keys:
- run_config
- distributed_training.distributed_port
- distributed_training.distributed_world_size
- model.pretrained_model_path
- model.target_network_path
- next_script
- task.cache_in_scratch
- task.data
- checkpoint.save_interval_updates
- checkpoint.keep_interval_updates
- checkpoint.save_on_overflow
- common.log_interval
sweep:
dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname}
subdir: ''
launcher:
submitit_folder: ${hydra.sweep.dir}
timeout_min: 4320
cpus_per_task: 80
gpus_per_node: 8
tasks_per_node: 1
mem_gb: 450
nodes: 3
name: ${env:PREFIX}_${hydra.job.config_name}
partition: devlab,learnlab,learnfair,scavenge
constraint: volta32gb,ib4
max_num_timeout: 30
# @package _global_
hydra:
job:
config:
override_dirname:
kv_sep: ':'
item_sep: '/'
exclude_keys:
- run_config
- distributed_training.distributed_port
- distributed_training.distributed_world_size
- model.pretrained_model_path
- model.target_network_path
- next_script
- task.cache_in_scratch
- task.data
- checkpoint.save_interval_updates
- checkpoint.keep_interval_updates
- checkpoint.save_on_overflow
- common.log_interval
sweep:
dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname}
subdir: ''
launcher:
submitit_folder: ${hydra.sweep.dir}
timeout_min: 4320
cpus_per_task: 10
gpus_per_node: 8
tasks_per_node: 8
mem_gb: 450
nodes: 4
name: ${env:PREFIX}_${hydra.job.config_name}
partition: devlab,learnlab,learnfair,scavenge
constraint: volta32gb,ib4
max_num_timeout: 30
# @package _global_
hydra:
job:
config:
override_dirname:
kv_sep: ':'
item_sep: '/'
exclude_keys:
- run_config
- distributed_training.distributed_port
- distributed_training.distributed_world_size
- model.pretrained_model_path
- model.target_network_path
- next_script
- task.cache_in_scratch
- task.data
- task.post_save_script
- checkpoint.save_interval_updates
- checkpoint.keep_interval_updates
- checkpoint.save_on_overflow
- common.log_interval
- common.user_dir
sweep:
dir: /fsx-wav2vec/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname}
subdir: ''
launcher:
submitit_folder: ${hydra.sweep.dir}
timeout_min: 4320
cpus_per_task: 12
gpus_per_node: 8
tasks_per_node: 8
mem_gb: 0
nodes: 4
name: ${env:PREFIX}_${hydra.job.config_name}
partition: wav2vec
max_num_timeout: 30
# @package _global_
hydra:
job:
config:
override_dirname:
kv_sep: ':'
item_sep: '/'
exclude_keys:
- run_config
- distributed_training.distributed_port
- distributed_training.distributed_world_size
- model.pretrained_model_path
- model.target_network_path
- next_script
- task.cache_in_scratch
- task.data
- checkpoint.save_interval_updates
- checkpoint.keep_interval_updates
- checkpoint.save_on_overflow
- common.log_interval
- common.user_dir
sweep:
dir: /fsx-wav2vec/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname}
subdir: ''
launcher:
submitit_folder: ${hydra.sweep.dir}
timeout_min: 4320
cpus_per_task: 12
gpus_per_node: 8
tasks_per_node: 8
mem_gb: 0
nodes: 6
name: ${env:PREFIX}_${hydra.job.config_name}
partition: wav2vec,learnlab,learnfair
max_num_timeout: 30
# @package _global_
hydra:
job:
config:
override_dirname:
kv_sep: ':'
item_sep: '/'
exclude_keys:
- run_config
- distributed_training.distributed_port
- distributed_training.distributed_world_size
- model.pretrained_model_path
- model.target_network_path
- next_script
- task.cache_in_scratch
- task.data
- checkpoint.save_interval_updates
- checkpoint.keep_interval_updates
- checkpoint.save_on_overflow
- common.log_interval
- common.user_dir
sweep:
dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname}
subdir: ''
launcher:
submitit_folder: ${hydra.sweep.dir}
timeout_min: 4320
cpus_per_task: 10
gpus_per_node: 8
tasks_per_node: 8
mem_gb: 450
nodes: 8
name: ${env:PREFIX}_${hydra.job.config_name}
partition: devlab,learnlab,learnfair,scavenge
constraint: volta32gb,ib4
max_num_timeout: 30
# @package _global_
hydra:
job:
config:
override_dirname:
kv_sep: ':'
item_sep: '/'
exclude_keys:
- run_config
- distributed_training.distributed_port
- distributed_training.distributed_world_size
- model.pretrained_model_path
- model.target_network_path
- next_script
- task.cache_in_scratch
- task.data
- checkpoint.save_interval_updates
- checkpoint.keep_interval_updates
- checkpoint.save_on_overflow
- common.log_interval
- common.user_dir
sweep:
dir: /fsx-wav2vec/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname}
subdir: ''
launcher:
submitit_folder: ${hydra.sweep.dir}
timeout_min: 4320
cpus_per_task: 12
gpus_per_node: 8
tasks_per_node: 8
mem_gb: 0
nodes: 8
name: ${env:PREFIX}_${hydra.job.config_name}
partition: wav2vec,learnlab,learnfair
max_num_timeout: 30
# @package _group_
common:
fp16: true
fp16_init_scale: 4
threshold_loss_scale: 1
fp16_scale_window: 128
log_format: json
log_interval: 200
user_dir: ${env:PWD}/examples/data2vec
task:
_name: sentence_prediction
data: ???
init_token: 0
separator_token: 2
num_classes: 2
max_positions: 512
d2v2_multi: True
checkpoint:
best_checkpoint_metric: mcc
maximize_best_checkpoint_metric: true
no_epoch_checkpoints: true
distributed_training:
find_unused_parameters: true
distributed_world_size: 1
nprocs_per_node: 1
distributed_port: -1
criterion:
_name: sentence_prediction
report_mcc: True
dataset:
batch_size: 16
required_batch_size_multiple: 1
max_tokens: 4400
num_workers: 1
optimizer:
_name: adam
weight_decay: 0.1
adam_betas: (0.9,0.98)
adam_eps: 1e-06
lr_scheduler:
_name: polynomial_decay
warmup_updates: 320
optimization:
clip_norm: 0.0
lr: [2e-05]
max_update: 5336
max_epoch: 10
model:
_name: data2vec_text_classification
model_path: ???
# @package _group_
common:
fp16: true
fp16_init_scale: 4
threshold_loss_scale: 1
fp16_scale_window: 128
log_format: json
log_interval: 200
user_dir: ${env:PWD}/examples/data2vec
task:
_name: sentence_prediction
data: ???
init_token: 0
separator_token: 2
num_classes: 3
max_positions: 512
d2v2_multi: True
checkpoint:
best_checkpoint_metric: accuracy
maximize_best_checkpoint_metric: true
no_epoch_checkpoints: true
distributed_training:
find_unused_parameters: true
distributed_world_size: 1
nprocs_per_node: 1
distributed_port: -1
criterion:
_name: sentence_prediction
dataset:
batch_size: 32
required_batch_size_multiple: 1
max_tokens: 4400
valid_subset: valid,valid1
num_workers: 1
optimizer:
_name: adam
weight_decay: 0.1
adam_betas: (0.9,0.98)
adam_eps: 1e-06
lr_scheduler:
_name: polynomial_decay
warmup_updates: 7432
optimization:
clip_norm: 0.0
lr: [2e-05]
max_update: 123873
max_epoch: 10
model:
_name: data2vec_text_classification
model_path: ???
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment