# Text2Sparql with BART name: &name Text2Sparql trainer: devices: 1 # the number of gpus, 0 for CPU, or list with gpu indices num_nodes: 1 max_epochs: 2 # the number of training epochs max_steps: -1 # precedence over max_epochs accumulate_grad_batches: 1 # accumulates grads every k batches accelerator: gpu strategy: ddp gradient_clip_val: 0.0 log_every_n_steps: 1 val_check_interval: 1.0 # check once per epoch .25 for 4 times per epoch enable_checkpointing: False # provided by exp_manager logger: false # provided by exp_manager model: nemo_path: null # exported .nemo path max_seq_length: 150 batch_size: 16 convert_labels: true # true if Bart, false otherwise (converts pad_id to -100 for masked loss) data_dir: null language_model: pretrained_model_name: facebook/bart-base # huggingface end-to-end model name pretrained_encoder_model_name: null # huggingface encoder model name pretrained_decoder_model_name: null # huggingface decoder model name lm_checkpoint: null config: null config_file: null # json file, precedence over config encoder_tokenizer: tokenizer_name: ${model.language_model.pretrained_model_name} # tokenizer that inherits from TokenizerSpec vocab_file: null # path to vocab file tokenizer_model: null # tokenizer model for sentencepiece special_tokens: null add_special_tokens: true decoder_tokenizer: tokenizer_name: ${model.language_model.pretrained_model_name} # tokenizer that inherits from TokenizerSpec vocab_file: null # path to vocab file tokenizer_model: null # tokenizer model for sentencepiece special_tokens: null add_special_tokens: true train_ds: filepath: ${model.data_dir}/train.tsv # path to data file shuffle: true num_samples: -1 num_workers: 2 drop_last: false pin_memory: false validation_ds: filepath: ${model.data_dir}/test_easy.tsv # path to data file shuffle: false num_samples: -1 num_workers: 2 drop_last: false pin_memory: false test_ds: filepath: ${model.data_dir}/test_hard.tsv # path to data file shuffle: false num_samples: -1 num_workers: 2 drop_last: false pin_memory: false optim: name: adamw lr: 4e-5 weight_decay: 0.0 sched: name: CosineAnnealing warmup_steps: null warmup_ratio: 0.06 min_lr: 0.0 last_epoch: -1 generate: max_length: ${model.max_seq_length} num_beams: 1 length_penalty: 2.0 early_stopping: true repetition_penalty: 1.0 do_sample: false top_k: null top_p: null num_return_sequences: 1 exp_manager: exp_dir: null # where to store logs and checkpoints name: *name # name of experiment create_tensorboard_logger: True create_checkpoint_callback: True hydra: run: dir: . job_logging: root: handlers: null