task: hub_module_url: '' model: encoder: type: bigbird bigbird: use_gradient_checkpointing: false # hidden_size: 768 # num_layers: 12 # num_attention_heads: 12 # intermediate_size: 3072 max_answer_length: 30 n_best_size: 20 null_score_diff_threshold: 0.0 init_checkpoint: 'TODO' train_data: drop_remainder: true global_batch_size: 48 input_path: 'TODO' is_training: true seq_length: 1024 validation_data: do_lower_case: true doc_stride: 128 drop_remainder: false global_batch_size: 48 input_path: 'TODO' is_training: false query_length: 64 seq_length: 1024 tokenization: SentencePiece version_2_with_negative: false vocab_file: 'TODO' trainer: checkpoint_interval: 1000 max_to_keep: 5 optimizer_config: learning_rate: polynomial: decay_steps: 3699 end_learning_rate: 0.0 initial_learning_rate: 8.0e-05 power: 1.0 type: polynomial optimizer: type: adamw warmup: polynomial: power: 1 warmup_steps: 370 type: polynomial steps_per_loop: 1000 summary_interval: 1000 train_steps: 3699 validation_interval: 1000 validation_steps: 226 best_checkpoint_export_subdir: 'best_ckpt' best_checkpoint_eval_metric: 'final_f1' best_checkpoint_metric_comp: 'higher'