DLLL {"timestamp": "1689419433.768081", "datetime": "2023-07-15 19:10:33.768081", "elapsedtime": "0.000237", "type": "LOG", "step": "PARAMETER", "data": {"Config": ["Namespace(amp=True, bert_model='bert-large-uncased', cache_dir=None, config_file='/public/home/hepj//model_source/pytorch_bert/bert_config.json', disable_progress_bar=False, dist_url='tcp://224.66.41.62:23456', do_eval=True, do_lower_case=False, do_predict=True, do_train=True, doc_stride=128, eval_script='./evaluate-v1.1.py', fp16=True, gpus_per_node=1, gradient_accumulation_steps=1, init_checkpoint='/public/home/hepj/model_source/model_pytorch.ckpt.pt', json_summary='./log/results-squad-fp16.json', learning_rate=5e-05, local_rank=-1, log_freq=50, loss_scale=0, max_answer_length=30, max_query_length=64, max_seq_length=384, max_steps=-1.0, n_best_size=20, no_cuda=False, null_score_diff_threshold=0.0, num_train_epochs=3.0, output_dir='/public/home/hepj/outdir/tourch/SQuAD', predict_batch_size=4, predict_file='/public/home/hepj/data/sq1.1/dev-v1.1.json', seed=42, skip_cache=False, skip_checkpoint=False, train_batch_size=4, train_file='/public/home/hepj/data/sq1.1/train-v1.1.json', use_env=False, verbose_logging=False, version_2_with_negative=False, vocab_file='/public/home/hepj//model_source/pytorch_bert/vocab.txt', warmup_proportion=0.1, world_size=1)"]}} DLLL {"timestamp": "1689419433.787672", "datetime": "2023-07-15 19:10:33.787672", "elapsedtime": "0.019828", "type": "LOG", "step": "PARAMETER", "data": {"SEED": 42}} DLLL {"timestamp": "1689419453.753193", "datetime": "2023-07-15 19:10:53.753193", "elapsedtime": "19.985349", "type": "LOG", "step": "PARAMETER", "data": {"loading_checkpoint": true}} DLLL {"timestamp": "1689419456.642115", "datetime": "2023-07-15 19:10:56.642115", "elapsedtime": "22.874271", "type": "LOG", "step": "PARAMETER", "data": {"loaded_checkpoint": true}} DLLL {"timestamp": "1689419457.266302", "datetime": "2023-07-15 19:10:57.266302", "elapsedtime": "23.498458", "type": "LOG", "step": "PARAMETER", "data": {"model_weights_num": 335150082}} DLLL {"timestamp": "1689419469.543777", "datetime": "2023-07-15 19:11:09.543777", "elapsedtime": "35.775933", "type": "LOG", "step": "PARAMETER", "data": {"train_start": true}} DLLL {"timestamp": "1689419469.543959", "datetime": "2023-07-15 19:11:09.543959", "elapsedtime": "35.776115", "type": "LOG", "step": "PARAMETER", "data": {"training_samples": 87599}} DLLL {"timestamp": "1689419469.54403", "datetime": "2023-07-15 19:11:09.544030", "elapsedtime": "35.776186", "type": "LOG", "step": "PARAMETER", "data": {"training_features": 88368}} DLLL {"timestamp": "1689419469.544095", "datetime": "2023-07-15 19:11:09.544095", "elapsedtime": "35.776251", "type": "LOG", "step": "PARAMETER", "data": {"train_batch_size": 4}} DLLL {"timestamp": "1689419469.544156", "datetime": "2023-07-15 19:11:09.544156", "elapsedtime": "35.776312", "type": "LOG", "step": "PARAMETER", "data": {"steps": 65697.0}} DLLL {"timestamp": "1689419476.360987", "datetime": "2023-07-15 19:11:16.360987", "elapsedtime": "42.593143", "type": "LOG", "step": [0, 1], "data": {"step_loss": 6.122858047485352, "learning_rate": 7.610697596541699e-09}} DLLL {"timestamp": "1689419492.221115", "datetime": "2023-07-15 19:11:32.221115", "elapsedtime": "58.453271", "type": "LOG", "step": [0, 51], "data": {"step_loss": 5.114989757537842, "learning_rate": 3.8814557742362663e-07}} DLLL {"timestamp": "1689419507.932752", "datetime": "2023-07-15 19:11:47.932752", "elapsedtime": "74.164908", "type": "LOG", "step": [0, 101], "data": {"step_loss": 5.053555488586426, "learning_rate": 7.686804572507116e-07}}