:::MLLOG {"namespace": "", "time_ms": 1728444225641, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728444225642, "event_type": "POINT_IN_TIME", "key": "global_batch_size", "value": 6000, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 133}} :::MLLOG {"namespace": "", "time_ms": 1728444225642, "event_type": "POINT_IN_TIME", "key": "opt_name", "value": "adam", "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 134}} :::MLLOG {"namespace": "", "time_ms": 1728444225643, "event_type": "POINT_IN_TIME", "key": "opt_base_learning_rate", "value": 0.25, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 136}} :::MLLOG {"namespace": "", "time_ms": 1728444225643, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_warmup_steps", "value": 0, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 137}} :::MLLOG {"namespace": "", "time_ms": 1728445229773, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728445229775, "event_type": "POINT_IN_TIME", "key": "global_batch_size", "value": 6000, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 133}} :::MLLOG {"namespace": "", "time_ms": 1728445229775, "event_type": "POINT_IN_TIME", "key": "opt_name", "value": "adam", "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 134}} :::MLLOG {"namespace": "", "time_ms": 1728445229775, "event_type": "POINT_IN_TIME", "key": "opt_base_learning_rate", "value": 0.25, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 136}} :::MLLOG {"namespace": "", "time_ms": 1728445229775, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_warmup_steps", "value": 0, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 137}} :::MLLOG {"namespace": "", "time_ms": 1728445371286, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728445371287, "event_type": "POINT_IN_TIME", "key": "global_batch_size", "value": 4096, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 133}} :::MLLOG {"namespace": "", "time_ms": 1728445371287, "event_type": "POINT_IN_TIME", "key": "opt_name", "value": "adam", "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 134}} :::MLLOG {"namespace": "", "time_ms": 1728445371287, "event_type": "POINT_IN_TIME", "key": "opt_base_learning_rate", "value": 0.25, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 136}} :::MLLOG {"namespace": "", "time_ms": 1728445371287, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_warmup_steps", "value": 0, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 137}} :::MLLOG {"namespace": "", "time_ms": 1728452615867, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728452615869, "event_type": "POINT_IN_TIME", "key": "global_batch_size", "value": 6000, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 133}} :::MLLOG {"namespace": "", "time_ms": 1728452615869, "event_type": "POINT_IN_TIME", "key": "opt_name", "value": "adam", "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 134}} :::MLLOG {"namespace": "", "time_ms": 1728452615869, "event_type": "POINT_IN_TIME", "key": "opt_base_learning_rate", "value": 0.25, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 136}} :::MLLOG {"namespace": "", "time_ms": 1728452615869, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_warmup_steps", "value": 0, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 137}} :::MLLOG {"namespace": "", "time_ms": 1728452615869, "event_type": "POINT_IN_TIME", "key": "max_sequence_length", "value": 1024, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 139, "method": "discard"}} :::MLLOG {"namespace": "", "time_ms": 1728452615869, "event_type": "POINT_IN_TIME", "key": "opt_adam_beta_1", "value": 0.9, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 140}} :::MLLOG {"namespace": "", "time_ms": 1728452615870, "event_type": "POINT_IN_TIME", "key": "opt_adam_beta_2", "value": 0.999, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 141}} :::MLLOG {"namespace": "", "time_ms": 1728452615870, "event_type": "POINT_IN_TIME", "key": "opt_adam_epsilon", "value": 1e-08, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 142}} :::MLLOG {"namespace": "", "time_ms": 1728452615870, "event_type": "POINT_IN_TIME", "key": "seed", "value": 1234, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 143}} :::MLLOG {"namespace": "", "time_ms": 1728452980089, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728452980091, "event_type": "POINT_IN_TIME", "key": "global_batch_size", "value": 6000, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 133}} :::MLLOG {"namespace": "", "time_ms": 1728452980091, "event_type": "POINT_IN_TIME", "key": "opt_name", "value": "adam", "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 134}} :::MLLOG {"namespace": "", "time_ms": 1728452980091, "event_type": "POINT_IN_TIME", "key": "opt_base_learning_rate", "value": 0.25, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 136}} :::MLLOG {"namespace": "", "time_ms": 1728452980091, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_warmup_steps", "value": 0, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 137}} :::MLLOG {"namespace": "", "time_ms": 1728452980091, "event_type": "POINT_IN_TIME", "key": "max_sequence_length", "value": 1024, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 139, "method": "discard"}} :::MLLOG {"namespace": "", "time_ms": 1728452980091, "event_type": "POINT_IN_TIME", "key": "opt_adam_beta_1", "value": 0.9, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 140}} :::MLLOG {"namespace": "", "time_ms": 1728452980092, "event_type": "POINT_IN_TIME", "key": "opt_adam_beta_2", "value": 0.999, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 141}} :::MLLOG {"namespace": "", "time_ms": 1728452980092, "event_type": "POINT_IN_TIME", "key": "opt_adam_epsilon", "value": 1e-08, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 142}} :::MLLOG {"namespace": "", "time_ms": 1728452980092, "event_type": "POINT_IN_TIME", "key": "seed", "value": 1234, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 143}} :::MLLOG {"namespace": "", "time_ms": 1728453530231, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728453530233, "event_type": "POINT_IN_TIME", "key": "global_batch_size", "value": 6000, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 133}} :::MLLOG {"namespace": "", "time_ms": 1728453530233, "event_type": "POINT_IN_TIME", "key": "opt_name", "value": "adam", "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 134}} :::MLLOG {"namespace": "", "time_ms": 1728453530233, "event_type": "POINT_IN_TIME", "key": "opt_base_learning_rate", "value": 0.25, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 136}} :::MLLOG {"namespace": "", "time_ms": 1728453530233, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_warmup_steps", "value": 0, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 137}} :::MLLOG {"namespace": "", "time_ms": 1728453530233, "event_type": "POINT_IN_TIME", "key": "max_sequence_length", "value": 1024, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 139, "method": "discard"}} :::MLLOG {"namespace": "", "time_ms": 1728453530234, "event_type": "POINT_IN_TIME", "key": "opt_adam_beta_1", "value": 0.9, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 140}} :::MLLOG {"namespace": "", "time_ms": 1728453530234, "event_type": "POINT_IN_TIME", "key": "opt_adam_beta_2", "value": 0.999, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 141}} :::MLLOG {"namespace": "", "time_ms": 1728453530234, "event_type": "POINT_IN_TIME", "key": "opt_adam_epsilon", "value": 1e-08, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 142}} :::MLLOG {"namespace": "", "time_ms": 1728453530234, "event_type": "POINT_IN_TIME", "key": "seed", "value": 1234, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 143}} :::MLLOG {"namespace": "", "time_ms": 1728889462232, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728889462232, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728889462243, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728889462246, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728889463182, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728889463193, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728889463210, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728889463219, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728889463220, "event_type": "POINT_IN_TIME", "key": "global_batch_size", "value": 81920, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 133}} :::MLLOG {"namespace": "", "time_ms": 1728889463220, "event_type": "POINT_IN_TIME", "key": "opt_name", "value": "adam", "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 134}} :::MLLOG {"namespace": "", "time_ms": 1728889463220, "event_type": "POINT_IN_TIME", "key": "opt_base_learning_rate", "value": 0.0019, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 136}} :::MLLOG {"namespace": "", "time_ms": 1728889463220, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_warmup_steps", "value": 750, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 137}} :::MLLOG {"namespace": "", "time_ms": 1728889463220, "event_type": "POINT_IN_TIME", "key": "max_sequence_length", "value": 64, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 139, "method": "discard"}} :::MLLOG {"namespace": "", "time_ms": 1728889463220, "event_type": "POINT_IN_TIME", "key": "opt_adam_beta_1", "value": 0.9, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 140}} :::MLLOG {"namespace": "", "time_ms": 1728889463221, "event_type": "POINT_IN_TIME", "key": "opt_adam_beta_2", "value": 0.98, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 141}} :::MLLOG {"namespace": "", "time_ms": 1728889463221, "event_type": "POINT_IN_TIME", "key": "opt_adam_epsilon", "value": 1e-09, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 142}} :::MLLOG {"namespace": "", "time_ms": 1728889463221, "event_type": "POINT_IN_TIME", "key": "seed", "value": 22078, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 143}} :::MLLOG {"namespace": "", "time_ms": 1728889761577, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728889762201, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728889762201, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728889762208, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728889762264, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728889762284, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728889762285, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728889762290, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728889762290, "event_type": "POINT_IN_TIME", "key": "global_batch_size", "value": 81920, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 133}} :::MLLOG {"namespace": "", "time_ms": 1728889762290, "event_type": "POINT_IN_TIME", "key": "opt_name", "value": "adam", "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 134}} :::MLLOG {"namespace": "", "time_ms": 1728889762291, "event_type": "POINT_IN_TIME", "key": "opt_base_learning_rate", "value": 0.0019, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 136}} :::MLLOG {"namespace": "", "time_ms": 1728889762291, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_warmup_steps", "value": 750, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 137}} :::MLLOG {"namespace": "", "time_ms": 1728889762291, "event_type": "POINT_IN_TIME", "key": "max_sequence_length", "value": 64, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 139, "method": "discard"}} :::MLLOG {"namespace": "", "time_ms": 1728889762291, "event_type": "POINT_IN_TIME", "key": "opt_adam_beta_1", "value": 0.9, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 140}} :::MLLOG {"namespace": "", "time_ms": 1728889762291, "event_type": "POINT_IN_TIME", "key": "opt_adam_beta_2", "value": 0.98, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 141}} :::MLLOG {"namespace": "", "time_ms": 1728889762291, "event_type": "POINT_IN_TIME", "key": "opt_adam_epsilon", "value": 1e-09, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 142}} :::MLLOG {"namespace": "", "time_ms": 1728889762291, "event_type": "POINT_IN_TIME", "key": "seed", "value": 17315, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 143}} :::MLLOG {"namespace": "", "time_ms": 1728889771351, "event_type": "INTERVAL_END", "key": "init_stop", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 212}} :::MLLOG {"namespace": "", "time_ms": 1728889771352, "event_type": "INTERVAL_START", "key": "run_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 214}} :::MLLOG {"namespace": "", "time_ms": 1728889771904, "event_type": "POINT_IN_TIME", "key": "train_samples", "value": 4590101, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 222}} :::MLLOG {"namespace": "", "time_ms": 1728889771904, "event_type": "POINT_IN_TIME", "key": "eval_samples", "value": 3003, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 225}} :::MLLOG {"namespace": "", "time_ms": 1728889773125, "event_type": "INTERVAL_START", "key": "block_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 255, "first_epoch_num": 1, "epoch_count": 1}} :::MLLOG {"namespace": "", "time_ms": 1728889773126, "event_type": "INTERVAL_START", "key": "epoch_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 258, "epoch_num": 1}} :::MLLOG {"namespace": "", "time_ms": 1728890366314, "event_type": "INTERVAL_END", "key": "epoch_stop", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 273, "epoch_num": 1}} :::MLLOG {"namespace": "", "time_ms": 1728890366315, "event_type": "INTERVAL_START", "key": "eval_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 640, "epoch_num": 1}} :::MLLOG {"namespace": "", "time_ms": 1728890627512, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728890627674, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728890627712, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728890627725, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728890628433, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728890628454, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728890628462, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728890628464, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}} :::MLLOG {"namespace": "", "time_ms": 1728890628464, "event_type": "POINT_IN_TIME", "key": "global_batch_size", "value": 81920, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 133}} :::MLLOG {"namespace": "", "time_ms": 1728890628464, "event_type": "POINT_IN_TIME", "key": "opt_name", "value": "adam", "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 134}} :::MLLOG {"namespace": "", "time_ms": 1728890628465, "event_type": "POINT_IN_TIME", "key": "opt_base_learning_rate", "value": 0.0019, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 136}} :::MLLOG {"namespace": "", "time_ms": 1728890628465, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_warmup_steps", "value": 750, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 137}} :::MLLOG {"namespace": "", "time_ms": 1728890628465, "event_type": "POINT_IN_TIME", "key": "max_sequence_length", "value": 64, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 139, "method": "discard"}} :::MLLOG {"namespace": "", "time_ms": 1728890628465, "event_type": "POINT_IN_TIME", "key": "opt_adam_beta_1", "value": 0.9, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 140}} :::MLLOG {"namespace": "", "time_ms": 1728890628465, "event_type": "POINT_IN_TIME", "key": "opt_adam_beta_2", "value": 0.98, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 141}} :::MLLOG {"namespace": "", "time_ms": 1728890628465, "event_type": "POINT_IN_TIME", "key": "opt_adam_epsilon", "value": 1e-09, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 142}} :::MLLOG {"namespace": "", "time_ms": 1728890628465, "event_type": "POINT_IN_TIME", "key": "seed", "value": 9431, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 143}} :::MLLOG {"namespace": "", "time_ms": 1728890637403, "event_type": "INTERVAL_END", "key": "init_stop", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 212}} :::MLLOG {"namespace": "", "time_ms": 1728890637404, "event_type": "INTERVAL_START", "key": "run_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 214}} :::MLLOG {"namespace": "", "time_ms": 1728890637971, "event_type": "POINT_IN_TIME", "key": "train_samples", "value": 4590101, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 222}} :::MLLOG {"namespace": "", "time_ms": 1728890637971, "event_type": "POINT_IN_TIME", "key": "eval_samples", "value": 3003, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 225}} :::MLLOG {"namespace": "", "time_ms": 1728890639238, "event_type": "INTERVAL_START", "key": "block_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 255, "first_epoch_num": 1, "epoch_count": 1}} :::MLLOG {"namespace": "", "time_ms": 1728890639239, "event_type": "INTERVAL_START", "key": "epoch_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 258, "epoch_num": 1}}