"torchvision/csrc/io/image/image_read_mode.h" did not exist on "a884cb7bcc03e57a969c1a572b7af85a95b4421c"
transformer.log 29.8 KB
Newer Older
jerrrrry's avatar
jerrrrry committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
:::MLLOG {"namespace": "", "time_ms": 1728444225641, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728444225642, "event_type": "POINT_IN_TIME", "key": "global_batch_size", "value": 6000, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 133}}
:::MLLOG {"namespace": "", "time_ms": 1728444225642, "event_type": "POINT_IN_TIME", "key": "opt_name", "value": "adam", "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 134}}
:::MLLOG {"namespace": "", "time_ms": 1728444225643, "event_type": "POINT_IN_TIME", "key": "opt_base_learning_rate", "value": 0.25, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 136}}
:::MLLOG {"namespace": "", "time_ms": 1728444225643, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_warmup_steps", "value": 0, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 137}}
:::MLLOG {"namespace": "", "time_ms": 1728445229773, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728445229775, "event_type": "POINT_IN_TIME", "key": "global_batch_size", "value": 6000, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 133}}
:::MLLOG {"namespace": "", "time_ms": 1728445229775, "event_type": "POINT_IN_TIME", "key": "opt_name", "value": "adam", "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 134}}
:::MLLOG {"namespace": "", "time_ms": 1728445229775, "event_type": "POINT_IN_TIME", "key": "opt_base_learning_rate", "value": 0.25, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 136}}
:::MLLOG {"namespace": "", "time_ms": 1728445229775, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_warmup_steps", "value": 0, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 137}}
:::MLLOG {"namespace": "", "time_ms": 1728445371286, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728445371287, "event_type": "POINT_IN_TIME", "key": "global_batch_size", "value": 4096, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 133}}
:::MLLOG {"namespace": "", "time_ms": 1728445371287, "event_type": "POINT_IN_TIME", "key": "opt_name", "value": "adam", "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 134}}
:::MLLOG {"namespace": "", "time_ms": 1728445371287, "event_type": "POINT_IN_TIME", "key": "opt_base_learning_rate", "value": 0.25, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 136}}
:::MLLOG {"namespace": "", "time_ms": 1728445371287, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_warmup_steps", "value": 0, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 137}}
:::MLLOG {"namespace": "", "time_ms": 1728452615867, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728452615869, "event_type": "POINT_IN_TIME", "key": "global_batch_size", "value": 6000, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 133}}
:::MLLOG {"namespace": "", "time_ms": 1728452615869, "event_type": "POINT_IN_TIME", "key": "opt_name", "value": "adam", "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 134}}
:::MLLOG {"namespace": "", "time_ms": 1728452615869, "event_type": "POINT_IN_TIME", "key": "opt_base_learning_rate", "value": 0.25, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 136}}
:::MLLOG {"namespace": "", "time_ms": 1728452615869, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_warmup_steps", "value": 0, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 137}}
:::MLLOG {"namespace": "", "time_ms": 1728452615869, "event_type": "POINT_IN_TIME", "key": "max_sequence_length", "value": 1024, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 139, "method": "discard"}}
:::MLLOG {"namespace": "", "time_ms": 1728452615869, "event_type": "POINT_IN_TIME", "key": "opt_adam_beta_1", "value": 0.9, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 140}}
:::MLLOG {"namespace": "", "time_ms": 1728452615870, "event_type": "POINT_IN_TIME", "key": "opt_adam_beta_2", "value": 0.999, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 141}}
:::MLLOG {"namespace": "", "time_ms": 1728452615870, "event_type": "POINT_IN_TIME", "key": "opt_adam_epsilon", "value": 1e-08, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 142}}
:::MLLOG {"namespace": "", "time_ms": 1728452615870, "event_type": "POINT_IN_TIME", "key": "seed", "value": 1234, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 143}}
:::MLLOG {"namespace": "", "time_ms": 1728452980089, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728452980091, "event_type": "POINT_IN_TIME", "key": "global_batch_size", "value": 6000, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 133}}
:::MLLOG {"namespace": "", "time_ms": 1728452980091, "event_type": "POINT_IN_TIME", "key": "opt_name", "value": "adam", "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 134}}
:::MLLOG {"namespace": "", "time_ms": 1728452980091, "event_type": "POINT_IN_TIME", "key": "opt_base_learning_rate", "value": 0.25, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 136}}
:::MLLOG {"namespace": "", "time_ms": 1728452980091, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_warmup_steps", "value": 0, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 137}}
:::MLLOG {"namespace": "", "time_ms": 1728452980091, "event_type": "POINT_IN_TIME", "key": "max_sequence_length", "value": 1024, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 139, "method": "discard"}}
:::MLLOG {"namespace": "", "time_ms": 1728452980091, "event_type": "POINT_IN_TIME", "key": "opt_adam_beta_1", "value": 0.9, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 140}}
:::MLLOG {"namespace": "", "time_ms": 1728452980092, "event_type": "POINT_IN_TIME", "key": "opt_adam_beta_2", "value": 0.999, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 141}}
:::MLLOG {"namespace": "", "time_ms": 1728452980092, "event_type": "POINT_IN_TIME", "key": "opt_adam_epsilon", "value": 1e-08, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 142}}
:::MLLOG {"namespace": "", "time_ms": 1728452980092, "event_type": "POINT_IN_TIME", "key": "seed", "value": 1234, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 143}}
:::MLLOG {"namespace": "", "time_ms": 1728453530231, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728453530233, "event_type": "POINT_IN_TIME", "key": "global_batch_size", "value": 6000, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 133}}
:::MLLOG {"namespace": "", "time_ms": 1728453530233, "event_type": "POINT_IN_TIME", "key": "opt_name", "value": "adam", "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 134}}
:::MLLOG {"namespace": "", "time_ms": 1728453530233, "event_type": "POINT_IN_TIME", "key": "opt_base_learning_rate", "value": 0.25, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 136}}
:::MLLOG {"namespace": "", "time_ms": 1728453530233, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_warmup_steps", "value": 0, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 137}}
:::MLLOG {"namespace": "", "time_ms": 1728453530233, "event_type": "POINT_IN_TIME", "key": "max_sequence_length", "value": 1024, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 139, "method": "discard"}}
:::MLLOG {"namespace": "", "time_ms": 1728453530234, "event_type": "POINT_IN_TIME", "key": "opt_adam_beta_1", "value": 0.9, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 140}}
:::MLLOG {"namespace": "", "time_ms": 1728453530234, "event_type": "POINT_IN_TIME", "key": "opt_adam_beta_2", "value": 0.999, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 141}}
:::MLLOG {"namespace": "", "time_ms": 1728453530234, "event_type": "POINT_IN_TIME", "key": "opt_adam_epsilon", "value": 1e-08, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 142}}
:::MLLOG {"namespace": "", "time_ms": 1728453530234, "event_type": "POINT_IN_TIME", "key": "seed", "value": 1234, "metadata": {"file": "/mnt/fs/user/llama/custom_model/mlcommons/training_results_v0.7/NVIDIA/benchmarks/transformer/implementations/pytorch/train.py", "lineno": 143}}
:::MLLOG {"namespace": "", "time_ms": 1728889462232, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728889462232, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728889462243, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728889462246, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728889463182, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728889463193, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728889463210, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728889463219, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728889463220, "event_type": "POINT_IN_TIME", "key": "global_batch_size", "value": 81920, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 133}}
:::MLLOG {"namespace": "", "time_ms": 1728889463220, "event_type": "POINT_IN_TIME", "key": "opt_name", "value": "adam", "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 134}}
:::MLLOG {"namespace": "", "time_ms": 1728889463220, "event_type": "POINT_IN_TIME", "key": "opt_base_learning_rate", "value": 0.0019, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 136}}
:::MLLOG {"namespace": "", "time_ms": 1728889463220, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_warmup_steps", "value": 750, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 137}}
:::MLLOG {"namespace": "", "time_ms": 1728889463220, "event_type": "POINT_IN_TIME", "key": "max_sequence_length", "value": 64, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 139, "method": "discard"}}
:::MLLOG {"namespace": "", "time_ms": 1728889463220, "event_type": "POINT_IN_TIME", "key": "opt_adam_beta_1", "value": 0.9, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 140}}
:::MLLOG {"namespace": "", "time_ms": 1728889463221, "event_type": "POINT_IN_TIME", "key": "opt_adam_beta_2", "value": 0.98, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 141}}
:::MLLOG {"namespace": "", "time_ms": 1728889463221, "event_type": "POINT_IN_TIME", "key": "opt_adam_epsilon", "value": 1e-09, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 142}}
:::MLLOG {"namespace": "", "time_ms": 1728889463221, "event_type": "POINT_IN_TIME", "key": "seed", "value": 22078, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 143}}
:::MLLOG {"namespace": "", "time_ms": 1728889761577, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728889762201, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728889762201, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728889762208, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728889762264, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728889762284, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728889762285, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728889762290, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728889762290, "event_type": "POINT_IN_TIME", "key": "global_batch_size", "value": 81920, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 133}}
:::MLLOG {"namespace": "", "time_ms": 1728889762290, "event_type": "POINT_IN_TIME", "key": "opt_name", "value": "adam", "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 134}}
:::MLLOG {"namespace": "", "time_ms": 1728889762291, "event_type": "POINT_IN_TIME", "key": "opt_base_learning_rate", "value": 0.0019, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 136}}
:::MLLOG {"namespace": "", "time_ms": 1728889762291, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_warmup_steps", "value": 750, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 137}}
:::MLLOG {"namespace": "", "time_ms": 1728889762291, "event_type": "POINT_IN_TIME", "key": "max_sequence_length", "value": 64, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 139, "method": "discard"}}
:::MLLOG {"namespace": "", "time_ms": 1728889762291, "event_type": "POINT_IN_TIME", "key": "opt_adam_beta_1", "value": 0.9, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 140}}
:::MLLOG {"namespace": "", "time_ms": 1728889762291, "event_type": "POINT_IN_TIME", "key": "opt_adam_beta_2", "value": 0.98, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 141}}
:::MLLOG {"namespace": "", "time_ms": 1728889762291, "event_type": "POINT_IN_TIME", "key": "opt_adam_epsilon", "value": 1e-09, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 142}}
:::MLLOG {"namespace": "", "time_ms": 1728889762291, "event_type": "POINT_IN_TIME", "key": "seed", "value": 17315, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 143}}
:::MLLOG {"namespace": "", "time_ms": 1728889771351, "event_type": "INTERVAL_END", "key": "init_stop", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 212}}
:::MLLOG {"namespace": "", "time_ms": 1728889771352, "event_type": "INTERVAL_START", "key": "run_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 214}}
:::MLLOG {"namespace": "", "time_ms": 1728889771904, "event_type": "POINT_IN_TIME", "key": "train_samples", "value": 4590101, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 222}}
:::MLLOG {"namespace": "", "time_ms": 1728889771904, "event_type": "POINT_IN_TIME", "key": "eval_samples", "value": 3003, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 225}}
:::MLLOG {"namespace": "", "time_ms": 1728889773125, "event_type": "INTERVAL_START", "key": "block_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 255, "first_epoch_num": 1, "epoch_count": 1}}
:::MLLOG {"namespace": "", "time_ms": 1728889773126, "event_type": "INTERVAL_START", "key": "epoch_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 258, "epoch_num": 1}}
:::MLLOG {"namespace": "", "time_ms": 1728890366314, "event_type": "INTERVAL_END", "key": "epoch_stop", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 273, "epoch_num": 1}}
:::MLLOG {"namespace": "", "time_ms": 1728890366315, "event_type": "INTERVAL_START", "key": "eval_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 640, "epoch_num": 1}}
:::MLLOG {"namespace": "", "time_ms": 1728890627512, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728890627674, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728890627712, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728890627725, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728890628433, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728890628454, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728890628462, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728890628464, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 116}}
:::MLLOG {"namespace": "", "time_ms": 1728890628464, "event_type": "POINT_IN_TIME", "key": "global_batch_size", "value": 81920, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 133}}
:::MLLOG {"namespace": "", "time_ms": 1728890628464, "event_type": "POINT_IN_TIME", "key": "opt_name", "value": "adam", "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 134}}
:::MLLOG {"namespace": "", "time_ms": 1728890628465, "event_type": "POINT_IN_TIME", "key": "opt_base_learning_rate", "value": 0.0019, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 136}}
:::MLLOG {"namespace": "", "time_ms": 1728890628465, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_warmup_steps", "value": 750, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 137}}
:::MLLOG {"namespace": "", "time_ms": 1728890628465, "event_type": "POINT_IN_TIME", "key": "max_sequence_length", "value": 64, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 139, "method": "discard"}}
:::MLLOG {"namespace": "", "time_ms": 1728890628465, "event_type": "POINT_IN_TIME", "key": "opt_adam_beta_1", "value": 0.9, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 140}}
:::MLLOG {"namespace": "", "time_ms": 1728890628465, "event_type": "POINT_IN_TIME", "key": "opt_adam_beta_2", "value": 0.98, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 141}}
:::MLLOG {"namespace": "", "time_ms": 1728890628465, "event_type": "POINT_IN_TIME", "key": "opt_adam_epsilon", "value": 1e-09, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 142}}
:::MLLOG {"namespace": "", "time_ms": 1728890628465, "event_type": "POINT_IN_TIME", "key": "seed", "value": 9431, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 143}}
:::MLLOG {"namespace": "", "time_ms": 1728890637403, "event_type": "INTERVAL_END", "key": "init_stop", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 212}}
:::MLLOG {"namespace": "", "time_ms": 1728890637404, "event_type": "INTERVAL_START", "key": "run_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 214}}
:::MLLOG {"namespace": "", "time_ms": 1728890637971, "event_type": "POINT_IN_TIME", "key": "train_samples", "value": 4590101, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 222}}
:::MLLOG {"namespace": "", "time_ms": 1728890637971, "event_type": "POINT_IN_TIME", "key": "eval_samples", "value": 3003, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 225}}
:::MLLOG {"namespace": "", "time_ms": 1728890639238, "event_type": "INTERVAL_START", "key": "block_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 255, "first_epoch_num": 1, "epoch_count": 1}}
:::MLLOG {"namespace": "", "time_ms": 1728890639239, "event_type": "INTERVAL_START", "key": "epoch_start", "value": null, "metadata": {"file": "/mnt/fs/user/llama/custom_model/libo_test/mlperf_test/transformer/implementations/pytorch/train.py", "lineno": 258, "epoch_num": 1}}