Commit 688448db authored by silencealiang's avatar silencealiang
Browse files

更新代码

parent a02a5490
Pipeline #2503 passed with stage
{
"lm loss": {
"start_step": 0,
"end_step": 50,
"step_interval": 5,
"values": [
10.82445,
10.86393,
10.85733,
10.80809,
10.70951,
10.63738,
10.16425,
10.28201,
10.19003,
9.88697
]
},
"num-zeros": {
"start_step": 0,
"end_step": 50,
"step_interval": 5,
"values": [
12678.0,
16220.0,
16626.0,
16055.0,
13829.0,
14904.0,
12931.0,
15765.0,
16771.0,
17621.0
]
},
"iteration-time": {
"start_step": 0,
"end_step": 50,
"step_interval": 5,
"values": [
25.19848,
0.70611,
0.70356,
0.70548,
0.70285,
0.70488,
0.70589,
0.70459,
0.70261,
0.71213
]
}
}
\ No newline at end of file
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.82445, "5": 10.83944, "10": 10.7889, "15": 10.82831, "20": 10.72949, "25": 10.57667, "30": 10.40631, "35": 10.3135, "40": 10.13964, "45": 9.90704, "50": 9.96951}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 12856.0, "5": 15993.0, "10": 12573.0, "15": 14651.0, "20": 13663.0, "25": 13137.0, "30": 14643.0, "35": 15376.0, "40": 16684.0, "45": 16099.0, "50": 18966.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 626182656.0, "5": 626185728.0, "10": 626182656.0, "15": 626185216.0, "20": 626186240.0, "25": 626183168.0, "30": 626183680.0, "35": 626184704.0, "40": 626185728.0, "45": 626475008.0, "50": 626184704.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1819317248.0, "5": 2050089472.0, "10": 2050089472.0, "15": 2050322944.0, "20": 2050322944.0, "25": 2050322944.0, "30": 2050322944.0, "35": 2050341376.0, "40": 2050341376.0, "45": 2050341376.0, "50": 2050341376.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 18.15563, "5": 0.44565, "10": 0.43891, "15": 0.44112, "20": 0.44197, "25": 0.44184, "30": 0.43708, "35": 0.43675, "40": 0.43865, "45": 0.44326, "50": 0.44012}}}
\ No newline at end of file
......@@ -19,8 +19,8 @@ MODEL_ARGS:
--train-iters: 50
--timing-log-level: 2
--lr-decay-iters: 320000
--save: ${CHECKPOINT_PATH}
--load: ${CHECKPOINT_PATH}
--save: ${CHECKPOINT_SAVE_PATH}
--load: ${CHECKPOINT_LOAD_PATH}
--data-path: ${DATA_PATH}/my-gpt3_00_text_document
--vocab-file: ${DATA_PATH}/bpe/vocab.json
--merge-file: ${DATA_PATH}/bpe/merges.txt
......@@ -51,4 +51,5 @@ MODEL_ARGS:
--data-cache-path: ${DATA_CACHE_PATH}
--bf16: true
--attention-backend: unfused
--log-memory-to-tensorboard: true
TEST_TYPE: regular
......@@ -19,8 +19,8 @@ MODEL_ARGS:
--train-iters: 100
--timing-log-level: 2
--lr-decay-iters: 320000
--save: ${CHECKPOINT_PATH}
--load: ${CHECKPOINT_PATH}
--save: ${CHECKPOINT_SAVE_PATH}
--load: ${CHECKPOINT_LOAD_PATH}
--data-path: ${DATA_PATH}/my-gpt3_00_text_document
--vocab-file: ${DATA_PATH}/bpe/vocab.json
--merge-file: ${DATA_PATH}/bpe/merges.txt
......@@ -51,4 +51,5 @@ MODEL_ARGS:
--data-cache-path: ${DATA_CACHE_PATH}
--fp16: true
--apply-query-key-layer-scaling: true
--log-memory-to-tensorboard: true
TEST_TYPE: ckpt-resume
{
"lm loss": {
"start_step": 0,
"end_step": 50,
"step_interval": 5,
"values": [
10.86122,
10.88647,
10.87773,
10.83111,
10.7165,
10.60619,
10.13147,
10.22767,
10.15929,
9.83482
]
},
"num-zeros": {
"start_step": 0,
"end_step": 50,
"step_interval": 5,
"values": [
1694.0,
2148.0,
2169.0,
2103.0,
1991.0,
1900.0,
1707.0,
2189.0,
2557.0,
2606.0
]
},
"iteration-time": {
"start_step": 0,
"end_step": 50,
"step_interval": 5,
"values": [
9.61991,
0.29135,
0.28852,
0.28971,
0.29221,
0.28994,
0.28976,
0.28887,
0.28975,
0.2869
]
}
}
\ No newline at end of file
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.86122, "5": 10.88248, "10": 10.83515, "15": 10.82747, "20": 10.72762, "25": 10.55769, "30": 10.37915, "35": 10.28345, "40": 10.08809, "45": 9.82642, "50": 9.91341}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1694.0, "5": 2127.0, "10": 1548.0, "15": 1997.0, "20": 1846.0, "25": 1802.0, "30": 2112.0, "35": 2172.0, "40": 2560.0, "45": 2397.0, "50": 2761.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 382956544.0, "5": 382956544.0, "10": 382956544.0, "15": 382956544.0, "20": 382956544.0, "25": 382956544.0, "30": 382956544.0, "35": 382956544.0, "40": 382956544.0, "45": 382956544.0, "50": 382956544.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1496754688.0, "5": 1628741632.0, "10": 1628741632.0, "15": 1628741632.0, "20": 1628741632.0, "25": 1628741632.0, "30": 1628741632.0, "35": 1628741632.0, "40": 1628741632.0, "45": 1628741632.0, "50": 1628741632.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 4.5526, "5": 0.28707, "10": 0.28966, "15": 0.28958, "20": 0.28862, "25": 0.28956, "30": 0.28644, "35": 0.28887, "40": 0.28562, "45": 0.28552, "50": 0.28692}}}
\ No newline at end of file
{
"lm loss": {
"start_step": 0,
"end_step": 50,
"step_interval": 5,
"values": [
10.86122,
10.88647,
10.87773,
10.83111,
10.7165,
10.60623,
10.13146,
10.2277,
10.15933,
9.8348
]
},
"num-zeros": {
"start_step": 0,
"end_step": 50,
"step_interval": 5,
"values": [
1694.0,
2148.0,
2169.0,
2103.0,
1991.0,
1869.0,
1760.0,
2214.0,
2529.0,
2587.0
]
},
"iteration-time": {
"start_step": 0,
"end_step": 50,
"step_interval": 5,
"values": [
11.72537,
0.29824,
0.29549,
0.29574,
0.29514,
0.29533,
0.29415,
0.30722,
0.29731,
0.29867
]
}
}
\ No newline at end of file
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.86122, "5": 10.88248, "10": 10.83515, "15": 10.82747, "20": 10.72762, "25": 10.55769, "30": 10.37919, "35": 10.28344, "40": 10.08807, "45": 9.82644, "50": 9.9134}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1694.0, "5": 2127.0, "10": 1548.0, "15": 1997.0, "20": 1846.0, "25": 1700.0, "30": 2165.0, "35": 2194.0, "40": 2540.0, "45": 2414.0, "50": 2586.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 382956544.0, "5": 382956544.0, "10": 382956544.0, "15": 382956544.0, "20": 382956544.0, "25": 382956544.0, "30": 382956544.0, "35": 382956544.0, "40": 382956544.0, "45": 382956544.0, "50": 382956544.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1497803776.0, "5": 1628741632.0, "10": 1628741632.0, "15": 1628741632.0, "20": 1628741632.0, "25": 1628741632.0, "30": 1628741632.0, "35": 1628741632.0, "40": 1628741632.0, "45": 1628741632.0, "50": 1628741632.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 8.04015, "5": 0.30609, "10": 0.30611, "15": 0.30476, "20": 0.30451, "25": 0.3037, "30": 0.30473, "35": 0.30527, "40": 0.30608, "45": 0.30141, "50": 0.30553}}}
\ No newline at end of file
......@@ -19,8 +19,8 @@ MODEL_ARGS:
--train-iters: 50
--timing-log-level: 2
--lr-decay-iters: 320000
--save: ${CHECKPOINT_PATH}
--load: ${CHECKPOINT_PATH}
--save: ${CHECKPOINT_SAVE_PATH}
--load: ${CHECKPOINT_LOAD_PATH}
--data-path: ${DATA_PATH}/my-gpt3_00_text_document
--vocab-file: ${DATA_PATH}/bpe/vocab.json
--merge-file: ${DATA_PATH}/bpe/merges.txt
......@@ -46,4 +46,5 @@ MODEL_ARGS:
--data-cache-path: ${DATA_CACHE_PATH}
--bf16: true
--apply-query-key-layer-scaling: true
--log-memory-to-tensorboard: true
TEST_TYPE: regular
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.86104, "5": 10.88371, "10": 10.84263, "15": 10.87936, "20": 10.87404, "25": 10.82866, "30": 10.77191, "35": 10.67622, "40": 10.517, "45": 10.28436, "50": 10.27862, "55": 10.20113, "60": 9.83306, "65": 9.26979, "70": 9.92663, "75": 9.61385, "80": 9.56419, "85": 9.74319, "90": 9.92148, "95": 9.6163, "100": 9.5087}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 284527616.0, "5": 284527616.0, "10": 284527616.0, "15": 284527616.0, "20": 416513536.0, "25": 416513536.0, "30": 416513536.0, "35": 416513536.0, "40": 416513536.0, "45": 416513536.0, "50": 416513536.0, "55": 416513536.0, "60": 416513536.0, "65": 416513536.0, "70": 416513536.0, "75": 416513536.0, "80": 416513536.0, "85": 416513536.0, "90": 416513536.0, "95": 416513536.0, "100": 416513536.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1465367552.0, "5": 1465367552.0, "10": 1465368576.0, "15": 1465368576.0, "20": 1597092352.0, "25": 1597092352.0, "30": 1597092352.0, "35": 1597092352.0, "40": 1597092352.0, "45": 1597092352.0, "50": 1597092352.0, "55": 1597092352.0, "60": 1597092352.0, "65": 1597092352.0, "70": 1597092352.0, "75": 1597092352.0, "80": 1597092352.0, "85": 1597092352.0, "90": 1597092352.0, "95": 1597092352.0, "100": 1597092352.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 3.88381, "5": 0.28491, "10": 0.28089, "15": 0.28096, "20": 0.2941, "25": 0.29217, "30": 0.29189, "35": 0.29014, "40": 0.29008, "45": 0.28992, "50": 0.29002, "55": 0.29062, "60": 0.29185, "65": 0.28998, "70": 0.28985, "75": 0.29115, "80": 0.29089, "85": 0.29148, "90": 0.2908, "95": 0.29004, "100": 0.29109}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": 1620.0, "25": 2028.0, "30": 2272.0, "35": 1848.0, "40": 1954.0, "45": 2388.0, "50": 2548.0, "55": 2269.0, "60": 2744.0, "65": 2295.0, "70": 3777.0, "75": 3002.0, "80": 3528.0, "85": 3660.0, "90": 3705.0, "95": 4147.0, "100": 3569.0}}}
\ No newline at end of file
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.86104, "5": 10.88371, "10": 10.84263, "15": 10.87936, "20": 10.87404, "25": 10.82866, "30": 10.77191, "35": 10.67622, "40": 10.517, "45": 10.28436, "50": 10.27862, "55": 10.20112, "60": 9.83306, "65": 9.26979, "70": 9.92662, "75": 9.61385, "80": 9.56419, "85": 9.74319, "90": 9.92149, "95": 9.6163, "100": 9.5087}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 284527616.0, "5": 284527616.0, "10": 284527616.0, "15": 284527616.0, "20": 416513536.0, "25": 416513536.0, "30": 416513536.0, "35": 416513536.0, "40": 416513536.0, "45": 416513536.0, "50": 416513536.0, "55": 416513536.0, "60": 416513536.0, "65": 416513536.0, "70": 416513536.0, "75": 416513536.0, "80": 416513536.0, "85": 416513536.0, "90": 416513536.0, "95": 416513536.0, "100": 416513536.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1465368064.0, "5": 1465368064.0, "10": 1465368064.0, "15": 1465368064.0, "20": 1596303360.0, "25": 1596303360.0, "30": 1596303360.0, "35": 1596303360.0, "40": 1596303360.0, "45": 1596303360.0, "50": 1596303360.0, "55": 1596303360.0, "60": 1596303360.0, "65": 1596303360.0, "70": 1596303360.0, "75": 1596304896.0, "80": 1596305408.0, "85": 1596305408.0, "90": 1596305408.0, "95": 1596305408.0, "100": 1596305920.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 11.25031, "5": 0.29691, "10": 0.29639, "15": 0.29502, "20": 0.3291, "25": 0.30842, "30": 0.31824, "35": 0.31378, "40": 0.31056, "45": 0.30902, "50": 0.30807, "55": 0.30895, "60": 0.31556, "65": 0.308, "70": 0.31154, "75": 0.30756, "80": 0.314, "85": 0.3103, "90": 0.3142, "95": 0.30701, "100": 0.30658}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": 1620.0, "25": 2028.0, "30": 2272.0, "35": 1848.0, "40": 1954.0, "45": 2388.0, "50": 2605.0, "55": 2341.0, "60": 2883.0, "65": 2307.0, "70": 3652.0, "75": 2877.0, "80": 3537.0, "85": 3698.0, "90": 3545.0, "95": 4040.0, "100": 3472.0}}}
\ No newline at end of file
......@@ -19,8 +19,8 @@ MODEL_ARGS:
--train-iters: 100
--timing-log-level: 2
--lr-decay-iters: 320000
--save: ${CHECKPOINT_PATH}
--load: ${CHECKPOINT_PATH}
--save: ${CHECKPOINT_SAVE_PATH}
--load: ${CHECKPOINT_LOAD_PATH}
--data-path: ${DATA_PATH}/my-gpt3_00_text_document
--vocab-file: ${DATA_PATH}/bpe/vocab.json
--merge-file: ${DATA_PATH}/bpe/merges.txt
......@@ -46,4 +46,5 @@ MODEL_ARGS:
--data-cache-path: ${DATA_CACHE_PATH}
--fp16: true
--apply-query-key-layer-scaling: true
--log-memory-to-tensorboard: true
TEST_TYPE: ckpt-resume
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.86104, "5": 10.88371, "10": 10.84263, "15": 10.87936, "20": 10.87404, "25": 10.82866, "30": 10.77191, "35": 10.67622, "40": 10.517, "45": 10.28436, "50": 10.27862, "55": 10.20113, "60": 9.83306, "65": 9.26979, "70": 9.92663, "75": 9.61385, "80": 9.56419, "85": 9.74319, "90": 9.92148, "95": 9.6163, "100": 9.5087}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 284527616.0, "5": 284527616.0, "10": 284527616.0, "15": 284527616.0, "20": 416513536.0, "25": 416513536.0, "30": 416513536.0, "35": 416513536.0, "40": 416513536.0, "45": 416513536.0, "50": 416513536.0, "55": 416513536.0, "60": 416513536.0, "65": 416513536.0, "70": 416513536.0, "75": 416513536.0, "80": 416513536.0, "85": 416513536.0, "90": 416513536.0, "95": 416513536.0, "100": 416513536.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1465368064.0, "5": 1465368064.0, "10": 1465368576.0, "15": 1465368576.0, "20": 1597092352.0, "25": 1597092352.0, "30": 1597092352.0, "35": 1597092352.0, "40": 1597092352.0, "45": 1597092352.0, "50": 1597092352.0, "55": 1597092352.0, "60": 1597092352.0, "65": 1597092352.0, "70": 1597092352.0, "75": 1597092352.0, "80": 1597092352.0, "85": 1597092352.0, "90": 1597092352.0, "95": 1597092352.0, "100": 1597092352.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 4.45696, "5": 0.28792, "10": 0.28811, "15": 0.28636, "20": 0.30153, "25": 0.29748, "30": 0.29505, "35": 0.29452, "40": 0.29464, "45": 0.29589, "50": 0.29474, "55": 0.29138, "60": 0.29052, "65": 0.28928, "70": 0.29165, "75": 0.29065, "80": 0.29154, "85": 0.29123, "90": 0.29106, "95": 0.29151, "100": 0.29157}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": 1620.0, "25": 2028.0, "30": 2272.0, "35": 1848.0, "40": 1954.0, "45": 2388.0, "50": 2548.0, "55": 2269.0, "60": 2744.0, "65": 2295.0, "70": 3777.0, "75": 3002.0, "80": 3528.0, "85": 3660.0, "90": 3705.0, "95": 4147.0, "100": 3569.0}}}
\ No newline at end of file
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.86104, "5": 10.88371, "10": 10.84263, "15": 10.87936, "20": 10.87404, "25": 10.82866, "30": 10.77191, "35": 10.67622, "40": 10.517, "45": 10.28436, "50": 10.27862, "55": 10.20112, "60": 9.83306, "65": 9.26979, "70": 9.92662, "75": 9.61385, "80": 9.56419, "85": 9.74319, "90": 9.92149, "95": 9.6163, "100": 9.5087}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 284527616.0, "5": 284527616.0, "10": 284527616.0, "15": 284527616.0, "20": 416513536.0, "25": 416513536.0, "30": 416513536.0, "35": 416513536.0, "40": 416513536.0, "45": 416513536.0, "50": 416513536.0, "55": 416513536.0, "60": 416513536.0, "65": 416513536.0, "70": 416513536.0, "75": 416513536.0, "80": 416513536.0, "85": 416513536.0, "90": 416513536.0, "95": 416513536.0, "100": 416513536.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1463270912.0, "5": 1465368576.0, "10": 1465368576.0, "15": 1465368576.0, "20": 1597092352.0, "25": 1597092352.0, "30": 1597092352.0, "35": 1597092352.0, "40": 1597092352.0, "45": 1597092352.0, "50": 1597092352.0, "55": 1597092352.0, "60": 1597092352.0, "65": 1597092352.0, "70": 1597092352.0, "75": 1597092352.0, "80": 1597092352.0, "85": 1597092352.0, "90": 1597092352.0, "95": 1597092352.0, "100": 1597092352.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 7.29813, "5": 0.29344, "10": 0.29293, "15": 0.29093, "20": 0.30728, "25": 0.31074, "30": 0.30695, "35": 0.30576, "40": 0.30871, "45": 0.31067, "50": 0.30611, "55": 0.3052, "60": 0.30899, "65": 0.30587, "70": 0.30945, "75": 0.30233, "80": 0.30465, "85": 0.30549, "90": 0.30363, "95": 0.30609, "100": 0.3023}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": 1620.0, "25": 2028.0, "30": 2272.0, "35": 1848.0, "40": 1954.0, "45": 2388.0, "50": 2605.0, "55": 2341.0, "60": 2883.0, "65": 2307.0, "70": 3652.0, "75": 2877.0, "80": 3537.0, "85": 3698.0, "90": 3545.0, "95": 4040.0, "100": 3472.0}}}
\ No newline at end of file
......@@ -19,8 +19,8 @@ MODEL_ARGS:
--train-iters: 100
--timing-log-level: 2
--lr-decay-iters: 320000
--save: ${CHECKPOINT_PATH}
--load: ${CHECKPOINT_PATH}
--save: ${CHECKPOINT_SAVE_PATH}
--load: ${CHECKPOINT_LOAD_PATH}
--data-path: ${DATA_PATH}/my-gpt3_00_text_document
--vocab-file: ${DATA_PATH}/bpe/vocab.json
--merge-file: ${DATA_PATH}/bpe/merges.txt
......@@ -47,4 +47,5 @@ MODEL_ARGS:
--data-cache-path: ${DATA_CACHE_PATH}
--fp16: true
--apply-query-key-layer-scaling: true
--log-memory-to-tensorboard: true
TEST_TYPE: ckpt-resume
{
"lm loss": {
"start_step": 0,
"end_step": 50,
"step_interval": 5,
"values": [
10.87346,
10.89625,
10.88939,
10.88681,
10.8893,
10.84863,
10.6962,
10.63919,
10.53931,
10.31119
]
},
"iteration-time": {
"start_step": 0,
"end_step": 50,
"step_interval": 5,
"values": [
4.95266,
0.07818,
0.07961,
0.07716,
0.08368,
0.08327,
0.08409,
0.08371,
0.08372,
0.08387
]
},
"num-zeros": {
"start_step": 0,
"end_step": 32,
"step_interval": 5,
"values": [
1300.0,
1287.0,
1565.0,
1441.0,
1419.0,
1295.0,
1177.0
]
}
}
\ No newline at end of file
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.87346, "5": 10.86718, "10": 10.85561, "15": 10.88831, "20": 10.87704, "25": 10.84986, "30": 10.76439, "35": 10.68583, "40": 10.52311, "45": 10.32331, "50": 10.29634}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 454770688.0, "5": 454770688.0, "10": 454770688.0, "15": 454770688.0, "20": 518880768.0, "25": 518880768.0, "30": 518880768.0, "35": 518880768.0, "40": 518880768.0, "45": 518880768.0, "50": 518880768.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 4511150592.0, "5": 4544705536.0, "10": 4544705536.0, "15": 4544705536.0, "20": 4607767040.0, "25": 4607767040.0, "30": 4607767040.0, "35": 4607767040.0, "40": 4607767040.0, "45": 4607767040.0, "50": 4607767040.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 5.60068, "5": 0.07688, "10": 0.07554, "15": 0.07566, "20": 0.33206, "25": 0.08271, "30": 0.08222, "35": 0.08267, "40": 0.08317, "45": 0.08236, "50": 0.08327}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": 1221.0, "25": 1129.0, "30": 1441.0, "35": 1322.0, "40": 1381.0, "45": 1282.0, "50": 1426.0}}}
\ No newline at end of file
{"lm loss": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [10.87346, 10.89625, 10.88939, 10.88681, 10.8893, 10.84864, 10.6962, 10.63918, 10.5393, 10.31119]}, "num-zeros": {"start_step": 0, "end_step": 32, "step_interval": 5, "values": [1298.0, 1352.0, 1590.0, 1403.0, 1435.0, 1266.0, 1195.0]}, "iteration_timing_avg": 0.07655911764705883}
\ No newline at end of file
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.87346, "5": 10.86718, "10": 10.85561, "15": 10.88831, "20": 10.87704, "25": 10.84986, "30": 10.7644, "35": 10.68583, "40": 10.5231, "45": 10.32331, "50": 10.29634}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 454770688.0, "5": 454770688.0, "10": 454770688.0, "15": 454770688.0, "20": 518880768.0, "25": 518880768.0, "30": 518880768.0, "35": 518880768.0, "40": 518880768.0, "45": 518880768.0, "50": 518880768.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 4511150592.0, "5": 4544705536.0, "10": 4544705536.0, "15": 4544705536.0, "20": 4607767040.0, "25": 4607767040.0, "30": 4607767040.0, "35": 4607767040.0, "40": 4607767040.0, "45": 4607767040.0, "50": 4607767040.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.97955, "5": 0.09865, "10": 0.09755, "15": 0.08417, "20": 0.09136, "25": 0.09055, "30": 0.09084, "35": 0.09134, "40": 0.09058, "45": 0.09138, "50": 0.09003}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": 1279.0, "25": 1219.0, "30": 1421.0, "35": 1249.0, "40": 1452.0, "45": 1336.0, "50": 1455.0}}}
\ No newline at end of file
......@@ -19,8 +19,8 @@ MODEL_ARGS:
--train-iters: 50
--timing-log-level: 2
--lr-decay-iters: 320000
--save: ${CHECKPOINT_PATH}
--load: ${CHECKPOINT_PATH}
--save: ${CHECKPOINT_SAVE_PATH}
--load: ${CHECKPOINT_LOAD_PATH}
--data-path: ${DATA_PATH}/my-gpt3_00_text_document
--vocab-file: ${DATA_PATH}/bpe/vocab.json
--merge-file: ${DATA_PATH}/bpe/merges.txt
......@@ -48,4 +48,5 @@ MODEL_ARGS:
--data-cache-path: ${DATA_CACHE_PATH}
--fp16: true
--apply-query-key-layer-scaling: true
--log-memory-to-tensorboard: true
TEST_TYPE: regular
{
"lm loss": {
"start_step": 0,
"end_step": 50,
"step_interval": 5,
"values": [
10.87346,
10.89625,
10.88939,
10.88681,
10.88931,
10.84864,
10.6962,
10.63918,
10.5393,
10.31119
]
},
"iteration-time": {
"start_step": 0,
"end_step": 50,
"step_interval": 5,
"values": [
5.32064,
0.08204,
0.08233,
0.08176,
0.09748,
0.0966,
0.09648,
0.09617,
0.09604,
0.09646
]
},
"num-zeros": {
"start_step": 0,
"end_step": 32,
"step_interval": 5,
"values": [
1112.0,
1124.0,
1229.0,
1665.0,
1269.0,
1219.0,
1572.0
]
}
}
\ No newline at end of file
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.87346, "5": 10.86718, "10": 10.85561, "15": 10.88831, "20": 10.87704, "25": 10.84986, "30": 10.7644, "35": 10.68582, "40": 10.5231, "45": 10.32331, "50": 10.29634}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 911795200.0, "5": 911795200.0, "10": 911795200.0, "15": 911795200.0, "20": 1426769408.0, "25": 1426769408.0, "30": 1426769408.0, "35": 1426769408.0, "40": 1426769408.0, "45": 1426769408.0, "50": 1426769408.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 4738548736.0, "5": 4772103680.0, "10": 4772103680.0, "15": 4772103680.0, "20": 5286291456.0, "25": 5286291456.0, "30": 5286291456.0, "35": 5286291456.0, "40": 5286291456.0, "45": 5286291456.0, "50": 5286291456.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3.51674, "5": 0.08141, "10": 0.08052, "15": 0.07992, "20": 0.09632, "25": 0.09637, "30": 0.09667, "35": 0.09681, "40": 0.09734, "45": 0.09652, "50": 0.09765}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": 1974.0, "25": 1113.0, "30": 994.0, "35": 1045.0, "40": 1324.0, "45": 1573.0, "50": 1267.0}}}
\ No newline at end of file
{"lm loss": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [10.87346, 10.89625, 10.88939, 10.88681, 10.88931, 10.84864, 10.6962, 10.63918, 10.53931, 10.31119]}, "num-zeros": {"start_step": 0, "end_step": 32, "step_interval": 5, "values": [1131.0, 1173.0, 1218.0, 1783.0, 1278.0, 1244.0, 1555.0]}, "iteration_timing_avg": 0.07975499999999999}
\ No newline at end of file
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.87346, "5": 10.86718, "10": 10.85561, "15": 10.88831, "20": 10.87703, "25": 10.84986, "30": 10.76439, "35": 10.68583, "40": 10.5231, "45": 10.32331, "50": 10.29634}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 911795200.0, "5": 911795200.0, "10": 911795200.0, "15": 911795200.0, "20": 1426769408.0, "25": 1426769408.0, "30": 1426769408.0, "35": 1426769408.0, "40": 1426769408.0, "45": 1426769408.0, "50": 1426769408.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 4738548736.0, "5": 4772103680.0, "10": 4772103680.0, "15": 4772103680.0, "20": 5286291456.0, "25": 5286291456.0, "30": 5286291456.0, "35": 5286291456.0, "40": 5286291456.0, "45": 5286291456.0, "50": 5286291456.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 7.38932, "5": 0.08356, "10": 0.08398, "15": 0.09924, "20": 0.09907, "25": 0.09964, "30": 0.09945, "35": 0.10076, "40": 0.09872, "45": 0.09961, "50": 0.09911}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": 2075.0, "25": 1126.0, "30": 1049.0, "35": 1033.0, "40": 1364.0, "45": 1599.0, "50": 1249.0}}}
\ No newline at end of file
......@@ -19,8 +19,8 @@ MODEL_ARGS:
--train-iters: 50
--timing-log-level: 2
--lr-decay-iters: 320000
--save: ${CHECKPOINT_PATH}
--load: ${CHECKPOINT_PATH}
--save: ${CHECKPOINT_SAVE_PATH}
--load: ${CHECKPOINT_LOAD_PATH}
--data-path: ${DATA_PATH}/my-gpt3_00_text_document
--vocab-file: ${DATA_PATH}/bpe/vocab.json
--merge-file: ${DATA_PATH}/bpe/merges.txt
......@@ -47,4 +47,5 @@ MODEL_ARGS:
--data-cache-path: ${DATA_CACHE_PATH}
--fp16: true
--apply-query-key-layer-scaling: true
--log-memory-to-tensorboard: true
TEST_TYPE: regular
{"lm loss": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [10.84009, 10.89314, 10.908, 10.87524, 10.86367, 10.83848, 10.64647, 10.62126, 10.53743, 10.24831]}, "num-zeros": {"start_step": 0, "end_step": 33, "step_interval": 5, "values": [2044.0, 2242.0, 2368.0, 2598.0, 2188.0, 1850.0, 2436.0]}, "iteration_timing_avg": 0.10581941176470588}
\ No newline at end of file
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.84009, "5": 10.87275, "10": 10.8333, "15": 10.87115, "20": 10.85956, "25": 10.8165, "30": 10.7379, "35": 10.66607, "40": 10.50091, "45": 10.26832, "50": 10.25759}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 685659136.0, "5": 685659136.0, "10": 685659136.0, "15": 685659136.0, "20": 1043027456.0, "25": 1043027456.0, "30": 1043027456.0, "35": 1043027456.0, "40": 1043027456.0, "45": 1043027456.0, "50": 1043027456.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3187304960.0, "5": 3187305472.0, "10": 3187305472.0, "15": 3187305472.0, "20": 3544935936.0, "25": 3544935936.0, "30": 3544935936.0, "35": 3544935936.0, "40": 3544935936.0, "45": 3544935936.0, "50": 3544935936.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 4.83724, "5": 0.1196, "10": 0.11844, "15": 0.11713, "20": 0.12863, "25": 0.12877, "30": 0.13001, "35": 0.12746, "40": 0.127, "45": 0.12743, "50": 0.12672}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": 2206.0, "25": 1990.0, "30": 2857.0, "35": 2070.0, "40": 2038.0, "45": 2212.0, "50": 2256.0}}}
\ No newline at end of file
{"lm loss": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [10.84009, 10.89314, 10.908, 10.87524, 10.86367, 10.83848, 10.64647, 10.62126, 10.53743, 10.24831]}, "num-zeros": {"start_step": 0, "end_step": 33, "step_interval": 5, "values": [2044.0, 2242.0, 2368.0, 2598.0, 2188.0, 1850.0, 2436.0]}, "iteration_timing_avg": 0.10581941176470588}
\ No newline at end of file
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.84009, "5": 10.87275, "10": 10.8333, "15": 10.87115, "20": 10.85956, "25": 10.8165, "30": 10.7379, "35": 10.66607, "40": 10.50091, "45": 10.26832, "50": 10.25759}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 685659136.0, "5": 685659136.0, "10": 685659136.0, "15": 685659136.0, "20": 1043027456.0, "25": 1043027456.0, "30": 1043027456.0, "35": 1043027456.0, "40": 1043027456.0, "45": 1043027456.0, "50": 1043027456.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3187304960.0, "5": 3187305472.0, "10": 3187305472.0, "15": 3187305472.0, "20": 3544935936.0, "25": 3544935936.0, "30": 3544935936.0, "35": 3544935936.0, "40": 3544935936.0, "45": 3544935936.0, "50": 3544935936.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 13.4701, "5": 0.1208, "10": 0.1223, "15": 0.11887, "20": 0.12942, "25": 0.12991, "30": 0.12979, "35": 0.12982, "40": 0.12913, "45": 0.12942, "50": 0.1287}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": 2206.0, "25": 1990.0, "30": 2857.0, "35": 2070.0, "40": 2038.0, "45": 2212.0, "50": 2256.0}}}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment