Commit 688448db authored by silencealiang's avatar silencealiang
Browse files

更新代码

parent a02a5490
Pipeline #2503 passed with stage
{"lm loss": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [10.82005, 10.87449, 10.87798, 10.79509, 10.68164, 10.59517, 10.10046, 10.21236, 10.13863, 9.80877]}, "num-zeros": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [1559.0, 1719.0, 1856.0, 1791.0, 1900.0, 1709.0, 1627.0, 1831.0, 2272.0, 2312.0]}, "iteration_timing_avg": 0.12502588235294115}
\ No newline at end of file
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.82005, "5": 10.85284, "10": 10.78455, "15": 10.79229, "20": 10.69211, "25": 10.52412, "30": 10.34552, "35": 10.26242, "40": 10.07239, "45": 9.811, "50": 9.88415}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1559.0, "5": 1840.0, "10": 1380.0, "15": 1848.0, "20": 1601.0, "25": 1635.0, "30": 1908.0, "35": 1925.0, "40": 2126.0, "45": 2086.0, "50": 2298.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 733859840.0, "5": 733859840.0, "10": 733859840.0, "15": 733859840.0, "20": 733859840.0, "25": 733859840.0, "30": 733859840.0, "35": 733859840.0, "40": 733859840.0, "45": 733859840.0, "50": 733859840.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3838895104.0, "5": 4122703872.0, "10": 4122703872.0, "15": 4122703872.0, "20": 4122703872.0, "25": 4122703872.0, "30": 4122703872.0, "35": 4122703872.0, "40": 4122703872.0, "45": 4122703872.0, "50": 4122703872.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 17.71283, "5": 0.16448, "10": 0.16446, "15": 0.16389, "20": 0.16438, "25": 0.15866, "30": 0.15768, "35": 0.15941, "40": 0.15987, "45": 0.16075, "50": 0.16301}}}
\ No newline at end of file
......@@ -19,8 +19,8 @@ MODEL_ARGS:
--train-iters: 50
--timing-log-level: 2
--lr-decay-iters: 320000
--save: ${CHECKPOINT_PATH}
--load: ${CHECKPOINT_PATH}
--save: ${CHECKPOINT_SAVE_PATH}
--load: ${CHECKPOINT_LOAD_PATH}
--data-path: ${DATA_PATH}/my-gpt3_00_text_document
--vocab-file: ${DATA_PATH}/bpe/vocab.json
--merge-file: ${DATA_PATH}/bpe/merges.txt
......@@ -48,4 +48,5 @@ MODEL_ARGS:
--data-cache-path: ${DATA_CACHE_PATH}
--bf16: true
--attention-backend: unfused
--log-memory-to-tensorboard: true
TEST_TYPE: regular
{
"lm loss": {
"start_step": 0,
"end_step": 50,
"step_interval": 5,
"values": [
10.82005,
10.87447,
10.87799,
10.79507,
10.68165,
10.59511,
10.10047,
10.2124,
10.13861,
9.80876
]
},
"num-zeros": {
"start_step": 0,
"end_step": 50,
"step_interval": 5,
"values": [
1562.0,
1738.0,
1852.0,
1802.0,
1917.0,
1765.0,
1570.0,
1949.0,
2251.0,
2270.0
]
},
"iteration-time": {
"start_step": 0,
"end_step": 50,
"step_interval": 5,
"values": [
14.96968,
0.16347,
0.16403,
0.16317,
0.162,
0.16129,
0.16268,
0.16156,
0.16212,
0.16407
]
}
}
\ No newline at end of file
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.82005, "5": 10.85286, "10": 10.7845, "15": 10.79231, "20": 10.6921, "25": 10.52408, "30": 10.34555, "35": 10.26239, "40": 10.07241, "45": 9.81101, "50": 9.88416}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1562.0, "5": 1861.0, "10": 1339.0, "15": 1948.0, "20": 1698.0, "25": 1687.0, "30": 1930.0, "35": 1927.0, "40": 2061.0, "45": 2060.0, "50": 2330.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 522976256.0, "5": 522976256.0, "10": 522976256.0, "15": 522976256.0, "20": 522976256.0, "25": 522976256.0, "30": 522976256.0, "35": 522976256.0, "40": 522976256.0, "45": 522976256.0, "50": 522976256.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3768845312.0, "5": 3912737792.0, "10": 3912737792.0, "15": 3912737792.0, "20": 3912737792.0, "25": 3912737792.0, "30": 3912737792.0, "35": 3912737792.0, "40": 3912737792.0, "45": 3912737792.0, "50": 3912737792.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 19.16738, "5": 0.16325, "10": 0.16427, "15": 0.16183, "20": 0.16039, "25": 0.16182, "30": 0.16047, "35": 0.16389, "40": 0.15815, "45": 0.15745, "50": 0.15915}}}
\ No newline at end of file
{"lm loss": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [10.82005, 10.87449, 10.87798, 10.79511, 10.68164, 10.59513, 10.10043, 10.21239, 10.13865, 9.80879]}, "num-zeros": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [1559.0, 1719.0, 1856.0, 1735.0, 1873.0, 1765.0, 1535.0, 1910.0, 2278.0, 2247.0]}, "iteration_timing_avg": 0.12168999999999999}
\ No newline at end of file
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.82005, "5": 10.85284, "10": 10.78455, "15": 10.7923, "20": 10.69211, "25": 10.52414, "30": 10.34555, "35": 10.2624, "40": 10.07237, "45": 9.81103, "50": 9.88417}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1559.0, "5": 1840.0, "10": 1380.0, "15": 1850.0, "20": 1668.0, "25": 1607.0, "30": 1945.0, "35": 1860.0, "40": 2022.0, "45": 2042.0, "50": 2292.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 523003904.0, "5": 523003904.0, "10": 523003904.0, "15": 523003904.0, "20": 523003904.0, "25": 523003904.0, "30": 523003904.0, "35": 523003904.0, "40": 523003904.0, "45": 523003904.0, "50": 523003904.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3768872960.0, "5": 3912765440.0, "10": 3912765440.0, "15": 3912765440.0, "20": 3912765440.0, "25": 3912765440.0, "30": 3912765440.0, "35": 3912765440.0, "40": 3912765440.0, "45": 3912765440.0, "50": 3912765440.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 18.09905, "5": 0.16241, "10": 0.16341, "15": 0.15828, "20": 0.15929, "25": 0.15899, "30": 0.16171, "35": 0.15966, "40": 0.15804, "45": 0.15972, "50": 0.15901}}}
\ No newline at end of file
......@@ -19,8 +19,8 @@ MODEL_ARGS:
--train-iters: 50
--timing-log-level: 2
--lr-decay-iters: 320000
--save: ${CHECKPOINT_PATH}
--load: ${CHECKPOINT_PATH}
--save: ${CHECKPOINT_SAVE_PATH}
--load: ${CHECKPOINT_LOAD_PATH}
--data-path: ${DATA_PATH}/my-gpt3_00_text_document
--vocab-file: ${DATA_PATH}/bpe/vocab.json
--merge-file: ${DATA_PATH}/bpe/merges.txt
......@@ -50,4 +50,5 @@ MODEL_ARGS:
--data-cache-path: ${DATA_CACHE_PATH}
--bf16: true
--attention-backend: unfused
--log-memory-to-tensorboard: true
TEST_TYPE: regular
{
"lm loss": {
"start_step": 0,
"end_step": 50,
"step_interval": 5,
"values": [
10.82005,
10.87447,
10.87799,
10.79507,
10.68165,
10.59511,
10.10047,
10.2124,
10.13861,
9.80876
]
},
"num-zeros": {
"start_step": 0,
"end_step": 50,
"step_interval": 5,
"values": [
1562.0,
1738.0,
1852.0,
1802.0,
1917.0,
1765.0,
1570.0,
1949.0,
2251.0,
2270.0
]
},
"iteration-time": {
"start_step": 0,
"end_step": 50,
"step_interval": 5,
"values": [
17.23575,
0.17553,
0.34737,
0.17165,
0.32526,
0.17081,
0.32706,
0.17037,
0.3321,
0.16992
]
}
}
\ No newline at end of file
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.82005, "5": 10.85286, "10": 10.7845, "15": 10.79231, "20": 10.6921, "25": 10.52408, "30": 10.34555, "35": 10.26239, "40": 10.07241, "45": 9.81101, "50": 9.88416}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1562.0, "5": 1861.0, "10": 1339.0, "15": 1948.0, "20": 1698.0, "25": 1687.0, "30": 1930.0, "35": 1927.0, "40": 2061.0, "45": 2060.0, "50": 2330.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 522976256.0, "5": 522976256.0, "10": 522976256.0, "15": 522976256.0, "20": 522976256.0, "25": 522976256.0, "30": 522976256.0, "35": 522976256.0, "40": 522976256.0, "45": 522976256.0, "50": 522976256.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3768845312.0, "5": 3912737792.0, "10": 3912737792.0, "15": 3912737792.0, "20": 3912737792.0, "25": 3912737792.0, "30": 3912737792.0, "35": 3912737792.0, "40": 3912737792.0, "45": 3912737792.0, "50": 3912737792.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 18.07048, "5": 0.17182, "10": 0.16227, "15": 0.16202, "20": 0.16214, "25": 0.16227, "30": 0.16231, "35": 0.16221, "40": 0.16257, "45": 0.16117, "50": 0.16119}}}
\ No newline at end of file
{"lm loss": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [10.82005, 10.87449, 10.87798, 10.79511, 10.68164, 10.59513, 10.10043, 10.21239, 10.13865, 9.80879]}, "num-zeros": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [1559.0, 1719.0, 1856.0, 1735.0, 1873.0, 1765.0, 1535.0, 1910.0, 2278.0, 2247.0]}, "iteration_timing_avg": 0.12873676470588236}
\ No newline at end of file
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.82005, "5": 10.85284, "10": 10.78455, "15": 10.7923, "20": 10.69211, "25": 10.52414, "30": 10.34555, "35": 10.2624, "40": 10.07237, "45": 9.81103, "50": 9.88417}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1559.0, "5": 1840.0, "10": 1380.0, "15": 1850.0, "20": 1668.0, "25": 1607.0, "30": 1945.0, "35": 1860.0, "40": 2022.0, "45": 2042.0, "50": 2292.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 523003904.0, "5": 523003904.0, "10": 523003904.0, "15": 523003904.0, "20": 523003904.0, "25": 523003904.0, "30": 523003904.0, "35": 523003904.0, "40": 523003904.0, "45": 523003904.0, "50": 523003904.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3768872960.0, "5": 3912765440.0, "10": 3912765440.0, "15": 3912765440.0, "20": 3912765440.0, "25": 3912765440.0, "30": 3912765440.0, "35": 3912765440.0, "40": 3912765440.0, "45": 3912765440.0, "50": 3912765440.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 15.80767, "5": 0.16165, "10": 0.16385, "15": 0.16229, "20": 0.16237, "25": 0.1618, "30": 0.1643, "35": 0.16116, "40": 0.16294, "45": 0.16266, "50": 0.16228}}}
\ No newline at end of file
......@@ -19,8 +19,8 @@ MODEL_ARGS:
--train-iters: 50
--timing-log-level: 2
--lr-decay-iters: 320000
--save: ${CHECKPOINT_PATH}
--load: ${CHECKPOINT_PATH}
--save: ${CHECKPOINT_SAVE_PATH}
--load: ${CHECKPOINT_LOAD_PATH}
--data-path: ${DATA_PATH}/my-gpt3_00_text_document
--vocab-file: ${DATA_PATH}/bpe/vocab.json
--merge-file: ${DATA_PATH}/bpe/merges.txt
......@@ -44,6 +44,7 @@ MODEL_ARGS:
--overlap-grad-reduce: true
--overlap-param-gather: true
--check-weight-hash-across-dp-replicas-interval: 10
--disable-gloo-process-groups: true
--ckpt-fully-parallel-load: true
--deterministic-mode: true
--no-gradient-accumulation-fusion: true
......@@ -53,4 +54,5 @@ MODEL_ARGS:
--data-cache-path: ${DATA_CACHE_PATH}
--bf16: true
--attention-backend: unfused
--log-memory-to-tensorboard: true
TEST_TYPE: regular
{
"lm loss": {
"start_step": 0,
"end_step": 50,
"step_interval": 5,
"values": [
10.82005,
10.87447,
10.87799,
10.79508,
10.68163,
10.59514,
10.10047,
10.21237,
10.13864,
9.80877
]
},
"num-zeros": {
"start_step": 0,
"end_step": 50,
"step_interval": 5,
"values": [
1562.0,
1738.0,
1852.0,
1796.0,
1869.0,
1788.0,
1517.0,
1941.0,
2226.0,
2214.0
]
},
"iteration-time": {
"start_step": 0,
"end_step": 50,
"step_interval": 5,
"values": [
17.43169,
0.16677,
0.33581,
0.16498,
0.33103,
0.16418,
0.33146,
0.16539,
0.33075,
0.1651
]
}
}
\ No newline at end of file
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.82005, "5": 10.85286, "10": 10.7845, "15": 10.79231, "20": 10.69208, "25": 10.52411, "30": 10.34557, "35": 10.2624, "40": 10.07239, "45": 9.811, "50": 9.8842}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1562.0, "5": 1861.0, "10": 1339.0, "15": 1964.0, "20": 1696.0, "25": 1558.0, "30": 1887.0, "35": 1887.0, "40": 2113.0, "45": 2114.0, "50": 2342.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 522977280.0, "5": 522977280.0, "10": 522977280.0, "15": 522977280.0, "20": 522977280.0, "25": 522977280.0, "30": 522977280.0, "35": 522977280.0, "40": 522977280.0, "45": 522977280.0, "50": 522977280.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3768846336.0, "5": 3912738816.0, "10": 3912738816.0, "15": 3912738816.0, "20": 3912738816.0, "25": 3912738816.0, "30": 3912738816.0, "35": 3912738816.0, "40": 3912738816.0, "45": 3912738816.0, "50": 3912738816.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 19.10362, "5": 0.16434, "10": 0.1658, "15": 0.16354, "20": 0.16555, "25": 0.16274, "30": 0.16422, "35": 0.16143, "40": 0.16856, "45": 0.16893, "50": 0.16867}}}
\ No newline at end of file
{"lm loss": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [10.82005, 10.87449, 10.87799, 10.79508, 10.68166, 10.59514, 10.10042, 10.21238, 10.13865, 9.80879]}, "num-zeros": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [1559.0, 1719.0, 1857.0, 1746.0, 1883.0, 1738.0, 1475.0, 1851.0, 2303.0, 2258.0]}, "iteration_timing_avg": 0.12873676470588236}
\ No newline at end of file
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.82005, "5": 10.85284, "10": 10.78451, "15": 10.79227, "20": 10.69215, "25": 10.52412, "30": 10.34553, "35": 10.26239, "40": 10.07239, "45": 9.81101, "50": 9.8842}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1559.0, "5": 1840.0, "10": 1336.0, "15": 1910.0, "20": 1640.0, "25": 1694.0, "30": 1894.0, "35": 1955.0, "40": 2147.0, "45": 2157.0, "50": 2389.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 523004928.0, "5": 523004928.0, "10": 523004928.0, "15": 523004928.0, "20": 523004928.0, "25": 523004928.0, "30": 523004928.0, "35": 523004928.0, "40": 523004928.0, "45": 523004928.0, "50": 523004928.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3768873984.0, "5": 3912766464.0, "10": 3912766464.0, "15": 3912766464.0, "20": 3912766464.0, "25": 3912766464.0, "30": 3912766464.0, "35": 3912766464.0, "40": 3912766464.0, "45": 3912766464.0, "50": 3912766464.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 21.58641, "5": 0.16306, "10": 0.16416, "15": 0.16288, "20": 0.16323, "25": 0.1694, "30": 0.16231, "35": 0.16648, "40": 0.16317, "45": 0.16593, "50": 0.16425}}}
\ No newline at end of file
......@@ -19,8 +19,8 @@ MODEL_ARGS:
--train-iters: 50
--timing-log-level: 2
--lr-decay-iters: 320000
--save: ${CHECKPOINT_PATH}
--load: ${CHECKPOINT_PATH}
--save: ${CHECKPOINT_SAVE_PATH}
--load: ${CHECKPOINT_LOAD_PATH}
--data-path: ${DATA_PATH}/my-gpt3_00_text_document
--vocab-file: ${DATA_PATH}/bpe/vocab.json
--merge-file: ${DATA_PATH}/bpe/merges.txt
......@@ -54,4 +54,5 @@ MODEL_ARGS:
--data-cache-path: ${DATA_CACHE_PATH}
--bf16: true
--attention-backend: unfused
--log-memory-to-tensorboard: true
TEST_TYPE: regular
{
"lm loss": {
"start_step": 0,
"end_step": 50,
"step_interval": 5,
"values": [
10.9359,
10.93551,
10.9424,
10.88073,
10.75652,
10.66333,
10.16716,
10.27244,
10.19575,
9.86005
]
},
"num-zeros": {
"start_step": 0,
"end_step": 50,
"step_interval": 5,
"values": [
22727668.0,
23021008.0,
22501280.0,
22830020.0,
22739656.0,
22548262.0,
22955680.0,
22589964.0,
22660156.0,
22884572.0
]
},
"iteration-time": {
"start_step": 0,
"end_step": 50,
"step_interval": 5,
"values": [
16.12696,
0.16574,
0.16735,
0.16507,
0.1657,
0.16626,
0.16614,
0.16517,
0.16625,
0.16568
]
}
}
\ No newline at end of file
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.9359, "5": 10.9322, "10": 10.91082, "15": 10.85725, "20": 10.7709, "25": 10.60557, "30": 10.40545, "35": 10.31363, "40": 10.12334, "45": 9.87564, "50": 9.94453}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 22727668.0, "5": 22715306.0, "10": 22919026.0, "15": 22821242.0, "20": 22693800.0, "25": 22819536.0, "30": 22631092.0, "35": 22787886.0, "40": 22658198.0, "45": 22674644.0, "50": 22904428.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 521936896.0, "5": 521936896.0, "10": 521936896.0, "15": 521936896.0, "20": 521936896.0, "25": 521936896.0, "30": 521936896.0, "35": 521936896.0, "40": 521936896.0, "45": 521936896.0, "50": 521936896.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3769915392.0, "5": 3914746880.0, "10": 3914746880.0, "15": 3914746880.0, "20": 3914746880.0, "25": 3914746880.0, "30": 3914746880.0, "35": 3914746880.0, "40": 3914746880.0, "45": 3914746880.0, "50": 3914746880.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 14.90437, "5": 0.15853, "10": 0.15748, "15": 0.15817, "20": 0.15827, "25": 0.1568, "30": 0.1606, "35": 0.16038, "40": 0.15929, "45": 0.16015, "50": 0.17077}}}
\ No newline at end of file
{"lm loss": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [10.9359, 10.93547, 10.94238, 10.88073, 10.75653, 10.66332, 10.1672, 10.27241, 10.19577, 9.86006]}, "num-zeros": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [22727686.0, 23020980.0, 22501260.0, 22830024.0, 22739772.0, 22548148.0, 22955712.0, 22589816.0, 22660000.0, 22884332.0]}, "iteration_timing_avg": 0.12799705882352944}
\ No newline at end of file
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.9359, "5": 10.93225, "10": 10.91081, "15": 10.85723, "20": 10.77091, "25": 10.60558, "30": 10.40544, "35": 10.31364, "40": 10.12333, "45": 9.8756, "50": 9.94451}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 22727686.0, "5": 22715312.0, "10": 22919004.0, "15": 22821282.0, "20": 22693812.0, "25": 22819580.0, "30": 22631132.0, "35": 22787906.0, "40": 22658304.0, "45": 22674764.0, "50": 22904438.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 523016192.0, "5": 523016192.0, "10": 523016192.0, "15": 523016192.0, "20": 523016192.0, "25": 523016192.0, "30": 523016192.0, "35": 523016192.0, "40": 523016192.0, "45": 523016192.0, "50": 523016192.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3769943040.0, "5": 3914774528.0, "10": 3914774528.0, "15": 3914774528.0, "20": 3914774528.0, "25": 3914774528.0, "30": 3914774528.0, "35": 3914774528.0, "40": 3914774528.0, "45": 3914774528.0, "50": 3914774528.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 19.24942, "5": 0.158, "10": 0.15909, "15": 0.15799, "20": 0.15892, "25": 0.15911, "30": 0.15833, "35": 0.15767, "40": 0.15693, "45": 0.16146, "50": 0.15756}}}
\ No newline at end of file
......@@ -19,8 +19,8 @@ MODEL_ARGS:
--train-iters: 50
--timing-log-level: 2
--lr-decay-iters: 320000
--save: ${CHECKPOINT_PATH}
--load: ${CHECKPOINT_PATH}
--save: ${CHECKPOINT_SAVE_PATH}
--load: ${CHECKPOINT_LOAD_PATH}
--data-path: ${DATA_PATH}/my-gpt3_00_text_document
--vocab-file: ${DATA_PATH}/bpe/vocab.json
--merge-file: ${DATA_PATH}/bpe/merges.txt
......@@ -51,4 +51,5 @@ MODEL_ARGS:
--data-cache-path: ${DATA_CACHE_PATH}
--bf16: true
--attention-backend: unfused
--log-memory-to-tensorboard: true
TEST_TYPE: regular
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.81873, "5": 10.85264, "10": 10.78415, "15": 10.7931, "20": 10.6921, "25": 10.52359, "30": 10.34496, "35": 10.25889, "40": 10.07079, "45": 9.80318, "50": 9.87688, "55": 9.85528, "60": 9.46661, "65": 8.91692, "70": 9.69269, "75": 9.37788, "80": 9.36796, "85": 9.576, "90": 9.77252, "95": 9.46897, "100": 9.34559}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1541.0, "5": 1835.0, "10": 1425.0, "15": 1935.0, "20": 1728.0, "25": 1634.0, "30": 1899.0, "35": 1945.0, "40": 2144.0, "45": 2092.0, "50": 2322.0, "55": 2333.0, "60": 2386.0, "65": 2636.0, "70": 3071.0, "75": 2522.0, "80": 3165.0, "85": 3334.0, "90": 2941.0, "95": 3321.0, "100": 3378.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 731763200.0, "5": 731763200.0, "10": 731763200.0, "15": 731763200.0, "20": 731763200.0, "25": 731763200.0, "30": 731763200.0, "35": 731763200.0, "40": 731763200.0, "45": 731763200.0, "50": 731763200.0, "55": 731763200.0, "60": 731763200.0, "65": 731763200.0, "70": 731763200.0, "75": 731763200.0, "80": 731763200.0, "85": 731763200.0, "90": 731763200.0, "95": 731763200.0, "100": 731763200.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2368927744.0, "5": 2649590784.0, "10": 2649590784.0, "15": 2649590784.0, "20": 2649590784.0, "25": 2649590784.0, "30": 2649590784.0, "35": 2649590784.0, "40": 2649590784.0, "45": 2649590784.0, "50": 2649590784.0, "55": 2649590784.0, "60": 2649590784.0, "65": 2649590784.0, "70": 2649590784.0, "75": 2649590784.0, "80": 2649590784.0, "85": 2649590784.0, "90": 2649590784.0, "95": 2649590784.0, "100": 2649590784.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 14.75021, "5": 0.15575, "10": 0.15818, "15": 0.15592, "20": 0.15584, "25": 0.15211, "30": 0.15253, "35": 0.15336, "40": 0.15465, "45": 0.1517, "50": 0.16501, "55": 0.16299, "60": 0.1657, "65": 0.16693, "70": 0.15946, "75": 0.15155, "80": 0.15175, "85": 0.15073, "90": 0.14954, "95": 0.14899, "100": 0.14722}}}
\ No newline at end of file
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.82005, "5": 10.85285, "10": 10.78449, "15": 10.79226, "20": 10.69196, "25": 10.52317, "30": 10.34507, "35": 10.25889, "40": 10.07027, "45": 9.80301, "50": 9.87673, "55": 9.85527, "60": 9.46636, "65": 8.9166, "70": 9.69277, "75": 9.37814, "80": 9.368, "85": 9.57597, "90": 9.77245, "95": 9.46913, "100": 9.34575}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1559.0, "5": 1915.0, "10": 1361.0, "15": 1831.0, "20": 1695.0, "25": 1596.0, "30": 1821.0, "35": 1872.0, "40": 2121.0, "45": 2090.0, "50": 2395.0, "55": 2324.0, "60": 2357.0, "65": 2606.0, "70": 3130.0, "75": 2556.0, "80": 3224.0, "85": 3412.0, "90": 2988.0, "95": 3347.0, "100": 3383.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 733859840.0, "5": 733859840.0, "10": 733859840.0, "15": 733859840.0, "20": 733859840.0, "25": 733859840.0, "30": 733859840.0, "35": 733859840.0, "40": 733859840.0, "45": 733859840.0, "50": 733859840.0, "55": 733859840.0, "60": 733859840.0, "65": 733859840.0, "70": 733859840.0, "75": 733859840.0, "80": 733859840.0, "85": 733859840.0, "90": 733859840.0, "95": 733859840.0, "100": 733859840.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 3838895104.0, "5": 4122703872.0, "10": 4122703872.0, "15": 4122703872.0, "20": 4122703872.0, "25": 4122703872.0, "30": 4122703872.0, "35": 4122703872.0, "40": 4122703872.0, "45": 4122703872.0, "50": 4122703872.0, "55": 4122703872.0, "60": 4122703872.0, "65": 4122703872.0, "70": 4122703872.0, "75": 4122703872.0, "80": 4122703872.0, "85": 4122703872.0, "90": 4122703872.0, "95": 4122703872.0, "100": 4122703872.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 18.81818, "5": 0.15605, "10": 0.15612, "15": 0.15668, "20": 0.15734, "25": 0.15595, "30": 0.15634, "35": 0.15597, "40": 0.15654, "45": 0.15538, "50": 0.15456, "55": 0.15493, "60": 0.15593, "65": 0.15527, "70": 0.15564, "75": 0.15555, "80": 0.15422, "85": 0.1551, "90": 0.1533, "95": 0.15475, "100": 0.15459}}}
\ No newline at end of file
......@@ -19,8 +19,8 @@ MODEL_ARGS:
--train-iters: 100
--timing-log-level: 2
--lr-decay-iters: 320000
--save: ${CHECKPOINT_PATH}
--load: ${CHECKPOINT_PATH}
--save: ${CHECKPOINT_SAVE_PATH}
--load: ${CHECKPOINT_LOAD_PATH}
--data-path: ${DATA_PATH}/my-gpt3_00_text_document
--vocab-file: ${DATA_PATH}/bpe/vocab.json
--merge-file: ${DATA_PATH}/bpe/merges.txt
......@@ -48,4 +48,5 @@ MODEL_ARGS:
--ckpt-format: torch
--data-cache-path: ${DATA_CACHE_PATH}
--bf16: true
--log-memory-to-tensorboard: true
TEST_TYPE: ckpt-resume
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.81873, "5": 10.85262, "10": 10.78413, "15": 10.79311, "20": 10.69219, "25": 10.52454, "30": 10.34542, "35": 10.26245, "40": 10.07286, "45": 9.8112, "50": 9.88428, "55": 9.86376, "60": 9.47981, "65": 8.93093, "70": 9.71205, "75": 9.4002, "80": 9.39074, "85": 9.60143, "90": 9.8051, "95": 9.5081, "100": 9.39221}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1541.0, "5": 1912.0, "10": 1317.0, "15": 1921.0, "20": 1595.0, "25": 1666.0, "30": 1933.0, "35": 1920.0, "40": 2094.0, "45": 2101.0, "50": 2362.0, "55": 2269.0, "60": 2379.0, "65": 2624.0, "70": 3128.0, "75": 2551.0, "80": 3192.0, "85": 3503.0, "90": 2966.0, "95": 3326.0, "100": 3383.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 731763200.0, "5": 731763200.0, "10": 731763200.0, "15": 731763200.0, "20": 731763200.0, "25": 731763200.0, "30": 731763200.0, "35": 731763200.0, "40": 731763200.0, "45": 731763200.0, "50": 731763200.0, "55": 731763200.0, "60": 731763200.0, "65": 731763200.0, "70": 731763200.0, "75": 731763200.0, "80": 731763200.0, "85": 731763200.0, "90": 731763200.0, "95": 731763200.0, "100": 731763200.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2368927744.0, "5": 2649590784.0, "10": 2649590784.0, "15": 2649590784.0, "20": 2649590784.0, "25": 2649590784.0, "30": 2649590784.0, "35": 2649590784.0, "40": 2649590784.0, "45": 2649590784.0, "50": 2649590784.0, "55": 2649590784.0, "60": 2649590784.0, "65": 2649590784.0, "70": 2649590784.0, "75": 2649590784.0, "80": 2649590784.0, "85": 2649590784.0, "90": 2649590784.0, "95": 2649590784.0, "100": 2649590784.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 16.11545, "5": 0.1692, "10": 0.16514, "15": 0.16462, "20": 0.15963, "25": 0.16538, "30": 0.1555, "35": 0.15427, "40": 0.15505, "45": 0.15352, "50": 0.15187, "55": 0.15496, "60": 0.15039, "65": 0.15056, "70": 0.14765, "75": 0.15137, "80": 0.15663, "85": 0.16052, "90": 0.15557, "95": 0.15598, "100": 0.15273}}}
\ No newline at end of file
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.82005, "5": 10.85284, "10": 10.78455, "15": 10.79229, "20": 10.69211, "25": 10.52412, "30": 10.34552, "35": 10.26242, "40": 10.07239, "45": 9.811, "50": 9.88415, "55": 9.86374, "60": 9.47965, "65": 8.93065, "70": 9.71216, "75": 9.40049, "80": 9.39075, "85": 9.6014, "90": 9.80503, "95": 9.50817, "100": 9.39236}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1559.0, "5": 1840.0, "10": 1380.0, "15": 1848.0, "20": 1601.0, "25": 1635.0, "30": 1908.0, "35": 1925.0, "40": 2126.0, "45": 2086.0, "50": 2298.0, "55": 2284.0, "60": 2337.0, "65": 2636.0, "70": 3136.0, "75": 2539.0, "80": 3253.0, "85": 3363.0, "90": 3004.0, "95": 3333.0, "100": 3447.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 733859840.0, "5": 733859840.0, "10": 733859840.0, "15": 733859840.0, "20": 733859840.0, "25": 733859840.0, "30": 733859840.0, "35": 733859840.0, "40": 733859840.0, "45": 733859840.0, "50": 733859840.0, "55": 733859840.0, "60": 733859840.0, "65": 733859840.0, "70": 733859840.0, "75": 733859840.0, "80": 733859840.0, "85": 733859840.0, "90": 733859840.0, "95": 733859840.0, "100": 733859840.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 3838895104.0, "5": 4122703872.0, "10": 4122703872.0, "15": 4122703872.0, "20": 4122703872.0, "25": 4122703872.0, "30": 4122703872.0, "35": 4122703872.0, "40": 4122703872.0, "45": 4122703872.0, "50": 4122703872.0, "55": 4122703872.0, "60": 4122703872.0, "65": 4122703872.0, "70": 4122703872.0, "75": 4122703872.0, "80": 4122703872.0, "85": 4122703872.0, "90": 4122703872.0, "95": 4122703872.0, "100": 4122703872.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 22.44598, "5": 0.17072, "10": 0.16018, "15": 0.16147, "20": 0.15588, "25": 0.15643, "30": 0.15744, "35": 0.15702, "40": 0.15705, "45": 0.15718, "50": 0.15547, "55": 0.1569, "60": 0.1592, "65": 0.1591, "70": 0.15725, "75": 0.1566, "80": 0.15569, "85": 0.15565, "90": 0.15537, "95": 0.15899, "100": 0.15823}}}
\ No newline at end of file
......@@ -19,8 +19,8 @@ MODEL_ARGS:
--train-iters: 100
--timing-log-level: 2
--lr-decay-iters: 320000
--save: ${CHECKPOINT_PATH}
--load: ${CHECKPOINT_PATH}
--save: ${CHECKPOINT_SAVE_PATH}
--load: ${CHECKPOINT_LOAD_PATH}
--data-path: ${DATA_PATH}/my-gpt3_00_text_document
--vocab-file: ${DATA_PATH}/bpe/vocab.json
--merge-file: ${DATA_PATH}/bpe/merges.txt
......@@ -49,4 +49,5 @@ MODEL_ARGS:
--ckpt-format: torch_dist
--data-cache-path: ${DATA_CACHE_PATH}
--bf16: true
--log-memory-to-tensorboard: true
TEST_TYPE: ckpt-resume
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment