Commit 688448db authored by silencealiang's avatar silencealiang
Browse files

更新代码

parent a02a5490
Pipeline #2503 passed with stage
......@@ -19,8 +19,8 @@ MODEL_ARGS:
--train-iters: 50
--timing-log-level: 2
--lr-decay-iters: 320000
--save: ${CHECKPOINT_PATH}
--load: ${CHECKPOINT_PATH}
--save: ${CHECKPOINT_SAVE_PATH}
--load: ${CHECKPOINT_LOAD_PATH}
--data-path: ${DATA_PATH}/my-gpt3_00_text_document
--vocab-file: ${DATA_PATH}/bpe/vocab.json
--merge-file: ${DATA_PATH}/bpe/merges.txt
......@@ -47,4 +47,5 @@ MODEL_ARGS:
--data-cache-path: ${DATA_CACHE_PATH}
--fp16: true
--apply-query-key-layer-scaling: true
--log-memory-to-tensorboard: true
TEST_TYPE: regular
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.79311, "5": 10.83074, "10": 10.76725, "15": 10.82664, "20": 10.81793, "25": 10.76529, "30": 10.69182, "35": 10.61672, "40": 10.44907, "45": 10.21488, "50": 10.21715, "55": 10.14491, "60": 9.76806, "65": 9.20573, "70": 9.87752, "75": 9.55094, "80": 9.52283, "85": 9.7106, "90": 9.89179, "95": 9.59202, "100": 9.48543}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 586369024.0, "5": 587417600.0, "10": 587417600.0, "15": 587417600.0, "20": 869128704.0, "25": 867031552.0, "30": 867031552.0, "35": 867031552.0, "40": 867031552.0, "45": 867031552.0, "50": 869128704.0, "55": 867031552.0, "60": 867031552.0, "65": 867031552.0, "70": 867031552.0, "75": 867031552.0, "80": 869128704.0, "85": 867031552.0, "90": 867031552.0, "95": 867031552.0, "100": 867031552.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 3832784384.0, "5": 3832784896.0, "10": 3832784896.0, "15": 3832784896.0, "20": 4114758144.0, "25": 4114758144.0, "30": 4114758144.0, "35": 4114758144.0, "40": 4114758144.0, "45": 4114758144.0, "50": 4114758144.0, "55": 4114758144.0, "60": 4114758144.0, "65": 4114758144.0, "70": 4114758144.0, "75": 4114758144.0, "80": 4114758144.0, "85": 4114758144.0, "90": 4114758144.0, "95": 4114758144.0, "100": 4114758144.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 15.36019, "5": 0.14748, "10": 0.14569, "15": 0.14722, "20": 0.15678, "25": 0.15572, "30": 0.15085, "35": 0.15125, "40": 0.15141, "45": 0.15202, "50": 0.14925, "55": 0.14768, "60": 0.14952, "65": 0.15001, "70": 0.15024, "75": 0.14973, "80": 0.14933, "85": 0.1492, "90": 0.14942, "95": 0.14927, "100": 0.14832}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": 1846.0, "25": 2348.0, "30": 2490.0, "35": 2010.0, "40": 2016.0, "45": 2642.0, "50": 2810.0, "55": 2481.0, "60": 2945.0, "65": 2329.0, "70": 3673.0, "75": 3016.0, "80": 3642.0, "85": 4122.0, "90": 3744.0, "95": 4035.0, "100": 3447.0}}}
\ No newline at end of file
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.79311, "5": 10.83074, "10": 10.76725, "15": 10.82664, "20": 10.81793, "25": 10.76529, "30": 10.69182, "35": 10.61672, "40": 10.44907, "45": 10.21488, "50": 10.21715, "55": 10.14491, "60": 9.76806, "65": 9.20573, "70": 9.87752, "75": 9.55094, "80": 9.52283, "85": 9.7106, "90": 9.89179, "95": 9.59202, "100": 9.48543}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 586369024.0, "5": 587417600.0, "10": 587417600.0, "15": 587417600.0, "20": 869128704.0, "25": 869128704.0, "30": 869128704.0, "35": 869128704.0, "40": 869128704.0, "45": 869128704.0, "50": 869128704.0, "55": 869128704.0, "60": 869128704.0, "65": 869128704.0, "70": 869128704.0, "75": 869128704.0, "80": 869128704.0, "85": 869128704.0, "90": 869128704.0, "95": 869128704.0, "100": 869128704.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 3832784384.0, "5": 3832784896.0, "10": 3832784896.0, "15": 3832784896.0, "20": 4114758144.0, "25": 4114758144.0, "30": 4114758144.0, "35": 4114758144.0, "40": 4114758144.0, "45": 4114758144.0, "50": 4114758144.0, "55": 4114758144.0, "60": 4114758144.0, "65": 4114758144.0, "70": 4114758144.0, "75": 4114758144.0, "80": 4114758144.0, "85": 4114758144.0, "90": 4114758144.0, "95": 4114758144.0, "100": 4114758144.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 17.6616, "5": 0.15013, "10": 0.15207, "15": 0.15146, "20": 0.15882, "25": 0.15823, "30": 0.15777, "35": 0.15885, "40": 0.15922, "45": 0.15588, "50": 0.15635, "55": 0.15588, "60": 0.15681, "65": 0.15688, "70": 0.15648, "75": 0.15793, "80": 0.15889, "85": 0.15769, "90": 0.15693, "95": 0.15611, "100": 0.15689}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": 1846.0, "25": 2348.0, "30": 2490.0, "35": 2010.0, "40": 2016.0, "45": 2642.0, "50": 2810.0, "55": 2481.0, "60": 2945.0, "65": 2329.0, "70": 3673.0, "75": 3016.0, "80": 3642.0, "85": 4122.0, "90": 3744.0, "95": 4035.0, "100": 3447.0}}}
\ No newline at end of file
......@@ -19,8 +19,8 @@ MODEL_ARGS:
--train-iters: 100
--timing-log-level: 2
--lr-decay-iters: 320000
--save: ${CHECKPOINT_PATH}
--load: ${CHECKPOINT_PATH}
--save: ${CHECKPOINT_SAVE_PATH}
--load: ${CHECKPOINT_LOAD_PATH}
--data-path: ${DATA_PATH}/my-gpt3_00_text_document
--vocab-file: ${DATA_PATH}/bpe/vocab.json
--merge-file: ${DATA_PATH}/bpe/merges.txt
......@@ -48,4 +48,5 @@ MODEL_ARGS:
--data-cache-path: ${DATA_CACHE_PATH}
--fp16: true
--apply-query-key-layer-scaling: true
--log-memory-to-tensorboard: true
TEST_TYPE: ckpt-resume
{"lm loss": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [10.85929, 10.89211, 10.87639, 10.86988, 10.88179, 10.83898, 10.66589, 10.62691, 10.52461, 10.25708]}, "num-zeros": {"start_step": 0, "end_step": 33, "step_interval": 5, "values": [2458.0, 2527.0, 2467.0, 2148.0, 2250.0, 2467.0, 2528.0]}, "iteration_timing_avg": 0.14292588235294112}
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.85929, "5": 10.87929, "10": 10.84772, "15": 10.86867, "20": 10.87317, "25": 10.83338, "30": 10.75624, "35": 10.66844, "40": 10.50171, "45": 10.28002, "50": 10.25621}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 379427840.0, "5": 378379264.0, "10": 378903552.0, "15": 378379264.0, "20": 561597952.0, "25": 561073664.0, "30": 561597952.0, "35": 561597952.0, "40": 561597952.0, "45": 561597952.0, "50": 561597952.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1704025600.0, "5": 1704026112.0, "10": 1704026112.0, "15": 1704026112.0, "20": 1886196224.0, "25": 1886196224.0, "30": 1886196224.0, "35": 1886196224.0, "40": 1886196224.0, "45": 1886196224.0, "50": 1886196224.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 9.22765, "5": 0.19439, "10": 0.19327, "15": 0.19227, "20": 0.20227, "25": 0.20323, "30": 0.2014, "35": 0.20216, "40": 0.20166, "45": 0.20072, "50": 0.19941}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": 1808.0, "25": 2385.0, "30": 2591.0, "35": 1997.0, "40": 1959.0, "45": 2368.0, "50": 3073.0}}}
\ No newline at end of file
{"lm loss": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [10.85929, 10.89211, 10.87639, 10.86988, 10.88179, 10.83898, 10.66589, 10.62691, 10.52461, 10.25708]}, "num-zeros": {"start_step": 0, "end_step": 33, "step_interval": 5, "values": [2458.0, 2527.0, 2467.0, 2148.0, 2250.0, 2467.0, 2528.0]}, "iteration_timing_avg": 0.14292588235294112}
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.85929, "5": 10.87929, "10": 10.84772, "15": 10.86867, "20": 10.87317, "25": 10.83338, "30": 10.75624, "35": 10.66844, "40": 10.50171, "45": 10.28002, "50": 10.25621}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 380476416.0, "5": 380476416.0, "10": 378903552.0, "15": 380476416.0, "20": 560549376.0, "25": 560549376.0, "30": 560549376.0, "35": 560549376.0, "40": 560287232.0, "45": 560549376.0, "50": 560549376.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1704025600.0, "5": 1704026112.0, "10": 1704026112.0, "15": 1704026112.0, "20": 1884099072.0, "25": 1884099072.0, "30": 1884099072.0, "35": 1884099072.0, "40": 1884361216.0, "45": 1884361216.0, "50": 1884361216.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 12.59076, "5": 0.20078, "10": 0.20046, "15": 0.19967, "20": 0.20892, "25": 0.20876, "30": 0.2082, "35": 0.2082, "40": 0.21131, "45": 0.21272, "50": 0.21012}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": 1808.0, "25": 2385.0, "30": 2591.0, "35": 1997.0, "40": 1959.0, "45": 2368.0, "50": 3073.0}}}
\ No newline at end of file
......@@ -19,8 +19,8 @@ MODEL_ARGS:
--train-iters: 50
--timing-log-level: 2
--lr-decay-iters: 320000
--save: ${CHECKPOINT_PATH}
--load: ${CHECKPOINT_PATH}
--save: ${CHECKPOINT_SAVE_PATH}
--load: ${CHECKPOINT_LOAD_PATH}
--data-path: ${DATA_PATH}/my-gpt3_00_text_document
--vocab-file: ${DATA_PATH}/bpe/vocab.json
--merge-file: ${DATA_PATH}/bpe/merges.txt
......@@ -46,4 +46,5 @@ MODEL_ARGS:
--data-cache-path: ${DATA_CACHE_PATH}
--fp16: true
--apply-query-key-layer-scaling: true
--log-memory-to-tensorboard: true
TEST_TYPE: regular
......@@ -19,8 +19,8 @@ MODEL_ARGS:
--train-iters: 100
--timing-log-level: 2
--lr-decay-iters: 320000
--save: ${CHECKPOINT_PATH}
--load: ${CHECKPOINT_PATH}
--save: ${CHECKPOINT_SAVE_PATH}
--load: ${CHECKPOINT_LOAD_PATH}
--data-path: ${DATA_PATH}/my-gpt3_00_text_document
--vocab-file: ${DATA_PATH}/bpe/vocab.json
--merge-file: ${DATA_PATH}/bpe/merges.txt
......@@ -47,4 +47,5 @@ MODEL_ARGS:
--data-cache-path: ${DATA_CACHE_PATH}
--fp16: true
--apply-query-key-layer-scaling: true
--log-memory-to-tensorboard: true
TEST_TYPE: ckpt-resume
......@@ -34,8 +34,8 @@ MODEL_ARGS:
--train-iters: 100
--timing-log-level: 2
--lr-decay-iters: 320000
--save: ${CHECKPOINT_PATH}
--load: ${CHECKPOINT_PATH}
--save: ${CHECKPOINT_SAVE_PATH}
--load: ${CHECKPOINT_LOAD_PATH}
--data-path: ${DATA_PATH}/my-gpt3_00_text_document
--vocab-file: ${DATA_PATH}/bpe/vocab.json
--merge-file: ${DATA_PATH}/bpe/merges.txt
......@@ -63,4 +63,5 @@ MODEL_ARGS:
--data-cache-path: ${DATA_CACHE_PATH}
--bf16: true
--attention-backend: unfused
--log-memory-to-tensorboard: true
TEST_TYPE: ckpt-resume
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -76,8 +76,8 @@ MODEL_ARGS:
--eval-iters: 32
--eval-interval: 200
# Add checkpointing args
--load: ${OUTPUT_PATH}/checkpoints
--save: ${OUTPUT_PATH}/checkpoints
--save: ${CHECKPOINT_SAVE_PATH}
--load: ${CHECKPOINT_LOAD_PATH}
--save-interval: 5000
# Add initialization args
--init-method-std: 0.010
......
......@@ -76,8 +76,8 @@ MODEL_ARGS:
--eval-iters: 32
--eval-interval: 200
# Add checkpointing args
--load: ${OUTPUT_PATH}/checkpoints
--save: ${OUTPUT_PATH}/checkpoints
--save: ${CHECKPOINT_LOAD_PATH}
--load: ${CHECKPOINT_SAVE_PATH}
--save-interval: 500
# Add initialization args
--init-method-std: 0.010
......
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment