Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
688448db
Commit
688448db
authored
Mar 14, 2025
by
silencealiang
Browse files
更新代码
parent
a02a5490
Pipeline
#2503
passed with stage
Changes
823
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
80 additions
and
625 deletions
+80
-625
tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev.json
...ist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev.json
+1
-0
tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts.json
...ist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts.json
+1
-0
tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml
...rch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml
+51
-0
tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev.json
...ist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev.json
+1
-0
tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts.json
...ist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts.json
+1
-0
tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml
...rch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml
+3
-2
tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G/golden_values_dev.json
...ch_dist_uninstall_te_dgx_a100_1N8G/golden_values_dev.json
+1
-0
tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G/golden_values_lts.json
...ch_dist_uninstall_te_dgx_a100_1N8G/golden_values_lts.json
+1
-0
tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G/model_config.yaml
...e_torch_dist_uninstall_te_dgx_a100_1N8G/model_config.yaml
+3
-2
tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G/golden_values_dev.json
...tp2_pp2_uninstall_te_dgx_a100_1N8G/golden_values_dev.json
+1
-0
tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G/golden_values_lts.json
...tp2_pp2_uninstall_te_dgx_a100_1N8G/golden_values_lts.json
+1
-0
tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G/model_config.yaml
...core_tp2_pp2_uninstall_te_dgx_a100_1N8G/model_config.yaml
+3
-2
tests/functional_tests/test_cases/gpt/gpt3_mr_te_tp2_pp2_dgx_a100_1N8G/golden_values_dev.json
...t/gpt3_mr_te_tp2_pp2_dgx_a100_1N8G/golden_values_dev.json
+1
-612
tests/functional_tests/test_cases/gpt/gpt3_mr_te_tp2_pp2_dgx_a100_1N8G/golden_values_lts.json
...t/gpt3_mr_te_tp2_pp2_dgx_a100_1N8G/golden_values_lts.json
+1
-1
tests/functional_tests/test_cases/gpt/gpt3_mr_te_tp2_pp2_dgx_a100_1N8G/model_config.yaml
...es/gpt/gpt3_mr_te_tp2_pp2_dgx_a100_1N8G/model_config.yaml
+3
-2
tests/functional_tests/test_cases/gpt/gpt3_mr_te_tp2_pp2_resume_torch_dgx_a100_1N8G/golden_values_dev.json
...tp2_pp2_resume_torch_dgx_a100_1N8G/golden_values_dev.json
+1
-0
tests/functional_tests/test_cases/gpt/gpt3_mr_te_tp2_pp2_resume_torch_dgx_a100_1N8G/golden_values_lts.json
...tp2_pp2_resume_torch_dgx_a100_1N8G/golden_values_lts.json
+1
-0
tests/functional_tests/test_cases/gpt/gpt3_mr_te_tp2_pp2_resume_torch_dgx_a100_1N8G/model_config.yaml
...r_te_tp2_pp2_resume_torch_dgx_a100_1N8G/model_config.yaml
+3
-2
tests/functional_tests/test_cases/gpt/gpt3_mr_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev.json
.../gpt3_mr_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev.json
+1
-1
tests/functional_tests/test_cases/gpt/gpt3_mr_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_lts.json
.../gpt3_mr_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_lts.json
+1
-1
No files found.
tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev.json
0 → 100644
View file @
688448db
{
"lm loss"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
10.85831
,
"5"
:
10.87279
,
"10"
:
10.83267
,
"15"
:
10.82104
,
"20"
:
10.71376
,
"25"
:
10.54763
,
"30"
:
10.36782
,
"35"
:
10.2846
,
"40"
:
10.08923
,
"45"
:
9.84556
,
"50"
:
9.91944
,
"55"
:
9.89194
,
"60"
:
9.5082
,
"65"
:
8.9595
,
"70"
:
9.73443
,
"75"
:
9.43114
,
"80"
:
9.41103
,
"85"
:
9.61515
,
"90"
:
9.82371
,
"95"
:
9.5226
,
"100"
:
9.40801
}},
"num-zeros"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
1758.0
,
"5"
:
2093.0
,
"10"
:
1539.0
,
"15"
:
2026.0
,
"20"
:
1800.0
,
"25"
:
1786.0
,
"30"
:
2071.0
,
"35"
:
2219.0
,
"40"
:
2402.0
,
"45"
:
2268.0
,
"50"
:
2714.0
,
"55"
:
2588.0
,
"60"
:
2760.0
,
"65"
:
2831.0
,
"70"
:
3489.0
,
"75"
:
2724.0
,
"80"
:
3683.0
,
"85"
:
3637.0
,
"90"
:
3411.0
,
"95"
:
3592.0
,
"100"
:
3642.0
}},
"mem-allocated-bytes"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
232398336.0
,
"5"
:
232398336.0
,
"10"
:
232398336.0
,
"15"
:
232398336.0
,
"20"
:
232398336.0
,
"25"
:
232398336.0
,
"30"
:
232398336.0
,
"35"
:
232398336.0
,
"40"
:
232398336.0
,
"45"
:
232398336.0
,
"50"
:
232398336.0
,
"55"
:
232398336.0
,
"60"
:
232398336.0
,
"65"
:
232398336.0
,
"70"
:
232398336.0
,
"75"
:
232398336.0
,
"80"
:
232398336.0
,
"85"
:
232398336.0
,
"90"
:
232398336.0
,
"95"
:
232398336.0
,
"100"
:
232398336.0
}},
"mem-max-allocated-bytes"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
682342912.0
,
"5"
:
773245440.0
,
"10"
:
773245440.0
,
"15"
:
773245440.0
,
"20"
:
773245440.0
,
"25"
:
773246464.0
,
"30"
:
773246464.0
,
"35"
:
773246464.0
,
"40"
:
773246464.0
,
"45"
:
773246464.0
,
"50"
:
773246464.0
,
"55"
:
773246464.0
,
"60"
:
773246464.0
,
"65"
:
773246464.0
,
"70"
:
773246464.0
,
"75"
:
773246464.0
,
"80"
:
773246464.0
,
"85"
:
773246464.0
,
"90"
:
775342080.0
,
"95"
:
775342080.0
,
"100"
:
775342080.0
}},
"iteration-time"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
9.25721
,
"5"
:
0.297
,
"10"
:
0.2962
,
"15"
:
0.29314
,
"20"
:
0.29254
,
"25"
:
0.29368
,
"30"
:
0.29285
,
"35"
:
0.2939
,
"40"
:
0.29424
,
"45"
:
0.29981
,
"50"
:
0.29991
,
"55"
:
0.28268
,
"60"
:
0.2813
,
"65"
:
0.28183
,
"70"
:
0.28205
,
"75"
:
0.28103
,
"80"
:
0.28125
,
"85"
:
0.28141
,
"90"
:
0.28129
,
"95"
:
0.28133
,
"100"
:
0.28055
}}}
\ No newline at end of file
tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts.json
0 → 100644
View file @
688448db
{
"lm loss"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
10.8583
,
"5"
:
10.87284
,
"10"
:
10.83264
,
"15"
:
10.82102
,
"20"
:
10.71379
,
"25"
:
10.54766
,
"30"
:
10.3679
,
"35"
:
10.28457
,
"40"
:
10.08925
,
"45"
:
9.84556
,
"50"
:
9.91943
,
"55"
:
9.89191
,
"60"
:
9.50823
,
"65"
:
8.95947
,
"70"
:
9.73446
,
"75"
:
9.43115
,
"80"
:
9.411
,
"85"
:
9.61516
,
"90"
:
9.82374
,
"95"
:
9.52257
,
"100"
:
9.408
}},
"num-zeros"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
1693.0
,
"5"
:
2113.0
,
"10"
:
1534.0
,
"15"
:
2023.0
,
"20"
:
1755.0
,
"25"
:
1764.0
,
"30"
:
2036.0
,
"35"
:
2228.0
,
"40"
:
2447.0
,
"45"
:
2332.0
,
"50"
:
2745.0
,
"55"
:
2594.0
,
"60"
:
2725.0
,
"65"
:
2901.0
,
"70"
:
3493.0
,
"75"
:
2725.0
,
"80"
:
3691.0
,
"85"
:
3596.0
,
"90"
:
3410.0
,
"95"
:
3607.0
,
"100"
:
3719.0
}},
"mem-allocated-bytes"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
232422400.0
,
"5"
:
232422400.0
,
"10"
:
232422400.0
,
"15"
:
232422400.0
,
"20"
:
232422400.0
,
"25"
:
232422400.0
,
"30"
:
232422400.0
,
"35"
:
232422400.0
,
"40"
:
232422400.0
,
"45"
:
232422400.0
,
"50"
:
232422400.0
,
"55"
:
232422400.0
,
"60"
:
232422400.0
,
"65"
:
232422400.0
,
"70"
:
232422400.0
,
"75"
:
232422400.0
,
"80"
:
232422400.0
,
"85"
:
232422400.0
,
"90"
:
232422400.0
,
"95"
:
232422400.0
,
"100"
:
232422400.0
}},
"iteration-time"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
13.16523
,
"5"
:
0.31605
,
"10"
:
0.28733
,
"15"
:
0.28667
,
"20"
:
0.28015
,
"25"
:
0.31509
,
"30"
:
0.28969
,
"35"
:
0.28728
,
"40"
:
0.29047
,
"45"
:
0.28331
,
"50"
:
0.28547
,
"55"
:
0.2768
,
"60"
:
0.27873
,
"65"
:
0.2789
,
"70"
:
0.27983
,
"75"
:
0.27902
,
"80"
:
0.27972
,
"85"
:
0.28215
,
"90"
:
0.27786
,
"95"
:
0.28072
,
"100"
:
0.28294
}}}
\ No newline at end of file
tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml
0 → 100644
View file @
688448db
ENV_VARS
:
CUDA_DEVICE_MAX_CONNECTIONS
:
1
NVTE_ALLOW_NONDETERMINISTIC_ALGO
:
1
MODEL_ARGS
:
--num-layers
:
12
--hidden-size
:
512
--num-attention-heads
:
8
--log-params-norm
:
true
--log-num-zeros-in-grad
:
true
--log-validation-ppl-to-tensorboard
:
true
--log-timers-to-tensorboard
:
true
--tensorboard-dir
:
${TENSORBOARD_PATH}
--micro-batch-size
:
4
--global-batch-size
:
32
--seq-length
:
1024
--max-position-embeddings
:
1024
--train-iters
:
100
--timing-log-level
:
2
--lr-decay-iters
:
320000
--save
:
${CHECKPOINT_SAVE_PATH}
--load
:
${CHECKPOINT_LOAD_PATH}
--data-path
:
${DATA_PATH}/my-gpt3_00_text_document
--vocab-file
:
${DATA_PATH}/bpe/vocab.json
--merge-file
:
${DATA_PATH}/bpe/merges.txt
--split
:
949,50,1
--distributed-backend
:
nccl
--lr
:
0.00015
--lr-decay-style
:
cosine
--min-lr
:
1.0e-5
--weight-decay
:
1e-2
--clip-grad
:
1.0
--lr-warmup-fraction
:
.01
--log-interval
:
1
--save-interval
:
50
--eval-interval
:
1000
--eval-iters
:
10
--transformer-impl
:
transformer_engine
--tensor-model-parallel-size
:
4
--pipeline-model-parallel-size
:
2
--use-distributed-optimizer
:
true
--async-save
:
true
--ckpt-fully-parallel-save
:
true
--no-gradient-accumulation-fusion
:
true
--attention-softmax-in-fp32
:
true
--use-checkpoint-opt_param-scheduler
:
true
--use-mcore-models
:
true
--ckpt-format
:
torch_dist
--data-cache-path
:
${DATA_CACHE_PATH}
--bf16
:
true
--log-memory-to-tensorboard
:
true
TEST_TYPE
:
frozen-resume
tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev.json
0 → 100644
View file @
688448db
{
"lm loss"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
10.85831
,
"5"
:
10.87284
,
"10"
:
10.83268
,
"15"
:
10.82102
,
"20"
:
10.71377
,
"25"
:
10.54763
,
"30"
:
10.36785
,
"35"
:
10.28461
,
"40"
:
10.08928
,
"45"
:
9.84557
,
"50"
:
9.9194
,
"55"
:
9.89197
,
"60"
:
9.50823
,
"65"
:
8.9595
,
"70"
:
9.73441
,
"75"
:
9.43113
,
"80"
:
9.411
,
"85"
:
9.61514
,
"90"
:
9.82373
,
"95"
:
9.52255
,
"100"
:
9.40799
}},
"num-zeros"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
1775.0
,
"5"
:
2048.0
,
"10"
:
1559.0
,
"15"
:
2026.0
,
"20"
:
1790.0
,
"25"
:
1815.0
,
"30"
:
2056.0
,
"35"
:
2157.0
,
"40"
:
2311.0
,
"45"
:
2242.0
,
"50"
:
2756.0
,
"55"
:
2589.0
,
"60"
:
2651.0
,
"65"
:
2874.0
,
"70"
:
3534.0
,
"75"
:
2840.0
,
"80"
:
3634.0
,
"85"
:
3505.0
,
"90"
:
3377.0
,
"95"
:
3729.0
,
"100"
:
3572.0
}},
"mem-allocated-bytes"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
232398336.0
,
"5"
:
232398336.0
,
"10"
:
232398336.0
,
"15"
:
232398336.0
,
"20"
:
232398336.0
,
"25"
:
232398336.0
,
"30"
:
232398336.0
,
"35"
:
232398336.0
,
"40"
:
233446912.0
,
"45"
:
232398336.0
,
"50"
:
232398336.0
,
"55"
:
232398336.0
,
"60"
:
232398336.0
,
"65"
:
232398336.0
,
"70"
:
232398336.0
,
"75"
:
232398336.0
,
"80"
:
232398336.0
,
"85"
:
232398336.0
,
"90"
:
232398336.0
,
"95"
:
232398336.0
,
"100"
:
232398336.0
}},
"mem-max-allocated-bytes"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
686536192.0
,
"5"
:
775341056.0
,
"10"
:
775341056.0
,
"15"
:
775341056.0
,
"20"
:
775342080.0
,
"25"
:
775343616.0
,
"30"
:
775343616.0
,
"35"
:
775343616.0
,
"40"
:
775343616.0
,
"45"
:
775343616.0
,
"50"
:
775343616.0
,
"55"
:
775343616.0
,
"60"
:
775343616.0
,
"65"
:
775343616.0
,
"70"
:
775343616.0
,
"75"
:
775343616.0
,
"80"
:
775343616.0
,
"85"
:
775343616.0
,
"90"
:
775343616.0
,
"95"
:
775343616.0
,
"100"
:
775343616.0
}},
"iteration-time"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
11.96401
,
"5"
:
0.29061
,
"10"
:
0.28498
,
"15"
:
0.28362
,
"20"
:
0.28222
,
"25"
:
0.28294
,
"30"
:
0.28438
,
"35"
:
0.28301
,
"40"
:
0.28255
,
"45"
:
0.28337
,
"50"
:
0.28254
,
"55"
:
0.29177
,
"60"
:
0.29121
,
"65"
:
0.2911
,
"70"
:
0.29076
,
"75"
:
0.29215
,
"80"
:
0.29191
,
"85"
:
0.28992
,
"90"
:
0.29114
,
"95"
:
0.29025
,
"100"
:
0.28959
}}}
\ No newline at end of file
tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts.json
0 → 100644
View file @
688448db
{
"lm loss"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
10.8583
,
"5"
:
10.87279
,
"10"
:
10.83264
,
"15"
:
10.82099
,
"20"
:
10.71379
,
"25"
:
10.54767
,
"30"
:
10.36789
,
"35"
:
10.2846
,
"40"
:
10.08927
,
"45"
:
9.84554
,
"50"
:
9.9194
,
"55"
:
9.89196
,
"60"
:
9.5082
,
"65"
:
8.95952
,
"70"
:
9.7344
,
"75"
:
9.4311
,
"80"
:
9.411
,
"85"
:
9.61517
,
"90"
:
9.82372
,
"95"
:
9.52256
,
"100"
:
9.408
}},
"num-zeros"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
1763.0
,
"5"
:
2118.0
,
"10"
:
1540.0
,
"15"
:
2065.0
,
"20"
:
1836.0
,
"25"
:
1790.0
,
"30"
:
2030.0
,
"35"
:
2200.0
,
"40"
:
2389.0
,
"45"
:
2250.0
,
"50"
:
2793.0
,
"55"
:
2708.0
,
"60"
:
2777.0
,
"65"
:
2829.0
,
"70"
:
3443.0
,
"75"
:
2863.0
,
"80"
:
3676.0
,
"85"
:
3495.0
,
"90"
:
3282.0
,
"95"
:
3687.0
,
"100"
:
3655.0
}},
"mem-allocated-bytes"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
232422400.0
,
"5"
:
232422400.0
,
"10"
:
232422400.0
,
"15"
:
232422400.0
,
"20"
:
232422400.0
,
"25"
:
232422400.0
,
"30"
:
232422400.0
,
"35"
:
232422400.0
,
"40"
:
232422400.0
,
"45"
:
232422400.0
,
"50"
:
232422400.0
,
"55"
:
232422400.0
,
"60"
:
232422400.0
,
"65"
:
232422400.0
,
"70"
:
232422400.0
,
"75"
:
232422400.0
,
"80"
:
232422400.0
,
"85"
:
232422400.0
,
"90"
:
232422400.0
,
"95"
:
232422400.0
,
"100"
:
232422400.0
}},
"mem-max-allocated-bytes"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
686566400.0
,
"5"
:
775371776.0
,
"10"
:
775371776.0
,
"15"
:
775372288.0
,
"20"
:
775372288.0
,
"25"
:
775372288.0
,
"30"
:
775372288.0
,
"35"
:
775372288.0
,
"40"
:
775372288.0
,
"45"
:
775372288.0
,
"50"
:
775372288.0
,
"55"
:
775372288.0
,
"60"
:
775372288.0
,
"65"
:
775372288.0
,
"70"
:
775372288.0
,
"75"
:
775372288.0
,
"80"
:
775372288.0
,
"85"
:
775372288.0
,
"90"
:
775372288.0
,
"95"
:
775372288.0
,
"100"
:
775372288.0
}},
"iteration-time"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
16.98947
,
"5"
:
0.28276
,
"10"
:
0.29522
,
"15"
:
0.28583
,
"20"
:
0.29135
,
"25"
:
0.28791
,
"30"
:
0.28029
,
"35"
:
0.27945
,
"40"
:
0.27988
,
"45"
:
0.29308
,
"50"
:
0.28374
,
"55"
:
0.2909
,
"60"
:
0.29746
,
"65"
:
0.28807
,
"70"
:
0.29826
,
"75"
:
0.28803
,
"80"
:
0.29862
,
"85"
:
0.28869
,
"90"
:
0.28952
,
"95"
:
0.28889
,
"100"
:
0.28882
}}}
\ No newline at end of file
tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml
View file @
688448db
...
...
@@ -17,8 +17,8 @@ MODEL_ARGS:
--train-iters
:
100
--timing-log-level
:
2
--lr-decay-iters
:
320000
--save
:
${CHECKPOINT_PATH}
--load
:
${CHECKPOINT_PATH}
--save
:
${CHECKPOINT_
SAVE_
PATH}
--load
:
${CHECKPOINT_
LOAD_
PATH}
--data-path
:
${DATA_PATH}/my-gpt3_00_text_document
--vocab-file
:
${DATA_PATH}/bpe/vocab.json
--merge-file
:
${DATA_PATH}/bpe/merges.txt
...
...
@@ -47,4 +47,5 @@ MODEL_ARGS:
--ckpt-format
:
torch_dist
--data-cache-path
:
${DATA_CACHE_PATH}
--bf16
:
true
--log-memory-to-tensorboard
:
true
TEST_TYPE
:
ckpt-resume
tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G/golden_values_dev.json
0 → 100644
View file @
688448db
{
"lm loss"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
10.92717
,
"5"
:
10.92928
,
"10"
:
10.91617
,
"15"
:
10.93901
,
"20"
:
10.93406
,
"25"
:
10.8858
,
"30"
:
10.81297
,
"35"
:
10.72203
,
"40"
:
10.55145
,
"45"
:
10.32854
,
"50"
:
10.28775
,
"55"
:
10.21253
,
"60"
:
9.833
,
"65"
:
9.27297
,
"70"
:
9.92539
,
"75"
:
9.59673
,
"80"
:
9.55132
,
"85"
:
9.73428
,
"90"
:
9.9073
,
"95"
:
9.60983
,
"100"
:
9.50131
}},
"mem-allocated-bytes"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
379952128.0
,
"5"
:
378379264.0
,
"10"
:
379427840.0
,
"15"
:
378379264.0
,
"20"
:
559762944.0
,
"25"
:
561860096.0
,
"30"
:
561073664.0
,
"35"
:
561073664.0
,
"40"
:
560287232.0
,
"45"
:
559762944.0
,
"50"
:
560287232.0
,
"55"
:
561073664.0
,
"60"
:
559762944.0
,
"65"
:
559762944.0
,
"70"
:
559762944.0
,
"75"
:
559762944.0
,
"80"
:
559762944.0
,
"85"
:
559762944.0
,
"90"
:
561860096.0
,
"95"
:
560549376.0
,
"100"
:
560549376.0
}},
"iteration-time"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
9.22195
,
"5"
:
0.20217
,
"10"
:
0.20177
,
"15"
:
0.20429
,
"20"
:
0.21411
,
"25"
:
0.21219
,
"30"
:
0.21117
,
"35"
:
0.21259
,
"40"
:
0.21302
,
"45"
:
0.21291
,
"50"
:
0.21122
,
"55"
:
0.22967
,
"60"
:
0.2322
,
"65"
:
0.23206
,
"70"
:
0.23201
,
"75"
:
0.23017
,
"80"
:
0.22985
,
"85"
:
0.23239
,
"90"
:
0.231
,
"95"
:
0.23146
,
"100"
:
0.23157
}},
"num-zeros"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
"nan"
,
"5"
:
"nan"
,
"10"
:
"nan"
,
"15"
:
"nan"
,
"20"
:
1799.0
,
"25"
:
2506.0
,
"30"
:
2471.0
,
"35"
:
2010.0
,
"40"
:
2153.0
,
"45"
:
2427.0
,
"50"
:
2914.0
,
"55"
:
2337.0
,
"60"
:
2978.0
,
"65"
:
2225.0
,
"70"
:
3612.0
,
"75"
:
3018.0
,
"80"
:
3488.0
,
"85"
:
3875.0
,
"90"
:
3770.0
,
"95"
:
3946.0
,
"100"
:
3446.0
}}}
\ No newline at end of file
tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G/golden_values_lts.json
0 → 100644
View file @
688448db
{
"lm loss"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
10.92717
,
"5"
:
10.92928
,
"10"
:
10.91617
,
"15"
:
10.93901
,
"20"
:
10.93406
,
"25"
:
10.8858
,
"30"
:
10.81297
,
"35"
:
10.72203
,
"40"
:
10.55145
,
"45"
:
10.32854
,
"50"
:
10.28775
,
"55"
:
10.21253
,
"60"
:
9.833
,
"65"
:
9.27297
,
"70"
:
9.92539
,
"75"
:
9.59673
,
"80"
:
9.55132
,
"85"
:
9.73428
,
"90"
:
9.9073
,
"95"
:
9.60983
,
"100"
:
9.5013
}},
"mem-allocated-bytes"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
378379264.0
,
"5"
:
378379264.0
,
"10"
:
378379264.0
,
"15"
:
378379264.0
,
"20"
:
561073664.0
,
"25"
:
561860096.0
,
"30"
:
561073664.0
,
"35"
:
561860096.0
,
"40"
:
561860096.0
,
"45"
:
560811520.0
,
"50"
:
561073664.0
,
"55"
:
561073664.0
,
"60"
:
561073664.0
,
"65"
:
561860096.0
,
"70"
:
561860096.0
,
"75"
:
561073664.0
,
"80"
:
561860096.0
,
"85"
:
561335808.0
,
"90"
:
561073664.0
,
"95"
:
561073664.0
,
"100"
:
561860096.0
}},
"iteration-time"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
11.81109
,
"5"
:
0.21194
,
"10"
:
0.21151
,
"15"
:
0.21057
,
"20"
:
0.22167
,
"25"
:
0.2212
,
"30"
:
0.22059
,
"35"
:
0.22295
,
"40"
:
0.22292
,
"45"
:
0.22399
,
"50"
:
0.22321
,
"55"
:
0.21669
,
"60"
:
0.21726
,
"65"
:
0.21668
,
"70"
:
0.22074
,
"75"
:
0.21923
,
"80"
:
0.21775
,
"85"
:
0.21706
,
"90"
:
0.21701
,
"95"
:
0.21697
,
"100"
:
0.2163
}},
"num-zeros"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
"nan"
,
"5"
:
"nan"
,
"10"
:
"nan"
,
"15"
:
"nan"
,
"20"
:
1799.0
,
"25"
:
2506.0
,
"30"
:
2471.0
,
"35"
:
2010.0
,
"40"
:
2153.0
,
"45"
:
2427.0
,
"50"
:
2914.0
,
"55"
:
2409.0
,
"60"
:
2939.0
,
"65"
:
2178.0
,
"70"
:
3539.0
,
"75"
:
3029.0
,
"80"
:
3531.0
,
"85"
:
3892.0
,
"90"
:
3772.0
,
"95"
:
4015.0
,
"100"
:
3520.0
}}}
\ No newline at end of file
tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G/model_config.yaml
View file @
688448db
...
...
@@ -19,8 +19,8 @@ MODEL_ARGS:
--train-iters
:
100
--timing-log-level
:
2
--lr-decay-iters
:
320000
--save
:
${CHECKPOINT_PATH}
--load
:
${CHECKPOINT_PATH}
--save
:
${CHECKPOINT_
SAVE_
PATH}
--load
:
${CHECKPOINT_
LOAD_
PATH}
--data-path
:
${DATA_PATH}/my-gpt3_00_text_document
--vocab-file
:
${DATA_PATH}/bpe/vocab.json
--merge-file
:
${DATA_PATH}/bpe/merges.txt
...
...
@@ -49,4 +49,5 @@ MODEL_ARGS:
--data-cache-path
:
${DATA_CACHE_PATH}
--fp16
:
true
--apply-query-key-layer-scaling
:
true
--log-memory-to-tensorboard
:
true
TEST_TYPE
:
ckpt-resume
tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G/golden_values_dev.json
0 → 100644
View file @
688448db
{
"lm loss"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
10.92717
,
"5"
:
10.92928
,
"10"
:
10.91616
,
"15"
:
10.93902
,
"20"
:
10.93405
,
"25"
:
10.88579
,
"30"
:
10.81295
,
"35"
:
10.72198
,
"40"
:
10.55137
,
"45"
:
10.32844
,
"50"
:
10.28765
}},
"mem-allocated-bytes"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
378378752.0
,
"5"
:
378903040.0
,
"10"
:
378378752.0
,
"15"
:
378903040.0
,
"20"
:
560548864.0
,
"25"
:
560548864.0
,
"30"
:
560548864.0
,
"35"
:
559238144.0
,
"40"
:
560548864.0
,
"45"
:
560548864.0
,
"50"
:
560548864.0
}},
"mem-max-allocated-bytes"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
1905351680.0
,
"5"
:
1905352192.0
,
"10"
:
1905352192.0
,
"15"
:
1905352192.0
,
"20"
:
2087784448.0
,
"25"
:
2087784448.0
,
"30"
:
2087784448.0
,
"35"
:
2087784448.0
,
"40"
:
2087784448.0
,
"45"
:
2087784448.0
,
"50"
:
2087784448.0
}},
"iteration-time"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
8.36878
,
"5"
:
0.2008
,
"10"
:
0.19913
,
"15"
:
0.19916
,
"20"
:
0.21528
,
"25"
:
0.21446
,
"30"
:
0.2138
,
"35"
:
0.21509
,
"40"
:
0.2138
,
"45"
:
0.21394
,
"50"
:
0.21354
}},
"num-zeros"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
"nan"
,
"5"
:
"nan"
,
"10"
:
"nan"
,
"15"
:
"nan"
,
"20"
:
1751.0
,
"25"
:
2490.0
,
"30"
:
2497.0
,
"35"
:
2017.0
,
"40"
:
2091.0
,
"45"
:
2389.0
,
"50"
:
2925.0
}}}
\ No newline at end of file
tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G/golden_values_lts.json
0 → 100644
View file @
688448db
{
"lm loss"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
10.92717
,
"5"
:
10.92928
,
"10"
:
10.91616
,
"15"
:
10.93902
,
"20"
:
10.93405
,
"25"
:
10.88579
,
"30"
:
10.81295
,
"35"
:
10.72198
,
"40"
:
10.55137
,
"45"
:
10.32844
,
"50"
:
10.28766
}},
"mem-allocated-bytes"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
378903040.0
,
"5"
:
378378752.0
,
"10"
:
378903040.0
,
"15"
:
378378752.0
,
"20"
:
560811008.0
,
"25"
:
560548864.0
,
"30"
:
561073152.0
,
"35"
:
562646016.0
,
"40"
:
560548864.0
,
"45"
:
562646016.0
,
"50"
:
560548864.0
}},
"mem-max-allocated-bytes"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
1905351680.0
,
"5"
:
1905352192.0
,
"10"
:
1905352192.0
,
"15"
:
1905352192.0
,
"20"
:
2087784448.0
,
"25"
:
2087784448.0
,
"30"
:
2087784448.0
,
"35"
:
2087784448.0
,
"40"
:
2087784448.0
,
"45"
:
2087784448.0
,
"50"
:
2087784448.0
}},
"iteration-time"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
10.5872
,
"5"
:
0.20393
,
"10"
:
0.20412
,
"15"
:
0.20193
,
"20"
:
0.22109
,
"25"
:
0.21826
,
"30"
:
0.21476
,
"35"
:
0.21348
,
"40"
:
0.21255
,
"45"
:
0.21142
,
"50"
:
0.21064
}},
"num-zeros"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
"nan"
,
"5"
:
"nan"
,
"10"
:
"nan"
,
"15"
:
"nan"
,
"20"
:
1751.0
,
"25"
:
2491.0
,
"30"
:
2428.0
,
"35"
:
1827.0
,
"40"
:
2072.0
,
"45"
:
2361.0
,
"50"
:
2998.0
}}}
\ No newline at end of file
tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G/model_config.yaml
View file @
688448db
...
...
@@ -23,8 +23,8 @@ MODEL_ARGS:
--train-iters
:
50
--timing-log-level
:
2
--lr-decay-iters
:
320000
--save
:
${CHECKPOINT_PATH}
--load
:
${CHECKPOINT_PATH}
--save
:
${CHECKPOINT_
SAVE_
PATH}
--load
:
${CHECKPOINT_
LOAD_
PATH}
--data-path
:
${DATA_PATH}/my-gpt3_00_text_document
--vocab-file
:
${DATA_PATH}/bpe/vocab.json
--merge-file
:
${DATA_PATH}/bpe/merges.txt
...
...
@@ -52,4 +52,5 @@ MODEL_ARGS:
--data-cache-path
:
${DATA_CACHE_PATH}
--fp16
:
true
--apply-query-key-layer-scaling
:
true
--log-memory-to-tensorboard
:
true
TEST_TYPE
:
regular
tests/functional_tests/test_cases/gpt/gpt3_mr_te_tp2_pp2_dgx_a100_1N8G/golden_values_dev.json
View file @
688448db
{
"forward-backward-time"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
7.99255
,
0.1699
,
0.16797
,
0.16814
,
0.16792
,
0.1675
,
0.16973
,
0.16925
,
0.16932
,
0.16655
]
},
"forward-compute-time"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
1.99201
,
0.07269
,
0.07105
,
0.07144
,
0.07113
,
0.07113
,
0.07269
,
0.07292
,
0.07231
,
0.07028
]
},
"backward-compute-time"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
1.74189
,
0.07561
,
0.07559
,
0.07617
,
0.07601
,
0.07555
,
0.07573
,
0.07602
,
0.07589
,
0.07554
]
},
"batch-generator-time"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
0.33623
,
0.00263
,
0.00278
,
0.00281
,
0.0029
,
0.00309
,
0.00249
,
0.00293
,
0.00275
,
0.00267
]
},
"forward-recv-time"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
2.03589
,
0.01468
,
0.01445
,
0.01439
,
0.01441
,
0.01438
,
0.01445
,
0.01443
,
0.01439
,
0.01458
]
},
"forward-send-time"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
0.56239
,
0.00016
,
0.00014
,
0.00015
,
0.00015
,
0.00015
,
0.00017
,
0.00015
,
0.00015
,
0.00014
]
},
"backward-recv-time"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
0.01891
,
0.01827
,
0.01862
,
0.01906
,
0.01881
,
0.01843
,
0.01836
,
0.01816
,
0.01928
,
0.01844
]
},
"backward-send-time"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
0.00022
,
0.00019
,
0.00026
,
0.00025
,
0.00025
,
0.00026
,
0.00019
,
0.00026
,
0.00024
,
0.00025
]
},
"forward-send-backward-recv-time"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
3.65009
,
0.02665
,
0.02419
,
0.02471
,
0.02401
,
0.02444
,
0.02648
,
0.02644
,
0.02615
,
0.02382
]
},
"backward-send-forward-recv-time"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
1.79597
,
0.00095
,
0.00098
,
0.00098
,
0.00099
,
0.00104
,
0.00099
,
0.00107
,
0.00111
,
0.00095
]
},
"layernorm-grads-all-reduce-time"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
3e-05
,
2e-05
,
3e-05
,
2e-05
,
2e-05
,
2e-05
,
2e-05
,
2e-05
,
2e-05
,
2e-05
]
},
"embedding-grads-all-reduce-time"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
0.00069
,
0.00052
,
0.00052
,
0.00053
,
0.00053
,
0.00053
,
0.00053
,
0.00052
,
0.00053
,
0.00052
]
},
"all-grads-sync-time"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
0.59902
,
0.00084
,
0.00085
,
0.00083
,
0.00084
,
0.00083
,
0.00084
,
0.00087
,
0.00084
,
0.00084
]
},
"optimizer-copy-to-main-grad-time"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
0.00026
,
0.00019
,
0.00019
,
0.00019
,
0.00019
,
0.00019
,
0.0002
,
0.00019
,
0.00019
,
0.00019
]
},
"optimizer-clip-main-grad-time"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
0.85985
,
0.0011
,
0.00109
,
0.00115
,
0.0012
,
0.00108
,
0.0011
,
0.00108
,
0.0011
,
0.00109
]
},
"optimizer-count-zeros-time"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
0.0167
,
0.00528
,
0.00524
,
0.00528
,
0.00523
,
0.00525
,
0.00524
,
0.00525
,
0.00525
,
0.00527
]
},
"optimizer-inner-step-time"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
0.01141
,
0.00081
,
0.00081
,
0.00083
,
0.00081
,
0.00084
,
0.00084
,
0.00084
,
0.00082
,
0.00083
]
},
"optimizer-copy-main-to-model-params-time"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
0.00088
,
0.0006
,
0.0006
,
0.0006
,
0.0006
,
0.00082
,
0.0006
,
0.00059
,
0.0006
,
0.0006
]
},
"optimizer-time"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
0.89007
,
0.00859
,
0.00853
,
0.00862
,
0.00862
,
0.00885
,
0.00857
,
0.00857
,
0.00854
,
0.00858
]
},
"learning-rate"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
]
},
"learning-rate vs samples"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
]
},
"batch-size"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
32.0
,
32.0
,
32.0
,
32.0
,
32.0
,
32.0
,
32.0
,
32.0
,
32.0
,
32.0
]
},
"batch-size vs samples"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
32.0
,
32.0
,
32.0
,
32.0
,
32.0
,
32.0
,
32.0
,
32.0
,
32.0
,
32.0
]
},
"lm loss"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
10.85926
,
10.89117
,
10.86647
,
10.81416
,
10.70027
,
10.60761
,
10.10644
,
10.21377
,
10.12972
,
9.8041
]
},
"lm loss vs samples"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
10.85926
,
10.89117
,
10.86647
,
10.81416
,
10.70027
,
10.60761
,
10.10644
,
10.21377
,
10.12972
,
9.8041
]
},
"loss-scale"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
]
},
"loss-scale vs samples"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
]
},
"grad-norm"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
14.36883
,
10.19308
,
9.38217
,
11.67025
,
11.2611
,
10.52068
,
12.43181
,
7.21395
,
6.03602
,
5.80161
]
},
"grad-norm vs samples"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
14.36883
,
10.19308
,
9.38217
,
11.67025
,
11.2611
,
10.52068
,
12.43181
,
7.21395
,
6.03602
,
5.80161
]
},
"num-zeros"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
1726.0
,
1922.0
,
2043.0
,
1879.0
,
1882.0
,
1821.0
,
1648.0
,
2039.0
,
2379.0
,
2451.0
]
},
"num-zeros vs samples"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
1726.0
,
1922.0
,
2043.0
,
1879.0
,
1882.0
,
1821.0
,
1648.0
,
2039.0
,
2379.0
,
2451.0
]
},
"params-norm"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
180.01265
,
180.01265
,
180.01265
,
180.01265
,
180.01265
,
180.01263
,
180.0126
,
180.01251
,
180.01237
,
180.01218
]
},
"params-norm vs samples"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
180.01265
,
180.01265
,
180.01265
,
180.01265
,
180.01265
,
180.01263
,
180.0126
,
180.01251
,
180.01237
,
180.01218
]
},
"iteration-time"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
8.9047
,
0.19058
,
0.18857
,
0.18884
,
0.18868
,
0.18839
,
0.19045
,
0.1901
,
0.18993
,
0.18735
]
},
"lm loss validation"
:
{
"start_step"
:
0
,
"end_step"
:
2
,
"step_interval"
:
5
,
"values"
:
[
9.81192
]
},
"lm loss validation vs samples"
:
{
"start_step"
:
0
,
"end_step"
:
1
,
"step_interval"
:
5
,
"values"
:
[
9.81192
]
},
"lm loss validation ppl"
:
{
"start_step"
:
0
,
"end_step"
:
1
,
"step_interval"
:
5
,
"values"
:
[
18250.01367
]
},
"lm loss validation ppl vs samples"
:
{
"start_step"
:
0
,
"end_step"
:
1
,
"step_interval"
:
5
,
"values"
:
[
18250.01367
]
}
}
\ No newline at end of file
{
"lm loss"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
10.85926
,
"5"
:
10.878
,
"10"
:
10.84086
,
"15"
:
10.81702
,
"20"
:
10.72418
,
"25"
:
10.55518
,
"30"
:
10.35548
,
"35"
:
10.2597
,
"40"
:
10.06425
,
"45"
:
9.81279
,
"50"
:
9.89265
}},
"num-zeros"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
1726.0
,
"5"
:
1899.0
,
"10"
:
1437.0
,
"15"
:
1923.0
,
"20"
:
1700.0
,
"25"
:
1640.0
,
"30"
:
1993.0
,
"35"
:
2075.0
,
"40"
:
2268.0
,
"45"
:
2144.0
,
"50"
:
2461.0
}},
"mem-allocated-bytes"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
487096832.0
,
"5"
:
487096832.0
,
"10"
:
487096832.0
,
"15"
:
487096832.0
,
"20"
:
487096832.0
,
"25"
:
487096832.0
,
"30"
:
487096832.0
,
"35"
:
487096832.0
,
"40"
:
487096832.0
,
"45"
:
487096832.0
,
"50"
:
487096832.0
}},
"mem-max-allocated-bytes"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
1229747712.0
,
"5"
:
1409821184.0
,
"10"
:
1409821184.0
,
"15"
:
1409821184.0
,
"20"
:
1409821184.0
,
"25"
:
1409821184.0
,
"30"
:
1409821184.0
,
"35"
:
1409821184.0
,
"40"
:
1409821184.0
,
"45"
:
1409821184.0
,
"50"
:
1409821184.0
}},
"iteration-time"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
13.04346
,
"5"
:
0.19365
,
"10"
:
0.19279
,
"15"
:
0.19212
,
"20"
:
0.1915
,
"25"
:
0.19182
,
"30"
:
0.192
,
"35"
:
0.19258
,
"40"
:
0.19179
,
"45"
:
0.19135
,
"50"
:
0.19151
}}}
\ No newline at end of file
tests/functional_tests/test_cases/gpt/gpt3_mr_te_tp2_pp2_dgx_a100_1N8G/golden_values_lts.json
View file @
688448db
{
"lm loss"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
10.86208
,
10.89137
,
10.86731
,
10.81652
,
10.70126
,
10.60816
,
10.11007
,
10.21889
,
10.1294
,
9.80326
]},
"num-zeros"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
1659.0
,
1944.0
,
1974.0
,
1920.0
,
1918.0
,
1855.0
,
1621.0
,
2018.0
,
2436.0
,
2304.0
]},
"iteration_timing_avg"
:
0.14203264705882354
}
\ No newline at end of file
{
"lm loss"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
10.86208
,
"5"
:
10.87869
,
"10"
:
10.84148
,
"15"
:
10.81526
,
"20"
:
10.72356
,
"25"
:
10.55942
,
"30"
:
10.35833
,
"35"
:
10.26014
,
"40"
:
10.06485
,
"45"
:
9.81413
,
"50"
:
9.89077
}},
"num-zeros"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
1659.0
,
"5"
:
1904.0
,
"10"
:
1453.0
,
"15"
:
2011.0
,
"20"
:
1695.0
,
"25"
:
1617.0
,
"30"
:
1893.0
,
"35"
:
2080.0
,
"40"
:
2232.0
,
"45"
:
2224.0
,
"50"
:
2454.0
}},
"mem-allocated-bytes"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
486047744.0
,
"5"
:
487096320.0
,
"10"
:
487096320.0
,
"15"
:
486047744.0
,
"20"
:
487096320.0
,
"25"
:
487096320.0
,
"30"
:
486047744.0
,
"35"
:
487096320.0
,
"40"
:
487096320.0
,
"45"
:
486047744.0
,
"50"
:
487096320.0
}},
"mem-max-allocated-bytes"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
1720084480.0
,
"5"
:
1900157952.0
,
"10"
:
1900157952.0
,
"15"
:
1900157952.0
,
"20"
:
1900157952.0
,
"25"
:
1900157952.0
,
"30"
:
1900157952.0
,
"35"
:
1900157952.0
,
"40"
:
1900157952.0
,
"45"
:
1900157952.0
,
"50"
:
1900157952.0
}},
"iteration-time"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
9.46191
,
"5"
:
0.19848
,
"10"
:
0.2013
,
"15"
:
0.20084
,
"20"
:
0.20142
,
"25"
:
0.20039
,
"30"
:
0.20371
,
"35"
:
0.20255
,
"40"
:
0.2022
,
"45"
:
0.20294
,
"50"
:
0.20066
}}}
\ No newline at end of file
tests/functional_tests/test_cases/gpt/gpt3_mr_te_tp2_pp2_dgx_a100_1N8G/model_config.yaml
View file @
688448db
...
...
@@ -19,8 +19,8 @@ MODEL_ARGS:
--train-iters
:
50
--timing-log-level
:
2
--lr-decay-iters
:
320000
--save
:
${CHECKPOINT_PATH}
--load
:
${CHECKPOINT_PATH}
--save
:
${CHECKPOINT_
SAVE_
PATH}
--load
:
${CHECKPOINT_
LOAD_
PATH}
--data-path
:
${DATA_PATH}/my-gpt3_00_text_document
--vocab-file
:
${DATA_PATH}/bpe/vocab.json
--merge-file
:
${DATA_PATH}/bpe/merges.txt
...
...
@@ -47,4 +47,5 @@ MODEL_ARGS:
--data-cache-path
:
${DATA_CACHE_PATH}
--bf16
:
true
--attention-backend
:
unfused
--log-memory-to-tensorboard
:
true
TEST_TYPE
:
regular
tests/functional_tests/test_cases/gpt/gpt3_mr_te_tp2_pp2_resume_torch_dgx_a100_1N8G/golden_values_dev.json
0 → 100644
View file @
688448db
{
"lm loss"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
10.85926
,
"5"
:
10.878
,
"10"
:
10.84086
,
"15"
:
10.81702
,
"20"
:
10.72418
,
"25"
:
10.55518
,
"30"
:
10.35548
,
"35"
:
10.2597
,
"40"
:
10.06425
,
"45"
:
9.81279
,
"50"
:
9.89265
,
"55"
:
9.86713
,
"60"
:
9.4818
,
"65"
:
8.93492
,
"70"
:
9.71847
,
"75"
:
9.41307
,
"80"
:
9.3968
,
"85"
:
9.60641
,
"90"
:
9.80599
,
"95"
:
9.51409
,
"100"
:
9.39833
}},
"num-zeros"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
1726.0
,
"5"
:
1899.0
,
"10"
:
1437.0
,
"15"
:
1923.0
,
"20"
:
1700.0
,
"25"
:
1640.0
,
"30"
:
1993.0
,
"35"
:
2075.0
,
"40"
:
2268.0
,
"45"
:
2144.0
,
"50"
:
2461.0
,
"55"
:
2419.0
,
"60"
:
2540.0
,
"65"
:
2748.0
,
"70"
:
3339.0
,
"75"
:
2600.0
,
"80"
:
3404.0
,
"85"
:
3412.0
,
"90"
:
3049.0
,
"95"
:
3491.0
,
"100"
:
3350.0
}},
"mem-allocated-bytes"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
488669696.0
,
"5"
:
488669696.0
,
"10"
:
488669696.0
,
"15"
:
488669696.0
,
"20"
:
488669696.0
,
"25"
:
488669696.0
,
"30"
:
488669696.0
,
"35"
:
488669696.0
,
"40"
:
488669696.0
,
"45"
:
488669696.0
,
"50"
:
488669696.0
,
"55"
:
488669696.0
,
"60"
:
488669696.0
,
"65"
:
488669696.0
,
"70"
:
488669696.0
,
"75"
:
488669696.0
,
"80"
:
488669696.0
,
"85"
:
488669696.0
,
"90"
:
488669696.0
,
"95"
:
488669696.0
,
"100"
:
488669696.0
}},
"mem-max-allocated-bytes"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
1229747712.0
,
"5"
:
1411918336.0
,
"10"
:
1411918336.0
,
"15"
:
1411918336.0
,
"20"
:
1411918336.0
,
"25"
:
1411918336.0
,
"30"
:
1411918336.0
,
"35"
:
1411918336.0
,
"40"
:
1411918336.0
,
"45"
:
1411918336.0
,
"50"
:
1411918336.0
,
"55"
:
1411918336.0
,
"60"
:
1411918336.0
,
"65"
:
1411918336.0
,
"70"
:
1411918336.0
,
"75"
:
1411918336.0
,
"80"
:
1411918336.0
,
"85"
:
1411918336.0
,
"90"
:
1411918336.0
,
"95"
:
1411918336.0
,
"100"
:
1411918336.0
}},
"iteration-time"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
13.2816
,
"5"
:
0.19252
,
"10"
:
0.19307
,
"15"
:
0.19178
,
"20"
:
0.19278
,
"25"
:
0.19268
,
"30"
:
0.19244
,
"35"
:
0.19333
,
"40"
:
0.19291
,
"45"
:
0.19374
,
"50"
:
0.19199
,
"55"
:
0.19307
,
"60"
:
0.19049
,
"65"
:
0.19061
,
"70"
:
0.19137
,
"75"
:
0.19057
,
"80"
:
0.1903
,
"85"
:
0.19047
,
"90"
:
0.19357
,
"95"
:
0.19059
,
"100"
:
0.1907
}}}
\ No newline at end of file
tests/functional_tests/test_cases/gpt/gpt3_mr_te_tp2_pp2_resume_torch_dgx_a100_1N8G/golden_values_lts.json
0 → 100644
View file @
688448db
{
"lm loss"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
10.86208
,
"5"
:
10.87869
,
"10"
:
10.84148
,
"15"
:
10.81526
,
"20"
:
10.72356
,
"25"
:
10.55942
,
"30"
:
10.35833
,
"35"
:
10.26014
,
"40"
:
10.06485
,
"45"
:
9.81413
,
"50"
:
9.89077
,
"55"
:
9.8674
,
"60"
:
9.48218
,
"65"
:
8.93482
,
"70"
:
9.7177
,
"75"
:
9.4111
,
"80"
:
9.39614
,
"85"
:
9.60606
,
"90"
:
9.80663
,
"95"
:
9.51629
,
"100"
:
9.39917
}},
"num-zeros"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
1659.0
,
"5"
:
1904.0
,
"10"
:
1453.0
,
"15"
:
2011.0
,
"20"
:
1695.0
,
"25"
:
1617.0
,
"30"
:
1893.0
,
"35"
:
2080.0
,
"40"
:
2232.0
,
"45"
:
2224.0
,
"50"
:
2454.0
,
"55"
:
2461.0
,
"60"
:
2555.0
,
"65"
:
2883.0
,
"70"
:
3255.0
,
"75"
:
2586.0
,
"80"
:
3445.0
,
"85"
:
3442.0
,
"90"
:
3067.0
,
"95"
:
3500.0
,
"100"
:
3328.0
}},
"mem-allocated-bytes"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
488144896.0
,
"5"
:
489193472.0
,
"10"
:
489193472.0
,
"15"
:
489193472.0
,
"20"
:
489193472.0
,
"25"
:
489193472.0
,
"30"
:
489193472.0
,
"35"
:
489193472.0
,
"40"
:
489193472.0
,
"45"
:
489193472.0
,
"50"
:
489193472.0
,
"55"
:
489193472.0
,
"60"
:
489193472.0
,
"65"
:
489193472.0
,
"70"
:
489193472.0
,
"75"
:
489193472.0
,
"80"
:
489193472.0
,
"85"
:
489193472.0
,
"90"
:
489193472.0
,
"95"
:
489193472.0
,
"100"
:
489193472.0
}},
"mem-max-allocated-bytes"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
1720084480.0
,
"5"
:
1902255104.0
,
"10"
:
1902255104.0
,
"15"
:
1902255104.0
,
"20"
:
1902255104.0
,
"25"
:
1902255104.0
,
"30"
:
1902255104.0
,
"35"
:
1902255104.0
,
"40"
:
1902255104.0
,
"45"
:
1902255104.0
,
"50"
:
1902255104.0
,
"55"
:
1902255104.0
,
"60"
:
1902255104.0
,
"65"
:
1902255104.0
,
"70"
:
1902255104.0
,
"75"
:
1902255104.0
,
"80"
:
1902255104.0
,
"85"
:
1902255104.0
,
"90"
:
1902255104.0
,
"95"
:
1902255104.0
,
"100"
:
1902255104.0
}},
"iteration-time"
:
{
"start_step"
:
1
,
"end_step"
:
100
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
12.77466
,
"5"
:
0.19369
,
"10"
:
0.19406
,
"15"
:
0.19154
,
"20"
:
0.19362
,
"25"
:
0.19633
,
"30"
:
0.19002
,
"35"
:
0.19146
,
"40"
:
0.19099
,
"45"
:
0.19061
,
"50"
:
0.19124
,
"55"
:
0.19463
,
"60"
:
0.1903
,
"65"
:
0.19035
,
"70"
:
0.19049
,
"75"
:
0.18947
,
"80"
:
0.19086
,
"85"
:
0.1921
,
"90"
:
0.19047
,
"95"
:
0.1932
,
"100"
:
0.19029
}}}
\ No newline at end of file
tests/functional_tests/test_cases/gpt/gpt3_mr_te_tp2_pp2_resume_torch_dgx_a100_1N8G/model_config.yaml
View file @
688448db
...
...
@@ -19,8 +19,8 @@ MODEL_ARGS:
--train-iters
:
100
--timing-log-level
:
2
--lr-decay-iters
:
320000
--save
:
${CHECKPOINT_PATH}
--load
:
${CHECKPOINT_PATH}
--save
:
${CHECKPOINT_
SAVE_
PATH}
--load
:
${CHECKPOINT_
LOAD_
PATH}
--data-path
:
${DATA_PATH}/my-gpt3_00_text_document
--vocab-file
:
${DATA_PATH}/bpe/vocab.json
--merge-file
:
${DATA_PATH}/bpe/merges.txt
...
...
@@ -47,4 +47,5 @@ MODEL_ARGS:
--use-legacy-models
:
true
--data-cache-path
:
${DATA_CACHE_PATH}
--bf16
:
true
--log-memory-to-tensorboard
:
true
TEST_TYPE
:
ckpt-resume
tests/functional_tests/test_cases/gpt/gpt3_mr_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev.json
View file @
688448db
{
"lm loss"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
10.79311
,
10.85248
,
10.87281
,
10.83016
,
10.82949
,
10.78726
,
10.565
,
10.57088
,
10.4836
,
10.19521
]},
"num-zero
s"
:
{
"start_step"
:
0
,
"end_step"
:
34
,
"step_interval"
:
5
,
"values"
:
[
2450.0
,
2765.0
,
2163.0
,
2585.0
,
2634.0
,
2585.0
,
2987
.0
]
},
"iteration
_
tim
ing_avg"
:
0.1211408823529412
}
{
"lm loss"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
10.79311
,
"5"
:
10.83074
,
"10"
:
10.76725
,
"15"
:
10.82664
,
"20"
:
10.81793
,
"25"
:
10.76529
,
"30"
:
10.69182
,
"35"
:
10.61672
,
"40"
:
10.44907
,
"45"
:
10.21488
,
"50"
:
10.21715
}},
"mem-allocated-bytes"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
586369024.0
,
"5"
:
587417600.0
,
"10"
:
587417600.0
,
"15"
:
587417600.0
,
"20"
:
869128704.0
,
"25"
:
867031552.0
,
"30"
:
867031552.0
,
"35"
:
867031552.0
,
"40"
:
867031552.0
,
"45"
:
867031552.0
,
"50"
:
869128704.0
}},
"mem-max-allocated-byte
s"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
3832784384.0
,
"5"
:
3832784896.0
,
"10"
:
3832784896.0
,
"15"
:
3832784896.0
,
"20"
:
4114758144.0
,
"25"
:
4114758144.0
,
"30"
:
4114758144.0
,
"35"
:
4114758144.0
,
"40"
:
4114758144.0
,
"45"
:
4114758144.0
,
"50"
:
4114758144
.0
}
},
"iteration
-
tim
e"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
14.89966
,
"5"
:
0.15568
,
"10"
:
0.15311
,
"15"
:
0.15336
,
"20"
:
0.15735
,
"25"
:
0.15804
,
"30"
:
0.15672
,
"35"
:
0.1548
,
"40"
:
0.15515
,
"45"
:
0.15584
,
"50"
:
0.15477
}},
"num-zeros"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
"nan"
,
"5"
:
"nan"
,
"10"
:
"nan"
,
"15"
:
"nan"
,
"20"
:
1846.0
,
"25"
:
2348.0
,
"30"
:
2490.0
,
"35"
:
2010.0
,
"40"
:
2016.0
,
"45"
:
2642.0
,
"50"
:
2810.0
}}
}
\ No newline at end of file
tests/functional_tests/test_cases/gpt/gpt3_mr_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_lts.json
View file @
688448db
{
"lm loss"
:
{
"start_step"
:
0
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
[
10.79311
,
10.85248
,
10.87281
,
10.83016
,
10.82949
,
10.78726
,
10.565
,
10.57088
,
10.4836
,
10.19521
]},
"num-zero
s"
:
{
"start_step"
:
0
,
"end_step"
:
34
,
"step_interval"
:
5
,
"values"
:
[
2450.0
,
2765.0
,
2163.0
,
2585.0
,
2634.0
,
2585.0
,
2987
.0
]
},
"iteration
_
tim
ing_avg"
:
0.1211408823529412
}
{
"lm loss"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
10.79311
,
"5"
:
10.83074
,
"10"
:
10.76725
,
"15"
:
10.82664
,
"20"
:
10.81793
,
"25"
:
10.76529
,
"30"
:
10.69182
,
"35"
:
10.61672
,
"40"
:
10.44907
,
"45"
:
10.21488
,
"50"
:
10.21715
}},
"mem-allocated-bytes"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
586369024.0
,
"5"
:
587417600.0
,
"10"
:
587417600.0
,
"15"
:
587417600.0
,
"20"
:
869128704.0
,
"25"
:
869128704.0
,
"30"
:
869128704.0
,
"35"
:
869128704.0
,
"40"
:
869128704.0
,
"45"
:
869128704.0
,
"50"
:
869128704.0
}},
"mem-max-allocated-byte
s"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
3832784384.0
,
"5"
:
3832784896.0
,
"10"
:
3832784896.0
,
"15"
:
3832784896.0
,
"20"
:
4114758144.0
,
"25"
:
4114758144.0
,
"30"
:
4114758144.0
,
"35"
:
4114758144.0
,
"40"
:
4114758144.0
,
"45"
:
4114758144.0
,
"50"
:
4114758144
.0
}
},
"iteration
-
tim
e"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
17.9574
,
"5"
:
0.15166
,
"10"
:
0.15201
,
"15"
:
0.1496
,
"20"
:
0.15614
,
"25"
:
0.15477
,
"30"
:
0.15483
,
"35"
:
0.15409
,
"40"
:
0.1546
,
"45"
:
0.15501
,
"50"
:
0.15639
}},
"num-zeros"
:
{
"start_step"
:
1
,
"end_step"
:
50
,
"step_interval"
:
5
,
"values"
:
{
"1"
:
"nan"
,
"5"
:
"nan"
,
"10"
:
"nan"
,
"15"
:
"nan"
,
"20"
:
1846.0
,
"25"
:
2348.0
,
"30"
:
2490.0
,
"35"
:
2010.0
,
"40"
:
2016.0
,
"45"
:
2642.0
,
"50"
:
2810.0
}}
}
\ No newline at end of file
Prev
1
…
29
30
31
32
33
34
35
36
37
…
42
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment