更新代码

688448db · silencealiang · a02a5490 · 688448db · 688448db · 688448db
Commit 688448db authored Mar 14, 2025 by silencealiang
20 changed files
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev.json
+{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.80181, "5": 10.84284, "10": 10.78173, "15": 10.80418, "20": 10.7314, "25": 10.57558, "30": 10.43631, "35": 10.34347, "40": 10.17318, "45": 9.94245, "50": 10.00163, "55": 9.94872, "60": 9.59802, "65": 9.02299, "70": 9.78149, "75": 9.4886, "80": 9.45936, "85": 9.6529, "90": 9.84596, "95": 9.55834, "100": 9.43841}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 30687.0, "5": 35697.0, "10": 30112.0, "15": 35251.0, "20": 32966.0, "25": 31233.0, "30": 33087.0, "35": 34941.0, "40": 36233.0, "45": 35628.0, "50": 39783.0, "55": 37089.0, "60": 40650.0, "65": 41057.0, "70": 45337.0, "75": 39742.0, "80": 47699.0, "85": 49328.0, "90": 49103.0, "95": 48497.0, "100": 45560.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 865126912.0, "5": 865125376.0, "10": 865125376.0, "15": 865125888.0, "20": 865122816.0, "25": 865126400.0, "30": 865127424.0, "35": 865127936.0, "40": 865125376.0, "45": 865123328.0, "50": 865125888.0, "55": 865128448.0, "60": 865129472.0, "65": 865143808.0, "70": 865128960.0, "75": 865125888.0, "80": 865140736.0, "85": 865142272.0, "90": 865127936.0, "95": 865128448.0, "100": 865138176.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1867287040.0, "5": 2107624448.0, "10": 2107624448.0, "15": 2107624448.0, "20": 2112163840.0, "25": 2112163840.0, "30": 2112163840.0, "35": 2114877952.0, "40": 2114877952.0, "45": 2114877952.0, "50": 2116962304.0, "55": 2116962304.0, "60": 2127979520.0, "65": 2132691456.0, "70": 2132691456.0, "75": 2132691456.0, "80": 2133346816.0, "85": 2133346816.0, "90": 2135661568.0, "95": 2135661568.0, "100": 2135661568.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 16.18829, "5": 0.41672, "10": 0.40953, "15": 0.42036, "20": 0.40968, "25": 0.40781, "30": 0.40912, "35": 0.68237, "40": 0.40935, "45": 0.40902, "50": 0.409, "55": 0.40627, "60": 0.41037, "65": 0.41568, "70": 0.41037, "75": 0.41007, "80": 0.41381, "85": 0.40865, "90": 0.40453, "95": 0.40952, "100": 0.41186}}}
\ No newline at end of file
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_lts.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_lts.json
+{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.79987, "5": 10.85021, "10": 10.78437, "15": 10.80402, "20": 10.74018, "25": 10.57365, "30": 10.43064, "35": 10.34542, "40": 10.17702, "45": 9.94116, "50": 10.00138, "55": 9.94734, "60": 9.5942, "65": 9.02239, "70": 9.781, "75": 9.48705, "80": 9.4551, "85": 9.65724, "90": 9.84458, "95": 9.55632, "100": 9.44025}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 30784.0, "5": 35580.0, "10": 30083.0, "15": 35706.0, "20": 32807.0, "25": 30763.0, "30": 32985.0, "35": 34748.0, "40": 36348.0, "45": 36297.0, "50": 39908.0, "55": 37140.0, "60": 40211.0, "65": 40766.0, "70": 45683.0, "75": 40504.0, "80": 47991.0, "85": 48935.0, "90": 49292.0, "95": 48929.0, "100": 46758.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 865156096.0, "5": 865154560.0, "10": 865157632.0, "15": 865156608.0, "20": 865153024.0, "25": 865157120.0, "30": 865158656.0, "35": 865157632.0, "40": 865155072.0, "45": 865155584.0, "50": 865156608.0, "55": 865158144.0, "60": 865160704.0, "65": 865175552.0, "70": 865159680.0, "75": 865157632.0, "80": 865171456.0, "85": 865173504.0, "90": 865158144.0, "95": 865159168.0, "100": 865167360.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2845674496.0, "5": 3089817088.0, "10": 3089817088.0, "15": 3089817088.0, "20": 3090944000.0, "25": 3090944000.0, "30": 3090944000.0, "35": 3096425472.0, "40": 3096425472.0, "45": 3096425472.0, "50": 3098263040.0, "55": 3098263040.0, "60": 3106846720.0, "65": 3111392768.0, "70": 3111392768.0, "75": 3112845824.0, "80": 3112845824.0, "85": 3113016320.0, "90": 3118998528.0, "95": 3118998528.0, "100": 3118998528.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 16.85588, "5": 0.63247, "10": 0.42764, "15": 0.42156, "20": 0.41825, "25": 0.41965, "30": 0.41813, "35": 0.4219, "40": 0.42305, "45": 0.42873, "50": 0.42716, "55": 0.41875, "60": 0.43473, "65": 0.42855, "70": 0.42285, "75": 0.42556, "80": 0.42276, "85": 0.42862, "90": 0.41965, "95": 0.42303, "100": 0.42037}}}
\ No newline at end of file
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
@@ -19,8 +19,8 @@ MODEL_ARGS:
  --train-iters: 100
  --timing-log-level: 2
  --lr-decay-iters: 320000
-  --save: ${CHECKPOINT_PATH}
-  --load: ${CHECKPOINT_PATH}
+  --save: ${CHECKPOINT_SAVE_PATH}
+  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/bpe/vocab.json
  --merge-file: ${DATA_PATH}/bpe/merges.txt
@@ -52,6 +52,8 @@ MODEL_ARGS:
  --use-checkpoint-opt_param-scheduler: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
+  --ckpt-assume-constant-structure: true
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
+  --log-memory-to-tensorboard: true
 TEST_TYPE: ckpt-resume
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/golden_values_dev.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/golden_values_dev.json
+{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.81129, "5": 10.84788, "10": 10.78374, "15": 10.82081, "20": 10.74489, "25": 10.59876, "30": 10.44157, "35": 10.35352, "40": 10.19022, "45": 9.95761, "50": 10.02046, "55": 9.95893, "60": 9.61299, "65": 9.03726, "70": 9.78763, "75": 9.49707, "80": 9.46333, "85": 9.66739, "90": 9.84886, "95": 9.56601, "100": 9.44758}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 30980.0, "5": 36191.0, "10": 30226.0, "15": 35268.0, "20": 33094.0, "25": 31163.0, "30": 33078.0, "35": 34831.0, "40": 36203.0, "45": 35991.0, "50": 39594.0, "55": 37028.0, "60": 39890.0, "65": 40600.0, "70": 45489.0, "75": 40046.0, "80": 47478.0, "85": 49240.0, "90": 49279.0, "95": 48714.0, "100": 44720.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1363810304.0, "5": 1363807744.0, "10": 1363810816.0, "15": 1363809792.0, "20": 1363809280.0, "25": 1363811840.0, "30": 1363812352.0, "35": 1363810304.0, "40": 1363808768.0, "45": 1363806720.0, "50": 1363808768.0, "55": 1363806208.0, "60": 1363809792.0, "65": 1363812864.0, "70": 1363806208.0, "75": 1363807744.0, "80": 1363818496.0, "85": 1363813888.0, "90": 1363805696.0, "95": 1363805696.0, "100": 1363811840.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1991354368.0, "5": 2555771392.0, "10": 2555771392.0, "15": 2555771392.0, "20": 2556176384.0, "25": 2556176384.0, "30": 2556176384.0, "35": 2560073728.0, "40": 2560073728.0, "45": 2560073728.0, "50": 2560073728.0, "55": 2560073728.0, "60": 2560073728.0, "65": 2560073728.0, "70": 2560073728.0, "75": 2562988032.0, "80": 2564945408.0, "85": 2564945408.0, "90": 2564945408.0, "95": 2564945408.0, "100": 2564945408.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 6.49146, "5": 0.27476, "10": 0.27706, "15": 0.29059, "20": 0.2707, "25": 0.27278, "30": 0.27509, "35": 0.27674, "40": 0.27814, "45": 0.27651, "50": 0.27399, "55": 0.27084, "60": 0.29133, "65": 0.27567, "70": 0.27284, "75": 0.27196, "80": 0.27431, "85": 0.27061, "90": 0.27032, "95": 0.27382, "100": 0.27476}}}
\ No newline at end of file
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/golden_values_lts.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/golden_values_lts.json
+{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.8029, "5": 10.85339, "10": 10.79198, "15": 10.81769, "20": 10.74357, "25": 10.58789, "30": 10.43346, "35": 10.35014, "40": 10.18622, "45": 9.95965, "50": 10.01907, "55": 9.95967, "60": 9.61901, "65": 9.0438, "70": 9.78907, "75": 9.50146, "80": 9.4689, "85": 9.66944, "90": 9.85084, "95": 9.562, "100": 9.44806}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 31473.0, "5": 36628.0, "10": 30674.0, "15": 35124.0, "20": 33128.0, "25": 30566.0, "30": 32881.0, "35": 34525.0, "40": 35704.0, "45": 35586.0, "50": 39709.0, "55": 36628.0, "60": 38989.0, "65": 40858.0, "70": 45481.0, "75": 39330.0, "80": 47453.0, "85": 49471.0, "90": 49228.0, "95": 47973.0, "100": 45474.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1355420160.0, "5": 1355417600.0, "10": 1355420672.0, "15": 1355418624.0, "20": 1355419136.0, "25": 1355422208.0, "30": 1355422208.0, "35": 1355423232.0, "40": 1355422208.0, "45": 1355418112.0, "50": 1355420160.0, "55": 1355424768.0, "60": 1355427840.0, "65": 1355438080.0, "70": 1355420672.0, "75": 1355420672.0, "80": 1355432448.0, "85": 1355430400.0, "90": 1355421184.0, "95": 1355420672.0, "100": 1355427840.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2953737728.0, "5": 3526075392.0, "10": 3526075392.0, "15": 3526075392.0, "20": 3527276544.0, "25": 3527276544.0, "30": 3527276544.0, "35": 3530373120.0, "40": 3530373120.0, "45": 3530373120.0, "50": 3534990848.0, "55": 3534990848.0, "60": 3542290944.0, "65": 3542290944.0, "70": 3542290944.0, "75": 3542290944.0, "80": 3542290944.0, "85": 3542290944.0, "90": 3542290944.0, "95": 3542290944.0, "100": 3542290944.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 9.55678, "5": 0.25735, "10": 0.25385, "15": 0.25994, "20": 0.26237, "25": 0.25011, "30": 0.25064, "35": 0.26042, "40": 0.25642, "45": 0.2505, "50": 0.25006, "55": 0.26056, "60": 0.25247, "65": 0.25735, "70": 0.26178, "75": 0.25377, "80": 0.25556, "85": 0.25939, "90": 0.26064, "95": 0.25687, "100": 0.25841}}}
\ No newline at end of file
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml
@@ -19,8 +19,8 @@ MODEL_ARGS:
  --train-iters: 100
  --timing-log-level: 2
  --lr-decay-iters: 320000
-  --save: ${CHECKPOINT_PATH}
-  --load: ${CHECKPOINT_PATH}
+  --save: ${CHECKPOINT_SAVE_PATH}
+  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/bpe/vocab.json
  --merge-file: ${DATA_PATH}/bpe/merges.txt
@@ -54,4 +54,5 @@ MODEL_ARGS:
  --ckpt-format: torch_dist
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
+  --log-memory-to-tensorboard: true
 TEST_TYPE: ckpt-resume
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_dev.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_dev.json
+{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.81978, "5": 10.85197, "10": 10.79064, "15": 10.81272, "20": 10.71609, "25": 10.52476, "30": 10.33545, "35": 10.22995, "40": 10.04741, "45": 9.77656, "50": 9.87115, "55": 9.83385, "60": 9.461, "65": 8.90362, "70": 9.70126, "75": 9.39148, "80": 9.37893, "85": 9.59197, "90": 9.7945, "95": 9.50434, "100": 9.37913}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 27060.0, "5": 31921.0, "10": 26272.0, "15": 31301.0, "20": 28838.0, "25": 28531.0, "30": 30657.0, "35": 33054.0, "40": 35298.0, "45": 35155.0, "50": 39066.0, "55": 38205.0, "60": 55119.0, "65": 2137811.0, "70": 2140949.0, "75": 41844.0, "80": 157090.0, "85": 53159.0, "90": 160405.0, "95": 46932.0, "100": 45379.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1366404096.0, "5": 1366391296.0, "10": 1366398976.0, "15": 1366411264.0, "20": 1366395392.0, "25": 1366414848.0, "30": 1366408192.0, "35": 1366416896.0, "40": 1366381056.0, "45": 1366368768.0, "50": 1366344192.0, "55": 1366319616.0, "60": 1366306816.0, "65": 1366317568.0, "70": 1366287360.0, "75": 1366282240.0, "80": 1366307840.0, "85": 1366326784.0, "90": 1366323712.0, "95": 1366303232.0, "100": 1366301184.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2330338304.0, "5": 2902626816.0, "10": 2902626816.0, "15": 2902758912.0, "20": 2902758912.0, "25": 2909535744.0, "30": 2909535744.0, "35": 2909906432.0, "40": 2909906432.0, "45": 2909906432.0, "50": 2909906432.0, "55": 2909906432.0, "60": 2909906432.0, "65": 2909906432.0, "70": 2909906432.0, "75": 2909906432.0, "80": 2909906432.0, "85": 2909906432.0, "90": 2909906432.0, "95": 2909906432.0, "100": 2909906432.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 13.82131, "5": 0.33381, "10": 0.33099, "15": 0.33324, "20": 0.33223, "25": 0.33172, "30": 0.33447, "35": 0.33688, "40": 0.33819, "45": 0.34723, "50": 0.34463, "55": 0.33883, "60": 0.33949, "65": 0.33894, "70": 0.33639, "75": 0.33664, "80": 0.33471, "85": 0.33448, "90": 0.33392, "95": 0.33001, "100": 0.34356}}}
\ No newline at end of file
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_lts.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_lts.json
+{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.83445, "5": 10.87409, "10": 10.82337, "15": 10.83072, "20": 10.73228, "25": 10.53817, "30": 10.34469, "35": 10.24798, "40": 10.05498, "45": 9.79536, "50": 9.88842, "55": 9.84583, "60": 9.47252, "65": 8.91336, "70": 9.70548, "75": 9.39495, "80": 9.38269, "85": 9.58876, "90": 9.79604, "95": 9.50297, "100": 9.37731}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 26648.0, "5": 31622.0, "10": 25722.0, "15": 30485.0, "20": 28303.0, "25": 27282.0, "30": 29586.0, "35": 32578.0, "40": 35072.0, "45": 35298.0, "50": 38377.0, "55": 36128.0, "60": 39347.0, "65": 39897.0, "70": 44013.0, "75": 41039.0, "80": 46916.0, "85": 48793.0, "90": 46771.0, "95": 45617.0, "100": 46434.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1356090368.0, "5": 1356061184.0, "10": 1356089856.0, "15": 1356084736.0, "20": 1356063744.0, "25": 1356076544.0, "30": 1356070912.0, "35": 1356048896.0, "40": 1356026880.0, "45": 1356000768.0, "50": 1355968000.0, "55": 1355970048.0, "60": 1355980288.0, "65": 1356027904.0, "70": 1355976192.0, "75": 1355971584.0, "80": 1356020736.0, "85": 1356049920.0, "90": 1356049920.0, "95": 1356047360.0, "100": 1356033536.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 3273181696.0, "5": 3853371904.0, "10": 3853371904.0, "15": 3853371904.0, "20": 3853371904.0, "25": 3853371904.0, "30": 3853371904.0, "35": 3853371904.0, "40": 3853371904.0, "45": 3853371904.0, "50": 3853371904.0, "55": 3853371904.0, "60": 3853371904.0, "65": 3853371904.0, "70": 3853371904.0, "75": 3853371904.0, "80": 3853371904.0, "85": 3853371904.0, "90": 3853371904.0, "95": 3853371904.0, "100": 3853371904.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.68307, "5": 0.32305, "10": 0.32316, "15": 0.32294, "20": 0.31895, "25": 0.32174, "30": 0.33171, "35": 0.32776, "40": 0.34887, "45": 0.34206, "50": 0.33607, "55": 0.33463, "60": 0.32995, "65": 0.32515, "70": 0.33288, "75": 0.32963, "80": 0.32161, "85": 0.32851, "90": 0.31854, "95": 0.31863, "100": 0.32233}}}
\ No newline at end of file
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml
@@ -19,8 +19,8 @@ MODEL_ARGS:
  --train-iters: 100
  --timing-log-level: 2
  --lr-decay-iters: 320000
-  --save: ${CHECKPOINT_PATH}
-  --load: ${CHECKPOINT_PATH}
+  --save: ${CHECKPOINT_SAVE_PATH}
+  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/bpe/vocab.json
  --merge-file: ${DATA_PATH}/bpe/merges.txt
@@ -55,4 +55,5 @@ MODEL_ARGS:
  --ckpt-format: torch_dist
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
+  --log-memory-to-tensorboard: true
 TEST_TYPE: ckpt-resume
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev.json
+{
+    "lm loss": {
+        "start_step": 0,
+        "end_step": 50,
+        "step_interval": 5,
+        "values": [
+            10.8029,
+            10.86149,
+            10.86819,
+            10.80829,
+            10.72062,
+            10.64588,
+            10.21132,
+            10.32324,
+            10.2265,
+            9.92918
+        ]
+    },
+    "num-zeros": {
+        "start_step": 0,
+        "end_step": 50,
+        "step_interval": 5,
+        "values": [
+            31473.0,
+            37753.0,
+            38332.0,
+            36348.0,
+            33270.0,
+            34310.0,
+            30284.0,
+            35432.0,
+            36356.0,
+            37109.0
+        ]
+    },
+    "iteration-time": {
+        "start_step": 0,
+        "end_step": 50,
+        "step_interval": 5,
+        "values": [
+            5.94452,
+            0.40526,
+            0.40286,
+            0.40289,
+            0.40215,
+            0.40351,
+            0.40373,
+            0.40354,
+            0.40382,
+            0.41286
+        ]
+    }
+}
\ No newline at end of file
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts.json
+{
+    "lm loss": {
+        "start_step": 0,
+        "end_step": 50,
+        "step_interval": 5,
+        "values": [
+            10.8029,
+            10.86149,
+            10.86819,
+            10.80829,
+            10.72062,
+            10.64588,
+            10.21132,
+            10.32324,
+            10.2265,
+            9.92918
+        ]
+    },
+    "num-zeros": {
+        "start_step": 0,
+        "end_step": 50,
+        "step_interval": 5,
+        "values": [
+            31473.0,
+            37753.0,
+            38332.0,
+            36348.0,
+            33270.0,
+            34310.0,
+            30284.0,
+            35432.0,
+            36356.0,
+            37109.0
+        ]
+    },
+    "iteration_timing_avg": 0.21900323529411767
+}
\ No newline at end of file
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  NCCL_ALGO: Tree
+  CUBLAS_WORKSPACE_CONFIG: :4096:8
+MODEL_ARGS:
+  --num-layers: 12
+  --hidden-size: 512
+  --num-attention-heads: 8
+  --log-params-norm: true
+  --log-num-zeros-in-grad: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-timers-to-tensorboard: true
+  --tensorboard-dir: ${TENSORBOARD_PATH}
+  --micro-batch-size: 4
+  --global-batch-size: 32
+  --seq-length: 1024
+  --max-position-embeddings: 1024
+  --train-iters: 50
+  --timing-log-level: 2
+  --lr-decay-iters: 320000
+  --save: ${CHECKPOINT_PATH}
+  --load: ${CHECKPOINT_PATH}
+  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --vocab-file: ${DATA_PATH}/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --split: 949,50,1
+  --distributed-backend: nccl
+  --lr: 0.00015
+  --lr-decay-style: cosine
+  --min-lr: 1.0e-5
+  --weight-decay: 1e-2
+  --clip-grad: 1.0
+  --lr-warmup-fraction: .01
+  --log-interval: 1
+  --save-interval: 10000
+  --eval-interval: 1000
+  --eval-iters: 10
+  --transformer-impl: transformer_engine
+  --tensor-model-parallel-size: 2
+  --pipeline-model-parallel-size: 1
+  --expert-model-parallel-size: 2
+  --no-ckpt-fully-parallel-save: true
+  --moe-grouped-gemm: true
+  --disable-bias-linear: true
+  --sequence-parallel: true
+  --num-experts: 8
+  --use-distributed-optimizer: true
+  --moe-router-load-balancing-type: sinkhorn
+  --moe-router-topk: 1
+  --overlap-grad-reduce: true
+  --overlap-param-gather: true
+  --deterministic-mode: true
+  --no-gradient-accumulation-fusion: true
+  --attention-softmax-in-fp32: true
+  --use-mcore-models: true
+  --ckpt-format: torch_dist
+  --data-cache-path: ${DATA_CACHE_PATH}
+  --bf16: true
+  --attention-backend: unfused
+  --log-memory-to-tensorboard: true
+TEST_TYPE: regular
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev.json
-{
-    "lm loss": {
-        "start_step": 0,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": [
-            10.79987,
-            10.85907,
-            10.86575,
-            10.79932,
-            10.70961,
-            10.63871,
-            10.19492,
-            10.31016,
-            10.22301,
-            9.91473
-        ]
-    },
-    "num-zeros": {
-        "start_step": 0,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": [
-            30795.0,
-            37447.0,
-            37837.0,
-            35948.0,
-            33382.0,
-            34774.0,
-            30403.0,
-            35340.0,
-            36357.0,
-            37792.0
-        ]
-    },
-    "iteration-time": {
-        "start_step": 0,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": [
-            10.77572,
-            0.42536,
-            0.42839,
-            0.42977,
-            0.42283,
-            0.42333,
-            0.43199,
-            0.42998,
-            0.43124,
-            0.43207
-        ]
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.79987, "5": 10.85011, "10": 10.78474, "15": 10.80469, "20": 10.74013, "25": 10.57368, "30": 10.43164, "35": 10.34482, "40": 10.17678, "45": 9.94099, "50": 10.00158}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 30795.0, "5": 36202.0, "10": 29805.0, "15": 35047.0, "20": 32996.0, "25": 31111.0, "30": 33355.0, "35": 34758.0, "40": 36390.0, "45": 36272.0, "50": 40012.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 865125376.0, "5": 865124864.0, "10": 865126400.0, "15": 865125376.0, "20": 865122816.0, "25": 865125888.0, "30": 865126912.0, "35": 865126912.0, "40": 865124352.0, "45": 865124864.0, "50": 865125376.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2845643776.0, "5": 3089300480.0, "10": 3089300480.0, "15": 3089300480.0, "20": 3090866176.0, "25": 3090866176.0, "30": 3090866176.0, "35": 3095958016.0, "40": 3095958016.0, "45": 3095958016.0, "50": 3098294272.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.74384, "5": 0.43164, "10": 0.43057, "15": 0.42884, "20": 0.4299, "25": 0.43109, "30": 0.67218, "35": 0.42782, "40": 0.42537, "45": 0.42596, "50": 0.77316}}}
\ No newline at end of file
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_lts.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_lts.json
-{
-    "lm loss": {
-        "start_step": 0,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": [
-            10.79987,
-            10.85983,
-            10.865,
-            10.799,
-            10.70987,
-            10.63782,
-            10.1965,
-            10.3099,
-            10.22262,
-            9.91423
-        ]
-    },
-    "num-zeros": {
-        "start_step": 0,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": [
-            30784.0,
-            37528.0,
-            37616.0,
-            36105.0,
-            33464.0,
-            34923.0,
-            30806.0,
-            35663.0,
-            36661.0,
-            37641.0
-        ]
-    },
-    "iteration_timing_avg": 0.3566726470588235
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.79987, "5": 10.85021, "10": 10.78437, "15": 10.80402, "20": 10.74018, "25": 10.57365, "30": 10.43064, "35": 10.34542, "40": 10.17702, "45": 9.94116, "50": 10.00138}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 30784.0, "5": 35580.0, "10": 30083.0, "15": 35706.0, "20": 32807.0, "25": 30763.0, "30": 32985.0, "35": 34748.0, "40": 36348.0, "45": 36297.0, "50": 39908.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 865156096.0, "5": 865154560.0, "10": 865157632.0, "15": 865156608.0, "20": 865153024.0, "25": 865157120.0, "30": 865158656.0, "35": 865157632.0, "40": 865155072.0, "45": 865155584.0, "50": 865156608.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2845674496.0, "5": 3089817088.0, "10": 3089817088.0, "15": 3089817088.0, "20": 3090944000.0, "25": 3090944000.0, "30": 3090944000.0, "35": 3096425472.0, "40": 3096425472.0, "45": 3096425472.0, "50": 3098263040.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 17.48573, "5": 0.68181, "10": 0.42126, "15": 0.42388, "20": 0.41898, "25": 0.41998, "30": 0.41505, "35": 0.41625, "40": 0.41814, "45": 0.41734, "50": 0.42354}}}
\ No newline at end of file
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
@@ -19,8 +19,8 @@ MODEL_ARGS:
  --train-iters: 50
  --timing-log-level: 2
  --lr-decay-iters: 320000
-  --save: ${CHECKPOINT_PATH}
-  --load: ${CHECKPOINT_PATH}
+  --save: ${CHECKPOINT_SAVE_PATH}
+  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/bpe/vocab.json
  --merge-file: ${DATA_PATH}/bpe/merges.txt
@@ -54,4 +54,5 @@ MODEL_ARGS:
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --attention-backend: unfused
+  --log-memory-to-tensorboard: true
 TEST_TYPE: regular
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev.json
-{
-    "lm loss": {
-        "start_step": 0,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": [
-            10.8029,
-            10.86149,
-            10.86819,
-            10.80829,
-            10.72062,
-            10.64588,
-            10.21132,
-            10.32324,
-            10.2265,
-            9.92918
-        ]
-    },
-    "num-zeros": {
-        "start_step": 0,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": [
-            31473.0,
-            37753.0,
-            38332.0,
-            36348.0,
-            33270.0,
-            34310.0,
-            30284.0,
-            35432.0,
-            36356.0,
-            37109.0
-        ]
-    },
-    "iteration-time": {
-        "start_step": 0,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": [
-            5.94452,
-            0.40526,
-            0.40286,
-            0.40289,
-            0.40215,
-            0.40351,
-            0.40373,
-            0.40354,
-            0.40382,
-            0.41286
-        ]
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.80959, "5": 10.85303, "10": 10.78796, "15": 10.81868, "20": 10.74722, "25": 10.5991, "30": 10.44004, "35": 10.3515, "40": 10.19634, "45": 9.95907, "50": 10.01749}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 31050.0, "5": 35937.0, "10": 30117.0, "15": 35239.0, "20": 32813.0, "25": 31429.0, "30": 33133.0, "35": 34855.0, "40": 36161.0, "45": 36187.0, "50": 38778.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 881113600.0, "5": 881112576.0, "10": 881114112.0, "15": 881116160.0, "20": 881113088.0, "25": 881115136.0, "30": 881115648.0, "35": 881117696.0, "40": 881113600.0, "45": 881113088.0, "50": 881115136.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2809510912.0, "5": 3055304704.0, "10": 3055304704.0, "15": 3055304704.0, "20": 3055304704.0, "25": 3055304704.0, "30": 3055304704.0, "35": 3056838144.0, "40": 3056838144.0, "45": 3056838144.0, "50": 3056838144.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 5.94436, "5": 0.29888, "10": 0.2892, "15": 0.28962, "20": 0.29155, "25": 0.2887, "30": 0.28714, "35": 0.28764, "40": 0.28693, "45": 0.2945, "50": 0.29357}}}
\ No newline at end of file
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_lts.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_lts.json
-{
-    "lm loss": {
-        "start_step": 0,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": [
-            10.8029,
-            10.86149,
-            10.86819,
-            10.80829,
-            10.72062,
-            10.64588,
-            10.21132,
-            10.32324,
-            10.2265,
-            9.92918
-        ]
-    },
-    "num-zeros": {
-        "start_step": 0,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": [
-            31473.0,
-            37753.0,
-            38332.0,
-            36348.0,
-            33270.0,
-            34310.0,
-            30284.0,
-            35432.0,
-            36356.0,
-            37109.0
-        ]
-    },
-    "iteration_timing_avg": 0.21900323529411767
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.8029, "5": 10.85339, "10": 10.79202, "15": 10.81788, "20": 10.74371, "25": 10.58737, "30": 10.43384, "35": 10.35041, "40": 10.18639, "45": 9.95903, "50": 10.01914}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 31473.0, "5": 36628.0, "10": 30874.0, "15": 35127.0, "20": 32995.0, "25": 30607.0, "30": 32534.0, "35": 34542.0, "40": 35881.0, "45": 35814.0, "50": 39646.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 864192000.0, "5": 864189440.0, "10": 864192512.0, "15": 864189952.0, "20": 864189952.0, "25": 864192000.0, "30": 864194048.0, "35": 864194560.0, "40": 864194560.0, "45": 864189952.0, "50": 864192000.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2792702464.0, "5": 3036425216.0, "10": 3036425216.0, "15": 3036425216.0, "20": 3037602304.0, "25": 3037602304.0, "30": 3037602304.0, "35": 3040573440.0, "40": 3040573440.0, "45": 3040573440.0, "50": 3043728384.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.3358, "5": 0.26072, "10": 0.25802, "15": 0.25889, "20": 0.27225, "25": 0.25765, "30": 0.258, "35": 0.27009, "40": 0.26047, "45": 0.25566, "50": 0.26576}}}
\ No newline at end of file
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/model_config.yaml
@@ -19,8 +19,8 @@ MODEL_ARGS:
  --train-iters: 50
  --timing-log-level: 2
  --lr-decay-iters: 320000
-  --save: ${CHECKPOINT_PATH}
-  --load: ${CHECKPOINT_PATH}
+  --save: ${CHECKPOINT_SAVE_PATH}
+  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/bpe/vocab.json
  --merge-file: ${DATA_PATH}/bpe/merges.txt
@@ -58,4 +58,5 @@ MODEL_ARGS:
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --attention-backend: unfused
+  --log-memory-to-tensorboard: true
 TEST_TYPE: regular
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_dev.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_dev.json
-{
-    "lm loss": {
-        "start_step": 0,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": [
-            10.83445,
-            10.87978,
-            10.87924,
-            10.81567,
-            10.69374,
-            10.60333,
-            10.08824,
-            10.21471,
-            10.10778,
-            9.78309
-        ]
-    },
-    "num-zeros": {
-        "start_step": 0,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": [
-            26648.0,
-            32884.0,
-            33611.0,
-            31683.0,
-            28744.0,
-            30671.0,
-            28602.0,
-            33538.0,
-            34560.0,
-            35099.0
-        ]
-    },
-    "iteration-time": {
-        "start_step": 0,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": [
-            9.03575,
-            0.59809,
-            0.59808,
-            0.60171,
-            0.60477,
-            0.611,
-            0.62441,
-            0.63554,
-            0.64372,
-            0.64983
-        ]
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.82006, "5": 10.85405, "10": 10.79175, "15": 10.80877, "20": 10.71387, "25": 10.52487, "30": 10.33469, "35": 10.23358, "40": 10.04961, "45": 9.77656, "50": 9.87044}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 27146.0, "5": 32189.0, "10": 26378.0, "15": 31015.0, "20": 28863.0, "25": 28323.0, "30": 30844.0, "35": 32780.0, "40": 35120.0, "45": 35338.0, "50": 40557.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1365952512.0, "5": 1365933056.0, "10": 1365943808.0, "15": 1365943296.0, "20": 1365935616.0, "25": 1365958656.0, "30": 1365950976.0, "35": 1365954048.0, "40": 1365923840.0, "45": 1365914112.0, "50": 1365878784.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3320324096.0, "5": 3889025536.0, "10": 3889025536.0, "15": 3889025536.0, "20": 3889025536.0, "25": 3889859072.0, "30": 3895426048.0, "35": 3895426048.0, "40": 3895426048.0, "45": 3895426048.0, "50": 3895426048.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 7.64353, "5": 0.35348, "10": 0.35586, "15": 0.35242, "20": 0.34871, "25": 0.35025, "30": 0.36767, "35": 0.35732, "40": 0.3578, "45": 0.3675, "50": 0.35703}}}
\ No newline at end of file
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_lts.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_lts.json
-{
-    "lm loss": {
-        "start_step": 0,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": [
-            10.83445,
-            10.87978,
-            10.87924,
-            10.81567,
-            10.69374,
-            10.60333,
-            10.08824,
-            10.21471,
-            10.10778,
-            9.78309
-        ]
-    },
-    "num-zeros": {
-        "start_step": 0,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": [
-            26648.0,
-            32884.0,
-            33611.0,
-            31683.0,
-            28744.0,
-            30671.0,
-            28602.0,
-            33538.0,
-            34560.0,
-            35099.0
-        ]
-    },
-    "iteration_timing_avg": 0.28211852941176474
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.83445, "5": 10.87409, "10": 10.82337, "15": 10.83072, "20": 10.73228, "25": 10.53817, "30": 10.34469, "35": 10.24798, "40": 10.05498, "45": 9.79536, "50": 9.88842}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 26648.0, "5": 31622.0, "10": 25722.0, "15": 30485.0, "20": 28303.0, "25": 27282.0, "30": 29586.0, "35": 32578.0, "40": 35072.0, "45": 35298.0, "50": 38377.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1356090368.0, "5": 1356061184.0, "10": 1356089856.0, "15": 1356084736.0, "20": 1356063744.0, "25": 1356076544.0, "30": 1356070912.0, "35": 1356048896.0, "40": 1356026880.0, "45": 1356000768.0, "50": 1355968000.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3273181696.0, "5": 3853371904.0, "10": 3853371904.0, "15": 3853371904.0, "20": 3853371904.0, "25": 3853371904.0, "30": 3853371904.0, "35": 3853371904.0, "40": 3853371904.0, "45": 3853371904.0, "50": 3853371904.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 11.04298, "5": 0.32129, "10": 0.31692, "15": 0.31486, "20": 0.3219, "25": 0.31787, "30": 0.33397, "35": 0.32395, "40": 0.34235, "45": 0.34383, "50": 0.33389}}}
\ No newline at end of file