add

0d99ae1f · silencealiang · c271aaae · 0d99ae1f · 0d99ae1f · 0d99ae1f
Commit 0d99ae1f authored Mar 14, 2025 by silencealiang
20 changed files
--- a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_pp1_freeze_vit_freeze_lm_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_pp1_freeze_vit_freeze_lm_dgx_a100_1N8G/model_config.yaml
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  NCCL_ALGO: Tree
+  CUBLAS_WORKSPACE_CONFIG: :4096:8
+  GPUS_PER_NODE: 8
+MODEL_ARGS:
+  --num-layers: 12
+  --hidden-size: 624
+  --attention-dropout: 0.0
+  --hidden-dropout: 0.0
+  --num-attention-heads: 12
+  --log-params-norm: true
+  --log-num-zeros-in-grad: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-timers-to-tensorboard: true
+  --tensorboard-dir: ${TENSORBOARD_PATH}
+  --micro-batch-size: 4
+  --global-batch-size: 32
+  --seq-length: 1024
+  --max-position-embeddings: 1024
+  --train-iters: 50
+  --timing-log-level: 2
+  --lr-decay-iters: 320000
+  --save: ${CHECKPOINT_PATH}
+  --load: ${CHECKPOINT_PATH}
+  --split: 949,50,1
+  --tokenizer-type: NullTokenizer
+  --vocab-size: 8192
+  --distributed-backend: nccl
+  --lr: 0.00015
+  --lr-decay-style: cosine
+  --min-lr: 1.0e-5
+  --weight-decay: 1e-2
+  --clip-grad: 1.0
+  --lr-warmup-fraction: .01
+  --log-interval: 1
+  --save-interval: 10000
+  --eval-interval: 1000
+  --eval-iters: 10
+  --transformer-impl: transformer_engine
+  --tensor-model-parallel-size: 4
+  --pipeline-model-parallel-size: 1
+  --encoder-pipeline-model-parallel-size: 1
+  --encoder-tensor-model-parallel-size: 4
+  --deterministic-mode: true
+  --attention-softmax-in-fp32: true
+  --ckpt-format: torch
+  --no-gradient-accumulation-fusion: true
+  --bf16: true
+  --img-h: 336
+  --img-w: 336
+  --patch-dim: 14
+  --mock-data: true
+  --freeze-ViT: true
+  --freeze-LM: true
+TEST_TYPE: regular
--- a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_pp1_freeze_vit_freeze_lm_dist_opt_dgx_a100_1N8G/golden_values_dev.json
+++ b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_pp1_freeze_vit_freeze_lm_dist_opt_dgx_a100_1N8G/golden_values_dev.json
+{"forward-backward-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [10.80164, 0.62602, 0.62115, 0.61347, 0.61356, 0.6148, 0.61452, 0.61389, 0.61239, 0.61187]}, "forward-compute-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [5.14549, 0.30295, 0.29758, 0.29055, 0.29096, 0.29124, 0.29129, 0.2913, 0.29037, 0.28939]}, "backward-compute-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [1.12619, 0.28782, 0.28877, 0.28732, 0.28777, 0.28808, 0.28786, 0.28769, 0.28753, 0.28791]}, "batch-generator-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [1.29859, 0.02375, 0.02123, 0.01897, 0.01822, 0.01828, 0.01866, 0.01876, 0.01889, 0.01783]}, "forward-recv-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [3.69025, 0.02974, 0.02963, 0.03036, 0.03015, 0.03018, 0.03047, 0.03047, 0.03, 0.03017]}, "forward-send-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [1.06877, 0.00017, 0.00016, 0.00015, 0.00015, 0.00015, 0.00018, 0.00015, 0.00016, 0.00014]}, "backward-recv-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.07001, 0.07185, 0.07034, 0.07062, 0.07068, 0.07076, 0.07093, 0.07034, 0.07033, 0.07056]}, "backward-send-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.00032, 0.00023, 0.00027, 0.00028, 0.00026, 0.0003, 0.00028, 0.00029, 0.00028, 0.00029]}, "forward-send-backward-recv-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [5.90985, 0.29772, 0.29629, 0.28867, 0.29204, 0.29221, 0.29134, 0.28969, 0.29014, 0.29351]}, "backward-send-forward-recv-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.86713, 0.00263, 0.0025, 0.00238, 0.00246, 0.00238, 0.00237, 0.00259, 0.00243, 0.00254]}, "layernorm-grads-all-reduce-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [3e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 3e-05, 3e-05, 2e-05, 2e-05]}, "embedding-grads-all-reduce-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [5e-05, 3e-05, 3e-05, 3e-05, 3e-05, 3e-05, 3e-05, 3e-05, 3e-05, 3e-05]}, "all-grads-sync-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.20519, 0.00031, 0.00025, 0.00025, 0.00026, 0.00025, 0.00025, 0.00025, 0.00025, 0.00025]}, "params-all-gather-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.00016, 0.00013, 0.00012, 0.00011, 0.00011, 0.00011, 0.00011, 0.00011, 0.00011, 0.00011]}, "optimizer-copy-to-main-grad-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.00015, 0.00013, 0.00011, 0.00011, 0.00011, 0.00011, 0.0001, 0.0001, 0.0001, 0.0001]}, "optimizer-clip-main-grad-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.01362, 0.00058, 0.00048, 0.00041, 0.00047, 0.0004, 0.0004, 0.00039, 0.0004, 0.0004]}, "optimizer-count-zeros-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.00823, 0.00068, 0.00072, 0.00073, 0.00068, 0.00069, 0.00069, 0.0007, 0.00069, 0.00066]}, "optimizer-inner-step-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.00098, 0.00026, 0.00023, 0.00023, 0.00025, 0.00023, 0.00023, 0.00024, 0.00024, 0.00023]}, "optimizer-copy-main-to-model-params-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.00019, 0.00018, 0.00015, 0.00016, 0.00015, 0.00016, 0.00016, 0.00015, 0.00015, 0.00015]}, "optimizer-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.02427, 0.00277, 0.00256, 0.00257, 0.00249, 0.00243, 0.00242, 0.00241, 0.00241, 0.00237]}, "learning-rate": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "learning-rate vs samples": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "batch-size": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0]}, "batch-size vs samples": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0]}, "lm loss": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [9.19947, 9.20335, 9.20248, 9.19723, 9.19172, 9.18973, 9.18517, 9.17532, 9.17374, 9.1609]}, "lm loss vs samples": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [9.19947, 9.20335, 9.20248, 9.19723, 9.19172, 9.18973, 9.18517, 9.17532, 9.17374, 9.1609]}, "loss-scale": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]}, "loss-scale vs samples": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]}, "grad-norm": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.93277, 1.00171, 1.00056, 0.944, 1.16867, 0.98576, 0.91686, 0.9042, 0.83078, 0.88219]}, "grad-norm vs samples": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.93277, 1.00171, 1.00056, 0.944, 1.16867, 0.98576, 0.91686, 0.9042, 0.83078, 0.88219]}, "num-zeros": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [106.0, 114.0, 108.0, 110.0, 81.0, 105.0, 85.0, 109.0, 146.0, 122.0]}, "num-zeros vs samples": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [106.0, 114.0, 108.0, 110.0, 81.0, 105.0, 85.0, 109.0, 146.0, 122.0]}, "params-norm": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [35.67851, 35.67851, 35.67851, 35.67851, 35.67851, 35.6785, 35.67851, 35.6785, 35.67848, 35.67848]}, "params-norm vs samples": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [35.67851, 35.67851, 35.67851, 35.67851, 35.67851, 35.6785, 35.67851, 35.6785, 35.67848, 35.67848]}, "iteration-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [11.71205, 0.64203, 0.63681, 0.62887, 0.62867, 0.62983, 0.6294, 0.62857, 0.62698, 0.62637]}, "lm loss validation": {"start_step": 0, "end_step": 2, "step_interval": 5, "values": [9.1542]}, "lm loss validation vs samples": {"start_step": 0, "end_step": 1, "step_interval": 5, "values": [9.1542]}, "lm loss validation ppl": {"start_step": 0, "end_step": 1, "step_interval": 5, "values": [9454.09668]}, "lm loss validation ppl vs samples": {"start_step": 0, "end_step": 1, "step_interval": 5, "values": [9454.09668]}}
\ No newline at end of file
--- a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_pp1_freeze_vit_freeze_lm_dist_opt_dgx_a100_1N8G/golden_values_lts.json
+++ b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_pp1_freeze_vit_freeze_lm_dist_opt_dgx_a100_1N8G/golden_values_lts.json
+{"forward-backward-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [9.12533, 0.61523, 0.612, 0.61274, 0.60959, 0.61563, 0.61043, 0.62211, 0.61259, 0.61475]}, "forward-compute-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [3.2886, 0.29298, 0.28952, 0.29035, 0.28755, 0.29301, 0.28608, 0.30023, 0.28978, 0.29236]}, "backward-compute-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [1.10925, 0.28738, 0.28707, 0.28715, 0.28829, 0.28813, 0.29022, 0.28846, 0.29053, 0.29005]}, "batch-generator-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.76471, 0.01852, 0.01694, 0.02369, 0.02029, 0.01651, 0.01633, 0.02469, 0.01956, 0.01684]}, "forward-recv-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [3.67666, 0.02972, 0.02965, 0.02942, 0.02811, 0.0288, 0.0288, 0.02849, 0.02832, 0.02838]}, "forward-send-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.9526, 0.00016, 0.00016, 0.00016, 0.00016, 0.00018, 0.00017, 0.00017, 0.00014, 0.00015]}, "backward-recv-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.07105, 0.07081, 0.07084, 0.07037, 0.06972, 0.07299, 0.06941, 0.06963, 0.07091, 0.07042]}, "backward-send-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.00019, 0.0002, 0.00021, 0.00019, 0.0002, 0.00019, 0.00019, 0.00018, 0.00018, 0.00018]}, "forward-send-backward-recv-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [4.17022, 0.29888, 0.30073, 0.30472, 0.30255, 0.30377, 0.30116, 0.3082, 0.3045, 0.30713]}, "backward-send-forward-recv-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.89549, 0.00229, 0.00225, 0.00218, 0.00224, 0.00218, 0.00214, 0.00228, 0.00208, 0.00209]}, "layernorm-grads-all-reduce-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [3e-05, 3e-05, 4e-05, 2e-05, 3e-05, 2e-05, 2e-05, 3e-05, 2e-05, 2e-05]}, "embedding-grads-all-reduce-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [5e-05, 3e-05, 5e-05, 3e-05, 3e-05, 3e-05, 3e-05, 3e-05, 3e-05, 3e-05]}, "all-grads-sync-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.19492, 0.00027, 0.00039, 0.00025, 0.00027, 0.00025, 0.00024, 0.00025, 0.00022, 0.00022]}, "params-all-gather-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.00015, 0.0001, 0.00011, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 9e-05, 9e-05]}, "optimizer-copy-to-main-grad-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.00013, 0.00011, 0.00011, 0.0001, 0.0001, 0.0001, 0.0001, 0.00011, 9e-05, 9e-05]}, "optimizer-clip-main-grad-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.02498, 0.00052, 0.00052, 0.00039, 0.00051, 0.00039, 0.00041, 0.00041, 0.00037, 0.00036]}, "optimizer-count-zeros-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.00735, 0.00064, 0.00064, 0.00064, 0.00063, 0.00065, 0.00068, 0.00065, 0.00065, 0.00065]}, "optimizer-inner-step-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.00093, 0.00021, 0.00021, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.00018, 0.00018]}, "optimizer-copy-main-to-model-params-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.00018, 0.00015, 0.00015, 0.00015, 0.00014, 0.00014, 0.00014, 0.00014, 0.00014, 0.00014]}, "optimizer-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.03475, 0.00249, 0.00249, 0.0023, 0.00258, 0.0023, 0.00234, 0.00235, 0.00223, 0.00223]}, "learning-rate": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "learning-rate vs samples": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "batch-size": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0]}, "batch-size vs samples": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0]}, "lm loss": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [9.19948, 9.20339, 9.20246, 9.19721, 9.1917, 9.18976, 9.18515, 9.17526, 9.1738, 9.16094]}, "lm loss vs samples": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [9.19948, 9.20339, 9.20246, 9.19721, 9.1917, 9.18976, 9.18515, 9.17526, 9.1738, 9.16094]}, "loss-scale": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]}, "loss-scale vs samples": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]}, "grad-norm": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.93282, 1.00192, 1.00046, 0.94405, 1.16906, 0.98576, 0.91623, 0.90401, 0.83116, 0.88246]}, "grad-norm vs samples": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [0.93282, 1.00192, 1.00046, 0.94405, 1.16906, 0.98576, 0.91623, 0.90401, 0.83116, 0.88246]}, "num-zeros": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [103.0, 122.0, 112.0, 97.0, 93.0, 105.0, 105.0, 101.0, 126.0, 120.0]}, "num-zeros vs samples": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [103.0, 122.0, 112.0, 97.0, 93.0, 105.0, 105.0, 101.0, 126.0, 120.0]}, "params-norm": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [35.67851, 35.67851, 35.67851, 35.67851, 35.67851, 35.67851, 35.67851, 35.6785, 35.67849, 35.67848]}, "params-norm vs samples": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [35.67851, 35.67851, 35.67851, 35.67851, 35.67851, 35.67851, 35.67851, 35.6785, 35.67849, 35.67848]}, "iteration-time": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [11.25871, 0.63103, 0.62702, 0.628, 0.62436, 0.6304, 0.62504, 0.63626, 0.62666, 0.62873]}, "lm loss validation": {"start_step": 0, "end_step": 2, "step_interval": 5, "values": [9.1542]}, "lm loss validation vs samples": {"start_step": 0, "end_step": 2, "step_interval": 5, "values": [9.1542]}, "lm loss validation ppl": {"start_step": 0, "end_step": 2, "step_interval": 5, "values": [9454.09668]}, "lm loss validation ppl vs samples": {"start_step": 0, "end_step": 2, "step_interval": 5, "values": [9454.09668]}}
\ No newline at end of file
--- a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_pp1_freeze_vit_freeze_lm_dist_opt_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_pp1_freeze_vit_freeze_lm_dist_opt_dgx_a100_1N8G/model_config.yaml
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  NCCL_ALGO: Tree
+  CUBLAS_WORKSPACE_CONFIG: :4096:8
+  GPUS_PER_NODE: 8
+MODEL_ARGS:
+  --num-layers: 12
+  --hidden-size: 624
+  --attention-dropout: 0.0
+  --hidden-dropout: 0.0
+  --num-attention-heads: 12
+  --log-params-norm: true
+  --log-num-zeros-in-grad: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-timers-to-tensorboard: true
+  --tensorboard-dir: ${TENSORBOARD_PATH}
+  --micro-batch-size: 4
+  --global-batch-size: 32
+  --seq-length: 1024
+  --max-position-embeddings: 1024
+  --train-iters: 50
+  --timing-log-level: 2
+  --lr-decay-iters: 320000
+  --save: ${CHECKPOINT_PATH}
+  --load: ${CHECKPOINT_PATH}
+  --split: 949,50,1
+  --tokenizer-type: NullTokenizer
+  --vocab-size: 8192
+  --distributed-backend: nccl
+  --lr: 0.00015
+  --lr-decay-style: cosine
+  --min-lr: 1.0e-5
+  --weight-decay: 1e-2
+  --clip-grad: 1.0
+  --lr-warmup-fraction: .01
+  --log-interval: 1
+  --save-interval: 10000
+  --eval-interval: 1000
+  --eval-iters: 10
+  --transformer-impl: transformer_engine
+  --tensor-model-parallel-size: 4
+  --pipeline-model-parallel-size: 1
+  --encoder-pipeline-model-parallel-size: 1
+  --encoder-tensor-model-parallel-size: 4
+  --deterministic-mode: true
+  --attention-softmax-in-fp32: true
+  --ckpt-format: torch
+  --no-gradient-accumulation-fusion: true
+  --bf16: true
+  --img-h: 336
+  --img-w: 336
+  --patch-dim: 14
+  --mock-data: true
+  --freeze-ViT: true
+  --freeze-LM: true
+  --use-distributed-optimizer: true
+TEST_TYPE: regular
--- a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_pp1_resume_torch_etp3_dgx_a100_1N7G/model_config.yaml
+++ b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_pp1_resume_torch_etp3_dgx_a100_1N7G/model_config.yaml
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp2_pp2_dgx_a100_1N8G/golden_values_dev.json
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp2_pp2_dgx_a100_1N8G/golden_values_dev.json
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp2_pp2_dgx_a100_1N8G/golden_values_lts.json
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp2_pp2_dgx_a100_1N8G/golden_values_lts.json
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp2_pp2_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp2_pp2_dgx_a100_1N8G/model_config.yaml
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp2_pp2_resume_torch_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp2_pp2_resume_torch_dgx_a100_1N8G/model_config.yaml
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev.json
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev.json
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_lts.json
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_lts.json
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/model_config.yaml
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev.json
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev.json
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_lts.json
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_lts.json
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp2_pp2_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp2_pp2_dgx_a100_1N8G/model_config.yaml
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp2_pp2_resume_torch_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp2_pp2_resume_torch_dgx_a100_1N8G/model_config.yaml
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev.json
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev.json
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_lts.json
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_lts.json
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/model_config.yaml