type: basic format_version: 1 maintainers: [mcore] loggers: [stdout] spec: name: "{test_case}" model: gpt-nemo build: mcore-nemo nodes: 1 gpus: 8 platforms: dgx_a100 time_limit: 1800 scope: null script: |- ls cd /opt/NeMo ARGUMENTS=( "DATA_PATH='-'" "DATA_CACHE_PATH='-'" "OUTPUT_PATH={assets_dir}" "TENSORBOARD_PATH={assets_dir}/tensorboard" "CHECKPOINT_PATH=/workspace/checkpoints" "TRAINING_SCRIPT_PATH=/opt/NeMo/examples/nlp/language_modeling/megatron_gpt_pretraining.py" "TRAINING_PARAMS_PATH=/opt/megatron-lm/tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml" "GOLDEN_VALUES_PATH=/opt/megatron-lm/tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}.json" "N_REPEAT={n_repeat}" ) bash /opt/megatron-lm/tests/functional_tests/shell_test_utils/run_ci_test.sh ${{ARGUMENTS[@]}} products: - environment: [dev] scope: [mr] n_repeat: [5] test_case: - gpt3-nemo_126m_mr_mbs1_gbs8_mcore_te_tp2_pp4_vp3_seq_par_overlap_p2p_dgx_a100_1N8G - gpt3-nemo_126m_mr_mbs4_gbs64_mcore_te_tp1_pp1_dgx_a100_1N8G