unit-test-job-lts.yaml 3.61 KB
Newer Older
xingjinliang's avatar
xingjinliang committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
default:
  interruptible: true
other:
  artifacts:
    paths:
      - results/
    when: always
  image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/mcore_utility:20283570
  needs:
    - job: functional:configure
      pipeline: $PARENT_PIPELINE_ID
  rules:
    - if: $CI_PIPELINE_SOURCE == "parent_pipeline"
    - if: $CI_MERGE_REQUEST_ID
  script:
    - export PYTHONPATH=$(pwd); python tests/test_utils/python_scripts/launch_jet_workload.py
      --model unit-tests --environment lts --n-repeat 1 --time-limit 1800 --test-case
      other --container-tag 20283570 --cluster dgxh100_coreweave
  stage: unit-tests
  tags: &id001
    - arch/amd64
    - env/prod
    - origin/jet-fleet
    - owner/jet-core
    - purpose/jet-client
    - team/megatron
  timeout: 7 days
stages:
  - unit-tests
tests/unit_tests/data/:
  artifacts:
    paths:
      - results/
    when: always
  image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/mcore_utility:20283570
  needs:
    - job: functional:configure
      pipeline: $PARENT_PIPELINE_ID
  rules:
    - if: $CI_PIPELINE_SOURCE == "parent_pipeline"
    - if: $CI_MERGE_REQUEST_ID
  script:
    - export PYTHONPATH=$(pwd); python tests/test_utils/python_scripts/launch_jet_workload.py
      --model unit-tests --environment lts --n-repeat 1 --time-limit 1800 --test-case
      tests/unit_tests/data/ --container-tag 20283570 --cluster dgxh100_coreweave
  stage: unit-tests
  tags: *id001
  timeout: 7 days
tests/unit_tests/dist_checkpointing/:
  artifacts:
    paths:
      - results/
    when: always
  image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/mcore_utility:20283570
  needs:
    - job: functional:configure
      pipeline: $PARENT_PIPELINE_ID
  rules:
    - if: $CI_PIPELINE_SOURCE == "parent_pipeline"
    - if: $CI_MERGE_REQUEST_ID
  script:
    - export PYTHONPATH=$(pwd); python tests/test_utils/python_scripts/launch_jet_workload.py
      --model unit-tests --environment lts --n-repeat 1 --time-limit 1800 --test-case
      tests/unit_tests/dist_checkpointing/ --container-tag 20283570 --cluster dgxh100_coreweave
  stage: unit-tests
  tags: *id001
  timeout: 7 days
tests/unit_tests/distributed/:
  artifacts:
    paths:
      - results/
    when: always
  image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/mcore_utility:20283570
  needs:
    - job: functional:configure
      pipeline: $PARENT_PIPELINE_ID
  rules:
    - if: $CI_PIPELINE_SOURCE == "parent_pipeline"
    - if: $CI_MERGE_REQUEST_ID
  script:
    - export PYTHONPATH=$(pwd); python tests/test_utils/python_scripts/launch_jet_workload.py
      --model unit-tests --environment lts --n-repeat 1 --time-limit 1800 --test-case
      tests/unit_tests/distributed/ --container-tag 20283570 --cluster dgxh100_coreweave
  stage: unit-tests
  tags: *id001
  timeout: 7 days
? tests/unit_tests/test_inference.py tests/unit_tests/test_tokenizer.py tests/unit_tests/test_utilities.py
  tests/unit_tests/test_training.py
: artifacts:
    paths:
      - results/
    when: always
  image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/mcore_utility:20283570
  needs:
    - job: functional:configure
      pipeline: $PARENT_PIPELINE_ID
  rules:
    - if: $CI_PIPELINE_SOURCE == "parent_pipeline"
    - if: $CI_MERGE_REQUEST_ID
  script:
    - export PYTHONPATH=$(pwd); python tests/test_utils/python_scripts/launch_jet_workload.py
      --model unit-tests --environment lts --n-repeat 1 --time-limit 1800 --test-case
      tests/unit_tests/test_inference.py tests/unit_tests/test_tokenizer.py tests/unit_tests/test_utilities.py
      tests/unit_tests/test_training.py --container-tag 20283570 --cluster dgxh100_coreweave
  stage: unit-tests
  tags: *id001
  timeout: 7 days