"vscode:/vscode.git/clone" did not exist on "a886564a18c9591ca03cfe114b45cddaedcc5856"
unit-tests.yaml 2.08 KB
Newer Older
xingjinliang's avatar
xingjinliang committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
type: basic
format_version: 1
maintainers: [mcore]
loggers: [stdout]
spec:
  name: '{test_case}'
  model: unit-tests
  nodes: 1
  build: mcore-pyt-{environment}
  gpus: 8
  platforms: dgx_h100
  script: |-
    ls

    export TAG={tag}
    export ENVIRONMENT={environment}
    export BUCKET="{test_case}"
    export UNIT_TEST_REPEAT={n_repeat}
    export UNIT_TEST_TIMEOUT=10

    set -euxo pipefail

    if [[ "$TAG" == "latest" ]]; then
      TEST_PATH="/opt/megatron-lm"
    else
      TEST_PATH="/opt/megatron-lm-legacy/"
    fi

    cd $TEST_PATH

    MARKER=()
    if [[ "$TAG" == "legacy" ]]; then
      MARKER+=("not internal")
    fi

    if [[ "$ENVIRONMENT" == "lts" ]]; then
      MARKER+=("not flaky")
    fi

    if [[ "$ENVIRONMENT" == "dev" ]]; then
      MARKER+=("not flaky_in_dev")
    fi

    MARKER_ARG=$(printf "%s" "${{MARKER[0]}}")
    for element in "${{MARKER[@]:1}}"; do
      MARKER_ARG+=" and $element"
    done

    IGNORE_TEST_CASES=$(cat /opt/megatron-lm/tests/test_utils/recipes/unit-tests.yaml | yq eval 'with(.products[].test_case; del(.[] | select(. == env(BUCKET)))) | .products[].test_case[]' | tr " " "\n")
    IGNORE_ARGS=()
    while IFS= read -r test_case; do
      if [[ $test_case == *\** ]]; then
          FILES=($(ls $test_case))
          echo ${{FILES[@]}}
          for file in "${{FILES[@]}}"; do
            IGNORE_ARGS+=("--ignore='$file'")
          done          
      else
          IGNORE_ARGS+=("--ignore=$test_case")
      fi
    done <<< "$IGNORE_TEST_CASES"

    for i in $(seq $UNIT_TEST_REPEAT); do
      CMD=$(echo pytest -xvs --cov-report=term --cov-report=html --cov=megatron/core --no-cov-on-fail ${{IGNORE_ARGS[@]}} -m "'${{MARKER_ARG}}'" $BUCKET)
      eval "$CMD"
    done

products:
  - environment: [lts, dev]
    tag: [latest, legacy]
    scope: [unit-tests]
    n_repeat: [1]
    time_limit: [1800]
    test_case:
      - tests/unit_tests/data/
      - tests/unit_tests/dist_checkpointing/*.py
      - tests/unit_tests/dist_checkpointing/models/
      - tests/unit_tests/transformer/*.py
      - tests/unit_tests/transformer/moe
      - tests/unit_tests