azure-pipelines.yml 3.65 KB
Newer Older
1
2

jobs:
3
- job: DeepSpeed_Tests
4
5
  timeoutInMinutes: 360
  pool:
6
    name: 'DS_testing'
7
8
9

  strategy:
    matrix:
10
      PyTorch12-CUDA100:
11
        python.version: '3.6'
12
13
14
15
16
        cuda.version: '10.0'
        pytorch.version: '1.2'
        torchvision.version: '0.4.0'
        runmodeltests: true
      #PyTorch15-CUDA101:
17
      #  python.version: '3.7'
18
19
20
21
22
23
24
25
26
27
      #  cuda.version: '10.1'
      #  pytorch.version: '1.5.0+cu101'
      #  torchvision.version: '0.6.0+cu101'
      #  runmodeltests: true
      ##PyTorch15-CUDA102:
      #  python.version: '3.7'
      #  cuda.version: '10.2'
      #  pytorch.version: '1.5'
      #  torchvision.version: '0.6.1'
      #  runmodeltests: true
28

29
30
  variables:
    conda_env: 'ds_test_py$(python.version)_cuda$(cuda.version)_pytorch$(pytorch.version)'
31
32

  steps:
33
34
35
36
37
38
39
40
41
42
43
    # Unfortunately nvidia's nvcc_linux-64=<version> seems to install 10.1 regardless?
    # Most of this complexity is a workaround to get the compiler toolchain to match the
    # cudatoolkit runtime
  - script: |
      conda create --force --yes -n $(conda_env) python=$(python.version) cudatoolkit=$(cuda.version)
      source activate $(conda_env)
      conda install -q --yes conda
      conda install -q --yes pip
      conda install -q --yes gxx_linux-64
      if [[ $(cuda.version) != "10.2" ]]; then conda install --yes -c conda-forge cudatoolkit-dev=$(cuda.version) ; fi
    displayName: 'Setup environment python=$(python.version) pytorch=$(pytorch.version) cuda=$(cuda.version)'
44

45
    # Manually install torch/torchvision first to enforce versioning.
46
  - script: |
47
48
49
50
51
52
      source activate $(conda_env)
      pip install --progress-bar=off torch==$(pytorch.version) torchvision==$(torchvision.version)
      #-f https://download.pytorch.org/whl/torch_stable.html
      ./install.sh --local_only
      #python -I basic_install_test.py
    displayName: 'Install DeepSpeed'
53
54

  - script: |
55
56
57
58
59
60
61
62
63
64
65
      source activate $(conda_env)
      which python
      python --version
      which nvcc
      nvcc --version
      which deepspeed
      python -c "import torch; print('torch:', torch.__version__, torch)"
      python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
      python -c "import deepspeed; print('deepspeed:', deepspeed.__version__)"
    displayName: 'Show environment'

66
67

  - script: |
68
69
      source activate $(conda_env)
      pytest --durations=0 --forked --verbose -x tests/unit/
70
71
72
    displayName: 'Unit tests'

  - script: |
73
      source activate $(conda_env)
74
      ln -s /data/Megatron-LM/data DeepSpeedExamples/Megatron-LM/
75
      pip install --progress-bar=off -r DeepSpeedExamples/Megatron-LM/requirements.txt
76
      cd tests/model/
77
78
79
80
      rm -rf BingBertSquad/baseline
      rm -rf Megatron_GPT2/baseline
      pytest --durations=0 -s run_sanity_check.py
    condition: and(succeeded(), eq(variables['runmodeltests'], true))
81
82
    displayName: 'Model tests'

83
84
85
86
87
88
   #BingBertSquad logs
  - task: PublishPipelineArtifact@1
    inputs:
      targetPath: '$(Build.SourcesDirectory)/tests/model/BingBertSquad/test/'
      artifactName: BingBertSquad_logs
    displayName: 'BingBertSquad log uploads'
89
    condition: eq(variables['runmodeltests'], true)
90
91


92
93
94
95
96
- job: Code_Quality_Checks
  pool:
    name: 'DS_testing'
  variables:
    conda_env: 'ds_codetest'
97

98
99
100
101
102
  steps:
  - script: |
      conda create --force --yes -n $(conda_env) python=3.7
      source activate $(conda_env)
    displayName: 'Create code test environment'
103

104
105
106
107
108
  - script: |
      source activate $(conda_env)
      pip install pre-commit
      pre-commit run --all-files
    displayName: 'Formatting checks'
109

110
111
112
113
114
  - script: |
      source activate $(conda_env)
      pip install pylint
      pylint --exit-zero deepspeed/
    displayName: 'Code linter'