azure-pipelines.yml 3.79 KB
Newer Older
1
2
3
4
5
trigger:
  paths:
    exclude:
      - docs
      - azure
6
jobs:
7
- job: DeepSpeed_Tests
8
9
  timeoutInMinutes: 360
  pool:
10
    name: 'DS_testing'
11
12
13

  strategy:
    matrix:
14
      PyTorch12-CUDA100:
15
        python.version: '3.6'
16
17
18
        cuda.version: '10.0'
        pytorch.version: '1.2'
        torchvision.version: '0.4.0'
19
        runmodeltests: false
20
      #PyTorch15-CUDA101:
21
      #  python.version: '3.7'
22
23
24
25
26
27
28
29
30
31
      #  cuda.version: '10.1'
      #  pytorch.version: '1.5.0+cu101'
      #  torchvision.version: '0.6.0+cu101'
      #  runmodeltests: true
      ##PyTorch15-CUDA102:
      #  python.version: '3.7'
      #  cuda.version: '10.2'
      #  pytorch.version: '1.5'
      #  torchvision.version: '0.6.1'
      #  runmodeltests: true
32

33
34
  variables:
    conda_env: 'ds_test_py$(python.version)_cuda$(cuda.version)_pytorch$(pytorch.version)'
35
36

  steps:
37
38
39
40
41
42
43
44
45
    # Unfortunately nvidia's nvcc_linux-64=<version> seems to install 10.1 regardless?
    # Most of this complexity is a workaround to get the compiler toolchain to match the
    # cudatoolkit runtime
  - script: |
      conda create --force --yes -n $(conda_env) python=$(python.version) cudatoolkit=$(cuda.version)
      source activate $(conda_env)
      conda install -q --yes conda
      conda install -q --yes pip
      conda install -q --yes gxx_linux-64
46
      echo "PATH=$PATH, LD_LIBRARY_PATH=$LD_LIBRARY_PATH"
47
    displayName: 'Setup environment python=$(python.version) pytorch=$(pytorch.version) cuda=$(cuda.version)'
48

49
    # Manually install torch/torchvision first to enforce versioning.
50
  - script: |
51
52
      source activate $(conda_env)
      pip install --progress-bar=off torch==$(pytorch.version) torchvision==$(torchvision.version)
53
54
      pip install .[dev]
      ds_report
55
    displayName: 'Install DeepSpeed'
56
57

  - script: |
58
59
60
61
62
63
64
65
66
67
68
      source activate $(conda_env)
      which python
      python --version
      which nvcc
      nvcc --version
      which deepspeed
      python -c "import torch; print('torch:', torch.__version__, torch)"
      python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
      python -c "import deepspeed; print('deepspeed:', deepspeed.__version__)"
    displayName: 'Show environment'

69
70

  - script: |
71
      source activate $(conda_env)
72
73
      if [[ -d ./torch-extensions ]]; then rm -rf ./torch-extensions; fi
      TORCH_EXTENSIONS_DIR=./torch-extensions pytest --durations=0 --forked --verbose -x tests/unit/
74
75
    displayName: 'Unit tests'

Jeff Rasley's avatar
Jeff Rasley committed
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#   - script: |
#       source activate $(conda_env)
#       ln -s /data/Megatron-LM/data DeepSpeedExamples/Megatron-LM/
#       pip install --progress-bar=off -r DeepSpeedExamples/Megatron-LM/requirements.txt
#       cd tests/model/
#       rm -rf BingBertSquad/baseline
#       rm -rf Megatron_GPT2/baseline
#       pytest --durations=0 -s run_sanity_check.py
#     condition: and(succeeded(), eq(variables['runmodeltests'], true))
#     displayName: 'Model tests'

#   #BingBertSquad logs
#   - task: PublishPipelineArtifact@1
#     inputs:
#       targetPath: '$(Build.SourcesDirectory)/tests/model/BingBertSquad/test/'
#       artifactName: BingBertSquad_logs
#     displayName: 'BingBertSquad log uploads'
#     condition: eq(variables['runmodeltests'], true)
94
95


96
97
- job: Code_Quality_Checks
  pool:
98
    vmImage: 'ubuntu-latest'
99
100
  variables:
    conda_env: 'ds_codetest'
101

102
  steps:
103
104
105
  - bash: echo "##vso[task.prependpath]$CONDA/bin"
    displayName: Add conda to PATH

106
107
108
109
  - script: |
      conda create --force --yes -n $(conda_env) python=3.7
      source activate $(conda_env)
    displayName: 'Create code test environment'
110

111
112
113
114
115
  - script: |
      source activate $(conda_env)
      pip install pre-commit
      pre-commit run --all-files
    displayName: 'Formatting checks'
116

117
118
119
120
121
  - script: |
      source activate $(conda_env)
      pip install pylint
      pylint --exit-zero deepspeed/
    displayName: 'Code linter'