azure-pipelines.yml 3.83 KB
Newer Older
1
2

jobs:
3
- job: DeepSpeed_Tests
4
5
  timeoutInMinutes: 360
  pool:
6
    name: 'DS_testing'
7
8
9

  strategy:
    matrix:
10
      PyTorch12-CUDA100:
11
        python.version: '3.6'
12
13
14
        cuda.version: '10.0'
        pytorch.version: '1.2'
        torchvision.version: '0.4.0'
15
        runmodeltests: false
16
      #PyTorch15-CUDA101:
17
      #  python.version: '3.7'
18
19
20
21
22
23
24
25
26
27
      #  cuda.version: '10.1'
      #  pytorch.version: '1.5.0+cu101'
      #  torchvision.version: '0.6.0+cu101'
      #  runmodeltests: true
      ##PyTorch15-CUDA102:
      #  python.version: '3.7'
      #  cuda.version: '10.2'
      #  pytorch.version: '1.5'
      #  torchvision.version: '0.6.1'
      #  runmodeltests: true
28

29
30
  variables:
    conda_env: 'ds_test_py$(python.version)_cuda$(cuda.version)_pytorch$(pytorch.version)'
31
32

  steps:
33
34
35
36
37
38
39
40
41
42
    # Unfortunately nvidia's nvcc_linux-64=<version> seems to install 10.1 regardless?
    # Most of this complexity is a workaround to get the compiler toolchain to match the
    # cudatoolkit runtime
  - script: |
      conda create --force --yes -n $(conda_env) python=$(python.version) cudatoolkit=$(cuda.version)
      source activate $(conda_env)
      conda install -q --yes conda
      conda install -q --yes pip
      conda install -q --yes gxx_linux-64
      if [[ $(cuda.version) != "10.2" ]]; then conda install --yes -c conda-forge cudatoolkit-dev=$(cuda.version) ; fi
43
      echo "PATH=$PATH, LD_LIBRARY_PATH=$LD_LIBRARY_PATH"
44
    displayName: 'Setup environment python=$(python.version) pytorch=$(pytorch.version) cuda=$(cuda.version)'
45

46
    # Manually install torch/torchvision first to enforce versioning.
47
  - script: |
48
49
50
51
52
53
      source activate $(conda_env)
      pip install --progress-bar=off torch==$(pytorch.version) torchvision==$(torchvision.version)
      #-f https://download.pytorch.org/whl/torch_stable.html
      ./install.sh --local_only
      #python -I basic_install_test.py
    displayName: 'Install DeepSpeed'
54
55

  - script: |
56
57
58
59
60
61
62
63
64
65
66
      source activate $(conda_env)
      which python
      python --version
      which nvcc
      nvcc --version
      which deepspeed
      python -c "import torch; print('torch:', torch.__version__, torch)"
      python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
      python -c "import deepspeed; print('deepspeed:', deepspeed.__version__)"
    displayName: 'Show environment'

67
68

  - script: |
69
70
      source activate $(conda_env)
      pytest --durations=0 --forked --verbose -x tests/unit/
71
72
    displayName: 'Unit tests'

Jeff Rasley's avatar
Jeff Rasley committed
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#   - script: |
#       source activate $(conda_env)
#       ln -s /data/Megatron-LM/data DeepSpeedExamples/Megatron-LM/
#       pip install --progress-bar=off -r DeepSpeedExamples/Megatron-LM/requirements.txt
#       cd tests/model/
#       rm -rf BingBertSquad/baseline
#       rm -rf Megatron_GPT2/baseline
#       pytest --durations=0 -s run_sanity_check.py
#     condition: and(succeeded(), eq(variables['runmodeltests'], true))
#     displayName: 'Model tests'

#   #BingBertSquad logs
#   - task: PublishPipelineArtifact@1
#     inputs:
#       targetPath: '$(Build.SourcesDirectory)/tests/model/BingBertSquad/test/'
#       artifactName: BingBertSquad_logs
#     displayName: 'BingBertSquad log uploads'
#     condition: eq(variables['runmodeltests'], true)
91
92


93
94
- job: Code_Quality_Checks
  pool:
95
    vmImage: 'ubuntu-latest'
96
97
  variables:
    conda_env: 'ds_codetest'
98

99
  steps:
100
101
102
  - bash: echo "##vso[task.prependpath]$CONDA/bin"
    displayName: Add conda to PATH

103
104
105
106
  - script: |
      conda create --force --yes -n $(conda_env) python=3.7
      source activate $(conda_env)
    displayName: 'Create code test environment'
107

108
109
110
111
112
  - script: |
      source activate $(conda_env)
      pip install pre-commit
      pre-commit run --all-files
    displayName: 'Formatting checks'
113

114
115
116
117
118
  - script: |
      source activate $(conda_env)
      pip install pylint
      pylint --exit-zero deepspeed/
    displayName: 'Code linter'