Unverified Commit 4b5d549d authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[ci] simplify CI configurations, parallelize compilation, test CUDA on Ubuntu 22.04 (#6458)

parent dd9da91f
......@@ -27,10 +27,10 @@ install:
- set PYTHON_VERSION=%CONFIGURATION%
- set CONDA_ENV="test-env"
- ps: |
$env:CMAKE_BUILD_PARALLEL_LEVEL = 4
$env:MINICONDA = "C:\Miniconda3-x64"
$env:PATH = "$env:MINICONDA;$env:MINICONDA\Scripts;$env:PATH"
$env:BUILD_SOURCESDIRECTORY = "$env:APPVEYOR_BUILD_FOLDER"
$env:LGB_VER = (Get-Content $env:APPVEYOR_BUILD_FOLDER\VERSION.txt).trim()
build: false
......
......@@ -10,6 +10,8 @@ SANITIZERS=${SANITIZERS:-""}
ARCH=$(uname -m)
LGB_VER=$(head -n 1 ${BUILD_DIRECTORY}/VERSION.txt)
if [[ $OS_NAME == "macos" ]] && [[ $COMPILER == "gcc" ]]; then
export CXX=g++-11
export CC=gcc-11
......
......@@ -6,6 +6,8 @@ function Check-Output {
}
}
$env:LGB_VER = (Get-Content $env:BUILD_SOURCESDIRECTORY\VERSION.txt).trim()
# unify environment variable for Azure DevOps and AppVeyor
if (Test-Path env:APPVEYOR) {
$env:APPVEYOR = "true"
......
......@@ -8,22 +8,74 @@ on:
branches:
- master
- release/*
# Run manually by clicking a button in the UI
workflow_dispatch:
inputs:
restart_docker:
description: 'Restart nvidia-docker on the runner before building?'
required: true
type: boolean
default: false
# automatically cancel in-progress builds if another commit is pushed
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
github_actions: 'true'
os_name: linux
conda_env: test-env
jobs:
# Optionally reinstall + restart docker on the runner before building.
# This is safe as long as only 1 of these jobs runs at a time.
restart-docker:
name: set up docker
runs-on: [self-hosted, linux]
timeout-minutes: 30
steps:
- name: Setup or update software on host machine
if: ${{ inputs.restart_docker }}
run: |
# install core packages
sudo apt-get update
sudo apt-get install --no-install-recommends -y \
apt-transport-https \
ca-certificates \
curl \
gnupg-agent \
lsb-release \
software-properties-common
# set up nvidia-docker
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" -y
curl -sL https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
curl -sL https://nvidia.github.io/nvidia-docker/$(. /etc/os-release;echo $ID$VERSION_ID)/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
sudo apt-get update
sudo apt-get install --no-install-recommends -y \
containerd.io \
docker-ce \
docker-ce-cli \
nvidia-docker2
sudo chmod a+rw /var/run/docker.sock
sudo systemctl restart docker
- name: mark job successful
run: |
exit 0
test:
name: ${{ matrix.task }} ${{ matrix.cuda_version }} ${{ matrix.method }} (linux, ${{ matrix.compiler }}, Python ${{ matrix.python_version }})
runs-on: [self-hosted, linux]
timeout-minutes: 60
needs: [restart-docker]
container:
image: ${{ matrix.image }}
env:
CMAKE_BUILD_PARALLEL_LEVEL: 4
COMPILER: ${{ matrix.compiler }}
CONDA: /tmp/miniforge
CONDA_ENV: test-env
DEBIAN_FRONTEND: noninteractive
METHOD: ${{ matrix.method }}
OS_NAME: linux
PYTHON_VERSION: ${{ matrix.python_version }}
TASK: ${{ matrix.task }}
options: --gpus all
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
......@@ -32,78 +84,47 @@ jobs:
compiler: gcc
python_version: "3.11"
cuda_version: "11.8.0"
image: nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu20.04
task: cuda
- method: source
compiler: gcc
python_version: "3.9"
cuda_version: "12.2.0"
image: nvcr.io/nvidia/cuda:12.2.0-devel-ubuntu22.04
task: cuda
- method: pip
compiler: clang
python_version: "3.10"
cuda_version: "11.8.0"
image: nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu20.04
task: cuda
steps:
- name: Setup or update software on host machine
- name: Install latest git
run: |
sudo apt-get update
sudo apt-get install --no-install-recommends -y \
apt-transport-https \
apt-get update
apt-get install --no-install-recommends -y \
ca-certificates \
curl \
git \
gnupg-agent \
lsb-release \
software-properties-common
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" -y
curl -sL https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
curl -sL https://nvidia.github.io/nvidia-docker/$(. /etc/os-release;echo $ID$VERSION_ID)/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
sudo apt-get update
sudo apt-get install --no-install-recommends -y \
containerd.io \
docker-ce \
docker-ce-cli \
nvidia-docker2
sudo chmod a+rw /var/run/docker.sock
sudo systemctl restart docker
- name: Remove old folder with repository
run: sudo rm -rf $GITHUB_WORKSPACE
add-apt-repository ppa:git-core/ppa -y
apt-get update
apt-get install --no-install-recommends -y \
git
- name: Checkout repository
uses: actions/checkout@v1
uses: actions/checkout@v4
with:
fetch-depth: 5
submodules: true
- name: Setup and run tests
run: |
export ROOT_DOCKER_FOLDER=/LightGBM
cat > docker.env <<EOF
GITHUB_ACTIONS=${{ env.github_actions }}
OS_NAME=${{ env.os_name }}
COMPILER=${{ matrix.compiler }}
TASK=${{ matrix.task }}
METHOD=${{ matrix.method }}
CONDA_ENV=${{ env.conda_env }}
PYTHON_VERSION=${{ matrix.python_version }}
BUILD_DIRECTORY=$ROOT_DOCKER_FOLDER
LGB_VER=$(head -n 1 VERSION.txt)
EOF
cat > docker-script.sh <<EOF
export CONDA=\$HOME/miniforge
export PATH=\$CONDA/bin:\$PATH
export BUILD_DIRECTORY="$GITHUB_WORKSPACE"
export PATH=$CONDA/bin:$PATH
# check GPU usage
nvidia-smi
$ROOT_DOCKER_FOLDER/.ci/setup.sh || exit 1
$ROOT_DOCKER_FOLDER/.ci/test.sh || exit 1
EOF
cuda_version="${{ matrix.cuda_version }}"
cuda_major=${cuda_version%%.*}
docker_img="nvcr.io/nvidia/cuda:${cuda_version}-devel"
if [[ ${cuda_major} -eq 11 ]]; then
docker_img="${docker_img}-ubuntu18.04"
elif [[ ${cuda_major} -ge 12 ]]; then
docker_img="${docker_img}-ubuntu20.04"
fi
docker run --env-file docker.env -v "$GITHUB_WORKSPACE":"$ROOT_DOCKER_FOLDER" --rm --gpus all "$docker_img" /bin/bash $ROOT_DOCKER_FOLDER/docker-script.sh
# build and test
$GITHUB_WORKSPACE/.ci/setup.sh
$GITHUB_WORKSPACE/.ci/test.sh
all-cuda-jobs-successful:
if: always()
runs-on: ubuntu-latest
......
......@@ -9,7 +9,6 @@ on:
env:
CONDA_ENV: test-env
GITHUB_ACTIONS: 'true'
OS_NAME: 'linux'
PYTHON_VERSION: '3.11'
TASK: 'check-links'
......
......@@ -15,8 +15,8 @@ concurrency:
cancel-in-progress: true
env:
CMAKE_BUILD_PARALLEL_LEVEL: 4
CONDA_ENV: test-env
GITHUB_ACTIONS: 'true'
jobs:
test:
......@@ -73,7 +73,6 @@ jobs:
export OS_NAME="linux"
fi
export BUILD_DIRECTORY="$GITHUB_WORKSPACE"
export LGB_VER=$(head -n 1 VERSION.txt)
export CONDA=${HOME}/miniforge
export PATH=${CONDA}/bin:${PATH}
$GITHUB_WORKSPACE/.ci/setup.sh || exit 1
......
......@@ -15,6 +15,7 @@ concurrency:
cancel-in-progress: true
env:
CMAKE_BUILD_PARALLEL_LEVEL: 4
# hack to get around this:
# https://stat.ethz.ch/pipermail/r-package-devel/2020q3/005930.html
_R_CHECK_SYSTEM_CLOCK_: 0
......@@ -189,7 +190,6 @@ jobs:
run: |
export TASK="${{ matrix.task }}"
export COMPILER="${{ matrix.compiler }}"
export GITHUB_ACTIONS="true"
if [[ "${{ matrix.os }}" == "macos-13" ]]; then
export OS_NAME="macos"
elif [[ "${{ matrix.os }}" == "ubuntu-latest" ]]; then
......@@ -216,7 +216,6 @@ jobs:
$env:R_VERSION = "${{ matrix.r_version }}"
$env:R_BUILD_TYPE = "${{ matrix.build_type }}"
$env:COMPILER = "${{ matrix.compiler }}"
$env:GITHUB_ACTIONS = "true"
$env:TASK = "${{ matrix.task }}"
& "$env:GITHUB_WORKSPACE/.ci/test_windows.ps1"
test-r-sanitizers:
......
......@@ -19,7 +19,6 @@ concurrency:
env:
COMPILER: 'gcc'
CONDA_ENV: test-env
GITHUB_ACTIONS: 'true'
OS_NAME: 'linux'
PYTHON_VERSION: '3.11'
......
......@@ -11,6 +11,7 @@ pr:
variables:
AZURE: 'true'
PYTHON_VERSION: '3.11'
CMAKE_BUILD_PARALLEL_LEVEL: 4
CONDA_ENV: test-env
runCodesignValidationInjection: false
skipComponentGovernanceDetection: true
......@@ -82,7 +83,6 @@ jobs:
steps:
- script: |
echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY"
echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)"
echo "##vso[task.prependpath]/usr/lib64/openmpi/bin"
echo "##vso[task.prependpath]$CONDA/bin"
displayName: 'Set variables'
......@@ -159,7 +159,6 @@ jobs:
steps:
- script: |
echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY"
echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)"
CONDA=$HOME/miniforge
echo "##vso[task.setvariable variable=CONDA]$CONDA"
echo "##vso[task.prependpath]$CONDA/bin"
......@@ -225,7 +224,6 @@ jobs:
CONDA_ENV=$CONDA_ENV
PYTHON_VERSION=$PYTHON_VERSION
BUILD_DIRECTORY=$ROOT_DOCKER_FOLDER
LGB_VER=$(head -n 1 VERSION.txt)
PRODUCES_ARTIFACTS=$PRODUCES_ARTIFACTS
BUILD_ARTIFACTSTAGINGDIRECTORY=$BUILD_ARTIFACTSTAGINGDIRECTORY
EOF
......@@ -283,7 +281,6 @@ jobs:
steps:
- script: |
echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY"
echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)"
CONDA=$AGENT_HOMEDIRECTORY/miniforge
echo "##vso[task.setvariable variable=CONDA]$CONDA"
echo "##vso[task.prependpath]$CONDA/bin"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment