[devops] fix extention building (#5427)

070df689 · Hongxin Liu · GitHub · 822241a9 · 070df689 · 070df689
Unverified Commit 070df689 authored Mar 05, 2024 by Hongxin Liu Committed by GitHub Mar 05, 2024
20 changed files
--- a/.cuda_ext.json
+++ b/.cuda_ext.json
 {
  "build": [
    {
-      "torch_command": "pip install torch==1.12.1+cu102 torchvision==0.13.1+cu102 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu102",
-      "cuda_image": "hpcaitech/cuda-conda:10.2"
+      "torch_command": "pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu121",
+      "cuda_image": "hpcaitech/cuda-conda:12.1"
    },
    {
-      "torch_command": "pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113",
-      "cuda_image": "hpcaitech/cuda-conda:11.3"
+      "torch_command": "pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu118",
+      "cuda_image": "hpcaitech/cuda-conda:11.8"
    },
    {
-      "torch_command": "pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu116",
-      "cuda_image": "hpcaitech/cuda-conda:11.6"
+      "torch_command": "pip install torch==2.0.0 torchvision==0.15.1 torchaudio==2.0.1",
+      "cuda_image": "hpcaitech/cuda-conda:11.7"
    }
  ]
 }
--- a/.github/workflows/compatiblity_test_on_dispatch.yml
+++ b/.github/workflows/compatiblity_test_on_dispatch.yml
@@ -83,7 +83,7 @@ jobs:
          fi
      - name: Install Colossal-AI
        run: |
-          CUDA_EXT=1 pip install -v .
+          BUILD_EXT=1 pip install -v .
          pip install -r requirements/requirements-test.txt
      - name: Unit Testing
        run: |

--- a/.github/workflows/compatiblity_test_on_pr.yml
+++ b/.github/workflows/compatiblity_test_on_pr.yml
@@ -78,7 +78,7 @@ jobs:

      - name: Install Colossal-AI
        run: |
-          CUDA_EXT=1 pip install -v .
+          BUILD_EXT=1 pip install -v .
          pip install -r requirements/requirements-test.txt
      - name: Unit Testing
        run: |

--- a/.github/workflows/compatiblity_test_on_schedule.yml
+++ b/.github/workflows/compatiblity_test_on_schedule.yml
@@ -75,7 +75,7 @@ jobs:

      - name: Install Colossal-AI
        run: |
-          CUDA_EXT=1 pip install -v .
+          BUILD_EXT=1 pip install -v .
          pip install -r requirements/requirements-test.txt

      - name: Unit Testing

--- a/.github/workflows/cuda_ext_check_before_merge.yml
+++ b/.github/workflows/cuda_ext_check_before_merge.yml
@@ -51,4 +51,4 @@ jobs:

      - name: Build
        run: |
-          CUDA_EXT=1 pip install -v .
+          BUILD_EXT=1 pip install -v .
--- a/.github/workflows/doc_test_on_pr.yml
+++ b/.github/workflows/doc_test_on_pr.yml
@@ -89,7 +89,7 @@ jobs:
      - name: Install ColossalAI
        run: |
          source activate pytorch
-          CUDA_EXT=1 pip install -v .
+          BUILD_EXT=1 pip install -v .

      - name: Test the Doc
        run: |

--- a/.github/workflows/doc_test_on_schedule.yml
+++ b/.github/workflows/doc_test_on_schedule.yml
@@ -32,7 +32,7 @@ jobs:

      - name: Install ColossalAI
        run: |
-          CUDA_EXT=1 pip install -v .
+          BUILD_EXT=1 pip install -v .

      - name: Install Doc Test Requirements
        run: |

--- a/.github/workflows/example_check_on_dispatch.yml
+++ b/.github/workflows/example_check_on_dispatch.yml
@@ -53,7 +53,7 @@ jobs:
        uses: actions/checkout@v3
      - name: Install Colossal-AI
        run: |
-          CUDA_EXT=1 pip install -v .
+          BUILD_EXT=1 pip install -v .
      - name: Test the example
        run: |
          dir=${{ matrix.directory }}

--- a/.github/workflows/example_check_on_pr.yml
+++ b/.github/workflows/example_check_on_pr.yml
@@ -88,7 +88,7 @@ jobs:

      - name: Install Colossal-AI
        run: |
-          CUDA_EXT=1 pip install -v .
+          BUILD_EXT=1 pip install -v .

      - name: Test the example
        run: |

--- a/.github/workflows/example_check_on_schedule.yml
+++ b/.github/workflows/example_check_on_schedule.yml
@@ -42,7 +42,7 @@ jobs:

      - name: Install Colossal-AI
        run: |
-          CUDA_EXT=1 pip install -v .
+          BUILD_EXT=1 pip install -v .

      - name: Traverse all files
        run: |

--- a/colossalai/cli/check/check_installation.py
+++ b/colossalai/cli/check/check_installation.py
@@ -76,7 +76,7 @@ def check_installation():
    click.echo("")
    click.echo(f"Note:")
    click.echo(
-        f"1. AOT (ahead-of-time) compilation of the CUDA kernels occurs during installation when the environment variable CUDA_EXT=1 is set"
+        f"1. AOT (ahead-of-time) compilation of the CUDA kernels occurs during installation when the environment variable BUILD_EXT=1 is set"
    )
    click.echo(f"2. If AOT compilation is not enabled, stay calm as the CUDA kernels can still be built during runtime")


--- a/colossalai/legacy/inference/serving/ray_serve/README.md
+++ b/colossalai/legacy/inference/serving/ray_serve/README.md
@@ -25,7 +25,7 @@ conda install -c conda-forge cupy cudnn cutensor nccl cuda-version=11.6

 # install colossalai with PyTorch extensions
 cd <path_to_ColossalAI_repo>
-CUDA_EXT=1 pip install -e .
+BUILD_EXT=1 pip install -e .

 # install other dependencies
 pip install triton==2.0.0.dev20221202

--- a/colossalai/legacy/inference/serving/torch_serve/README.md
+++ b/colossalai/legacy/inference/serving/torch_serve/README.md
@@ -25,7 +25,7 @@ conda install -c "nvidia/label/cuda-11.6.2" cuda-toolkit
 cd <path_to_ColossalAI_repo>
 pip install -r requirements/requirements.txt
 pip install -r requirements/requirements-test.txt
-CUDA_EXT=1 pip install -e .
+BUILD_EXT=1 pip install -e .

 # install torchserve
 cd <path_to_torch_serve_repo>

--- a/colossalai/legacy/inference/serving/torch_serve/docker/Dockerfile
+++ b/colossalai/legacy/inference/serving/torch_serve/docker/Dockerfile
@@ -38,7 +38,7 @@ ARG VERSION=main
 RUN git clone -b ${VERSION} https://github.com/hpcaitech/ColossalAI.git && \
    cd ./ColossalAI && \
    git checkout 3e05c07bb8921f2a8f9736b6f6673d4e9f1697d0 && \
-    CUDA_EXT=1 pip install -v --no-cache-dir .
+    BUILD_EXT=1 pip install -v --no-cache-dir .

 # install titans
 RUN pip install --no-cache-dir titans

--- a/colossalai/nn/optimizer/cpu_adam.py
+++ b/colossalai/nn/optimizer/cpu_adam.py
@@ -78,7 +78,7 @@ class CPUAdam(NVMeOptimizer):
        super(CPUAdam, self).__init__(model_params, default_args, nvme_offload_fraction, nvme_offload_dir)
        self.adamw_mode = adamw_mode
        cpu_adam = CPUAdamLoader().load()
-        # if you find yourself stuck here, make sure that you install colossalai with CUDA_EXT=1 specification
+        # if you find yourself stuck here, make sure that you install colossalai with BUILD_EXT=1 specification
        self.cpu_adam_op = cpu_adam.CPUAdamOptimizer(lr, betas[0], betas[1], eps, weight_decay, adamw_mode)

    def torch_adam_update(

--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -37,7 +37,7 @@ RUN git clone https://github.com/NVIDIA/apex && \
 ARG VERSION=main
 RUN git clone -b ${VERSION} https://github.com/hpcaitech/ColossalAI.git \
    && cd ./ColossalAI \
-    && CUDA_EXT=1 pip install -v --no-cache-dir .
+    && BUILD_EXT=1 pip install -v --no-cache-dir .

 # install titans
 RUN pip install --no-cache-dir titans

--- a/docs/README-zh-Hans.md
+++ b/docs/README-zh-Hans.md
@@ -146,25 +146,25 @@ Colossal-AI 为您提供了一系列并行组件。我们的目标是让您的
 [[HuggingFace model weights]](https://huggingface.co/hpcai-tech/Colossal-LLaMA-2-13b-base)
 [[Modelscope model weights]](https://www.modelscope.cn/models/colossalai/Colossal-LLaMA-2-13b-base/summary)

-|              Model             |  Backbone  | Tokens Consumed |     MMLU (5-shot)    | CMMLU (5-shot)| AGIEval (5-shot) | GAOKAO (0-shot) | CEval (5-shot)  |
-| :----------------------------: | :--------: | :-------------: | :------------------: | :-----------: | :--------------: | :-------------: | :-------------: |
-|          Baichuan-7B           |     -      |      1.2T       |    42.32 (42.30)     | 44.53 (44.02) |        38.72     |       36.74     |       42.80     |
-|       Baichuan-13B-Base        |     -      |      1.4T       |    50.51 (51.60)     | 55.73 (55.30) |        47.20     |       51.41     |       53.60     |
-|       Baichuan2-7B-Base        |     -      |      2.6T       |    46.97 (54.16)     | 57.67 (57.07) |        45.76     |       52.60     |       54.00     |
-|       Baichuan2-13B-Base       |     -      |      2.6T       |    54.84 (59.17)     | 62.62 (61.97) |        52.08     |       58.25     |       58.10     |
-|           ChatGLM-6B           |     -      |      1.0T       |    39.67 (40.63)     |   41.17 (-)   |        40.10     |       36.53     |       38.90     |
-|          ChatGLM2-6B           |     -      |      1.4T       |    44.74 (45.46)     |   49.40 (-)   |        46.36     |       45.49     |       51.70     |
-|          InternLM-7B           |     -      |      1.6T       |    46.70 (51.00)     |   52.00 (-)   |        44.77     |       61.64     |       52.80     |
-|            Qwen-7B             |     -      |      2.2T       |        54.29 (56.70) | 56.03 (58.80) |        52.47     |       56.42     |       59.60     |
-|           Llama-2-7B           |     -      |      2.0T       |    44.47 (45.30)     |   32.97 (-)   |        32.60     |       25.46     |         -       |
-| Linly-AI/Chinese-LLaMA-2-7B-hf | Llama-2-7B |      1.0T       |        37.43         |     29.92     |        32.00     |       27.57     |         -       |
-| wenge-research/yayi-7b-llama2  | Llama-2-7B |        -        |        38.56         |     31.52     |        30.99     |       25.95     |         -       |
-| ziqingyang/chinese-llama-2-7b  | Llama-2-7B |        -        |        33.86         |     34.69     |        34.52     |       25.18     |        34.2     |
-| TigerResearch/tigerbot-7b-base | Llama-2-7B |      0.3T       |        43.73         |     42.04     |        37.64     |       30.61     |         -       |
-|  LinkSoul/Chinese-Llama-2-7b   | Llama-2-7B |        -        |        48.41         |     38.31     |        38.45     |       27.72     |         -       |
-|       FlagAlpha/Atom-7B        | Llama-2-7B |      0.1T       |        49.96         |     41.10     |        39.83     |       33.00     |         -       |
-| IDEA-CCNL/Ziya-LLaMA-13B-v1.1  | Llama-13B  |      0.11T      |        50.25         |     40.99     |        40.04     |       30.54     |         -       |
-|  **Colossal-LLaMA-2-7b-base**  | Llama-2-7B |   **0.0085T**   |        53.06         |     49.89     |        51.48     |       58.82     |        50.2     |
+|             Model              |  Backbone  | Tokens Consumed | MMLU (5-shot) | CMMLU (5-shot) | AGIEval (5-shot) | GAOKAO (0-shot) | CEval (5-shot) |
+|:------------------------------:|:----------:|:---------------:|:-------------:|:--------------:|:----------------:|:---------------:|:--------------:|
+|          Baichuan-7B           |     -      |      1.2T       | 42.32 (42.30) | 44.53 (44.02)  |      38.72       |      36.74      |     42.80      |
+|       Baichuan-13B-Base        |     -      |      1.4T       | 50.51 (51.60) | 55.73 (55.30)  |      47.20       |      51.41      |     53.60      |
+|       Baichuan2-7B-Base        |     -      |      2.6T       | 46.97 (54.16) | 57.67 (57.07)  |      45.76       |      52.60      |     54.00      |
+|       Baichuan2-13B-Base       |     -      |      2.6T       | 54.84 (59.17) | 62.62 (61.97)  |      52.08       |      58.25      |     58.10      |
+|           ChatGLM-6B           |     -      |      1.0T       | 39.67 (40.63) |   41.17 (-)    |      40.10       |      36.53      |     38.90      |
+|          ChatGLM2-6B           |     -      |      1.4T       | 44.74 (45.46) |   49.40 (-)    |      46.36       |      45.49      |     51.70      |
+|          InternLM-7B           |     -      |      1.6T       | 46.70 (51.00) |   52.00 (-)    |      44.77       |      61.64      |     52.80      |
+|            Qwen-7B             |     -      |      2.2T       | 54.29 (56.70) | 56.03 (58.80)  |      52.47       |      56.42      |     59.60      |
+|           Llama-2-7B           |     -      |      2.0T       | 44.47 (45.30) |   32.97 (-)    |      32.60       |      25.46      |       -        |
+| Linly-AI/Chinese-LLaMA-2-7B-hf | Llama-2-7B |      1.0T       |     37.43     |     29.92      |      32.00       |      27.57      |       -        |
+| wenge-research/yayi-7b-llama2  | Llama-2-7B |        -        |     38.56     |     31.52      |      30.99       |      25.95      |       -        |
+| ziqingyang/chinese-llama-2-7b  | Llama-2-7B |        -        |     33.86     |     34.69      |      34.52       |      25.18      |      34.2      |
+| TigerResearch/tigerbot-7b-base | Llama-2-7B |      0.3T       |     43.73     |     42.04      |      37.64       |      30.61      |       -        |
+|  LinkSoul/Chinese-Llama-2-7b   | Llama-2-7B |        -        |     48.41     |     38.31      |      38.45       |      27.72      |       -        |
+|       FlagAlpha/Atom-7B        | Llama-2-7B |      0.1T       |     49.96     |     41.10      |      39.83       |      33.00      |       -        |
+| IDEA-CCNL/Ziya-LLaMA-13B-v1.1  | Llama-13B  |      0.11T      |     50.25     |     40.99      |      40.04       |      30.54      |       -        |
+|  **Colossal-LLaMA-2-7b-base**  | Llama-2-7B |   **0.0085T**   |     53.06     |     49.89      |      51.48       |      58.82      |      50.2      |


 ### ColossalChat
@@ -406,10 +406,10 @@ pip install colossalai

 **注：目前只支持Linux。**

-但是，如果你想在安装时就直接构建PyTorch扩展，您可以设置环境变量`CUDA_EXT=1`.
+但是，如果你想在安装时就直接构建PyTorch扩展，您可以设置环境变量`BUILD_EXT=1`.

 ```bash
-CUDA_EXT=1 pip install colossalai
+BUILD_EXT=1 pip install colossalai
 ```

 **否则，PyTorch扩展只会在你实际需要使用他们时在运行时里被构建。**
@@ -438,7 +438,7 @@ pip install .
 我们默认在`pip install`时不安装PyTorch扩展，而是在运行时临时编译，如果你想要提前安装这些扩展的话（在使用融合优化器时会用到），可以使用一下命令。

 ```shell
-CUDA_EXT=1 pip install .
+BUILD_EXT=1 pip install .
 ```

 <p align="right">(<a href="#top">返回顶端</a>)</p>

--- a/docs/source/en/get_started/installation.md
+++ b/docs/source/en/get_started/installation.md
@@ -42,7 +42,7 @@ pip install -r requirements/requirements.txt
 BUILD_EXT=1 pip install .
 ```

-If you don't want to install and enable CUDA kernel fusion (compulsory installation when using fused optimizer), just don't specify the `CUDA_EXT`:
+If you don't want to install and enable CUDA kernel fusion (compulsory installation when using fused optimizer), just don't specify the `BUILD_EXT`:

 ```shell
 pip install .

--- a/examples/images/diffusion/README.md
+++ b/examples/images/diffusion/README.md
@@ -77,7 +77,7 @@ git clone https://github.com/hpcaitech/ColossalAI.git
 cd ColossalAI

 # install colossalai
-CUDA_EXT=1 pip install .
+BUILD_EXT=1 pip install .
 ```

 #### Step 3: Accelerate with flash attention by xformers (Optional)

--- a/examples/images/diffusion/test_ci.sh
+++ b/examples/images/diffusion/test_ci.sh
@@ -8,7 +8,7 @@ conda activate ldm
 conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.3 -c pytorch
 pip install transformers diffusers invisible-watermark

-CUDA_EXT=1  pip install colossalai
+BUILD_EXT=1  pip install colossalai

 wget https://huggingface.co/stabilityai/stable-diffusion-2-base/resolve/main/512-base-ema.ckpt