Unverified Commit 827b6c25 authored by Sayak Paul's avatar Sayak Paul Committed by GitHub
Browse files

[CI] Add quantization (#9832)

* add quantization to nightly CI.

* prep.

* fix lib name.

* remove deps that are not needed.

* fix slice.
parent 784b351f
...@@ -347,6 +347,64 @@ jobs: ...@@ -347,6 +347,64 @@ jobs:
pip install slack_sdk tabulate pip install slack_sdk tabulate
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
run_nightly_quantization_tests:
name: Torch quantization nightly tests
strategy:
fail-fast: false
max-parallel: 2
matrix:
config:
- backend: "bitsandbytes"
test_location: "bnb"
runs-on:
group: aws-g6e-xlarge-plus
container:
image: diffusers/diffusers-pytorch-cuda
options: --shm-size "20gb" --ipc host --gpus 0
steps:
- name: Checkout diffusers
uses: actions/checkout@v3
with:
fetch-depth: 2
- name: NVIDIA-SMI
run: nvidia-smi
- name: Install dependencies
run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
python -m uv pip install -U ${{ matrix.config.backend }}
python -m uv pip install pytest-reportlog
- name: Environment
run: |
python utils/print_env.py
- name: ${{ matrix.config.backend }} quantization tests on GPU
env:
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
CUBLAS_WORKSPACE_CONFIG: :16:8
BIG_GPU_MEMORY: 40
run: |
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
--make-reports=tests_${{ matrix.config.backend }}_torch_cuda \
--report-log=tests_${{ matrix.config.backend }}_torch_cuda.log \
tests/quantization/${{ matrix.config.test_location }}
- name: Failure short reports
if: ${{ failure() }}
run: |
cat reports/tests_${{ matrix.config.backend }}_torch_cuda_stats.txt
cat reports/tests_${{ matrix.config.backend }}_torch_cuda_failures_short.txt
- name: Test suite reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: torch_cuda_${{ matrix.config.backend }}_reports
path: reports
- name: Generate Report and Notify Channel
if: always()
run: |
pip install slack_sdk tabulate
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
# M1 runner currently not well supported # M1 runner currently not well supported
# TODO: (Dhruv) add these back when we setup better testing for Apple Silicon # TODO: (Dhruv) add these back when we setup better testing for Apple Silicon
# run_nightly_tests_apple_m1: # run_nightly_tests_apple_m1:
......
...@@ -432,7 +432,6 @@ class SlowBnb4BitTests(Base4bitTests): ...@@ -432,7 +432,6 @@ class SlowBnb4BitTests(Base4bitTests):
expected_slice = np.array([0.1123, 0.1296, 0.1609, 0.1042, 0.1230, 0.1274, 0.0928, 0.1165, 0.1216]) expected_slice = np.array([0.1123, 0.1296, 0.1609, 0.1042, 0.1230, 0.1274, 0.0928, 0.1165, 0.1216])
max_diff = numpy_cosine_similarity_distance(expected_slice, out_slice) max_diff = numpy_cosine_similarity_distance(expected_slice, out_slice)
print(f"{max_diff=}")
self.assertTrue(max_diff < 1e-2) self.assertTrue(max_diff < 1e-2)
def test_generate_quality_dequantize(self): def test_generate_quality_dequantize(self):
......
...@@ -369,7 +369,7 @@ class SlowBnb8bitTests(Base8bitTests): ...@@ -369,7 +369,7 @@ class SlowBnb8bitTests(Base8bitTests):
output_type="np", output_type="np",
).images ).images
out_slice = output[0, -3:, -3:, -1].flatten() out_slice = output[0, -3:, -3:, -1].flatten()
expected_slice = np.array([0.0149, 0.0322, 0.0073, 0.0134, 0.0332, 0.011, 0.002, 0.0232, 0.0193]) expected_slice = np.array([0.0376, 0.0359, 0.0015, 0.0449, 0.0479, 0.0098, 0.0083, 0.0295, 0.0295])
max_diff = numpy_cosine_similarity_distance(expected_slice, out_slice) max_diff = numpy_cosine_similarity_distance(expected_slice, out_slice)
self.assertTrue(max_diff < 1e-2) self.assertTrue(max_diff < 1e-2)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment