Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
AutoAWQ
Commits
79b6fbd8
"docs/vscode:/vscode.git/clone" did not exist on "b215776f2d4d31c160538dccdbfe7c827d1d3e88"
Unverified
Commit
79b6fbd8
authored
Feb 16, 2024
by
Casper
Committed by
GitHub
Feb 16, 2024
Browse files
Support Fused Mixtral on multi-GPU (#352)
parent
74053104
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
15 additions
and
11 deletions
+15
-11
.github/workflows/build.yaml
.github/workflows/build.yaml
+2
-1
awq/models/mixtral.py
awq/models/mixtral.py
+1
-1
setup.py
setup.py
+12
-9
No files found.
.github/workflows/build.yaml
View file @
79b6fbd8
...
...
@@ -111,6 +111,7 @@ jobs:
if ( $env:CUDA_VERSION -eq $env:PYPI_CUDA_VERSION ){
$env:PYPI_BUILD = 1
}
$env:PYPI_FORCE_TAGS = 1
python setup.py sdist bdist_wheel
...
...
@@ -223,7 +224,7 @@ jobs:
python --version
which python
ROCM_VERSION=${{ matrix.rocm }} python setup.py sdist bdist_wheel
ROCM_VERSION=${{ matrix.rocm }}
PYPI_FORCE_TAGS=1
python setup.py sdist bdist_wheel
-
name
:
Upload Assets
uses
:
shogo82148/actions-upload-release-asset@v1
...
...
awq/models/mixtral.py
View file @
79b6fbd8
...
...
@@ -127,7 +127,7 @@ class MixtralFuser:
)
sparse_moe
=
module
.
block_sparse_moe
if
isinstance
(
sparse_moe
.
experts
[
0
].
w1
,
WQLinear_GEMM
)
and
torch
.
cuda
.
device_count
()
==
1
:
if
isinstance
(
sparse_moe
.
experts
[
0
].
w1
,
WQLinear_GEMM
):
fused_w1w3s
=
[
fuse_linears
(
[
...
...
setup.py
View file @
79b6fbd8
...
...
@@ -132,6 +132,18 @@ if not KERNELS_INSTALLED and (CUDA_VERSION or ROCM_VERSION):
"Please install the kernels manually from https://github.com/casper-hansen/AutoAWQ_kernels"
)
force_extension
=
os
.
getenv
(
"PYPI_FORCE_TAGS"
,
"0"
)
if
force_extension
==
"1"
:
# NOTE: We create an empty CUDAExtension because torch helps us with
# creating the right boilerplate to enable correct targeting of
# the autoawq-kernels package
common_setup_kwargs
[
"ext_modules"
]
=
[
CUDAExtension
(
name
=
"test_kernel"
,
sources
=
[],
)
]
setup
(
packages
=
find_packages
(),
install_requires
=
requirements
,
...
...
@@ -139,14 +151,5 @@ setup(
"eval"
:
[
"lm_eval>=0.4.0"
,
"tabulate"
,
"protobuf"
,
"evaluate"
,
"scipy"
],
"dev"
:
[
"black"
,
"mkdocstrings-python"
,
"mkdocs-material"
,
"griffe-typingdoc"
]
},
# NOTE: We create an empty CUDAExtension because torch helps us with
# creating the right boilerplate to enable correct targeting of
# the autoawq-kernels package
ext_modules
=
[
CUDAExtension
(
name
=
"__build_artifact_for_awq_kernel_targeting"
,
sources
=
[],
)
],
**
common_setup_kwargs
,
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment