Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
text-generation-inference
Commits
8bd0adb1
Unverified
Commit
8bd0adb1
authored
Jul 27, 2023
by
OlivierDehaene
Committed by
GitHub
Jul 27, 2023
Browse files
fix(server): fix quantization python requirements (#708)
parent
e64a6589
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
30 additions
and
21 deletions
+30
-21
server/poetry.lock
server/poetry.lock
+13
-1
server/pyproject.toml
server/pyproject.toml
+1
-0
server/requirements.txt
server/requirements.txt
+1
-0
server/text_generation_server/models/flash_rw.py
server/text_generation_server/models/flash_rw.py
+0
-1
server/text_generation_server/server.py
server/text_generation_server/server.py
+13
-13
server/text_generation_server/utils/gptq/quantize.py
server/text_generation_server/utils/gptq/quantize.py
+2
-6
No files found.
server/poetry.lock
View file @
8bd0adb1
...
@@ -624,6 +624,14 @@ python-versions = ">=3.8"
...
@@ -624,6 +624,14 @@ python-versions = ">=3.8"
[package.dependencies]
[package.dependencies]
mpmath = ">=0.19"
mpmath = ">=0.19"
[[package]]
name = "texttable"
version = "1.6.7"
description = "module to create simple ASCII tables"
category = "main"
optional = false
python-versions = "*"
[[package]]
[[package]]
name = "tokenizers"
name = "tokenizers"
version = "0.13.3"
version = "0.13.3"
...
@@ -810,7 +818,7 @@ bnb = ["bitsandbytes"]
...
@@ -810,7 +818,7 @@ bnb = ["bitsandbytes"]
[metadata]
[metadata]
lock-version = "1.1"
lock-version = "1.1"
python-versions = "^3.9"
python-versions = "^3.9"
content-hash = "
65afc4bfa07da4b1427d269fa745939da3851eaede9a8478f5a4bf5949d32cc
9"
content-hash = "
c2e0d926748a7d420909c6bd21e17cf060bc7acdd788ae93e3ec1809a4b8452
9"
[metadata.files]
[metadata.files]
accelerate = [
accelerate = [
...
@@ -1484,6 +1492,10 @@ sympy = [
...
@@ -1484,6 +1492,10 @@ sympy = [
{file = "sympy-1.12-py3-none-any.whl", hash = "sha256:c3588cd4295d0c0f603d0f2ae780587e64e2efeedb3521e46b9bb1d08d184fa5"},
{file = "sympy-1.12-py3-none-any.whl", hash = "sha256:c3588cd4295d0c0f603d0f2ae780587e64e2efeedb3521e46b9bb1d08d184fa5"},
{file = "sympy-1.12.tar.gz", hash = "sha256:ebf595c8dac3e0fdc4152c51878b498396ec7f30e7a914d6071e674d49420fb8"},
{file = "sympy-1.12.tar.gz", hash = "sha256:ebf595c8dac3e0fdc4152c51878b498396ec7f30e7a914d6071e674d49420fb8"},
]
]
texttable = [
{file = "texttable-1.6.7-py2.py3-none-any.whl", hash = "sha256:b7b68139aa8a6339d2c320ca8b1dc42d13a7831a346b446cb9eb385f0c76310c"},
{file = "texttable-1.6.7.tar.gz", hash = "sha256:290348fb67f7746931bcdfd55ac7584ecd4e5b0846ab164333f0794b121760f2"},
]
tokenizers = [
tokenizers = [
{file = "tokenizers-0.13.3-cp310-cp310-macosx_10_11_x86_64.whl", hash = "sha256:f3835c5be51de8c0a092058a4d4380cb9244fb34681fd0a295fbf0a52a5fdf33"},
{file = "tokenizers-0.13.3-cp310-cp310-macosx_10_11_x86_64.whl", hash = "sha256:f3835c5be51de8c0a092058a4d4380cb9244fb34681fd0a295fbf0a52a5fdf33"},
{file = "tokenizers-0.13.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:4ef4c3e821730f2692489e926b184321e887f34fb8a6b80b8096b966ba663d07"},
{file = "tokenizers-0.13.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:4ef4c3e821730f2692489e926b184321e887f34fb8a6b80b8096b966ba663d07"},
...
...
server/pyproject.toml
View file @
8bd0adb1
...
@@ -28,6 +28,7 @@ tokenizers = "0.13.3"
...
@@ -28,6 +28,7 @@ tokenizers = "0.13.3"
huggingface-hub
=
"^0.14.1"
huggingface-hub
=
"^0.14.1"
transformers
=
"4.29.2"
transformers
=
"4.29.2"
einops
=
"^0.6.1"
einops
=
"^0.6.1"
texttable
=
"^1.6.7"
[tool.poetry.extras]
[tool.poetry.extras]
accelerate
=
["accelerate"]
accelerate
=
["accelerate"]
...
...
server/requirements.txt
View file @
8bd0adb1
...
@@ -35,6 +35,7 @@ requests==2.31.0 ; python_version >= "3.9" and python_version < "4.0"
...
@@ -35,6 +35,7 @@ requests==2.31.0 ; python_version >= "3.9" and python_version < "4.0"
safetensors==0.3.1 ; python_version >= "3.9" and python_version < "4.0"
safetensors==0.3.1 ; python_version >= "3.9" and python_version < "4.0"
sentencepiece==0.1.99 ; python_version >= "3.9" and python_version < "4.0"
sentencepiece==0.1.99 ; python_version >= "3.9" and python_version < "4.0"
setuptools==68.0.0 ; python_version >= "3.9" and python_version < "4.0"
setuptools==68.0.0 ; python_version >= "3.9" and python_version < "4.0"
texttable==1.6.7 ; python_version >= "3.9" and python_version < "4.0"
tokenizers==0.13.3 ; python_version >= "3.9" and python_version < "4.0"
tokenizers==0.13.3 ; python_version >= "3.9" and python_version < "4.0"
tqdm==4.65.0 ; python_version >= "3.9" and python_version < "4.0"
tqdm==4.65.0 ; python_version >= "3.9" and python_version < "4.0"
transformers==4.29.2 ; python_version >= "3.9" and python_version < "4.0"
transformers==4.29.2 ; python_version >= "3.9" and python_version < "4.0"
...
...
server/text_generation_server/models/flash_rw.py
View file @
8bd0adb1
...
@@ -61,7 +61,6 @@ class FlashRWSharded(FlashCausalLM):
...
@@ -61,7 +61,6 @@ class FlashRWSharded(FlashCausalLM):
if
config
.
quantize
==
"gptq"
:
if
config
.
quantize
==
"gptq"
:
weights
.
_set_gptq_params
(
model_id
)
weights
.
_set_gptq_params
(
model_id
)
model
=
FlashRWForCausalLM
(
config
,
weights
)
model
=
FlashRWForCausalLM
(
config
,
weights
)
torch
.
distributed
.
barrier
(
group
=
self
.
process_group
)
torch
.
distributed
.
barrier
(
group
=
self
.
process_group
)
...
...
server/text_generation_server/server.py
View file @
8bd0adb1
...
@@ -105,21 +105,21 @@ class TextGenerationService(generate_pb2_grpc.TextGenerationServiceServicer):
...
@@ -105,21 +105,21 @@ class TextGenerationService(generate_pb2_grpc.TextGenerationServiceServicer):
def
serve
(
def
serve
(
model_id
:
str
,
model_id
:
str
,
revision
:
Optional
[
str
],
revision
:
Optional
[
str
],
sharded
:
bool
,
sharded
:
bool
,
quantize
:
Optional
[
str
],
quantize
:
Optional
[
str
],
dtype
:
Optional
[
str
],
dtype
:
Optional
[
str
],
trust_remote_code
:
bool
,
trust_remote_code
:
bool
,
uds_path
:
Path
,
uds_path
:
Path
,
):
):
async
def
serve_inner
(
async
def
serve_inner
(
model_id
:
str
,
model_id
:
str
,
revision
:
Optional
[
str
],
revision
:
Optional
[
str
],
sharded
:
bool
=
False
,
sharded
:
bool
=
False
,
quantize
:
Optional
[
str
]
=
None
,
quantize
:
Optional
[
str
]
=
None
,
dtype
:
Optional
[
str
]
=
None
,
dtype
:
Optional
[
str
]
=
None
,
trust_remote_code
:
bool
=
False
,
trust_remote_code
:
bool
=
False
,
):
):
unix_socket_template
=
"unix://{}-{}"
unix_socket_template
=
"unix://{}-{}"
if
sharded
:
if
sharded
:
...
...
server/text_generation_server/utils/gptq/quantize.py
View file @
8bd0adb1
import
argparse
import
time
import
time
import
numpy
as
np
import
torch
import
torch.nn
as
nn
import
torch.nn
as
nn
import
math
import
math
import
json
import
json
import
os
import
os
import
torch
import
transformers
from
texttable
import
Texttable
from
texttable
import
Texttable
from
transformers
import
AutoModelForCausalLM
,
AutoConfig
,
AutoTokenizer
from
transformers
import
AutoModelForCausalLM
,
AutoConfig
,
AutoTokenizer
import
transformers
from
huggingface_hub
import
HfApi
from
huggingface_hub
import
HfApi
import
numpy
as
np
import
torch
from
accelerate
import
init_empty_weights
from
accelerate
import
init_empty_weights
from
text_generation_server.utils
import
initialize_torch_distributed
,
Weights
from
text_generation_server.utils
import
initialize_torch_distributed
,
Weights
from
text_generation_server.utils.hub
import
weight_files
from
text_generation_server.utils.hub
import
weight_files
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment