Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
text-generation-inference
Commits
e7248fe9
Commit
e7248fe9
authored
Jun 01, 2023
by
OlivierDehaene
Browse files
v0.8.2
parent
95d35469
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
17 additions
and
11 deletions
+17
-11
Cargo.lock
Cargo.lock
+4
-4
Cargo.toml
Cargo.toml
+1
-1
docs/openapi.json
docs/openapi.json
+1
-1
server/pyproject.toml
server/pyproject.toml
+1
-1
server/text_generation_server/models/__init__.py
server/text_generation_server/models/__init__.py
+3
-1
server/text_generation_server/models/flash_santacoder.py
server/text_generation_server/models/flash_santacoder.py
+7
-3
No files found.
Cargo.lock
View file @
e7248fe9
...
@@ -2557,7 +2557,7 @@ dependencies = [
...
@@ -2557,7 +2557,7 @@ dependencies = [
[[package]]
[[package]]
name = "text-generation-benchmark"
name = "text-generation-benchmark"
version = "0.8.
1
"
version = "0.8.
2
"
dependencies = [
dependencies = [
"average",
"average",
"clap",
"clap",
...
@@ -2577,7 +2577,7 @@ dependencies = [
...
@@ -2577,7 +2577,7 @@ dependencies = [
[[package]]
[[package]]
name = "text-generation-client"
name = "text-generation-client"
version = "0.8.
1
"
version = "0.8.
2
"
dependencies = [
dependencies = [
"futures",
"futures",
"grpc-metadata",
"grpc-metadata",
...
@@ -2593,7 +2593,7 @@ dependencies = [
...
@@ -2593,7 +2593,7 @@ dependencies = [
[[package]]
[[package]]
name = "text-generation-launcher"
name = "text-generation-launcher"
version = "0.8.
1
"
version = "0.8.
2
"
dependencies = [
dependencies = [
"clap",
"clap",
"ctrlc",
"ctrlc",
...
@@ -2609,7 +2609,7 @@ dependencies = [
...
@@ -2609,7 +2609,7 @@ dependencies = [
[[package]]
[[package]]
name = "text-generation-router"
name = "text-generation-router"
version = "0.8.
1
"
version = "0.8.
2
"
dependencies = [
dependencies = [
"async-stream",
"async-stream",
"axum",
"axum",
...
...
Cargo.toml
View file @
e7248fe9
...
@@ -8,7 +8,7 @@ members = [
...
@@ -8,7 +8,7 @@ members = [
]
]
[workspace.package]
[workspace.package]
version
=
"0.8.
1
"
version
=
"0.8.
2
"
edition
=
"2021"
edition
=
"2021"
authors
=
[
"Olivier Dehaene"
]
authors
=
[
"Olivier Dehaene"
]
homepage
=
"https://github.com/huggingface/text-generation-inference"
homepage
=
"https://github.com/huggingface/text-generation-inference"
...
...
docs/openapi.json
View file @
e7248fe9
...
@@ -10,7 +10,7 @@
...
@@ -10,7 +10,7 @@
"name"
:
"Apache 2.0"
,
"name"
:
"Apache 2.0"
,
"url"
:
"https://www.apache.org/licenses/LICENSE-2.0"
"url"
:
"https://www.apache.org/licenses/LICENSE-2.0"
},
},
"version"
:
"0.8.
1
"
"version"
:
"0.8.
2
"
},
},
"paths"
:
{
"paths"
:
{
"/"
:
{
"/"
:
{
...
...
server/pyproject.toml
View file @
e7248fe9
[tool.poetry]
[tool.poetry]
name
=
"text-generation-server"
name
=
"text-generation-server"
version
=
"0.8.
1
"
version
=
"0.8.
2
"
description
=
"Text Generation Inference Python gRPC Server"
description
=
"Text Generation Inference Python gRPC Server"
authors
=
[
"Olivier Dehaene <olivier@huggingface.co>"
]
authors
=
[
"Olivier Dehaene <olivier@huggingface.co>"
]
...
...
server/text_generation_server/models/__init__.py
View file @
e7248fe9
...
@@ -138,7 +138,9 @@ def get_model(
...
@@ -138,7 +138,9 @@ def get_model(
trust_remote_code
=
trust_remote_code
,
trust_remote_code
=
trust_remote_code
,
)
)
config_dict
,
_
=
PretrainedConfig
.
get_config_dict
(
model_id
,
revision
=
revision
,
trust_remote_code
=
trust_remote_code
)
config_dict
,
_
=
PretrainedConfig
.
get_config_dict
(
model_id
,
revision
=
revision
,
trust_remote_code
=
trust_remote_code
)
model_type
=
config_dict
[
"model_type"
]
model_type
=
config_dict
[
"model_type"
]
if
model_type
==
"gpt_bigcode"
:
if
model_type
==
"gpt_bigcode"
:
...
...
server/text_generation_server/models/flash_santacoder.py
View file @
e7248fe9
...
@@ -87,7 +87,9 @@ class FlashSantacoder(FlashCausalLM):
...
@@ -87,7 +87,9 @@ class FlashSantacoder(FlashCausalLM):
):
):
for
filename
in
filenames
:
for
filename
in
filenames
:
with
safe_open
(
with
safe_open
(
filename
,
framework
=
"pt"
,
device
=
str
(
device
)
if
quantize
is
None
else
"cpu"
filename
,
framework
=
"pt"
,
device
=
str
(
device
)
if
quantize
is
None
else
"cpu"
,
)
as
f
:
)
as
f
:
for
key
in
f
.
keys
():
for
key
in
f
.
keys
():
value
=
f
.
get_tensor
(
key
)
value
=
f
.
get_tensor
(
key
)
...
@@ -148,11 +150,13 @@ class FlashSantacoder(FlashCausalLM):
...
@@ -148,11 +150,13 @@ class FlashSantacoder(FlashCausalLM):
module
.
_parameters
[
param_name
][:
value
.
shape
[
0
]]
=
value
module
.
_parameters
[
param_name
][:
value
.
shape
[
0
]]
=
value
elif
"kv_attn.weight"
in
key
:
elif
"kv_attn.weight"
in
key
:
module
.
_parameters
[
param_name
][
module
.
_parameters
[
param_name
][
model
.
transformer
.
head_size
*
model
.
transformer
.
num_heads
:
model
.
transformer
.
head_size
*
model
.
transformer
.
num_heads
:
]
=
value
]
=
value
elif
"kv_attn.bias"
in
key
:
elif
"kv_attn.bias"
in
key
:
module
.
_parameters
[
param_name
][
module
.
_parameters
[
param_name
][
model
.
transformer
.
head_size
*
model
.
transformer
.
num_heads
:
model
.
transformer
.
head_size
*
model
.
transformer
.
num_heads
:
]
=
value
]
=
value
else
:
else
:
if
current_parameter_tensor
.
shape
!=
value
.
shape
:
if
current_parameter_tensor
.
shape
!=
value
.
shape
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment