Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
text-generation-inference
Commits
fef1a1c3
Unverified
Commit
fef1a1c3
authored
Mar 30, 2023
by
OlivierDehaene
Committed by
GitHub
Mar 30, 2023
Browse files
v0.4.3 (#152)
parent
84722f3e
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
12 additions
and
16 deletions
+12
-16
Cargo.lock
Cargo.lock
+3
-3
benchmark/Cargo.lock
benchmark/Cargo.lock
+1
-1
docs/openapi.json
docs/openapi.json
+1
-1
launcher/Cargo.toml
launcher/Cargo.toml
+1
-1
router/Cargo.toml
router/Cargo.toml
+1
-1
router/client/Cargo.toml
router/client/Cargo.toml
+1
-1
router/src/main.rs
router/src/main.rs
+3
-7
server/pyproject.toml
server/pyproject.toml
+1
-1
No files found.
Cargo.lock
View file @
fef1a1c3
...
...
@@ -2212,7 +2212,7 @@ dependencies = [
[[package]]
name = "text-generation-client"
version = "0.4.
2
"
version = "0.4.
3
"
dependencies = [
"futures",
"grpc-metadata",
...
...
@@ -2229,7 +2229,7 @@ dependencies = [
[[package]]
name = "text-generation-launcher"
version = "0.4.
2
"
version = "0.4.
3
"
dependencies = [
"clap 4.1.8",
"ctrlc",
...
...
@@ -2244,7 +2244,7 @@ dependencies = [
[[package]]
name = "text-generation-router"
version = "0.4.
2
"
version = "0.4.
3
"
dependencies = [
"async-stream",
"axum",
...
...
benchmark/Cargo.lock
View file @
fef1a1c3
...
...
@@ -2069,7 +2069,7 @@ dependencies = [
[[package]]
name = "text-generation-client"
version = "0.4.
2
"
version = "0.4.
3
"
dependencies = [
"futures",
"grpc-metadata",
...
...
docs/openapi.json
View file @
fef1a1c3
...
...
@@ -11,7 +11,7 @@
"name"
:
"Apache 2.0"
,
"url"
:
"https://www.apache.org/licenses/LICENSE-2.0"
},
"version"
:
"0.4.
2
"
"version"
:
"0.4.
3
"
},
"paths"
:
{
"/generate"
:
{
...
...
launcher/Cargo.toml
View file @
fef1a1c3
[package]
name
=
"text-generation-launcher"
version
=
"0.4.
2
"
version
=
"0.4.
3
"
edition
=
"2021"
authors
=
[
"Olivier Dehaene"
]
description
=
"Text Generation Launcher"
...
...
router/Cargo.toml
View file @
fef1a1c3
[package]
name
=
"text-generation-router"
version
=
"0.4.
2
"
version
=
"0.4.
3
"
edition
=
"2021"
authors
=
[
"Olivier Dehaene"
]
description
=
"Text Generation Webserver"
...
...
router/client/Cargo.toml
View file @
fef1a1c3
[package]
name
=
"text-generation-client"
version
=
"0.4.
2
"
version
=
"0.4.
3
"
edition
=
"2021"
[dependencies]
...
...
router/src/main.rs
View file @
fef1a1c3
...
...
@@ -37,7 +37,7 @@ struct Args {
max_waiting_tokens
:
usize
,
#[clap(default_value
=
"3000"
,
long,
short,
env)]
port
:
u16
,
#[clap(default_value
=
"/tmp/text-generation-
server-
0"
,
long,
env)]
#[clap(default_value
=
"/tmp/text-generation-0"
,
long,
env)]
master_shard_uds_path
:
String
,
#[clap(default_value
=
"bigscience/bloom"
,
long,
env)]
tokenizer_name
:
String
,
...
...
@@ -76,8 +76,6 @@ fn main() -> Result<(), std::io::Error> {
panic!
(
"validation_workers must be > 0"
);
}
init_logging
(
otlp_endpoint
,
json_output
);
// CORS allowed origins
// map to go inside the option and then map to parse from String to HeaderValue
// Finally, convert to AllowOrigin
...
...
@@ -91,21 +89,17 @@ fn main() -> Result<(), std::io::Error> {
// Tokenizer instance
// This will only be used to validate payloads
tracing
::
info!
(
"Loading tokenizer"
);
let
local_path
=
Path
::
new
(
&
tokenizer_name
);
let
tokenizer
=
if
local_path
.exists
()
&&
local_path
.is_dir
()
&&
local_path
.join
(
"tokenizer.json"
)
.exists
()
{
// Load local tokenizer
tracing
::
info!
(
"Found local tokenizer"
);
Tokenizer
::
from_file
(
local_path
.join
(
"tokenizer.json"
))
.unwrap
()
}
else
{
// Download and instantiate tokenizer
// We need to download it outside of the Tokio runtime
tracing
::
info!
(
"Downloading tokenizer"
);
Tokenizer
::
from_pretrained
(
tokenizer_name
.clone
(),
None
)
.unwrap
()
};
tracing
::
info!
(
"Tokenizer loaded"
);
// Launch Tokio runtime
tokio
::
runtime
::
Builder
::
new_multi_thread
()
...
...
@@ -113,6 +107,8 @@ fn main() -> Result<(), std::io::Error> {
.build
()
.unwrap
()
.block_on
(
async
{
init_logging
(
otlp_endpoint
,
json_output
);
// Get pipeline tag
let
model_info
=
reqwest
::
get
(
format!
(
"https://huggingface.co/api/models/{tokenizer_name}"
...
...
server/pyproject.toml
View file @
fef1a1c3
[tool.poetry]
name
=
"text-generation-server"
version
=
"0.4.
2
"
version
=
"0.4.
3
"
description
=
"Text Generation Inference Python gRPC Server"
authors
=
[
"Olivier Dehaene <olivier@huggingface.co>"
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment