Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
text-generation-inference
Commits
b6ee0ec7
Unverified
Commit
b6ee0ec7
authored
Apr 19, 2023
by
OlivierDehaene
Committed by
GitHub
Apr 19, 2023
Browse files
feat(router): add git sha to info route (#208)
parent
252f42c1
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
33 additions
and
9 deletions
+33
-9
.github/workflows/build.yaml
.github/workflows/build.yaml
+4
-0
Dockerfile
Dockerfile
+2
-0
router/build.rs
router/build.rs
+13
-1
server/text_generation_server/models/__init__.py
server/text_generation_server/models/__init__.py
+14
-8
No files found.
.github/workflows/build.yaml
View file @
b6ee0ec7
...
@@ -95,6 +95,8 @@ jobs:
...
@@ -95,6 +95,8 @@ jobs:
file
:
Dockerfile
file
:
Dockerfile
push
:
${{ github.event_name != 'pull_request' }}
push
:
${{ github.event_name != 'pull_request' }}
platforms
:
'
linux/amd64'
platforms
:
'
linux/amd64'
build-args
:
|
GIT_SHA={{ env.GITHUB_SHA }}
tags
:
${{ steps.meta.outputs.tags }}
tags
:
${{ steps.meta.outputs.tags }}
labels
:
${{ steps.meta.outputs.labels }}
labels
:
${{ steps.meta.outputs.labels }}
cache-from
:
type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=max
cache-from
:
type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=max
...
@@ -176,6 +178,8 @@ jobs:
...
@@ -176,6 +178,8 @@ jobs:
file
:
Dockerfile
file
:
Dockerfile
push
:
${{ github.event_name != 'pull_request' }}
push
:
${{ github.event_name != 'pull_request' }}
platforms
:
'
linux/amd64'
platforms
:
'
linux/amd64'
build-args
:
|
GIT_SHA={{ env.GITHUB_SHA }}
target
:
sagemaker
target
:
sagemaker
tags
:
${{ steps.meta.outputs.tags }}
tags
:
${{ steps.meta.outputs.tags }}
labels
:
${{ steps.meta.outputs.labels }}
labels
:
${{ steps.meta.outputs.labels }}
...
...
Dockerfile
View file @
b6ee0ec7
...
@@ -12,6 +12,8 @@ RUN cargo chef prepare --recipe-path recipe.json
...
@@ -12,6 +12,8 @@ RUN cargo chef prepare --recipe-path recipe.json
FROM
chef AS builder
FROM
chef AS builder
ARG
GIT_SHA
RUN
PROTOC_ZIP
=
protoc-21.12-linux-x86_64.zip
&&
\
RUN
PROTOC_ZIP
=
protoc-21.12-linux-x86_64.zip
&&
\
curl
-OL
https://github.com/protocolbuffers/protobuf/releases/download/v21.12/
$PROTOC_ZIP
&&
\
curl
-OL
https://github.com/protocolbuffers/protobuf/releases/download/v21.12/
$PROTOC_ZIP
&&
\
unzip
-o
$PROTOC_ZIP
-d
/usr/local bin/protoc
&&
\
unzip
-o
$PROTOC_ZIP
-d
/usr/local bin/protoc
&&
\
...
...
router/build.rs
View file @
b6ee0ec7
...
@@ -2,6 +2,18 @@ use std::error::Error;
...
@@ -2,6 +2,18 @@ use std::error::Error;
use
vergen
::
EmitBuilder
;
use
vergen
::
EmitBuilder
;
fn
main
()
->
Result
<
(),
Box
<
dyn
Error
>>
{
fn
main
()
->
Result
<
(),
Box
<
dyn
Error
>>
{
EmitBuilder
::
builder
()
.git_sha
(
false
)
.emit
()
?
;
// Try to get the git sha from the local git repository
if
EmitBuilder
::
builder
()
.fail_on_error
()
.git_sha
(
false
)
.emit
()
.is_err
()
{
// Unable to get the git sha
if
let
Ok
(
sha
)
=
std
::
env
::
var
(
"GIT_SHA"
)
{
// Set it from an env var
println!
(
"cargo:rustc-env=VERGEN_GIT_SHA={sha}"
);
}
}
Ok
(())
Ok
(())
}
}
server/text_generation_server/models/__init__.py
View file @
b6ee0ec7
...
@@ -17,13 +17,6 @@ from text_generation_server.models.gpt_neox import GPTNeoxSharded
...
@@ -17,13 +17,6 @@ from text_generation_server.models.gpt_neox import GPTNeoxSharded
from
text_generation_server.models.t5
import
T5Sharded
from
text_generation_server.models.t5
import
T5Sharded
try
:
try
:
from
text_generation_server.models.flash_neox
import
FlashNeoX
,
FlashNeoXSharded
from
text_generation_server.models.flash_llama
import
FlashLlama
,
FlashLlamaSharded
from
text_generation_server.models.flash_santacoder
import
(
FlashSantacoder
,
FlashSantacoderSharded
,
)
if
torch
.
cuda
.
is_available
():
if
torch
.
cuda
.
is_available
():
major
,
minor
=
torch
.
cuda
.
get_device_capability
()
major
,
minor
=
torch
.
cuda
.
get_device_capability
()
is_sm75
=
major
==
7
and
minor
==
5
is_sm75
=
major
==
7
and
minor
==
5
...
@@ -32,7 +25,20 @@ try:
...
@@ -32,7 +25,20 @@ try:
supported
=
is_sm75
or
is_sm8x
or
is_sm90
supported
=
is_sm75
or
is_sm8x
or
is_sm90
if
not
supported
:
if
not
supported
:
raise
ImportError
(
f
"GPU with CUDA capability
{
major
}
{
minor
}
is not supported"
)
raise
ImportError
(
f
"GPU with CUDA capability
{
major
}
{
minor
}
is not supported"
)
from
text_generation_server.models.flash_neox
import
FlashNeoX
,
FlashNeoXSharded
from
text_generation_server.models.flash_llama
import
(
FlashLlama
,
FlashLlamaSharded
,
)
from
text_generation_server.models.flash_santacoder
import
(
FlashSantacoder
,
FlashSantacoderSharded
,
)
FLASH_ATTENTION
=
True
FLASH_ATTENTION
=
True
else
:
else
:
FLASH_ATTENTION
=
False
FLASH_ATTENTION
=
False
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment