Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
text-generation-inference
Commits
87dc034b
Unverified
Commit
87dc034b
authored
May 31, 2023
by
OlivierDehaene
Committed by
GitHub
May 31, 2023
Browse files
feat(server): add retry on download (#384)
parent
444400b4
Changes
4
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
309 additions
and
298 deletions
+309
-298
integration-tests/models/__snapshots__/test_flash_falcon/test_flash_falcon_load.json
...snapshots__/test_flash_falcon/test_flash_falcon_load.json
+281
-281
server/text_generation_server/models/flash_rw.py
server/text_generation_server/models/flash_rw.py
+2
-2
server/text_generation_server/models/rw.py
server/text_generation_server/models/rw.py
+1
-1
server/text_generation_server/utils/hub.py
server/text_generation_server/utils/hub.py
+25
-14
No files found.
integration-tests/models/__snapshots__/test_flash_falcon/test_flash_falcon_load.json
View file @
87dc034b
This diff is collapsed.
Click to expand it.
server/text_generation_server/models/flash_rw.py
View file @
87dc034b
...
...
@@ -37,7 +37,7 @@ class FlashRW(FlashCausalLM):
):
if
torch
.
cuda
.
is_available
():
device
=
torch
.
device
(
"cuda"
)
dtype
=
torch
.
b
float16
dtype
=
torch
.
float16
else
:
raise
NotImplementedError
(
"RW is only available on GPU"
)
...
...
@@ -124,7 +124,7 @@ class FlashRWSharded(FlashRW):
self
.
process_group
,
rank
,
world_size
=
initialize_torch_distributed
()
if
torch
.
cuda
.
is_available
():
device
=
torch
.
device
(
f
"cuda:
{
rank
}
"
)
dtype
=
torch
.
b
float16
dtype
=
torch
.
float16
else
:
raise
NotImplementedError
(
"FlashRW is only available on GPU"
)
...
...
server/text_generation_server/models/rw.py
View file @
87dc034b
...
...
@@ -16,7 +16,7 @@ class RW(CausalLM):
):
if
torch
.
cuda
.
is_available
():
device
=
torch
.
device
(
"cuda"
)
dtype
=
torch
.
b
float16
dtype
=
torch
.
float16
else
:
if
quantize
:
raise
ValueError
(
"quantization is not available on CPU"
)
...
...
server/text_generation_server/utils/hub.py
View file @
87dc034b
...
...
@@ -23,7 +23,11 @@ def weight_hub_files(
"""Get the weights filenames on the hub"""
api
=
HfApi
()
info
=
api
.
model_info
(
model_id
,
revision
=
revision
)
filenames
=
[
s
.
rfilename
for
s
in
info
.
siblings
if
s
.
rfilename
.
endswith
(
extension
)]
filenames
=
[
s
.
rfilename
for
s
in
info
.
siblings
if
s
.
rfilename
.
endswith
(
extension
)
and
len
(
s
.
rfilename
.
split
(
"/"
))
==
1
]
if
not
filenames
:
raise
EntryNotFoundError
(
...
...
@@ -130,24 +134,31 @@ def download_weights(
)
->
List
[
Path
]:
"""Download the safetensors files from the hub"""
def
download_file
(
filename
):
def
download_file
(
filename
,
tries
=
5
):
local_file
=
try_to_load_from_cache
(
model_id
,
revision
,
filename
)
if
local_file
is
not
None
:
logger
.
info
(
f
"File
{
filename
}
already present in cache."
)
return
Path
(
local_file
)
logger
.
info
(
f
"Download file:
{
filename
}
"
)
start_time
=
time
.
time
()
local_file
=
hf_hub_download
(
filename
=
filename
,
repo_id
=
model_id
,
revision
=
revision
,
local_files_only
=
False
,
)
logger
.
info
(
f
"Downloaded
{
local_file
}
in
{
timedelta
(
seconds
=
int
(
time
.
time
()
-
start_time
))
}
."
)
return
Path
(
local_file
)
for
i
in
range
(
tries
):
try
:
logger
.
info
(
f
"Download file:
{
filename
}
"
)
start_time
=
time
.
time
()
local_file
=
hf_hub_download
(
filename
=
filename
,
repo_id
=
model_id
,
revision
=
revision
,
local_files_only
=
False
,
)
logger
.
info
(
f
"Downloaded
{
local_file
}
in
{
timedelta
(
seconds
=
int
(
time
.
time
()
-
start_time
))
}
."
)
return
Path
(
local_file
)
except
Exception
as
e
:
if
i
+
1
==
tries
:
raise
e
logger
.
error
(
e
)
logger
.
info
(
f
"Retry
{
i
+
1
}
/
{
tries
-
1
}
"
)
# We do this instead of using tqdm because we want to parse the logs with the launcher
start_time
=
time
.
time
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment