Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
337ed766
Unverified
Commit
337ed766
authored
Oct 19, 2024
by
sasha0552
Committed by
GitHub
Oct 18, 2024
Browse files
[Bugfix] Fix offline mode when using `mistral_common` (#9457)
parent
0c9a5258
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
62 additions
and
28 deletions
+62
-28
tests/entrypoints/offline_mode/test_offline_mode.py
tests/entrypoints/offline_mode/test_offline_mode.py
+31
-25
vllm/transformers_utils/tokenizers/mistral.py
vllm/transformers_utils/tokenizers/mistral.py
+31
-3
No files found.
tests/entrypoints/offline_mode/test_offline_mode.py
View file @
337ed766
"""Tests for HF_HUB_OFFLINE mode"""
import
importlib
import
sys
import
weakref
import
pytest
from
vllm
import
LLM
from
vllm.distributed
import
cleanup_dist_env_and_memory
MODEL_NAME
=
"facebook/opt-125m"
MODEL_CONFIGS
=
[
{
"model"
:
"facebook/opt-125m"
,
"enforce_eager"
:
True
,
"gpu_memory_utilization"
:
0.20
,
"max_model_len"
:
64
,
"max_num_batched_tokens"
:
64
,
"max_num_seqs"
:
64
,
"tensor_parallel_size"
:
1
,
},
{
"model"
:
"mistralai/Mistral-7B-Instruct-v0.1"
,
"enforce_eager"
:
True
,
"gpu_memory_utilization"
:
0.95
,
"max_model_len"
:
64
,
"max_num_batched_tokens"
:
64
,
"max_num_seqs"
:
64
,
"tensor_parallel_size"
:
1
,
"tokenizer_mode"
:
"mistral"
,
},
]
@
pytest
.
fixture
(
scope
=
"module"
)
def
llm
():
# pytest caches the fixture so we use weakref.proxy to
# enable garbage collection
llm
=
LLM
(
model
=
MODEL_NAME
,
max_num_batched_tokens
=
4096
,
tensor_parallel_size
=
1
,
gpu_memory_utilization
=
0.10
,
enforce_eager
=
True
)
with
llm
.
deprecate_legacy_api
():
yield
weakref
.
proxy
(
llm
)
del
llm
def
cache_models
():
# Cache model files first
for
model_config
in
MODEL_CONFIGS
:
LLM
(
**
model_config
)
cleanup_dist_env_and_memory
()
yield
@
pytest
.
mark
.
skip_global_cleanup
def
test_offline_mode
(
llm
:
LLM
,
monkeypatch
):
# we use the llm fixture to ensure the model files are in-cache
del
llm
@
pytest
.
mark
.
skip_global_cleanup
@
pytest
.
mark
.
usefixtures
(
"cache_models"
)
def
test_offline_mode
(
monkeypatch
):
# Set HF to offline mode and ensure we can still construct an LLM
try
:
monkeypatch
.
setenv
(
"HF_HUB_OFFLINE"
,
"1"
)
# Need to re-import huggingface_hub and friends to setup offline mode
_re_import_modules
()
# Cached model files should be used in offline mode
LLM
(
model
=
MODEL_NAME
,
max_num_batched_tokens
=
4096
,
tensor_parallel_size
=
1
,
gpu_memory_utilization
=
0.20
,
enforce_eager
=
True
)
for
model_config
in
MODEL_CONFIGS
:
LLM
(
**
model_config
)
finally
:
# Reset the environment after the test
# NB: Assuming tests are run in online mode
...
...
vllm/transformers_utils/tokenizers/mistral.py
View file @
337ed766
...
...
@@ -4,6 +4,7 @@ from dataclasses import dataclass
from
pathlib
import
Path
from
typing
import
TYPE_CHECKING
,
Any
,
Dict
,
List
,
Optional
,
Union
,
cast
import
huggingface_hub
from
huggingface_hub
import
HfApi
,
hf_hub_download
from
mistral_common.protocol.instruct.request
import
ChatCompletionRequest
# yapf: disable
...
...
@@ -24,6 +25,26 @@ class Encoding:
input_ids
:
List
[
int
]
def
list_local_repo_files
(
repo_id
:
str
,
revision
:
Optional
[
str
])
->
List
[
str
]:
repo_cache
=
os
.
path
.
join
(
huggingface_hub
.
constants
.
HF_HUB_CACHE
,
huggingface_hub
.
constants
.
REPO_ID_SEPARATOR
.
join
(
[
"models"
,
*
repo_id
.
split
(
"/"
)]))
if
revision
is
None
:
revision_file
=
os
.
path
.
join
(
repo_cache
,
"refs"
,
"main"
)
if
os
.
path
.
isfile
(
revision_file
):
with
open
(
revision_file
)
as
file
:
revision
=
file
.
read
()
if
revision
:
revision_dir
=
os
.
path
.
join
(
repo_cache
,
"snapshots"
,
revision
)
if
os
.
path
.
isdir
(
revision_dir
):
return
os
.
listdir
(
revision_dir
)
return
[]
def
find_tokenizer_file
(
files
:
List
[
str
]):
file_pattern
=
re
.
compile
(
r
"^tokenizer\.model\.v.*$|^tekken\.json$"
)
...
...
@@ -90,9 +111,16 @@ class MistralTokenizer:
@
staticmethod
def
_download_mistral_tokenizer_from_hf
(
tokenizer_name
:
str
,
revision
:
Optional
[
str
])
->
str
:
api
=
HfApi
()
repo_info
=
api
.
model_info
(
tokenizer_name
)
files
=
[
s
.
rfilename
for
s
in
repo_info
.
siblings
]
try
:
hf_api
=
HfApi
()
files
=
hf_api
.
list_repo_files
(
repo_id
=
tokenizer_name
,
revision
=
revision
)
except
ConnectionError
as
exc
:
files
=
list_local_repo_files
(
repo_id
=
tokenizer_name
,
revision
=
revision
)
if
len
(
files
)
==
0
:
raise
exc
filename
=
find_tokenizer_file
(
files
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment