Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
text-generation-inference
Commits
1b1bfa49
Unverified
Commit
1b1bfa49
authored
Dec 15, 2023
by
OlivierDehaene
Committed by
GitHub
Dec 15, 2023
Browse files
fix: fix logic if sliding window key is not present in config (#1352)
parent
9b56d3fb
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
11 additions
and
9 deletions
+11
-9
server/text_generation_server/models/__init__.py
server/text_generation_server/models/__init__.py
+8
-6
server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py
...n_server/models/custom_modeling/flash_mistral_modeling.py
+1
-1
server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py
...n_server/models/custom_modeling/flash_mixtral_modeling.py
+1
-1
server/text_generation_server/models/model.py
server/text_generation_server/models/model.py
+1
-1
No files found.
server/text_generation_server/models/__init__.py
View file @
1b1bfa49
...
...
@@ -281,9 +281,10 @@ def get_model(
)
if
model_type
==
"mistral"
:
if
(
config_dict
[
"sliding_window"
]
is
None
and
FLASH_ATTENTION
)
or
(
config_dict
[
"sliding_window"
]
>
0
and
HAS_FLASH_ATTN_V2_CUDA
):
sliding_window
=
config_dict
.
get
(
"sliding_window"
,
-
1
)
if
(
(
sliding_window
is
None
or
sliding_window
==
-
1
)
and
FLASH_ATTENTION
)
or
HAS_FLASH_ATTN_V2_CUDA
:
return
FlashMistral
(
model_id
,
revision
,
...
...
@@ -293,9 +294,10 @@ def get_model(
)
if
model_type
==
"mixtral"
:
if
(
config_dict
[
"sliding_window"
]
is
None
and
FLASH_ATTENTION
)
or
(
config_dict
[
"sliding_window"
]
>
0
and
HAS_FLASH_ATTN_V2_CUDA
):
sliding_window
=
config_dict
.
get
(
"sliding_window"
,
-
1
)
if
(
(
sliding_window
is
None
or
sliding_window
==
-
1
)
and
FLASH_ATTENTION
)
or
HAS_FLASH_ATTN_V2_CUDA
:
return
FlashMixtral
(
model_id
,
revision
,
...
...
server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py
View file @
1b1bfa49
...
...
@@ -60,7 +60,7 @@ class MistralConfig(PretrainedConfig):
pretraining_tp
=
1
,
tie_word_embeddings
=
False
,
rope_theta
=
10000.0
,
sliding_window
=
4096
,
sliding_window
=
None
,
**
kwargs
,
):
self
.
vocab_size
=
vocab_size
...
...
server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py
View file @
1b1bfa49
...
...
@@ -72,7 +72,7 @@ class MixtralConfig(PretrainedConfig):
pretraining_tp
=
1
,
tie_word_embeddings
=
False
,
rope_theta
=
10000.0
,
sliding_window
=
4096
,
sliding_window
=
None
,
num_experts_per_tok
=
2
,
num_local_experts
=
8
,
**
kwargs
,
...
...
server/text_generation_server/models/model.py
View file @
1b1bfa49
...
...
@@ -33,7 +33,7 @@ class Model(ABC):
self
.
device
=
device
self
.
rank
=
rank
self
.
world_size
=
world_size
self
.
sliding_window
=
sliding_window
self
.
sliding_window
=
sliding_window
if
sliding_window
!=
-
1
else
None
if
speculate
is
None
:
speculate
=
get_speculate
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment