Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
text-generation-inference
Commits
c2d4a3b5
Unverified
Commit
c2d4a3b5
authored
Jan 26, 2024
by
OlivierDehaene
Committed by
GitHub
Jan 26, 2024
Browse files
v1.4.0 (#1494)
parent
d9758851
Changes
23
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
14 additions
and
9 deletions
+14
-9
server/text_generation_server/models/flash_phi.py
server/text_generation_server/models/flash_phi.py
+6
-6
server/text_generation_server/models/phi.py
server/text_generation_server/models/phi.py
+5
-2
server/text_generation_server/utils/layers.py
server/text_generation_server/utils/layers.py
+3
-1
No files found.
server/text_generation_server/models/flash_phi.py
View file @
c2d4a3b5
...
@@ -64,9 +64,9 @@ class FlashPhi(FlashCausalLM):
...
@@ -64,9 +64,9 @@ class FlashPhi(FlashCausalLM):
import
os
import
os
from
pathlib
import
Path
from
pathlib
import
Path
is_local_model
=
(
Path
(
use_medusa
).
exists
()
and
Path
(
use_medusa
).
is_dir
())
or
os
.
getenv
(
is_local_model
=
(
"WEIGHTS_CACHE_OVERRIDE"
,
None
Path
(
use_medusa
).
exists
()
and
Path
(
use_medusa
).
is_dir
()
)
is
not
None
)
or
os
.
getenv
(
"WEIGHTS_CACHE_OVERRIDE"
,
None
)
is
not
None
if
not
is_local_model
:
if
not
is_local_model
:
medusa_config
=
hf_hub_download
(
medusa_config
=
hf_hub_download
(
...
...
server/text_generation_server/models/phi.py
View file @
c2d4a3b5
...
@@ -5,13 +5,17 @@ from transformers import AutoConfig, AutoTokenizer
...
@@ -5,13 +5,17 @@ from transformers import AutoConfig, AutoTokenizer
from
typing
import
Optional
,
List
,
Tuple
from
typing
import
Optional
,
List
,
Tuple
from
text_generation_server.models
import
CausalLM
from
text_generation_server.models
import
CausalLM
from
text_generation_server.models.custom_modeling.phi_modeling
import
PhiConfig
,
PhiForCausalLM
from
text_generation_server.models.custom_modeling.phi_modeling
import
(
PhiConfig
,
PhiForCausalLM
,
)
from
text_generation_server.utils
import
(
from
text_generation_server.utils
import
(
initialize_torch_distributed
,
initialize_torch_distributed
,
weight_files
,
weight_files
,
Weights
,
Weights
,
)
)
class
Phi
(
CausalLM
):
class
Phi
(
CausalLM
):
def
__init__
(
def
__init__
(
self
,
self
,
...
@@ -60,4 +64,3 @@ class Phi(CausalLM):
...
@@ -60,4 +64,3 @@ class Phi(CausalLM):
dtype
=
dtype
,
dtype
=
dtype
,
device
=
device
,
device
=
device
,
)
)
server/text_generation_server/utils/layers.py
View file @
c2d4a3b5
...
@@ -510,7 +510,9 @@ class TensorParallelEmbedding(nn.Module):
...
@@ -510,7 +510,9 @@ class TensorParallelEmbedding(nn.Module):
block_size
=
(
num_embeddings
+
world_size
-
1
)
//
world_size
block_size
=
(
num_embeddings
+
world_size
-
1
)
//
world_size
self
.
min_id
=
rank
*
block_size
self
.
min_id
=
rank
*
block_size
self
.
max_id
=
min
(
num_embeddings
,
(
rank
+
1
)
*
block_size
)
self
.
max_id
=
min
(
num_embeddings
,
(
rank
+
1
)
*
block_size
)
self
.
null_idx
=
weight
.
shape
[
0
]
# Usually block_size, might be less in non even vocab_size.
self
.
null_idx
=
weight
.
shape
[
0
]
# Usually block_size, might be less in non even vocab_size.
self
.
process_group
=
weights
.
process_group
self
.
process_group
=
weights
.
process_group
self
.
reduce
=
reduce
self
.
reduce
=
reduce
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment