Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
text-generation-inference
Commits
9231098f
Commit
9231098f
authored
May 24, 2024
by
Daniël de Kok
Committed by
Daniël de Kok
May 27, 2024
Browse files
Fix (flash) Gemma prefix and enable tests
parent
d32e33bd
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
3 additions
and
6 deletions
+3
-6
integration-tests/models/test_flash_gemma.py
integration-tests/models/test_flash_gemma.py
+1
-4
server/text_generation_server/models/custom_modeling/flash_gemma_modeling.py
...ion_server/models/custom_modeling/flash_gemma_modeling.py
+1
-1
server/text_generation_server/models/flash_gemma.py
server/text_generation_server/models/flash_gemma.py
+1
-1
No files found.
integration-tests/models/test_flash_gemma.py
View file @
9231098f
...
@@ -3,7 +3,7 @@ import pytest
...
@@ -3,7 +3,7 @@ import pytest
@
pytest
.
fixture
(
scope
=
"module"
)
@
pytest
.
fixture
(
scope
=
"module"
)
def
flash_gemma_handle
(
launcher
):
def
flash_gemma_handle
(
launcher
):
with
launcher
(
"g
g-hf
/gemma-2b"
,
num_shard
=
1
)
as
handle
:
with
launcher
(
"g
oogle
/gemma-2b"
,
num_shard
=
1
)
as
handle
:
yield
handle
yield
handle
...
@@ -13,7 +13,6 @@ async def flash_gemma(flash_gemma_handle):
...
@@ -13,7 +13,6 @@ async def flash_gemma(flash_gemma_handle):
return
flash_gemma_handle
.
client
return
flash_gemma_handle
.
client
@
pytest
.
mark
.
skip
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
private
@
pytest
.
mark
.
private
async
def
test_flash_gemma
(
flash_gemma
,
response_snapshot
):
async
def
test_flash_gemma
(
flash_gemma
,
response_snapshot
):
...
@@ -25,7 +24,6 @@ async def test_flash_gemma(flash_gemma, response_snapshot):
...
@@ -25,7 +24,6 @@ async def test_flash_gemma(flash_gemma, response_snapshot):
assert
response
==
response_snapshot
assert
response
==
response_snapshot
@
pytest
.
mark
.
skip
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
private
@
pytest
.
mark
.
private
async
def
test_flash_gemma_all_params
(
flash_gemma
,
response_snapshot
):
async
def
test_flash_gemma_all_params
(
flash_gemma
,
response_snapshot
):
...
@@ -49,7 +47,6 @@ async def test_flash_gemma_all_params(flash_gemma, response_snapshot):
...
@@ -49,7 +47,6 @@ async def test_flash_gemma_all_params(flash_gemma, response_snapshot):
assert
response
==
response_snapshot
assert
response
==
response_snapshot
@
pytest
.
mark
.
skip
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
private
@
pytest
.
mark
.
private
async
def
test_flash_gemma_load
(
flash_gemma
,
generate_load
,
response_snapshot
):
async
def
test_flash_gemma_load
(
flash_gemma
,
generate_load
,
response_snapshot
):
...
...
server/text_generation_server/models/custom_modeling/flash_gemma_modeling.py
View file @
9231098f
...
@@ -423,7 +423,7 @@ class FlashGemmaForCausalLM(torch.nn.Module):
...
@@ -423,7 +423,7 @@ class FlashGemmaForCausalLM(torch.nn.Module):
super
().
__init__
()
super
().
__init__
()
embed_norm
=
config
.
hidden_size
**
0.5
embed_norm
=
config
.
hidden_size
**
0.5
if
prefix
is
None
:
if
not
prefix
:
prefix
=
"model"
prefix
=
"model"
else
:
else
:
prefix
=
f
"
{
prefix
}
.model"
prefix
=
f
"
{
prefix
}
.model"
...
...
server/text_generation_server/models/flash_gemma.py
View file @
9231098f
...
@@ -57,7 +57,7 @@ class FlashGemma(FlashCausalLM):
...
@@ -57,7 +57,7 @@ class FlashGemma(FlashCausalLM):
weights
.
_set_gptq_params
(
model_id
,
revision
)
weights
.
_set_gptq_params
(
model_id
,
revision
)
# TODO hardcoded
# TODO hardcoded
prefix
=
"
language_model
"
prefix
=
""
model
=
FlashGemmaForCausalLM
(
prefix
,
config
,
weights
,
causal
=
True
)
model
=
FlashGemmaForCausalLM
(
prefix
,
config
,
weights
,
causal
=
True
)
torch
.
distributed
.
barrier
(
group
=
self
.
process_group
)
torch
.
distributed
.
barrier
(
group
=
self
.
process_group
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment