Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
text-generation-inference
Commits
e5c1d6d6
Unverified
Commit
e5c1d6d6
authored
Jul 20, 2024
by
Daniël de Kok
Committed by
GitHub
Jul 20, 2024
Browse files
Add FP8 release test (#2261)
parent
11123a8e
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
598 additions
and
0 deletions
+598
-0
integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8.json
...napshots__/test_flash_llama_fp8/test_flash_llama_fp8.json
+89
-0
integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8_all_params.json
...test_flash_llama_fp8/test_flash_llama_fp8_all_params.json
+89
-0
integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8_load.json
...ots__/test_flash_llama_fp8/test_flash_llama_fp8_load.json
+358
-0
integration-tests/models/test_flash_llama_fp8.py
integration-tests/models/test_flash_llama_fp8.py
+62
-0
No files found.
integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8.json
0 → 100644
View file @
e5c1d6d6
{
"details"
:
{
"best_of_sequences"
:
null
,
"finish_reason"
:
"length"
,
"generated_tokens"
:
10
,
"prefill"
:
[
{
"id"
:
128000
,
"logprob"
:
null
,
"text"
:
"<|begin_of_text|>"
},
{
"id"
:
2323
,
"logprob"
:
-9.421875
,
"text"
:
"Test"
},
{
"id"
:
1715
,
"logprob"
:
-10.546875
,
"text"
:
" request"
}
],
"seed"
:
null
,
"tokens"
:
[
{
"id"
:
369
,
"logprob"
:
-2.1816406
,
"special"
:
false
,
"text"
:
" for"
},
{
"id"
:
279
,
"logprob"
:
-2.6992188
,
"special"
:
false
,
"text"
:
" the"
},
{
"id"
:
220
,
"logprob"
:
-3.6308594
,
"special"
:
false
,
"text"
:
" "
},
{
"id"
:
679
,
"logprob"
:
-1.7900391
,
"special"
:
false
,
"text"
:
"201"
},
{
"id"
:
24
,
"logprob"
:
-1.3554688
,
"special"
:
false
,
"text"
:
"9"
},
{
"id"
:
12
,
"logprob"
:
-2.0039062
,
"special"
:
false
,
"text"
:
"-"
},
{
"id"
:
2366
,
"logprob"
:
-0.4489746
,
"special"
:
false
,
"text"
:
"202"
},
{
"id"
:
15
,
"logprob"
:
-0.037109375
,
"special"
:
false
,
"text"
:
"0"
},
{
"id"
:
2978
,
"logprob"
:
-0.8100586
,
"special"
:
false
,
"text"
:
" school"
},
{
"id"
:
1060
,
"logprob"
:
-0.013015747
,
"special"
:
false
,
"text"
:
" year"
}
],
"top_tokens"
:
null
},
"generated_text"
:
" for the 2019-2020 school year"
}
integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8_all_params.json
0 → 100644
View file @
e5c1d6d6
{
"details"
:
{
"best_of_sequences"
:
null
,
"finish_reason"
:
"length"
,
"generated_tokens"
:
10
,
"prefill"
:
[
{
"id"
:
128000
,
"logprob"
:
null
,
"text"
:
"<|begin_of_text|>"
},
{
"id"
:
2323
,
"logprob"
:
-9.421875
,
"text"
:
"Test"
},
{
"id"
:
1715
,
"logprob"
:
-10.546875
,
"text"
:
" request"
}
],
"seed"
:
0
,
"tokens"
:
[
{
"id"
:
25
,
"logprob"
:
-0.8535156
,
"special"
:
false
,
"text"
:
":"
},
{
"id"
:
2209
,
"logprob"
:
-2.4804688
,
"special"
:
false
,
"text"
:
" Is"
},
{
"id"
:
279
,
"logprob"
:
-0.7167969
,
"special"
:
false
,
"text"
:
" the"
},
{
"id"
:
734
,
"logprob"
:
-2.625
,
"special"
:
false
,
"text"
:
" function"
},
{
"id"
:
330
,
"logprob"
:
-0.35131836
,
"special"
:
false
,
"text"
:
"
\"
"
},
{
"id"
:
4110
,
"logprob"
:
-2.4101562
,
"special"
:
false
,
"text"
:
"Create"
},
{
"id"
:
264
,
"logprob"
:
-0.23181152
,
"special"
:
false
,
"text"
:
" a"
},
{
"id"
:
502
,
"logprob"
:
-0.25512695
,
"special"
:
false
,
"text"
:
" new"
},
{
"id"
:
1052
,
"logprob"
:
-1.2792969
,
"special"
:
false
,
"text"
:
" file"
},
{
"id"
:
1
,
"logprob"
:
-1.2529297
,
"special"
:
false
,
"text"
:
"
\"
"
}
],
"top_tokens"
:
null
},
"generated_text"
:
"Test request: Is the function
\"
Create a new file
\"
"
}
integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8_load.json
0 → 100644
View file @
e5c1d6d6
[
{
"details"
:
{
"best_of_sequences"
:
null
,
"finish_reason"
:
"length"
,
"generated_tokens"
:
10
,
"prefill"
:
[
{
"id"
:
128000
,
"logprob"
:
null
,
"text"
:
"<|begin_of_text|>"
},
{
"id"
:
2323
,
"logprob"
:
-9.421875
,
"text"
:
"Test"
},
{
"id"
:
1715
,
"logprob"
:
-10.546875
,
"text"
:
" request"
}
],
"seed"
:
null
,
"tokens"
:
[
{
"id"
:
369
,
"logprob"
:
-2.1816406
,
"special"
:
false
,
"text"
:
" for"
},
{
"id"
:
279
,
"logprob"
:
-2.6992188
,
"special"
:
false
,
"text"
:
" the"
},
{
"id"
:
220
,
"logprob"
:
-3.6308594
,
"special"
:
false
,
"text"
:
" "
},
{
"id"
:
679
,
"logprob"
:
-1.7988281
,
"special"
:
false
,
"text"
:
"201"
},
{
"id"
:
24
,
"logprob"
:
-1.3535156
,
"special"
:
false
,
"text"
:
"9"
},
{
"id"
:
12
,
"logprob"
:
-2.0058594
,
"special"
:
false
,
"text"
:
"-"
},
{
"id"
:
2366
,
"logprob"
:
-0.45410156
,
"special"
:
false
,
"text"
:
"202"
},
{
"id"
:
15
,
"logprob"
:
-0.037109375
,
"special"
:
false
,
"text"
:
"0"
},
{
"id"
:
2978
,
"logprob"
:
-0.8095703
,
"special"
:
false
,
"text"
:
" school"
},
{
"id"
:
1060
,
"logprob"
:
-0.013053894
,
"special"
:
false
,
"text"
:
" year"
}
],
"top_tokens"
:
null
},
"generated_text"
:
" for the 2019-2020 school year"
},
{
"details"
:
{
"best_of_sequences"
:
null
,
"finish_reason"
:
"length"
,
"generated_tokens"
:
10
,
"prefill"
:
[
{
"id"
:
128000
,
"logprob"
:
null
,
"text"
:
"<|begin_of_text|>"
},
{
"id"
:
2323
,
"logprob"
:
-9.421875
,
"text"
:
"Test"
},
{
"id"
:
1715
,
"logprob"
:
-10.546875
,
"text"
:
" request"
}
],
"seed"
:
null
,
"tokens"
:
[
{
"id"
:
369
,
"logprob"
:
-2.1816406
,
"special"
:
false
,
"text"
:
" for"
},
{
"id"
:
279
,
"logprob"
:
-2.6992188
,
"special"
:
false
,
"text"
:
" the"
},
{
"id"
:
220
,
"logprob"
:
-3.6308594
,
"special"
:
false
,
"text"
:
" "
},
{
"id"
:
679
,
"logprob"
:
-1.7988281
,
"special"
:
false
,
"text"
:
"201"
},
{
"id"
:
24
,
"logprob"
:
-1.3535156
,
"special"
:
false
,
"text"
:
"9"
},
{
"id"
:
12
,
"logprob"
:
-2.0058594
,
"special"
:
false
,
"text"
:
"-"
},
{
"id"
:
2366
,
"logprob"
:
-0.45410156
,
"special"
:
false
,
"text"
:
"202"
},
{
"id"
:
15
,
"logprob"
:
-0.037109375
,
"special"
:
false
,
"text"
:
"0"
},
{
"id"
:
2978
,
"logprob"
:
-0.8095703
,
"special"
:
false
,
"text"
:
" school"
},
{
"id"
:
1060
,
"logprob"
:
-0.013053894
,
"special"
:
false
,
"text"
:
" year"
}
],
"top_tokens"
:
null
},
"generated_text"
:
" for the 2019-2020 school year"
},
{
"details"
:
{
"best_of_sequences"
:
null
,
"finish_reason"
:
"length"
,
"generated_tokens"
:
10
,
"prefill"
:
[
{
"id"
:
128000
,
"logprob"
:
null
,
"text"
:
"<|begin_of_text|>"
},
{
"id"
:
2323
,
"logprob"
:
-9.421875
,
"text"
:
"Test"
},
{
"id"
:
1715
,
"logprob"
:
-10.546875
,
"text"
:
" request"
}
],
"seed"
:
null
,
"tokens"
:
[
{
"id"
:
369
,
"logprob"
:
-2.1816406
,
"special"
:
false
,
"text"
:
" for"
},
{
"id"
:
279
,
"logprob"
:
-2.6992188
,
"special"
:
false
,
"text"
:
" the"
},
{
"id"
:
220
,
"logprob"
:
-3.6308594
,
"special"
:
false
,
"text"
:
" "
},
{
"id"
:
679
,
"logprob"
:
-1.7988281
,
"special"
:
false
,
"text"
:
"201"
},
{
"id"
:
24
,
"logprob"
:
-1.3535156
,
"special"
:
false
,
"text"
:
"9"
},
{
"id"
:
12
,
"logprob"
:
-2.0058594
,
"special"
:
false
,
"text"
:
"-"
},
{
"id"
:
2366
,
"logprob"
:
-0.45410156
,
"special"
:
false
,
"text"
:
"202"
},
{
"id"
:
15
,
"logprob"
:
-0.037109375
,
"special"
:
false
,
"text"
:
"0"
},
{
"id"
:
2978
,
"logprob"
:
-0.8095703
,
"special"
:
false
,
"text"
:
" school"
},
{
"id"
:
1060
,
"logprob"
:
-0.013053894
,
"special"
:
false
,
"text"
:
" year"
}
],
"top_tokens"
:
null
},
"generated_text"
:
" for the 2019-2020 school year"
},
{
"details"
:
{
"best_of_sequences"
:
null
,
"finish_reason"
:
"length"
,
"generated_tokens"
:
10
,
"prefill"
:
[
{
"id"
:
128000
,
"logprob"
:
null
,
"text"
:
"<|begin_of_text|>"
},
{
"id"
:
2323
,
"logprob"
:
-9.421875
,
"text"
:
"Test"
},
{
"id"
:
1715
,
"logprob"
:
-10.546875
,
"text"
:
" request"
}
],
"seed"
:
null
,
"tokens"
:
[
{
"id"
:
369
,
"logprob"
:
-2.1816406
,
"special"
:
false
,
"text"
:
" for"
},
{
"id"
:
279
,
"logprob"
:
-2.6992188
,
"special"
:
false
,
"text"
:
" the"
},
{
"id"
:
220
,
"logprob"
:
-3.6308594
,
"special"
:
false
,
"text"
:
" "
},
{
"id"
:
679
,
"logprob"
:
-1.7988281
,
"special"
:
false
,
"text"
:
"201"
},
{
"id"
:
24
,
"logprob"
:
-1.3535156
,
"special"
:
false
,
"text"
:
"9"
},
{
"id"
:
12
,
"logprob"
:
-2.0058594
,
"special"
:
false
,
"text"
:
"-"
},
{
"id"
:
2366
,
"logprob"
:
-0.45410156
,
"special"
:
false
,
"text"
:
"202"
},
{
"id"
:
15
,
"logprob"
:
-0.037109375
,
"special"
:
false
,
"text"
:
"0"
},
{
"id"
:
2978
,
"logprob"
:
-0.8095703
,
"special"
:
false
,
"text"
:
" school"
},
{
"id"
:
1060
,
"logprob"
:
-0.013053894
,
"special"
:
false
,
"text"
:
" year"
}
],
"top_tokens"
:
null
},
"generated_text"
:
" for the 2019-2020 school year"
}
]
integration-tests/models/test_flash_llama_fp8.py
0 → 100644
View file @
e5c1d6d6
import
pytest
@
pytest
.
fixture
(
scope
=
"module"
)
def
flash_llama_fp8_handle
(
launcher
):
with
launcher
(
"meta-llama/Meta-Llama-3-8B"
,
num_shard
=
2
,
quantize
=
"fp8"
)
as
handle
:
yield
handle
@
pytest
.
fixture
(
scope
=
"module"
)
async
def
flash_llama_fp8
(
flash_llama_fp8_handle
):
await
flash_llama_fp8_handle
.
health
(
300
)
return
flash_llama_fp8_handle
.
client
@
pytest
.
mark
.
release
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
private
async
def
test_flash_llama_fp8
(
flash_llama_fp8
,
response_snapshot
):
response
=
await
flash_llama_fp8
.
generate
(
"Test request"
,
max_new_tokens
=
10
,
decoder_input_details
=
True
)
assert
response
.
details
.
generated_tokens
==
10
assert
response
==
response_snapshot
@
pytest
.
mark
.
release
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
private
async
def
test_flash_llama_fp8_all_params
(
flash_llama_fp8
,
response_snapshot
):
response
=
await
flash_llama_fp8
.
generate
(
"Test request"
,
max_new_tokens
=
10
,
repetition_penalty
=
1.2
,
return_full_text
=
True
,
stop_sequences
=
[
"test"
],
temperature
=
0.5
,
top_p
=
0.9
,
top_k
=
10
,
truncate
=
5
,
typical_p
=
0.9
,
watermark
=
True
,
decoder_input_details
=
True
,
seed
=
0
,
)
assert
response
==
response_snapshot
@
pytest
.
mark
.
release
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
private
async
def
test_flash_llama_fp8_load
(
flash_llama_fp8
,
generate_load
,
response_snapshot
):
responses
=
await
generate_load
(
flash_llama_fp8
,
"Test request"
,
max_new_tokens
=
10
,
n
=
4
)
assert
len
(
responses
)
==
4
assert
all
([
r
.
generated_text
==
responses
[
0
].
generated_text
for
r
in
responses
])
assert
responses
==
response_snapshot
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment