Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
text-generation-inference
Commits
77746552
Unverified
Commit
77746552
authored
Sep 16, 2024
by
Daniël de Kok
Committed by
GitHub
Sep 16, 2024
Browse files
Add tests for Mixtral (#2520)
Disable by default because CI runners do not have enough GPUs.
parent
9cca3e0b
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
746 additions
and
0 deletions
+746
-0
integration-tests/models/__snapshots__/test_flash_mixtral/test_flash_mixtral.json
.../__snapshots__/test_flash_mixtral/test_flash_mixtral.json
+114
-0
integration-tests/models/__snapshots__/test_flash_mixtral/test_flash_mixtral_all_params.json
...s__/test_flash_mixtral/test_flash_mixtral_all_params.json
+99
-0
integration-tests/models/__snapshots__/test_flash_mixtral/test_flash_mixtral_load.json
...apshots__/test_flash_mixtral/test_flash_mixtral_load.json
+458
-0
integration-tests/models/test_flash_mixtral.py
integration-tests/models/test_flash_mixtral.py
+75
-0
No files found.
integration-tests/models/__snapshots__/test_flash_mixtral/test_flash_mixtral.json
0 → 100644
View file @
77746552
{
"details"
:
{
"best_of_sequences"
:
null
,
"finish_reason"
:
"length"
,
"generated_tokens"
:
10
,
"prefill"
:
[
{
"id"
:
1
,
"logprob"
:
null
,
"text"
:
"<s>"
},
{
"id"
:
1824
,
"logprob"
:
-6.1445312
,
"text"
:
"What"
},
{
"id"
:
349
,
"logprob"
:
-1.4648438
,
"text"
:
"is"
},
{
"id"
:
21135
,
"logprob"
:
-13.6875
,
"text"
:
"gradient"
},
{
"id"
:
24871
,
"logprob"
:
-1.6005859
,
"text"
:
"descent"
},
{
"id"
:
28804
,
"logprob"
:
-0.39526367
,
"text"
:
"?"
},
{
"id"
:
13
,
"logprob"
:
-0.640625
,
"text"
:
"
\n
"
},
{
"id"
:
13
,
"logprob"
:
-0.18774414
,
"text"
:
"
\n
"
}
],
"seed"
:
null
,
"tokens"
:
[
{
"id"
:
20910
,
"logprob"
:
-0.96484375
,
"special"
:
false
,
"text"
:
"Grad"
},
{
"id"
:
722
,
"logprob"
:
-0.003168106
,
"special"
:
false
,
"text"
:
"ient"
},
{
"id"
:
24871
,
"logprob"
:
-0.16540527
,
"special"
:
false
,
"text"
:
" descent"
},
{
"id"
:
349
,
"logprob"
:
-0.08886719
,
"special"
:
false
,
"text"
:
" is"
},
{
"id"
:
396
,
"logprob"
:
-0.75878906
,
"special"
:
false
,
"text"
:
" an"
},
{
"id"
:
18586
,
"logprob"
:
-0.5703125
,
"special"
:
false
,
"text"
:
" optimization"
},
{
"id"
:
9464
,
"logprob"
:
-0.11242676
,
"special"
:
false
,
"text"
:
" algorithm"
},
{
"id"
:
1307
,
"logprob"
:
-0.7939453
,
"special"
:
false
,
"text"
:
" used"
},
{
"id"
:
298
,
"logprob"
:
-0.17102051
,
"special"
:
false
,
"text"
:
" to"
},
{
"id"
:
26518
,
"logprob"
:
-0.34326172
,
"special"
:
false
,
"text"
:
" minimize"
}
],
"top_tokens"
:
null
},
"generated_text"
:
"Gradient descent is an optimization algorithm used to minimize"
}
integration-tests/models/__snapshots__/test_flash_mixtral/test_flash_mixtral_all_params.json
0 → 100644
View file @
77746552
{
"details"
:
{
"best_of_sequences"
:
null
,
"finish_reason"
:
"length"
,
"generated_tokens"
:
10
,
"prefill"
:
[
{
"id"
:
1
,
"logprob"
:
null
,
"text"
:
"<s>"
},
{
"id"
:
24871
,
"logprob"
:
-17.234375
,
"text"
:
"descent"
},
{
"id"
:
28804
,
"logprob"
:
-7.4335938
,
"text"
:
"?"
},
{
"id"
:
13
,
"logprob"
:
-0.8017578
,
"text"
:
"
\n
"
},
{
"id"
:
13
,
"logprob"
:
-0.32958984
,
"text"
:
"
\n
"
}
],
"seed"
:
0
,
"tokens"
:
[
{
"id"
:
1313
,
"logprob"
:
-2.3613281
,
"special"
:
false
,
"text"
:
"It"
},
{
"id"
:
3969
,
"logprob"
:
-0.7285156
,
"special"
:
false
,
"text"
:
" seems"
},
{
"id"
:
298
,
"logprob"
:
-1.3466797
,
"special"
:
false
,
"text"
:
" to"
},
{
"id"
:
528
,
"logprob"
:
0.0
,
"special"
:
false
,
"text"
:
" me"
},
{
"id"
:
28725
,
"logprob"
:
-1.6757812
,
"special"
:
false
,
"text"
:
","
},
{
"id"
:
369
,
"logprob"
:
-0.06585693
,
"special"
:
false
,
"text"
:
" that"
},
{
"id"
:
513
,
"logprob"
:
-1.1269531
,
"special"
:
false
,
"text"
:
" if"
},
{
"id"
:
368
,
"logprob"
:
0.0
,
"special"
:
false
,
"text"
:
" you"
},
{
"id"
:
28742
,
"logprob"
:
-2.4921875
,
"special"
:
false
,
"text"
:
"'"
},
{
"id"
:
267
,
"logprob"
:
0.0
,
"special"
:
false
,
"text"
:
"re"
}
],
"top_tokens"
:
null
},
"generated_text"
:
"What is gradient descent?
\n\n
It seems to me, that if you're"
}
integration-tests/models/__snapshots__/test_flash_mixtral/test_flash_mixtral_load.json
0 → 100644
View file @
77746552
[
{
"details"
:
{
"best_of_sequences"
:
null
,
"finish_reason"
:
"length"
,
"generated_tokens"
:
10
,
"prefill"
:
[
{
"id"
:
1
,
"logprob"
:
null
,
"text"
:
"<s>"
},
{
"id"
:
1824
,
"logprob"
:
-6.1445312
,
"text"
:
"What"
},
{
"id"
:
349
,
"logprob"
:
-1.4648438
,
"text"
:
"is"
},
{
"id"
:
21135
,
"logprob"
:
-13.6875
,
"text"
:
"gradient"
},
{
"id"
:
24871
,
"logprob"
:
-1.6005859
,
"text"
:
"descent"
},
{
"id"
:
28804
,
"logprob"
:
-0.39526367
,
"text"
:
"?"
},
{
"id"
:
13
,
"logprob"
:
-0.640625
,
"text"
:
"
\n
"
},
{
"id"
:
13
,
"logprob"
:
-0.18774414
,
"text"
:
"
\n
"
}
],
"seed"
:
null
,
"tokens"
:
[
{
"id"
:
20910
,
"logprob"
:
-0.96484375
,
"special"
:
false
,
"text"
:
"Grad"
},
{
"id"
:
722
,
"logprob"
:
-0.003168106
,
"special"
:
false
,
"text"
:
"ient"
},
{
"id"
:
24871
,
"logprob"
:
-0.16369629
,
"special"
:
false
,
"text"
:
" descent"
},
{
"id"
:
349
,
"logprob"
:
-0.0881958
,
"special"
:
false
,
"text"
:
" is"
},
{
"id"
:
396
,
"logprob"
:
-0.76708984
,
"special"
:
false
,
"text"
:
" an"
},
{
"id"
:
18586
,
"logprob"
:
-0.57373047
,
"special"
:
false
,
"text"
:
" optimization"
},
{
"id"
:
9464
,
"logprob"
:
-0.11291504
,
"special"
:
false
,
"text"
:
" algorithm"
},
{
"id"
:
1307
,
"logprob"
:
-0.79589844
,
"special"
:
false
,
"text"
:
" used"
},
{
"id"
:
298
,
"logprob"
:
-0.1694336
,
"special"
:
false
,
"text"
:
" to"
},
{
"id"
:
26518
,
"logprob"
:
-0.34350586
,
"special"
:
false
,
"text"
:
" minimize"
}
],
"top_tokens"
:
null
},
"generated_text"
:
"Gradient descent is an optimization algorithm used to minimize"
},
{
"details"
:
{
"best_of_sequences"
:
null
,
"finish_reason"
:
"length"
,
"generated_tokens"
:
10
,
"prefill"
:
[
{
"id"
:
1
,
"logprob"
:
null
,
"text"
:
"<s>"
},
{
"id"
:
1824
,
"logprob"
:
-6.1445312
,
"text"
:
"What"
},
{
"id"
:
349
,
"logprob"
:
-1.4677734
,
"text"
:
"is"
},
{
"id"
:
21135
,
"logprob"
:
-13.6875
,
"text"
:
"gradient"
},
{
"id"
:
24871
,
"logprob"
:
-1.6015625
,
"text"
:
"descent"
},
{
"id"
:
28804
,
"logprob"
:
-0.39453125
,
"text"
:
"?"
},
{
"id"
:
13
,
"logprob"
:
-0.6435547
,
"text"
:
"
\n
"
},
{
"id"
:
13
,
"logprob"
:
-0.18713379
,
"text"
:
"
\n
"
}
],
"seed"
:
null
,
"tokens"
:
[
{
"id"
:
20910
,
"logprob"
:
-0.9628906
,
"special"
:
false
,
"text"
:
"Grad"
},
{
"id"
:
722
,
"logprob"
:
-0.0032176971
,
"special"
:
false
,
"text"
:
"ient"
},
{
"id"
:
24871
,
"logprob"
:
-0.16540527
,
"special"
:
false
,
"text"
:
" descent"
},
{
"id"
:
349
,
"logprob"
:
-0.08898926
,
"special"
:
false
,
"text"
:
" is"
},
{
"id"
:
396
,
"logprob"
:
-0.765625
,
"special"
:
false
,
"text"
:
" an"
},
{
"id"
:
18586
,
"logprob"
:
-0.5708008
,
"special"
:
false
,
"text"
:
" optimization"
},
{
"id"
:
9464
,
"logprob"
:
-0.11401367
,
"special"
:
false
,
"text"
:
" algorithm"
},
{
"id"
:
1307
,
"logprob"
:
-0.7963867
,
"special"
:
false
,
"text"
:
" used"
},
{
"id"
:
298
,
"logprob"
:
-0.17028809
,
"special"
:
false
,
"text"
:
" to"
},
{
"id"
:
26518
,
"logprob"
:
-0.34326172
,
"special"
:
false
,
"text"
:
" minimize"
}
],
"top_tokens"
:
null
},
"generated_text"
:
"Gradient descent is an optimization algorithm used to minimize"
},
{
"details"
:
{
"best_of_sequences"
:
null
,
"finish_reason"
:
"length"
,
"generated_tokens"
:
10
,
"prefill"
:
[
{
"id"
:
1
,
"logprob"
:
null
,
"text"
:
"<s>"
},
{
"id"
:
1824
,
"logprob"
:
-6.140625
,
"text"
:
"What"
},
{
"id"
:
349
,
"logprob"
:
-1.4658203
,
"text"
:
"is"
},
{
"id"
:
21135
,
"logprob"
:
-13.6796875
,
"text"
:
"gradient"
},
{
"id"
:
24871
,
"logprob"
:
-1.5898438
,
"text"
:
"descent"
},
{
"id"
:
28804
,
"logprob"
:
-0.3955078
,
"text"
:
"?"
},
{
"id"
:
13
,
"logprob"
:
-0.64501953
,
"text"
:
"
\n
"
},
{
"id"
:
13
,
"logprob"
:
-0.18493652
,
"text"
:
"
\n
"
}
],
"seed"
:
null
,
"tokens"
:
[
{
"id"
:
20910
,
"logprob"
:
-0.9580078
,
"special"
:
false
,
"text"
:
"Grad"
},
{
"id"
:
722
,
"logprob"
:
-0.0032176971
,
"special"
:
false
,
"text"
:
"ient"
},
{
"id"
:
24871
,
"logprob"
:
-0.16552734
,
"special"
:
false
,
"text"
:
" descent"
},
{
"id"
:
349
,
"logprob"
:
-0.08874512
,
"special"
:
false
,
"text"
:
" is"
},
{
"id"
:
396
,
"logprob"
:
-0.75878906
,
"special"
:
false
,
"text"
:
" an"
},
{
"id"
:
18586
,
"logprob"
:
-0.5703125
,
"special"
:
false
,
"text"
:
" optimization"
},
{
"id"
:
9464
,
"logprob"
:
-0.11236572
,
"special"
:
false
,
"text"
:
" algorithm"
},
{
"id"
:
1307
,
"logprob"
:
-0.79541016
,
"special"
:
false
,
"text"
:
" used"
},
{
"id"
:
298
,
"logprob"
:
-0.17102051
,
"special"
:
false
,
"text"
:
" to"
},
{
"id"
:
26518
,
"logprob"
:
-0.34326172
,
"special"
:
false
,
"text"
:
" minimize"
}
],
"top_tokens"
:
null
},
"generated_text"
:
"Gradient descent is an optimization algorithm used to minimize"
},
{
"details"
:
{
"best_of_sequences"
:
null
,
"finish_reason"
:
"length"
,
"generated_tokens"
:
10
,
"prefill"
:
[
{
"id"
:
1
,
"logprob"
:
null
,
"text"
:
"<s>"
},
{
"id"
:
1824
,
"logprob"
:
-6.1328125
,
"text"
:
"What"
},
{
"id"
:
349
,
"logprob"
:
-1.4658203
,
"text"
:
"is"
},
{
"id"
:
21135
,
"logprob"
:
-13.6796875
,
"text"
:
"gradient"
},
{
"id"
:
24871
,
"logprob"
:
-1.5947266
,
"text"
:
"descent"
},
{
"id"
:
28804
,
"logprob"
:
-0.39648438
,
"text"
:
"?"
},
{
"id"
:
13
,
"logprob"
:
-0.6464844
,
"text"
:
"
\n
"
},
{
"id"
:
13
,
"logprob"
:
-0.18688965
,
"text"
:
"
\n
"
}
],
"seed"
:
null
,
"tokens"
:
[
{
"id"
:
20910
,
"logprob"
:
-0.9609375
,
"special"
:
false
,
"text"
:
"Grad"
},
{
"id"
:
722
,
"logprob"
:
-0.003168106
,
"special"
:
false
,
"text"
:
"ient"
},
{
"id"
:
24871
,
"logprob"
:
-0.16601562
,
"special"
:
false
,
"text"
:
" descent"
},
{
"id"
:
349
,
"logprob"
:
-0.088134766
,
"special"
:
false
,
"text"
:
" is"
},
{
"id"
:
396
,
"logprob"
:
-0.7597656
,
"special"
:
false
,
"text"
:
" an"
},
{
"id"
:
18586
,
"logprob"
:
-0.5708008
,
"special"
:
false
,
"text"
:
" optimization"
},
{
"id"
:
9464
,
"logprob"
:
-0.11291504
,
"special"
:
false
,
"text"
:
" algorithm"
},
{
"id"
:
1307
,
"logprob"
:
-0.7944336
,
"special"
:
false
,
"text"
:
" used"
},
{
"id"
:
298
,
"logprob"
:
-0.17102051
,
"special"
:
false
,
"text"
:
" to"
},
{
"id"
:
26518
,
"logprob"
:
-0.34399414
,
"special"
:
false
,
"text"
:
" minimize"
}
],
"top_tokens"
:
null
},
"generated_text"
:
"Gradient descent is an optimization algorithm used to minimize"
}
]
integration-tests/models/test_flash_mixtral.py
0 → 100644
View file @
77746552
import
pytest
@
pytest
.
fixture
(
scope
=
"module"
)
def
flash_mixtral_handle
(
launcher
):
with
launcher
(
"mistralai/Mixtral-8x7B-v0.1"
,
num_shard
=
8
)
as
handle
:
yield
handle
@
pytest
.
fixture
(
scope
=
"module"
)
async
def
flash_mixtral
(
flash_mixtral_handle
):
await
flash_mixtral_handle
.
health
(
300
)
return
flash_mixtral_handle
.
client
@
pytest
.
mark
.
skip
(
reason
=
"requires > 4 shards"
)
@
pytest
.
mark
.
asyncio
async
def
test_flash_mixtral
(
flash_mixtral
,
response_snapshot
):
response
=
await
flash_mixtral
.
generate
(
"What is gradient descent?
\n\n
"
,
max_new_tokens
=
10
,
decoder_input_details
=
True
)
assert
response
.
details
.
generated_tokens
==
10
assert
(
response
.
generated_text
==
"Gradient descent is an optimization algorithm used to minimize"
)
assert
response
==
response_snapshot
@
pytest
.
mark
.
skip
(
reason
=
"requires > 4 shards"
)
@
pytest
.
mark
.
asyncio
async
def
test_flash_mixtral_all_params
(
flash_mixtral
,
response_snapshot
):
response
=
await
flash_mixtral
.
generate
(
"What is gradient descent?
\n\n
"
,
max_new_tokens
=
10
,
repetition_penalty
=
1.2
,
return_full_text
=
True
,
stop_sequences
=
[
"test"
],
temperature
=
0.5
,
top_p
=
0.9
,
top_k
=
10
,
truncate
=
5
,
typical_p
=
0.9
,
watermark
=
True
,
decoder_input_details
=
True
,
seed
=
0
,
)
assert
response
.
details
.
generated_tokens
==
10
assert
(
response
.
generated_text
==
"What is gradient descent?
\n\n
It seems to me, that if you're"
)
assert
response
==
response_snapshot
@
pytest
.
mark
.
skip
(
reason
=
"requires > 4 shards"
)
@
pytest
.
mark
.
asyncio
async
def
test_flash_mixtral_load
(
flash_mixtral
,
generate_load
,
response_snapshot
):
responses
=
await
generate_load
(
flash_mixtral
,
"What is gradient descent?
\n\n
"
,
max_new_tokens
=
10
,
n
=
4
)
assert
len
(
responses
)
==
4
assert
responses
[
0
].
details
.
generated_tokens
==
10
assert
(
responses
[
0
].
generated_text
==
"Gradient descent is an optimization algorithm used to minimize"
)
assert
all
(
[
r
.
generated_text
==
responses
[
0
].
generated_text
for
r
in
responses
]
),
f
"
{
[
r
.
generated_text
for
r
in
responses
]
}
"
assert
responses
==
response_snapshot
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment