Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
18c811ba
Commit
18c811ba
authored
Nov 21, 2024
by
zhuwenwen
Browse files
remove flashinfer and change float to half
parent
520d727f
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
11 additions
and
6 deletions
+11
-6
tests/samplers/test_logprobs.py
tests/samplers/test_logprobs.py
+1
-1
tests/samplers/test_rejection_sampler.py
tests/samplers/test_rejection_sampler.py
+10
-5
No files found.
tests/samplers/test_logprobs.py
View file @
18c811ba
...
@@ -12,7 +12,7 @@ MODELS = ["facebook/opt-125m"]
...
@@ -12,7 +12,7 @@ MODELS = ["facebook/opt-125m"]
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"
float
"
])
# needed for comparing logprobs with HF
[
"
half
"
])
# needed for comparing logprobs with HF
@
pytest
.
mark
.
parametrize
(
"chunked_prefill_token_size"
,
[
1
,
4
,
16
,
-
1
])
@
pytest
.
mark
.
parametrize
(
"chunked_prefill_token_size"
,
[
1
,
4
,
16
,
-
1
])
@
pytest
.
mark
.
parametrize
(
"num_top_logprobs"
,
[
0
,
6
])
# 32000 == vocab_size
@
pytest
.
mark
.
parametrize
(
"num_top_logprobs"
,
[
0
,
6
])
# 32000 == vocab_size
@
pytest
.
mark
.
parametrize
(
"detokenize"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"detokenize"
,
[
True
,
False
])
...
...
tests/samplers/test_rejection_sampler.py
View file @
18c811ba
...
@@ -43,7 +43,8 @@ def mock_causal_accepted_tensor(
...
@@ -43,7 +43,8 @@ def mock_causal_accepted_tensor(
"which_tokens_accepted"
,
"which_tokens_accepted"
,
[
"all_tokens_accepted"
,
"no_tokens_accepted"
,
"some_tokens_accepted"
])
[
"all_tokens_accepted"
,
"no_tokens_accepted"
,
"some_tokens_accepted"
])
@
pytest
.
mark
.
parametrize
(
"device"
,
CUDA_DEVICES
)
@
pytest
.
mark
.
parametrize
(
"device"
,
CUDA_DEVICES
)
@
pytest
.
mark
.
parametrize
(
"use_flashinfer"
,
[
True
,
False
])
# @pytest.mark.parametrize("use_flashinfer", [True, False])
@
pytest
.
mark
.
parametrize
(
"use_flashinfer"
,
[
False
])
@
torch
.
inference_mode
()
@
torch
.
inference_mode
()
def
test_correct_output_format
(
which_tokens_accepted
:
str
,
seed
:
int
,
def
test_correct_output_format
(
which_tokens_accepted
:
str
,
seed
:
int
,
device
:
str
,
use_flashinfer
:
bool
):
device
:
str
,
use_flashinfer
:
bool
):
...
@@ -127,7 +128,8 @@ def test_correct_output_format(which_tokens_accepted: str, seed: int,
...
@@ -127,7 +128,8 @@ def test_correct_output_format(which_tokens_accepted: str, seed: int,
@
pytest
.
mark
.
parametrize
(
"vocab_size"
,
[
30_000
,
50_000
])
@
pytest
.
mark
.
parametrize
(
"vocab_size"
,
[
30_000
,
50_000
])
@
pytest
.
mark
.
parametrize
(
"batch_size"
,
list
(
range
(
1
,
32
)))
@
pytest
.
mark
.
parametrize
(
"batch_size"
,
list
(
range
(
1
,
32
)))
@
pytest
.
mark
.
parametrize
(
"device"
,
CUDA_DEVICES
)
@
pytest
.
mark
.
parametrize
(
"device"
,
CUDA_DEVICES
)
@
pytest
.
mark
.
parametrize
(
"use_flashinfer"
,
[
True
,
False
])
# @pytest.mark.parametrize("use_flashinfer", [True, False])
@
pytest
.
mark
.
parametrize
(
"use_flashinfer"
,
[
False
])
@
torch
.
inference_mode
()
@
torch
.
inference_mode
()
def
test_no_crash_with_varying_dims
(
k
:
int
,
vocab_size
:
int
,
batch_size
:
int
,
def
test_no_crash_with_varying_dims
(
k
:
int
,
vocab_size
:
int
,
batch_size
:
int
,
device
:
str
,
use_flashinfer
:
bool
):
device
:
str
,
use_flashinfer
:
bool
):
...
@@ -159,7 +161,8 @@ def test_no_crash_with_varying_dims(k: int, vocab_size: int, batch_size: int,
...
@@ -159,7 +161,8 @@ def test_no_crash_with_varying_dims(k: int, vocab_size: int, batch_size: int,
@
pytest
.
mark
.
parametrize
(
"batch_size"
,
[
1
,
8
,
32
,
128
])
@
pytest
.
mark
.
parametrize
(
"batch_size"
,
[
1
,
8
,
32
,
128
])
@
pytest
.
mark
.
parametrize
(
"n_rep"
,
[
100
])
@
pytest
.
mark
.
parametrize
(
"n_rep"
,
[
100
])
@
pytest
.
mark
.
parametrize
(
"device"
,
CUDA_DEVICES
)
@
pytest
.
mark
.
parametrize
(
"device"
,
CUDA_DEVICES
)
@
pytest
.
mark
.
parametrize
(
"use_flashinfer"
,
[
True
,
False
])
# @pytest.mark.parametrize("use_flashinfer", [True, False])
@
pytest
.
mark
.
parametrize
(
"use_flashinfer"
,
[
False
])
@
torch
.
inference_mode
()
@
torch
.
inference_mode
()
def
test_deterministic_when_seeded
(
k
:
int
,
vocab_size
:
int
,
batch_size
:
int
,
def
test_deterministic_when_seeded
(
k
:
int
,
vocab_size
:
int
,
batch_size
:
int
,
frac_seeded
:
float
,
n_rep
:
int
,
device
:
str
,
frac_seeded
:
float
,
n_rep
:
int
,
device
:
str
,
...
@@ -258,7 +261,8 @@ def test_compare_nonflashinfer_backend(k: int, vocab_size: int,
...
@@ -258,7 +261,8 @@ def test_compare_nonflashinfer_backend(k: int, vocab_size: int,
@
pytest
.
mark
.
parametrize
(
"which_token_ids"
,
@
pytest
.
mark
.
parametrize
(
"which_token_ids"
,
[
"bonus_token_ids"
,
"draft_token_ids"
])
[
"bonus_token_ids"
,
"draft_token_ids"
])
@
pytest
.
mark
.
parametrize
(
"device"
,
CUDA_DEVICES
)
@
pytest
.
mark
.
parametrize
(
"device"
,
CUDA_DEVICES
)
@
pytest
.
mark
.
parametrize
(
"use_flashinfer"
,
[
True
,
False
])
# @pytest.mark.parametrize("use_flashinfer", [True, False])
@
pytest
.
mark
.
parametrize
(
"use_flashinfer"
,
[
False
])
@
torch
.
inference_mode
()
@
torch
.
inference_mode
()
def
test_raises_when_vocab_oob
(
above_or_below_vocab_range
:
str
,
def
test_raises_when_vocab_oob
(
above_or_below_vocab_range
:
str
,
which_token_ids
:
str
,
device
:
str
,
which_token_ids
:
str
,
device
:
str
,
...
@@ -310,7 +314,8 @@ def test_raises_when_vocab_oob(above_or_below_vocab_range: str,
...
@@ -310,7 +314,8 @@ def test_raises_when_vocab_oob(above_or_below_vocab_range: str,
@
pytest
.
mark
.
parametrize
(
"draft_and_target_probs_equal"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"draft_and_target_probs_equal"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"seed"
,
list
(
range
(
5
)))
@
pytest
.
mark
.
parametrize
(
"seed"
,
list
(
range
(
5
)))
@
pytest
.
mark
.
parametrize
(
"use_flashinfer"
,
[
True
,
False
])
# @pytest.mark.parametrize("use_flashinfer", [True, False])
@
pytest
.
mark
.
parametrize
(
"use_flashinfer"
,
[
False
])
@
torch
.
inference_mode
()
@
torch
.
inference_mode
()
def
test_rejection_sampling_approximates_target_distribution
(
def
test_rejection_sampling_approximates_target_distribution
(
seed
:
int
,
draft_and_target_probs_equal
:
bool
,
use_flashinfer
:
bool
):
seed
:
int
,
draft_and_target_probs_equal
:
bool
,
use_flashinfer
:
bool
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment