Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
129fce94
Commit
129fce94
authored
Dec 05, 2024
by
zhuwenwen
Browse files
update DCU info and skip tests same to nv
parent
3f78216a
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
44 additions
and
251 deletions
+44
-251
tests/kernels/test_cache.py
tests/kernels/test_cache.py
+1
-2
tests/models/decoder_only/vision_language/test_llava_next_video.py
...els/decoder_only/vision_language/test_llava_next_video.py
+20
-10
tests/models/decoder_only/vision_language/test_paligemma.py
tests/models/decoder_only/vision_language/test_paligemma.py
+16
-10
tests/samplers/test_rejection_sampler.py
tests/samplers/test_rejection_sampler.py
+2
-0
tests/tokenization/test_get_eos.py
tests/tokenization/test_get_eos.py
+4
-0
vllm/model_executor/layers/fused_moe/configs/E=64,N=1408,device_name=DCU_K100_AI.json
...used_moe/configs/E=64,N=1408,device_name=DCU_K100_AI.json
+0
-114
vllm/model_executor/layers/fused_moe/configs/E=64,N=704,device_name=DCU_K100_AI.json
...fused_moe/configs/E=64,N=704,device_name=DCU_K100_AI.json
+0
-114
vllm/utils.py
vllm/utils.py
+1
-1
No files found.
tests/kernels/test_cache.py
View file @
129fce94
...
...
@@ -6,8 +6,7 @@ import torch
from
tests.kernels.utils
import
DEFAULT_OPCHECK_TEST_UTILS
from
vllm
import
_custom_ops
as
ops
from
vllm.utils
import
seed_everything
from
vllm.utils
import
is_hip
from
vllm.utils
import
seed_everything
,
is_hip
from
.utils
import
torch_version
COPYING_DIRECTION
=
[(
'cuda'
,
'cpu'
),
(
'cuda'
,
'cuda'
),
(
'cpu'
,
'cuda'
)]
...
...
tests/models/decoder_only/vision_language/test_llava_next_video.py
View file @
129fce94
...
...
@@ -12,6 +12,7 @@ from vllm.sequence import SampleLogprobs
from
....conftest
import
VIDEO_ASSETS
,
HfRunner
,
VllmRunner
,
_VideoAssets
from
...utils
import
check_logprobs_close
from
....utils
import
models_path_prefix
from
vllm.utils
import
is_hip
_PREFACE
=
(
"A chat between a curious human and an artificial intelligence assistant. "
...
...
@@ -160,12 +161,11 @@ def run_test(
)
@
pytest
.
mark
.
skipif
(
transformers
.
__version__
<
"4.45"
,
reason
=
"Waiting for next transformers release"
)
@
pytest
.
mark
.
parametrize
(
"model"
,
models
)
@
pytest
.
mark
.
parametrize
(
"size_factors"
,
[
def
get_size_factors
():
if
is_hip
():
return
[[],]
else
:
return
[
# No video
[],
# Single-scale
...
...
@@ -175,6 +175,14 @@ def run_test(
# Multi-scale
[
0.25
,
0.5
,
1.0
],
],
@
pytest
.
mark
.
skipif
(
transformers
.
__version__
<
"4.45"
,
reason
=
"Waiting for next transformers release"
)
@
pytest
.
mark
.
parametrize
(
"model"
,
models
)
@
pytest
.
mark
.
parametrize
(
"size_factors"
,
get_size_factors
()
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"half"
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
128
])
...
...
@@ -205,6 +213,8 @@ def test_models(hf_runner, vllm_runner, video_assets, model, size_factors,
)
@
pytest
.
mark
.
skipif
(
is_hip
(),
reason
=
"Consistent with NV."
)
@
pytest
.
mark
.
skipif
(
transformers
.
__version__
<
"4.45"
,
reason
=
"Waiting for next transformers release"
)
@
pytest
.
mark
.
parametrize
(
"model"
,
models
)
...
...
tests/models/decoder_only/vision_language/test_paligemma.py
View file @
129fce94
...
...
@@ -125,10 +125,11 @@ def run_test(
)
@
pytest
.
mark
.
parametrize
(
"model"
,
models
)
@
pytest
.
mark
.
parametrize
(
"size_factors"
,
[
def
get_size_factors
():
if
is_hip
():
return
[[],]
else
:
return
[
# No image
[],
# Single-scale
...
...
@@ -137,7 +138,12 @@ def run_test(
[
1.0
,
1.0
,
1.0
],
# Multi-scale
[
0.25
,
0.5
,
1.0
],
],
]
@
pytest
.
mark
.
parametrize
(
"model"
,
models
)
@
pytest
.
mark
.
parametrize
(
"size_factors"
,
get_size_factors
()
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
pytest
.
param
(
...
...
tests/samplers/test_rejection_sampler.py
View file @
129fce94
...
...
@@ -201,6 +201,8 @@ def test_deterministic_when_seeded(k: int, vocab_size: int, batch_size: int,
assert
torch
.
equal
(
results
[
j
][
i
],
results
[
0
][
i
])
@
pytest
.
mark
.
skipif
(
is_hip
(),
reason
=
"Consistent with NV."
)
@
pytest
.
mark
.
parametrize
(
"k"
,
[
1
,
3
,
6
])
@
pytest
.
mark
.
parametrize
(
"vocab_size"
,
[
30_000
,
50_000
])
@
pytest
.
mark
.
parametrize
(
"batch_size"
,
[
1
,
8
,
32
,
128
])
...
...
tests/tokenization/test_get_eos.py
View file @
129fce94
...
...
@@ -7,8 +7,12 @@ from vllm.transformers_utils.config import try_get_generation_config
from
vllm.transformers_utils.tokenizer
import
get_tokenizer
from
..utils
import
models_path_prefix
import
os
import
pytest
from
vllm.utils
import
is_hip
@
pytest
.
mark
.
skipif
(
is_hip
(),
reason
=
"Consistent with NV."
)
def
test_get_llama3_eos_token
():
model_name
=
os
.
path
.
join
(
models_path_prefix
,
"meta-llama/Meta-Llama-3-8B-Instruct"
)
...
...
vllm/model_executor/layers/fused_moe/configs/E=64,N=1408,device_name=DCU_K100_AI.json
deleted
100644 → 0
View file @
3f78216a
{
"1"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"num_warps"
:
8
,
"num_stages"
:
2
},
"2"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
6
,
"num_warps"
:
8
,
"num_stages"
:
2
},
"4"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
1
,
"num_warps"
:
8
,
"num_stages"
:
2
},
"8"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
1
,
"num_warps"
:
8
,
"num_stages"
:
2
},
"16"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
1
,
"num_warps"
:
8
,
"num_stages"
:
2
},
"32"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
1
,
"num_warps"
:
8
,
"num_stages"
:
2
},
"1024"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
1
,
"num_warps"
:
4
,
"num_stages"
:
2
},
"2048"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
4
,
"num_warps"
:
4
,
"num_stages"
:
2
},
"4096"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
6
,
"num_warps"
:
4
,
"num_stages"
:
1
},
"6144"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
6
,
"num_warps"
:
8
,
"num_stages"
:
1
},
"8192"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"num_warps"
:
4
,
"num_stages"
:
1
},
"12288"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"num_warps"
:
8
,
"num_stages"
:
1
},
"16384"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
4
,
"num_warps"
:
4
,
"num_stages"
:
1
},
"32786"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
6
,
"num_warps"
:
4
,
"num_stages"
:
1
}
}
vllm/model_executor/layers/fused_moe/configs/E=64,N=704,device_name=DCU_K100_AI.json
deleted
100644 → 0
View file @
3f78216a
{
"1"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
32
,
"GROUP_SIZE_M"
:
8
,
"num_warps"
:
4
,
"num_stages"
:
2
},
"2"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"num_warps"
:
8
,
"num_stages"
:
2
},
"4"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
4
,
"num_warps"
:
8
,
"num_stages"
:
2
},
"8"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
4
,
"num_warps"
:
8
,
"num_stages"
:
2
},
"16"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
1
,
"num_warps"
:
8
,
"num_stages"
:
2
},
"32"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
1
,
"num_warps"
:
8
,
"num_stages"
:
2
},
"1024"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
1
,
"num_warps"
:
4
,
"num_stages"
:
2
},
"2048"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
1
,
"num_warps"
:
4
,
"num_stages"
:
2
},
"4096"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
1
,
"num_warps"
:
4
,
"num_stages"
:
1
},
"6144"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
1
,
"num_warps"
:
4
,
"num_stages"
:
1
},
"8192"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
4
,
"num_warps"
:
4
,
"num_stages"
:
1
},
"12288"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
6
,
"num_warps"
:
4
,
"num_stages"
:
1
},
"16384"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
1
,
"num_warps"
:
4
,
"num_stages"
:
1
},
"32786"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
1
,
"num_warps"
:
4
,
"num_stages"
:
1
}
}
vllm/utils.py
View file @
129fce94
...
...
@@ -1418,5 +1418,5 @@ class W8a8GetCacheJSON:
def
get_w8a8json_name
(
self
,
n
,
k
):
device_name
=
current_platform
.
get_device_name
().
replace
(
" "
,
"_"
)
return
self
.
triton_json_dir
+
f
"/W8A8_
{
n
}
_
{
k
}
_
D
CU
{
device_name
}
.json"
return
self
.
triton_json_dir
+
f
"/W8A8_
{
n
}
_
{
k
}
_
H
CU
{
device_name
}
.json"
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment