Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
5b86b199
Unverified
Commit
5b86b199
authored
Sep 01, 2024
by
Roger Wang
Committed by
GitHub
Sep 01, 2024
Browse files
[Misc] Optional installation of audio related packages (#8063)
parent
5231f089
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
29 additions
and
10 deletions
+29
-10
requirements-common.txt
requirements-common.txt
+1
-3
requirements-test.txt
requirements-test.txt
+3
-1
setup.py
setup.py
+1
-0
tests/models/test_ultravox.py
tests/models/test_ultravox.py
+2
-2
vllm/model_executor/models/ultravox.py
vllm/model_executor/models/ultravox.py
+5
-1
vllm/multimodal/utils.py
vllm/multimodal/utils.py
+17
-3
No files found.
requirements-common.txt
View file @
5b86b199
...
@@ -22,9 +22,7 @@ typing_extensions >= 4.10
...
@@ -22,9 +22,7 @@ typing_extensions >= 4.10
filelock >= 3.10.4 # filelock starts to support `mode` argument from 3.10.4
filelock >= 3.10.4 # filelock starts to support `mode` argument from 3.10.4
pyzmq
pyzmq
msgspec
msgspec
librosa # Required for audio processing
soundfile # Required for audio processing
gguf == 0.9.1
gguf == 0.9.1
importlib_metadata
importlib_metadata
mistral_common >= 1.3.4
mistral_common >= 1.3.4
pyyaml
pyyaml
\ No newline at end of file
requirements-test.txt
View file @
5b86b199
...
@@ -13,10 +13,12 @@ pytest-shard
...
@@ -13,10 +13,12 @@ pytest-shard
awscli
awscli
einops # required for MPT, qwen-vl and Mamba
einops # required for MPT, qwen-vl and Mamba
httpx
httpx
librosa # required for audio test
peft
peft
requests
requests
ray
ray
sentence-transformers # required for embedding
sentence-transformers # required for embedding
soundfile # required for audio test
compressed-tensors==0.4.0 # required for compressed-tensors
compressed-tensors==0.4.0 # required for compressed-tensors
timm # required for internvl test
timm # required for internvl test
transformers_stream_generator # required for qwen-vl test
transformers_stream_generator # required for qwen-vl test
...
@@ -30,4 +32,4 @@ aiohttp
...
@@ -30,4 +32,4 @@ aiohttp
# quantization
# quantization
bitsandbytes==0.42.0
bitsandbytes==0.42.0
buildkite-test-collector==0.1.8
buildkite-test-collector==0.1.8
\ No newline at end of file
setup.py
View file @
5b86b199
...
@@ -501,6 +501,7 @@ setup(
...
@@ -501,6 +501,7 @@ setup(
ext_modules
=
ext_modules
,
ext_modules
=
ext_modules
,
extras_require
=
{
extras_require
=
{
"tensorizer"
:
[
"tensorizer>=2.9.0"
],
"tensorizer"
:
[
"tensorizer>=2.9.0"
],
"audio"
:
[
"librosa"
,
"soundfile"
]
# Required for audio processing
},
},
cmdclass
=
{
"build_ext"
:
cmake_build_ext
}
if
len
(
ext_modules
)
>
0
else
{},
cmdclass
=
{
"build_ext"
:
cmake_build_ext
}
if
len
(
ext_modules
)
>
0
else
{},
package_data
=
package_data
,
package_data
=
package_data
,
...
...
tests/models/test_ultravox.py
View file @
5b86b199
from
typing
import
List
,
Optional
,
Tuple
,
Type
from
typing
import
List
,
Optional
,
Tuple
,
Type
import
librosa
import
numpy
as
np
import
numpy
as
np
import
pytest
import
pytest
from
transformers
import
AutoModel
,
AutoTokenizer
,
BatchEncoding
from
transformers
import
AutoModel
,
AutoTokenizer
,
BatchEncoding
from
vllm.assets.audio
import
AudioAsset
from
vllm.sequence
import
SampleLogprobs
from
vllm.sequence
import
SampleLogprobs
from
vllm.utils
import
STR_DTYPE_TO_TORCH_DTYPE
from
vllm.utils
import
STR_DTYPE_TO_TORCH_DTYPE
...
@@ -21,6 +19,7 @@ AudioTuple = Tuple[np.ndarray, int]
...
@@ -21,6 +19,7 @@ AudioTuple = Tuple[np.ndarray, int]
@
pytest
.
fixture
(
scope
=
"session"
)
@
pytest
.
fixture
(
scope
=
"session"
)
def
audio_and_sample_rate
():
def
audio_and_sample_rate
():
from
vllm.assets.audio
import
AudioAsset
return
AudioAsset
(
"mary_had_lamb"
).
audio_and_sample_rate
return
AudioAsset
(
"mary_had_lamb"
).
audio_and_sample_rate
...
@@ -109,6 +108,7 @@ def run_test(
...
@@ -109,6 +108,7 @@ def run_test(
dtype
=
dtype
,
dtype
=
dtype
,
postprocess_inputs
=
process
,
postprocess_inputs
=
process
,
auto_cls
=
AutoModel
)
as
hf_model
:
auto_cls
=
AutoModel
)
as
hf_model
:
import
librosa
hf_outputs_per_audio
=
[
hf_outputs_per_audio
=
[
hf_model
.
generate_greedy_logprobs_limit
(
hf_model
.
generate_greedy_logprobs_limit
(
...
...
vllm/model_executor/models/ultravox.py
View file @
5b86b199
...
@@ -8,7 +8,6 @@ from functools import lru_cache
...
@@ -8,7 +8,6 @@ from functools import lru_cache
from
typing
import
(
Iterable
,
List
,
Literal
,
Mapping
,
Optional
,
Tuple
,
from
typing
import
(
Iterable
,
List
,
Literal
,
Mapping
,
Optional
,
Tuple
,
TypedDict
,
Union
,
cast
)
TypedDict
,
Union
,
cast
)
import
librosa
import
numpy
as
np
import
numpy
as
np
import
torch
import
torch
import
torch.utils.checkpoint
import
torch.utils.checkpoint
...
@@ -107,6 +106,11 @@ def input_mapper_for_ultravox(ctx: InputContext, data: object):
...
@@ -107,6 +106,11 @@ def input_mapper_for_ultravox(ctx: InputContext, data: object):
feature_extractor
=
whisper_feature_extractor
(
ctx
)
feature_extractor
=
whisper_feature_extractor
(
ctx
)
if
sr
!=
feature_extractor
.
sampling_rate
:
if
sr
!=
feature_extractor
.
sampling_rate
:
try
:
import
librosa
except
ImportError
:
raise
ImportError
(
"Please install vllm[audio] for audio support."
)
from
None
audio
=
librosa
.
resample
(
audio
,
audio
=
librosa
.
resample
(
audio
,
orig_sr
=
sr
,
orig_sr
=
sr
,
target_sr
=
feature_extractor
.
sampling_rate
)
target_sr
=
feature_extractor
.
sampling_rate
)
...
...
vllm/multimodal/utils.py
View file @
5b86b199
import
base64
import
base64
from
functools
import
lru_cache
from
functools
import
lru_cache
from
io
import
BytesIO
from
io
import
BytesIO
from
typing
import
List
,
Optional
,
Tuple
,
TypeVar
,
Union
from
typing
import
Any
,
List
,
Optional
,
Tuple
,
TypeVar
,
Union
import
librosa
import
numpy
as
np
import
numpy
as
np
import
soundfile
from
PIL
import
Image
from
PIL
import
Image
from
vllm.connections
import
global_http_connection
from
vllm.connections
import
global_http_connection
...
@@ -73,10 +71,22 @@ async def async_fetch_image(image_url: str,
...
@@ -73,10 +71,22 @@ async def async_fetch_image(image_url: str,
return
image
.
convert
(
image_mode
)
return
image
.
convert
(
image_mode
)
def
try_import_audio_packages
()
->
Tuple
[
Any
,
Any
]:
try
:
import
librosa
import
soundfile
except
ImportError
:
raise
ImportError
(
"Please install vllm[audio] for audio support."
)
from
None
return
librosa
,
soundfile
def
fetch_audio
(
audio_url
:
str
)
->
Tuple
[
np
.
ndarray
,
Union
[
int
,
float
]]:
def
fetch_audio
(
audio_url
:
str
)
->
Tuple
[
np
.
ndarray
,
Union
[
int
,
float
]]:
"""
"""
Load audio from a URL.
Load audio from a URL.
"""
"""
librosa
,
_
=
try_import_audio_packages
()
if
audio_url
.
startswith
(
"http"
):
if
audio_url
.
startswith
(
"http"
):
audio_bytes
=
global_http_connection
.
get_bytes
(
audio_bytes
=
global_http_connection
.
get_bytes
(
audio_url
,
timeout
=
VLLM_AUDIO_FETCH_TIMEOUT
)
audio_url
,
timeout
=
VLLM_AUDIO_FETCH_TIMEOUT
)
...
@@ -95,6 +105,8 @@ async def async_fetch_audio(
...
@@ -95,6 +105,8 @@ async def async_fetch_audio(
"""
"""
Asynchronously fetch audio from a URL.
Asynchronously fetch audio from a URL.
"""
"""
librosa
,
_
=
try_import_audio_packages
()
if
audio_url
.
startswith
(
"http"
):
if
audio_url
.
startswith
(
"http"
):
audio_bytes
=
await
global_http_connection
.
async_get_bytes
(
audio_bytes
=
await
global_http_connection
.
async_get_bytes
(
audio_url
,
timeout
=
VLLM_AUDIO_FETCH_TIMEOUT
)
audio_url
,
timeout
=
VLLM_AUDIO_FETCH_TIMEOUT
)
...
@@ -123,6 +135,8 @@ def encode_audio_base64(
...
@@ -123,6 +135,8 @@ def encode_audio_base64(
sampling_rate
:
int
,
sampling_rate
:
int
,
)
->
str
:
)
->
str
:
"""Encode audio as base64."""
"""Encode audio as base64."""
_
,
soundfile
=
try_import_audio_packages
()
buffered
=
BytesIO
()
buffered
=
BytesIO
()
soundfile
.
write
(
buffered
,
audio
,
sampling_rate
,
format
=
"WAV"
)
soundfile
.
write
(
buffered
,
audio
,
sampling_rate
,
format
=
"WAV"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment