Commit 66b809cc authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge tag 'v0.7.2' into v0.7.2-dev

parents 37b63c24 0408efc6
# SPDX-License-Identifier: Apache-2.0
""" """
Test the piecewise compilation with a simple model, comparing the output Test the piecewise compilation with a simple model, comparing the output
with and without the piecewise compilation. with and without the piecewise compilation.
......
# SPDX-License-Identifier: Apache-2.0
import dataclasses import dataclasses
from typing import Dict, List, Optional from typing import Dict, List, Optional
......
# SPDX-License-Identifier: Apache-2.0
import pytest import pytest
from vllm.config import CompilationLevel from vllm.config import CompilationLevel
......
# SPDX-License-Identifier: Apache-2.0
import os import os
import pytest import pytest
import torch import torch
......
# SPDX-License-Identifier: Apache-2.0
import pytest import pytest
import torch import torch
from compressed_tensors.quantization import FP8_DTYPE from compressed_tensors.quantization import FP8_DTYPE
......
# SPDX-License-Identifier: Apache-2.0
import pickle import pickle
import pytest import pytest
......
# SPDX-License-Identifier: Apache-2.0
from typing import Optional from typing import Optional
import torch import torch
......
# SPDX-License-Identifier: Apache-2.0
import os import os
import torch import torch
......
# SPDX-License-Identifier: Apache-2.0
import json import json
import os import os
import tempfile import tempfile
...@@ -738,6 +740,7 @@ class VllmRunner: ...@@ -738,6 +740,7 @@ class VllmRunner:
images: Optional[PromptImageInput] = None, images: Optional[PromptImageInput] = None,
videos: Optional[PromptVideoInput] = None, videos: Optional[PromptVideoInput] = None,
audios: Optional[PromptAudioInput] = None, audios: Optional[PromptAudioInput] = None,
**kwargs: Any,
) -> List[Tuple[List[List[int]], List[str]]]: ) -> List[Tuple[List[List[int]], List[str]]]:
inputs = self.get_inputs(prompts, inputs = self.get_inputs(prompts,
images=images, images=images,
...@@ -745,7 +748,8 @@ class VllmRunner: ...@@ -745,7 +748,8 @@ class VllmRunner:
audios=audios) audios=audios)
req_outputs = self.model.generate(inputs, req_outputs = self.model.generate(inputs,
sampling_params=sampling_params) sampling_params=sampling_params,
**kwargs)
outputs: List[Tuple[List[List[int]], List[str]]] = [] outputs: List[Tuple[List[List[int]], List[str]]] = []
for req_output in req_outputs: for req_output in req_outputs:
...@@ -783,6 +787,7 @@ class VllmRunner: ...@@ -783,6 +787,7 @@ class VllmRunner:
images: Optional[PromptImageInput] = None, images: Optional[PromptImageInput] = None,
audios: Optional[PromptAudioInput] = None, audios: Optional[PromptAudioInput] = None,
videos: Optional[PromptVideoInput] = None, videos: Optional[PromptVideoInput] = None,
**kwargs: Any,
) -> Union[List[TokensTextLogprobs], ) -> Union[List[TokensTextLogprobs],
List[TokensTextLogprobsPromptLogprobs]]: List[TokensTextLogprobsPromptLogprobs]]:
inputs = self.get_inputs(prompts, inputs = self.get_inputs(prompts,
...@@ -791,7 +796,8 @@ class VllmRunner: ...@@ -791,7 +796,8 @@ class VllmRunner:
audios=audios) audios=audios)
req_outputs = self.model.generate(inputs, req_outputs = self.model.generate(inputs,
sampling_params=sampling_params) sampling_params=sampling_params,
**kwargs)
toks_str_logsprobs_prompt_logprobs = ( toks_str_logsprobs_prompt_logprobs = (
self._final_steps_generate_w_logprobs(req_outputs)) self._final_steps_generate_w_logprobs(req_outputs))
...@@ -827,13 +833,15 @@ class VllmRunner: ...@@ -827,13 +833,15 @@ class VllmRunner:
images: Optional[PromptImageInput] = None, images: Optional[PromptImageInput] = None,
videos: Optional[PromptVideoInput] = None, videos: Optional[PromptVideoInput] = None,
audios: Optional[PromptAudioInput] = None, audios: Optional[PromptAudioInput] = None,
**kwargs: Any,
) -> List[Tuple[List[int], str]]: ) -> List[Tuple[List[int], str]]:
greedy_params = SamplingParams(temperature=0.0, max_tokens=max_tokens) greedy_params = SamplingParams(temperature=0.0, max_tokens=max_tokens)
outputs = self.generate(prompts, outputs = self.generate(prompts,
greedy_params, greedy_params,
images=images, images=images,
videos=videos, videos=videos,
audios=audios) audios=audios,
**kwargs)
return [(output_ids[0], output_str[0]) return [(output_ids[0], output_str[0])
for output_ids, output_str in outputs] for output_ids, output_str in outputs]
...@@ -848,6 +856,7 @@ class VllmRunner: ...@@ -848,6 +856,7 @@ class VllmRunner:
videos: Optional[PromptVideoInput] = None, videos: Optional[PromptVideoInput] = None,
stop_token_ids: Optional[List[int]] = None, stop_token_ids: Optional[List[int]] = None,
stop: Optional[List[str]] = None, stop: Optional[List[str]] = None,
**kwargs: Any,
) -> Union[List[TokensTextLogprobs], ) -> Union[List[TokensTextLogprobs],
List[TokensTextLogprobsPromptLogprobs]]: List[TokensTextLogprobsPromptLogprobs]]:
greedy_logprobs_params = SamplingParams( greedy_logprobs_params = SamplingParams(
...@@ -862,7 +871,8 @@ class VllmRunner: ...@@ -862,7 +871,8 @@ class VllmRunner:
greedy_logprobs_params, greedy_logprobs_params,
images=images, images=images,
audios=audios, audios=audios,
videos=videos) videos=videos,
**kwargs)
def generate_encoder_decoder_greedy_logprobs( def generate_encoder_decoder_greedy_logprobs(
self, self,
......
# SPDX-License-Identifier: Apache-2.0
import pytest import pytest
......
# SPDX-License-Identifier: Apache-2.0
from typing import Callable, Iterable, Optional from typing import Callable, Iterable, Optional
import pytest import pytest
......
# SPDX-License-Identifier: Apache-2.0
from itertools import cycle from itertools import cycle
import pytest import pytest
......
# SPDX-License-Identifier: Apache-2.0
import random import random
from typing import List from typing import List
......
# SPDX-License-Identifier: Apache-2.0
import pytest import pytest
from vllm.core.block.utils import (STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE, from vllm.core.block.utils import (STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE,
......
# SPDX-License-Identifier: Apache-2.0
from typing import List from typing import List
import pytest import pytest
......
# SPDX-License-Identifier: Apache-2.0
import random import random
import pytest import pytest
......
# SPDX-License-Identifier: Apache-2.0
import pytest import pytest
from vllm.core.block.cpu_gpu_block_allocator import CpuGpuBlockAllocator from vllm.core.block.cpu_gpu_block_allocator import CpuGpuBlockAllocator
......
# SPDX-License-Identifier: Apache-2.0
from typing import List, Optional from typing import List, Optional
import pytest import pytest
......
# SPDX-License-Identifier: Apache-2.0
import math import math
import random import random
from typing import List, Optional from typing import List, Optional
...@@ -63,8 +65,8 @@ class TestPrefixCachingBlock: ...@@ -63,8 +65,8 @@ class TestPrefixCachingBlock:
previous_block = MagicMock(spec=PrefixCachingBlock) previous_block = MagicMock(spec=PrefixCachingBlock)
prev_block_hash = random.randint(0, 1000) prev_block_hash = random.randint(0, 1000)
previous_block.content_hash = (prev_block_hash previous_block.content_hash = (prev_block_hash if prev_block_has_hash
if prev_block_has_hash else None) else hash('None'))
num_to_fill = block_size if is_curr_block_full else random.randint( num_to_fill = block_size if is_curr_block_full else random.randint(
0, block_size - 1) 0, block_size - 1)
......
# SPDX-License-Identifier: Apache-2.0
from typing import List from typing import List
from unittest.mock import MagicMock from unittest.mock import MagicMock
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment