"vllm/entrypoints/openai/completion/protocol.py" did not exist on "aab0102a267eba814cdc09170b530a3aed96be60"
Unverified Commit 21063c11 authored by Aaron Pham's avatar Aaron Pham Committed by GitHub
Browse files

[CI/Build] drop support for Python 3.8 EOL (#8464)


Signed-off-by: default avatarAaron Pham <contact@aarnphm.xyz>
parent 4be3a451
......@@ -413,12 +413,10 @@ class _CorrectnessTestHelper:
def generate_probs_for_test(
self, draft_and_target_probs_equal: bool
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
draft_probs, target_probs = [
F.softmax(
torch.rand(self.vocab_size, dtype=torch.float32),
dim=-1,
) for _ in range(2)
]
draft_probs, target_probs = (F.softmax(
torch.rand(self.vocab_size, dtype=torch.float32),
dim=-1,
) for _ in range(2))
num_reference_probs = 100
reference_probs = F.softmax(
......
......@@ -29,7 +29,7 @@ def test_trace_function_call():
cur_dir = os.path.dirname(__file__)
enable_trace_function_call(path, cur_dir)
f1(1)
with open(path, 'r') as f:
with open(path) as f:
content = f.read()
assert "f1" in content
......
......@@ -93,10 +93,10 @@ def test_mistral_edge_case(tokenizer, truth):
def skip_special_tokens(request, tokenizer_name) -> Generator[bool, Any, None]:
if "mistral" in tokenizer_name:
yield (
bool(True) if request.param else
True if request.param else
pytest.skip("mistral doesn't support skip_special_tokens=False"))
else:
yield bool(True) if request.param else bool(False)
yield bool(request.param)
@pytest.mark.parametrize("truth", TRUTH)
......
......@@ -46,7 +46,7 @@ if __name__ == "__main__":
args = parser.parse_args()
with open(args.json_trace, "r") as f:
with open(args.json_trace) as f:
profile_data = json.load(f)
if args.table == "summary":
......
......@@ -434,7 +434,7 @@ def main(
f"{', Sparsity ' + sparsity if sparsity else ''}")
profile_json = None
with open(json_trace, "r") as f:
with open(json_trace) as f:
profile_json = json.load(f)
assert profile_json is not None
......
......@@ -81,7 +81,7 @@ class Target:
# Allow for modest floating-point errors
epsilon = 0.000002
if (self.weighted_duration > self.Duration() + epsilon):
print('%s > %s?' % (self.weighted_duration, self.Duration()))
print('{} > {}?'.format(self.weighted_duration, self.Duration()))
assert (self.weighted_duration <= self.Duration() + epsilon)
return self.weighted_duration
......@@ -104,7 +104,7 @@ def ReadTargets(log, show_all):
The result is a list of Target objects."""
header = log.readline()
assert header == '# ninja log v5\n', \
'unrecognized ninja log version %r' % header
'unrecognized ninja log version {!r}'.format(header)
targets_dict = {}
last_end_seen = 0.0
for line in log:
......@@ -254,8 +254,8 @@ def SummarizeEntries(entries, extra_step_types):
# Warn if the sum of weighted times is off by more than half a second.
if abs(length - weighted_total) > 500:
print('Warning: Possible corrupt ninja log, results may be '
'untrustworthy. Length = %.3f, weighted total = %.3f' %
(length, weighted_total))
'untrustworthy. Length = {:.3f}, weighted total = {:.3f}'.format(
length, weighted_total))
entries_by_ext = defaultdict(list)
for target in entries:
......@@ -263,16 +263,17 @@ def SummarizeEntries(entries, extra_step_types):
entries_by_ext[extension].append(target)
for key, values in entries_by_ext.items():
print(' Longest build steps for %s:' % key)
print(' Longest build steps for {}:'.format(key))
values.sort(key=lambda x: x.WeightedDuration())
for target in values[-long_count:]:
print(' %8.1f weighted s to build %s (%.1f s elapsed time)' %
(target.WeightedDuration(), target.DescribeTargets(),
target.Duration()))
print(' %.1f s weighted time (%.1f s elapsed time sum, %1.1fx '
'parallelism)' %
(length, total_cpu_time, total_cpu_time * 1.0 / length))
print(
' {:8.1f} weighted s to build {} ({:.1f} s elapsed time)'.
format(target.WeightedDuration(), target.DescribeTargets(),
target.Duration()))
print(' {:.1f} s weighted time ({:.1f} s elapsed time sum, {:1.1f}x '
'parallelism)'.format(length, total_cpu_time,
total_cpu_time * 1.0 / length))
print(' %d build steps completed, average of %1.2f/s' %
(len(entries), len(entries) / (length)))
......@@ -298,11 +299,12 @@ def main():
long_ext_count += len(args.step_types.split(';'))
try:
with open(log_file, 'r') as log:
with open(log_file) as log:
entries = ReadTargets(log, False)
SummarizeEntries(entries, args.step_types)
except IOError:
print('Log file %r not found, no build summary created.' % log_file)
except OSError:
print('Log file {!r} not found, no build summary created.'.format(
log_file))
return errno.ENOENT
......
......@@ -4,7 +4,7 @@ requires_files = glob.glob('requirements*.txt')
requires_files += ["pyproject.toml"]
for file in requires_files:
print(f">>> cleaning {file}")
with open(file, 'r') as f:
with open(file) as f:
lines = f.readlines()
if "torch" in "".join(lines).lower():
print("removed:")
......
......@@ -192,10 +192,8 @@ class LocalStridedBlockSparseAttn(torch.nn.Module):
attn_mask = self.dense_attn_mask[None, :, :maxlen, :maxlen]
q2 = self.transpose_and_pad(q, cu_seqlens, maxlen, 1)
k2, v2 = [
self.transpose_and_pad(x, cu_seqlens, maxlen, q_k_ratio)
for x in [k, v]
]
k2, v2 = (self.transpose_and_pad(x, cu_seqlens, maxlen, q_k_ratio)
for x in [k, v])
spda_output = torch.nn.functional.scaled_dot_product_attention(
q2, k2, v2, attn_mask=attn_mask, scale=sm_scale)
return self.transpose_and_unpad(spda_output, cu_seqlens)
......
......@@ -668,9 +668,10 @@ class ModelConfig:
@property
def is_encoder_decoder_model(self) -> bool:
"""Extract the HF encoder/decoder model flag."""
return getattr(self.hf_config, "is_encoder_decoder", False) or (
(hasattr(self.hf_config, "text_config") and getattr(
self.hf_config.text_config, "is_encoder_decoder", False)))
return getattr(
self.hf_config, "is_encoder_decoder",
False) or (hasattr(self.hf_config, "text_config") and getattr(
self.hf_config.text_config, "is_encoder_decoder", False))
@property
def is_multimodal_model(self) -> bool:
......
......@@ -52,7 +52,7 @@ class Evictor(ABC):
pass
class BlockMetaData():
class BlockMetaData:
"""Data structure for storing key data describe cached block, so that
evitor could use to make its decision which one to choose for eviction
......
......@@ -240,7 +240,7 @@ def gpu_p2p_access_check(src: int, tgt: int) -> bool:
if is_distributed:
get_world_group().barrier()
logger.info("reading GPU P2P access cache from %s", path)
with open(path, "r") as f:
with open(path) as f:
cache = json.load(f)
_gpu_p2p_access_cache = cache
return _gpu_p2p_access_cache[f"{src}->{tgt}"]
......
......@@ -812,7 +812,7 @@ class AsyncLLMEngine(EngineClient):
async def run_engine_loop(engine_ref: ReferenceType):
"""We use a weakref to the engine so that the running loop
doesn't prevent the engine being garbage collected."""
engine: Optional["AsyncLLMEngine"] = engine_ref()
engine: Optional[AsyncLLMEngine] = engine_ref()
if not engine:
return
......
......@@ -1541,8 +1541,8 @@ class LLMEngine:
seq_group.state.remaining_steps != ref_remaining_steps
for seq_group in seq_group_metadata_list[1:]
]):
raise AssertionError(("All running sequence groups should "
"have the same remaining steps."))
raise AssertionError("All running sequence groups should "
"have the same remaining steps.")
return ref_remaining_steps > 0
......
......@@ -77,7 +77,7 @@ class StatLoggerBase(ABC):
self.num_generation_tokens: List[int] = []
self.last_local_log = time.time()
self.local_interval = local_interval
self.spec_decode_metrics: Optional["SpecDecodeWorkerMetrics"] = None
self.spec_decode_metrics: Optional[SpecDecodeWorkerMetrics] = None
@abstractmethod
def log(self, stats: Stats) -> None:
......
......@@ -63,7 +63,7 @@ class MultiStepOutputProcessor(SequenceGroupOutputProcessor):
single_step_process_prompt_logprob(self, seq_group, output)
@staticmethod
@functools.lru_cache()
@functools.lru_cache
def _log_prompt_logprob_unsupported_warning_once():
# Reminder: Please update docs/source/serving/compatibility_matrix.rst
# If the feature combo become valid
......
......@@ -362,7 +362,7 @@ def load_chat_template(
if chat_template is None:
return None
try:
with open(chat_template, "r") as f:
with open(chat_template) as f:
resolved_chat_template = f.read()
except OSError as e:
if isinstance(chat_template, Path):
......
......@@ -120,7 +120,7 @@ async def read_file(path_or_url: str) -> str:
session.get(path_or_url) as resp:
return await resp.text()
else:
with open(path_or_url, "r", encoding="utf-8") as f:
with open(path_or_url, encoding="utf-8") as f:
return f.read()
......
......@@ -32,7 +32,7 @@ class RayGPUExecutor(DistributedGPUExecutor):
uses_ray: bool = True
def _init_executor(self) -> None:
self.forward_dag: Optional["ray.dag.CompiledDAG"] = None
self.forward_dag: Optional[ray.dag.CompiledDAG] = None
# If the env var is set, it uses the Ray's compiled DAG API
# which optimizes the control plane overhead.
# Run vLLM with VLLM_USE_RAY_COMPILED_DAG=1 to enable it.
......
......@@ -67,8 +67,7 @@ def _configure_vllm_root_logger() -> None:
raise RuntimeError(
"Could not load logging config. File does not exist: %s",
VLLM_LOGGING_CONFIG_PATH)
with open(VLLM_LOGGING_CONFIG_PATH, encoding="utf-8",
mode="r") as file:
with open(VLLM_LOGGING_CONFIG_PATH, encoding="utf-8") as file:
custom_config = json.loads(file.read())
if not isinstance(custom_config, dict):
......
......@@ -343,7 +343,7 @@ class LoRAModelManager(AdapterModelManager):
# text modules (e.g. ChatGLM)
and hasattr(self.model, "get_mm_mapping"))
self.packed_modules: Dict[str, List[str]] = {}
self.modules: Dict[str, "BaseLayerWithLoRA"] = {}
self.modules: Dict[str, BaseLayerWithLoRA] = {}
# Dict instead of a Set for compatibility with LRUCache.
self._last_mapping: Optional[LoRAMapping] = None
self._create_lora_modules()
......@@ -548,7 +548,7 @@ class LoRAModelManager(AdapterModelManager):
else:
parts = module_name.split(".")
replacements = self.packed_modules_mapping[parts[-1]]
subloras: List[Optional["LoRALayerWeights"]] = []
subloras: List[Optional[LoRALayerWeights]] = []
for i, r in enumerate(replacements):
lora = LoRALayerWeights.create_dummy_lora_weights(
module_name + "." + r,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment