"ssh:/git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "2f707fcb35c5bc4b9164cf2bbce0254a72f7348b"
Unverified Commit 2382ad29 authored by youkaichao's avatar youkaichao Committed by GitHub
Browse files

[ci] fix linter (#13701)


Signed-off-by: default avataryoukaichao <youkaichao@gmail.com>
parent 3e472d88
...@@ -48,15 +48,16 @@ def main(dp_size, dp_rank, dp_master_ip, dp_master_port, GPUs_per_dp_rank): ...@@ -48,15 +48,16 @@ def main(dp_size, dp_rank, dp_master_ip, dp_master_port, GPUs_per_dp_rank):
max_tokens=16 * (dp_rank + 1)) max_tokens=16 * (dp_rank + 1))
# Create an LLM. # Create an LLM.
llm = LLM(model="facebook/opt-125m", tensor_parallel_size=2, enforce_eager=True) llm = LLM(model="facebook/opt-125m",
tensor_parallel_size=2,
enforce_eager=True)
outputs = llm.generate(prompts, sampling_params) outputs = llm.generate(prompts, sampling_params)
# Print the outputs. # Print the outputs.
for output in outputs: for output in outputs:
prompt = output.prompt prompt = output.prompt
generated_text = output.outputs[0].text generated_text = output.outputs[0].text
print( print(f"DP rank {dp_rank}, Prompt: {prompt!r}, "
f"DP rank {dp_rank}, Prompt: {prompt!r}, " f"Generated text: {generated_text!r}")
f"Generated text: {generated_text!r}")
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -1372,7 +1372,7 @@ class ParallelConfig: ...@@ -1372,7 +1372,7 @@ class ParallelConfig:
@staticmethod @staticmethod
def has_unfinished_dp(dp_group: "ProcessGroup", def has_unfinished_dp(dp_group: "ProcessGroup",
has_unfinished: bool) -> bool: has_unfinished: bool) -> bool:
tensor = torch.tensor([has_unfinished], tensor = torch.tensor([has_unfinished],
dtype=torch.int32, dtype=torch.int32,
device="cpu") device="cpu")
......
...@@ -518,6 +518,7 @@ def get_open_port() -> int: ...@@ -518,6 +518,7 @@ def get_open_port() -> int:
return port return port
return _get_open_port() return _get_open_port()
def _get_open_port() -> int: def _get_open_port() -> int:
port = envs.VLLM_PORT port = envs.VLLM_PORT
if port is not None: if port is not None:
......
...@@ -89,7 +89,7 @@ class EngineCoreClient(ABC): ...@@ -89,7 +89,7 @@ class EngineCoreClient(ABC):
def execute_dummy_batch(self) -> None: def execute_dummy_batch(self) -> None:
raise NotImplementedError raise NotImplementedError
async def execute_dummy_batch_async(self) -> None: async def execute_dummy_batch_async(self) -> None:
raise NotImplementedError raise NotImplementedError
...@@ -343,6 +343,7 @@ class SyncMPClient(MPClient): ...@@ -343,6 +343,7 @@ class SyncMPClient(MPClient):
def execute_dummy_batch(self) -> None: def execute_dummy_batch(self) -> None:
self._call_utility("execute_dummy_batch") self._call_utility("execute_dummy_batch")
class AsyncMPClient(MPClient): class AsyncMPClient(MPClient):
"""Asyncio-compatible client for multi-proc EngineCore.""" """Asyncio-compatible client for multi-proc EngineCore."""
......
...@@ -1167,7 +1167,8 @@ class GPUModelRunner(LoRAModelRunnerMixin): ...@@ -1167,7 +1167,8 @@ class GPUModelRunner(LoRAModelRunnerMixin):
for k, v in self.intermediate_tensors.items() for k, v in self.intermediate_tensors.items()
}) })
with set_forward_context(None, self.vllm_config, num_tokens=num_tokens): with set_forward_context(None, self.vllm_config,
num_tokens=num_tokens):
hidden_states = model( hidden_states = model(
input_ids=input_ids, input_ids=input_ids,
positions=positions, positions=positions,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment