Unverified Commit ad0d567e authored by Robert Shaw's avatar Robert Shaw Committed by GitHub
Browse files

[V1] Chore: cruft removal (#11724)

parent bf0d97d7
......@@ -225,8 +225,6 @@ class LLM:
# Logic to switch between engines is done at runtime instead of import
# to avoid import order issues
self.engine_class = self.get_engine_class()
# TODO(rob): enable mp by default (issue with fork vs spawn)
self.llm_engine = self.engine_class.from_engine_args(
engine_args, usage_context=UsageContext.LLM_CLASS)
......
......@@ -94,8 +94,6 @@ class InprocClient(EngineCoreClient):
* pushes EngineCoreRequest directly into the EngineCore
* pulls EngineCoreOutputs by stepping the EngineCore
TODO: support asyncio-mode for debugging.
"""
def __init__(self, *args, **kwargs):
......
......@@ -42,8 +42,6 @@ class LLMEngine:
use_cached_outputs: bool = False,
multiprocess_mode: bool = False,
) -> None:
# TODO: Can we avoid this?
self.model_config = vllm_config.model_config
# Tokenizer (+ ensure liveness if running in another process).
......@@ -179,8 +177,6 @@ class LLMEngine:
return request_outputs
# TODO(rob): Can we get rid of these?
def get_model_config(self):
return self.model_config
......
......@@ -49,9 +49,6 @@ class Processor:
cache_config.enable_prefix_caching
self.mm_hasher = MMHasher()
# TODO: run in an ThreadpoolExecutor or BackgroundProcess.
# This ideally should releases the GIL, so we should not block the
# asyncio loop while this is running.
def process_inputs(
self,
request_id: str,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment