Commit cf1d8464 authored by lizhigong's avatar lizhigong
Browse files

debug on v0.8.5

parent 29e922ac
...@@ -52,7 +52,6 @@ class ZeroOverheadEngine(LLMEngine): ...@@ -52,7 +52,6 @@ class ZeroOverheadEngine(LLMEngine):
log_stats: bool, log_stats: bool,
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT, usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
stat_loggers: Optional[Dict[str, StatLoggerBase]] = None, stat_loggers: Optional[Dict[str, StatLoggerBase]] = None,
input_registry: InputRegistry = INPUT_REGISTRY,
mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY, mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY,
use_cached_outputs: bool = False, use_cached_outputs: bool = False,
) -> None: ) -> None:
...@@ -113,10 +112,6 @@ class ZeroOverheadEngine(LLMEngine): ...@@ -113,10 +112,6 @@ class ZeroOverheadEngine(LLMEngine):
self.tokenizer, self.tokenizer,
mm_registry) mm_registry)
self.input_registry = input_registry
self.input_processor = input_registry.create_input_processor(
self.model_config)
self.model_executor = executor_class(vllm_config=vllm_config, ) self.model_executor = executor_class(vllm_config=vllm_config, )
if self.model_config.runner_type != "pooling": if self.model_config.runner_type != "pooling":
...@@ -159,11 +154,6 @@ class ZeroOverheadEngine(LLMEngine): ...@@ -159,11 +154,6 @@ class ZeroOverheadEngine(LLMEngine):
self.parallel_config.disable_custom_all_reduce, self.parallel_config.disable_custom_all_reduce,
}) })
if self.tokenizer:
# Ping the tokenizer to ensure liveness if it runs in a
# different process.
self.tokenizer.ping()
self.cached_scheduler_outputs = [ self.cached_scheduler_outputs = [
SchedulerOutputState() SchedulerOutputState()
for _ in range(self.parallel_config.pipeline_parallel_size) for _ in range(self.parallel_config.pipeline_parallel_size)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment