Commit e19ddc39 authored by zhuwenwen's avatar zhuwenwen
Browse files

support v1 engine

parent ac61f64c
...@@ -3610,7 +3610,9 @@ class VllmConfig: ...@@ -3610,7 +3610,9 @@ class VllmConfig:
self.compilation_config.cudagraph_num_of_warmups = 1 self.compilation_config.cudagraph_num_of_warmups = 1
self.compilation_config.pass_config.enable_fusion = False self.compilation_config.pass_config.enable_fusion = False
self.compilation_config.pass_config.enable_noop = False self.compilation_config.pass_config.enable_noop = False
self.compilation_config.level = CompilationLevel.PIECEWISE # TODO
# self.compilation_config.level = CompilationLevel.PIECEWISE
self.compilation_config.level = CompilationLevel.NO_COMPILATION
self.compilation_config.set_splitting_ops_for_v1() self.compilation_config.set_splitting_ops_for_v1()
self._set_cudagraph_sizes() self._set_cudagraph_sizes()
......
...@@ -129,10 +129,10 @@ class WorkerBase: ...@@ -129,10 +129,10 @@ class WorkerBase:
def list_loras(self) -> Set[int]: def list_loras(self) -> Set[int]:
raise NotImplementedError raise NotImplementedError
@property # @property
@abstractmethod # @abstractmethod
def cache_engines(self) -> Optional[List[CacheEngine]]: # def cache_engines(self) -> Optional[List[CacheEngine]]:
raise NotImplementedError # raise NotImplementedError
@property @property
def vocab_size(self) -> int: def vocab_size(self) -> int:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment