Commit 99963991 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.9.2-dev' of http://10.16.6.30/dcutoolkit/deeplearing/vllm into v0.9.2-dev

parents a7668e46 6cc81877
...@@ -2088,8 +2088,9 @@ class GPUModelRunner(LoRAModelRunnerMixin): ...@@ -2088,8 +2088,9 @@ class GPUModelRunner(LoRAModelRunnerMixin):
hidden_states = outputs hidden_states = outputs
if self.speculative_config and self.speculative_config.use_eagle() and not is_profile: if self.speculative_config and self.speculative_config.use_eagle() and not is_profile:
assert isinstance(self.drafter, EagleProposer) #assert isinstance(self.drafter, EagleProposer)
self.drafter.dummy_run(num_tokens, attn_metadata) if hasattr(self, 'drafter') and isinstance(self.drafter, EagleProposer):
self.drafter.dummy_run(num_tokens, attn_metadata)
# This is necessary to avoid blocking DP. # This is necessary to avoid blocking DP.
# For dummy runs, we typically skip EPLB since we don't have any real # For dummy runs, we typically skip EPLB since we don't have any real
...@@ -2677,10 +2678,11 @@ class GPUModelRunner(LoRAModelRunnerMixin): ...@@ -2677,10 +2678,11 @@ class GPUModelRunner(LoRAModelRunnerMixin):
kv_caches = self.initialize_kv_cache_tensors(kv_cache_config) kv_caches = self.initialize_kv_cache_tensors(kv_cache_config)
if self.speculative_config and self.speculative_config.use_eagle(): if self.speculative_config and self.speculative_config.use_eagle():
assert isinstance(self.drafter, EagleProposer) #assert isinstance(self.drafter, EagleProposer)
# validate all draft model layers belong to the same kv cache # validate all draft model layers belong to the same kv cache
# group # group
self.drafter.validate_same_kv_cache_group(kv_cache_config) if hasattr(self, 'drafter') and isinstance(self.drafter, EagleProposer):
self.drafter.validate_same_kv_cache_group(kv_cache_config)
if has_kv_transfer_group(): if has_kv_transfer_group():
get_kv_transfer_group().register_kv_caches(kv_caches) get_kv_transfer_group().register_kv_caches(kv_caches)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment