Unverified Commit b590adfd authored by Richard Zou's avatar Richard Zou Committed by GitHub
Browse files

Fix vLLM x torch.compile config caching (#16491)


Signed-off-by: default avatarrzou <zou3519@gmail.com>
parent b4fe16c7
...@@ -298,12 +298,18 @@ class ModelConfig: ...@@ -298,12 +298,18 @@ class ModelConfig:
factors.append(self.quantization) factors.append(self.quantization)
factors.append(self.revision) factors.append(self.revision)
factors.append(self.code_revision) factors.append(self.code_revision)
factors.append(self.max_model_len)
factors.append(self.max_logprobs)
factors.append(self.disable_sliding_window)
factors.append(self.trust_remote_code) factors.append(self.trust_remote_code)
factors.append(self.mm_processor_kwargs)
factors.append(self.generation_config)
factors.append(self.model_impl)
factors.append(self.override_generation_config)
factors.append(self.rope_scaling) factors.append(self.rope_scaling)
factors.append(self.rope_theta) factors.append(self.rope_theta)
# rope cos/sin cache depends on the max_position_embeddings # hf_config can control how the model looks!
factors.append( factors.append(self.hf_config.to_json_string())
getattr(self.hf_config, "max_position_embeddings", "None"))
return hashlib.sha256(str(factors).encode()).hexdigest() return hashlib.sha256(str(factors).encode()).hexdigest()
def __init__( def __init__(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment