Unverified Commit f790ad3c authored by Avinash Raj's avatar Avinash Raj Committed by GitHub
Browse files

[Frontend][OpenAI] Support for returning max_model_len on /v1/models response (#4643)

parent ed59a7ed
...@@ -82,6 +82,7 @@ class ModelCard(OpenAIBaseModel): ...@@ -82,6 +82,7 @@ class ModelCard(OpenAIBaseModel):
owned_by: str = "vllm" owned_by: str = "vllm"
root: Optional[str] = None root: Optional[str] = None
parent: Optional[str] = None parent: Optional[str] = None
max_model_len: Optional[int] = None
permission: List[ModelPermission] = Field(default_factory=list) permission: List[ModelPermission] = Field(default_factory=list)
......
...@@ -62,6 +62,7 @@ class OpenAIServing: ...@@ -62,6 +62,7 @@ class OpenAIServing:
"""Show available models. Right now we only have one model.""" """Show available models. Right now we only have one model."""
model_cards = [ model_cards = [
ModelCard(id=served_model_name, ModelCard(id=served_model_name,
max_model_len=self.max_model_len,
root=self.served_model_names[0], root=self.served_model_names[0],
permission=[ModelPermission()]) permission=[ModelPermission()])
for served_model_name in self.served_model_names for served_model_name in self.served_model_names
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment