- Only applicable to [text generation models](../models/generative_models.md) with a [chat template](../serving/openai_compatible_server.md#chat-template).
- Only applicable to [text generation models](../models/generative_models.md) with a [chat template](../serving/openai_compatible_server.md#chat-template).
-*Note: `user` parameter is ignored.*
-*Note: `user` parameter is ignored.*
...
@@ -229,6 +231,31 @@ The following extra parameters are supported:
...
@@ -229,6 +231,31 @@ The following extra parameters are supported:
six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12
six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12
setuptools>=77.0.3,<81.0.0; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12
setuptools>=77.0.3,<81.0.0; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12
einops # Required for Qwen2-VL.
einops # Required for Qwen2-VL.
compressed-tensors == 0.12.2 # required for compressed-tensors
compressed-tensors == 0.13.0 # required for compressed-tensors
depyf==0.20.0 # required for profiling and debugging with compilation config
depyf==0.20.0 # required for profiling and debugging with compilation config
cloudpickle # allows pickling lambda functions in model_executor/models/registry.py
cloudpickle # allows pickling lambda functions in model_executor/models/registry.py
watchfiles # required for http server to monitor the updates of TLS files
watchfiles # required for http server to monitor the updates of TLS files