Unverified Commit ab618f01 authored by Alexandre Marques's avatar Alexandre Marques Committed by GitHub
Browse files

Add support for enable_thinking argument in vllm model, set default to False (#2947)

parent fc5019ea
...@@ -68,6 +68,7 @@ class VLLM(TemplateLM): ...@@ -68,6 +68,7 @@ class VLLM(TemplateLM):
device: str = "cuda", device: str = "cuda",
data_parallel_size: int = 1, data_parallel_size: int = 1,
lora_local_path: str = None, lora_local_path: str = None,
enable_thinking: bool = False,
**kwargs, **kwargs,
): ):
super().__init__() super().__init__()
...@@ -129,6 +130,7 @@ class VLLM(TemplateLM): ...@@ -129,6 +130,7 @@ class VLLM(TemplateLM):
add_bos_token=add_bos_token, add_bos_token=add_bos_token,
) )
self.tokenizer = configure_pad_token(self.tokenizer, model_config=self._config) self.tokenizer = configure_pad_token(self.tokenizer, model_config=self._config)
self.enable_thinking = enable_thinking
self.add_bos_token = add_bos_token self.add_bos_token = add_bos_token
if "gemma" in pretrained.lower(): if "gemma" in pretrained.lower():
self.add_bos_token = True self.add_bos_token = True
...@@ -209,6 +211,7 @@ class VLLM(TemplateLM): ...@@ -209,6 +211,7 @@ class VLLM(TemplateLM):
add_generation_prompt=add_generation_prompt, add_generation_prompt=add_generation_prompt,
continue_final_message=not add_generation_prompt, continue_final_message=not add_generation_prompt,
chat_template=self.hf_chat_template, chat_template=self.hf_chat_template,
enable_thinking=self.enable_thinking,
) )
return chat_templated return chat_templated
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment