Unverified Commit 17ca9642 authored by Travis Johnson's avatar Travis Johnson Committed by GitHub
Browse files

[Model] IBM Granite 3.1 (#11307)


Signed-off-by: default avatarTravis Johnson <tsjohnso@us.ibm.com>
parent 5a9da2e6
...@@ -194,8 +194,8 @@ Text Generation (``--task generate``) ...@@ -194,8 +194,8 @@ Text Generation (``--task generate``)
- -
- ✅︎ - ✅︎
* - :code:`GraniteForCausalLM` * - :code:`GraniteForCausalLM`
- Granite 3.0, PowerLM - Granite 3.0, Granite 3.1, PowerLM
- :code:`ibm-granite/granite-3.0-2b-base`, :code:`ibm-granite/granite-3.0-8b-instruct`, :code:`ibm/PowerLM-3b`, etc. - :code:`ibm-granite/granite-3.0-2b-base`, :code:`ibm-granite/granite-3.1-8b-instruct`, :code:`ibm/PowerLM-3b`, etc.
- ✅︎ - ✅︎
- ✅︎ - ✅︎
* - :code:`GraniteMoeForCausalLM` * - :code:`GraniteMoeForCausalLM`
......
...@@ -170,6 +170,12 @@ Recommended flags: `--tool-call-parser granite --chat-template examples/tool_cha ...@@ -170,6 +170,12 @@ Recommended flags: `--tool-call-parser granite --chat-template examples/tool_cha
`examples/tool_chat_template_granite.jinja`: this is a modified chat template from the original on Huggingface. Parallel function calls are supported. `examples/tool_chat_template_granite.jinja`: this is a modified chat template from the original on Huggingface. Parallel function calls are supported.
* `ibm-granite/granite-3.1-8b-instruct`
Recommended flags: `--tool-call-parser granite`
The chat template from Huggingface can be used directly. Parallel function calls are supported.
* `ibm-granite/granite-20b-functioncalling` * `ibm-granite/granite-20b-functioncalling`
Recommended flags: `--tool-call-parser granite-20b-fc --chat-template examples/tool_chat_template_granite_20b_fc.jinja` Recommended flags: `--tool-call-parser granite-20b-fc --chat-template examples/tool_chat_template_granite_20b_fc.jinja`
...@@ -284,4 +290,3 @@ Then you can use this plugin in the command line like this. ...@@ -284,4 +290,3 @@ Then you can use this plugin in the command line like this.
--tool-call-parser example \ --tool-call-parser example \
--chat-template <your chat template> \ --chat-template <your chat template> \
``` ```
...@@ -103,7 +103,7 @@ CONFIGS: Dict[str, ServerConfig] = { ...@@ -103,7 +103,7 @@ CONFIGS: Dict[str, ServerConfig] = {
"supports_rocm": "supports_rocm":
False, False,
}, },
"granite8b": { "granite-3.0-8b": {
"model": "model":
"ibm-granite/granite-3.0-8b-instruct", "ibm-granite/granite-3.0-8b-instruct",
"arguments": [ "arguments": [
...@@ -111,6 +111,14 @@ CONFIGS: Dict[str, ServerConfig] = { ...@@ -111,6 +111,14 @@ CONFIGS: Dict[str, ServerConfig] = {
str(VLLM_PATH / "examples/tool_chat_template_granite.jinja") str(VLLM_PATH / "examples/tool_chat_template_granite.jinja")
], ],
}, },
"granite-3.1-8b": {
"model": "ibm-granite/granite-3.1-8b-instruct",
"arguments": [
"--tool-call-parser",
"granite",
],
"supports_parallel": True,
},
"internlm": { "internlm": {
"model": "model":
"internlm/internlm2_5-7b-chat", "internlm/internlm2_5-7b-chat",
......
...@@ -35,13 +35,18 @@ class GraniteToolParser(ToolParser): ...@@ -35,13 +35,18 @@ class GraniteToolParser(ToolParser):
def __init__(self, tokenizer: AnyTokenizer): def __init__(self, tokenizer: AnyTokenizer):
super().__init__(tokenizer) super().__init__(tokenizer)
# for granite 3.0, the token `<|tool_call|>`
self.bot_token = "<|tool_call|>" self.bot_token = "<|tool_call|>"
# for granite 3.1, the string `<tool_call>`
self.bot_string = "<tool_call>"
def extract_tool_calls( def extract_tool_calls(
self, model_output: str, self, model_output: str,
request: ChatCompletionRequest) -> ExtractedToolCallInformation: request: ChatCompletionRequest) -> ExtractedToolCallInformation:
# remove whitespace and the BOT token if it exists stripped = model_output.strip()\
stripped = model_output.strip().removeprefix(self.bot_token).lstrip() .removeprefix(self.bot_token)\
.removeprefix(self.bot_string)\
.lstrip()
if not stripped or stripped[0] != '[': if not stripped or stripped[0] != '[':
return ExtractedToolCallInformation(tools_called=False, return ExtractedToolCallInformation(tools_called=False,
tool_calls=[], tool_calls=[],
...@@ -91,6 +96,9 @@ class GraniteToolParser(ToolParser): ...@@ -91,6 +96,9 @@ class GraniteToolParser(ToolParser):
if current_text[start_idx:].startswith(self.bot_token): if current_text[start_idx:].startswith(self.bot_token):
start_idx = consume_space(start_idx + len(self.bot_token), start_idx = consume_space(start_idx + len(self.bot_token),
current_text) current_text)
if current_text[start_idx:].startswith(self.bot_string):
start_idx = consume_space(start_idx + len(self.bot_string),
current_text)
if not current_text or start_idx >= len(current_text)\ if not current_text or start_idx >= len(current_text)\
or current_text[start_idx] != '[': or current_text[start_idx] != '[':
return DeltaMessage(content=delta_text) return DeltaMessage(content=delta_text)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment