Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
17ca9642
Unverified
Commit
17ca9642
authored
Dec 18, 2024
by
Travis Johnson
Committed by
GitHub
Dec 19, 2024
Browse files
[Model] IBM Granite 3.1 (#11307)
Signed-off-by:
Travis Johnson
<
tsjohnso@us.ibm.com
>
parent
5a9da2e6
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
27 additions
and
6 deletions
+27
-6
docs/source/models/supported_models.rst
docs/source/models/supported_models.rst
+2
-2
docs/source/usage/tool_calling.md
docs/source/usage/tool_calling.md
+6
-1
tests/tool_use/utils.py
tests/tool_use/utils.py
+9
-1
vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py
vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py
+10
-2
No files found.
docs/source/models/supported_models.rst
View file @
17ca9642
...
@@ -194,8 +194,8 @@ Text Generation (``--task generate``)
...
@@ -194,8 +194,8 @@ Text Generation (``--task generate``)
-
-
- ✅︎
- ✅︎
* - :code:`GraniteForCausalLM`
* - :code:`GraniteForCausalLM`
- Granite 3.0, PowerLM
- Granite 3.0,
Granite 3.1,
PowerLM
- :code:`ibm-granite/granite-3.0-2b-base`, :code:`ibm-granite/granite-3.
0
-8b-instruct`, :code:`ibm/PowerLM-3b`, etc.
- :code:`ibm-granite/granite-3.0-2b-base`, :code:`ibm-granite/granite-3.
1
-8b-instruct`, :code:`ibm/PowerLM-3b`, etc.
- ✅︎
- ✅︎
- ✅︎
- ✅︎
* - :code:`GraniteMoeForCausalLM`
* - :code:`GraniteMoeForCausalLM`
...
...
docs/source/usage/tool_calling.md
View file @
17ca9642
...
@@ -170,6 +170,12 @@ Recommended flags: `--tool-call-parser granite --chat-template examples/tool_cha
...
@@ -170,6 +170,12 @@ Recommended flags: `--tool-call-parser granite --chat-template examples/tool_cha
`examples/tool_chat_template_granite.jinja`
: this is a modified chat template from the original on Huggingface. Parallel function calls are supported.
`examples/tool_chat_template_granite.jinja`
: this is a modified chat template from the original on Huggingface. Parallel function calls are supported.
*
`ibm-granite/granite-3.1-8b-instruct`
Recommended flags:
`--tool-call-parser granite`
The chat template from Huggingface can be used directly. Parallel function calls are supported.
*
`ibm-granite/granite-20b-functioncalling`
*
`ibm-granite/granite-20b-functioncalling`
Recommended flags:
`--tool-call-parser granite-20b-fc --chat-template examples/tool_chat_template_granite_20b_fc.jinja`
Recommended flags:
`--tool-call-parser granite-20b-fc --chat-template examples/tool_chat_template_granite_20b_fc.jinja`
...
@@ -284,4 +290,3 @@ Then you can use this plugin in the command line like this.
...
@@ -284,4 +290,3 @@ Then you can use this plugin in the command line like this.
--tool-call-parser example \
--tool-call-parser example \
--chat-template <your chat template> \
--chat-template <your chat template> \
```
```
tests/tool_use/utils.py
View file @
17ca9642
...
@@ -103,7 +103,7 @@ CONFIGS: Dict[str, ServerConfig] = {
...
@@ -103,7 +103,7 @@ CONFIGS: Dict[str, ServerConfig] = {
"supports_rocm"
:
"supports_rocm"
:
False
,
False
,
},
},
"granite8b"
:
{
"granite
-3.0-
8b"
:
{
"model"
:
"model"
:
"ibm-granite/granite-3.0-8b-instruct"
,
"ibm-granite/granite-3.0-8b-instruct"
,
"arguments"
:
[
"arguments"
:
[
...
@@ -111,6 +111,14 @@ CONFIGS: Dict[str, ServerConfig] = {
...
@@ -111,6 +111,14 @@ CONFIGS: Dict[str, ServerConfig] = {
str
(
VLLM_PATH
/
"examples/tool_chat_template_granite.jinja"
)
str
(
VLLM_PATH
/
"examples/tool_chat_template_granite.jinja"
)
],
],
},
},
"granite-3.1-8b"
:
{
"model"
:
"ibm-granite/granite-3.1-8b-instruct"
,
"arguments"
:
[
"--tool-call-parser"
,
"granite"
,
],
"supports_parallel"
:
True
,
},
"internlm"
:
{
"internlm"
:
{
"model"
:
"model"
:
"internlm/internlm2_5-7b-chat"
,
"internlm/internlm2_5-7b-chat"
,
...
...
vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py
View file @
17ca9642
...
@@ -35,13 +35,18 @@ class GraniteToolParser(ToolParser):
...
@@ -35,13 +35,18 @@ class GraniteToolParser(ToolParser):
def
__init__
(
self
,
tokenizer
:
AnyTokenizer
):
def
__init__
(
self
,
tokenizer
:
AnyTokenizer
):
super
().
__init__
(
tokenizer
)
super
().
__init__
(
tokenizer
)
# for granite 3.0, the token `<|tool_call|>`
self
.
bot_token
=
"<|tool_call|>"
self
.
bot_token
=
"<|tool_call|>"
# for granite 3.1, the string `<tool_call>`
self
.
bot_string
=
"<tool_call>"
def
extract_tool_calls
(
def
extract_tool_calls
(
self
,
model_output
:
str
,
self
,
model_output
:
str
,
request
:
ChatCompletionRequest
)
->
ExtractedToolCallInformation
:
request
:
ChatCompletionRequest
)
->
ExtractedToolCallInformation
:
# remove whitespace and the BOT token if it exists
stripped
=
model_output
.
strip
()
\
stripped
=
model_output
.
strip
().
removeprefix
(
self
.
bot_token
).
lstrip
()
.
removeprefix
(
self
.
bot_token
)
\
.
removeprefix
(
self
.
bot_string
)
\
.
lstrip
()
if
not
stripped
or
stripped
[
0
]
!=
'['
:
if
not
stripped
or
stripped
[
0
]
!=
'['
:
return
ExtractedToolCallInformation
(
tools_called
=
False
,
return
ExtractedToolCallInformation
(
tools_called
=
False
,
tool_calls
=
[],
tool_calls
=
[],
...
@@ -91,6 +96,9 @@ class GraniteToolParser(ToolParser):
...
@@ -91,6 +96,9 @@ class GraniteToolParser(ToolParser):
if
current_text
[
start_idx
:].
startswith
(
self
.
bot_token
):
if
current_text
[
start_idx
:].
startswith
(
self
.
bot_token
):
start_idx
=
consume_space
(
start_idx
+
len
(
self
.
bot_token
),
start_idx
=
consume_space
(
start_idx
+
len
(
self
.
bot_token
),
current_text
)
current_text
)
if
current_text
[
start_idx
:].
startswith
(
self
.
bot_string
):
start_idx
=
consume_space
(
start_idx
+
len
(
self
.
bot_string
),
current_text
)
if
not
current_text
or
start_idx
>=
len
(
current_text
)
\
if
not
current_text
or
start_idx
>=
len
(
current_text
)
\
or
current_text
[
start_idx
]
!=
'['
:
or
current_text
[
start_idx
]
!=
'['
:
return
DeltaMessage
(
content
=
delta_text
)
return
DeltaMessage
(
content
=
delta_text
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment