Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
5c9a2d49
Unverified
Commit
5c9a2d49
authored
Jun 04, 2025
by
richardhuo-nv
Committed by
GitHub
Jun 04, 2025
Browse files
fix: add speculative decoding config to dynamo serve + trtllm (#1356)
parent
b8dc0150
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
10 additions
and
0 deletions
+10
-0
examples/tensorrt_llm/common/parser.py
examples/tensorrt_llm/common/parser.py
+10
-0
No files found.
examples/tensorrt_llm/common/parser.py
View file @
5c9a2d49
...
...
@@ -22,6 +22,7 @@ from typing import Any, Dict, Tuple
import
yaml
from
tensorrt_llm._torch.pyexecutor.config
import
PyTorchConfig
from
tensorrt_llm.llmapi
import
KvCacheConfig
from
tensorrt_llm.llmapi.llm_args
import
DecodingBaseConfig
@
dataclass
...
...
@@ -32,12 +33,14 @@ class LLMAPIConfig:
model_path
:
str
|
None
=
None
,
pytorch_backend_config
:
PyTorchConfig
|
None
=
None
,
kv_cache_config
:
KvCacheConfig
|
None
=
None
,
speculative_config
:
DecodingBaseConfig
|
None
=
None
,
**
kwargs
,
):
self
.
model_name
=
model_name
self
.
model_path
=
model_path
self
.
pytorch_backend_config
=
pytorch_backend_config
self
.
kv_cache_config
=
kv_cache_config
self
.
speculative_config
=
speculative_config
self
.
extra_args
=
kwargs
# Hardcoded to skip tokenizer init for now.
...
...
@@ -51,6 +54,7 @@ class LLMAPIConfig:
data
=
{
"pytorch_backend_config"
:
self
.
pytorch_backend_config
,
"kv_cache_config"
:
self
.
kv_cache_config
,
"speculative_config"
:
self
.
speculative_config
,
"skip_tokenizer_init"
:
self
.
skip_tokenizer_init
,
}
if
self
.
extra_args
:
...
...
@@ -68,6 +72,12 @@ class LLMAPIConfig:
self
.
kv_cache_config
=
KvCacheConfig
(
**
other_config
[
"kv_cache_config"
])
self
.
extra_args
.
pop
(
"kv_cache_config"
,
None
)
if
"speculative_config"
in
other_config
:
self
.
speculative_config
=
DecodingBaseConfig
.
from_dict
(
other_config
[
"speculative_config"
]
)
self
.
extra_args
.
pop
(
"speculative_config"
,
None
)
def
_get_llm_args
(
engine_config
):
# Only do model validation checks and leave other checks to LLMAPI
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment