Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
5624d144
Unverified
Commit
5624d144
authored
Feb 13, 2026
by
Tzu-Ling Kan
Committed by
GitHub
Feb 13, 2026
Browse files
Rename fetch_llm to fetch_model (#6268)
Signed-off-by:
tzulingk@nvidia.com
<
tzulingk@nvidia.com
>
parent
09b6ab2f
Changes
32
Show whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
48 additions
and
38 deletions
+48
-38
examples/backends/tritonserver/src/tritonworker.py
examples/backends/tritonserver/src/tritonworker.py
+3
-3
examples/multimodal/components/processor.py
examples/multimodal/components/processor.py
+2
-2
lib/bindings/python/examples/hello_world/server_sglang.py
lib/bindings/python/examples/hello_world/server_sglang.py
+3
-3
lib/bindings/python/examples/hello_world/server_sglang_tok.py
...bindings/python/examples/hello_world/server_sglang_tok.py
+3
-3
lib/bindings/python/examples/hello_world/server_vllm.py
lib/bindings/python/examples/hello_world/server_vllm.py
+2
-2
lib/bindings/python/rust/lib.rs
lib/bindings/python/rust/lib.rs
+8
-8
lib/bindings/python/src/dynamo/_core.pyi
lib/bindings/python/src/dynamo/_core.pyi
+10
-5
lib/bindings/python/src/dynamo/llm/__init__.py
lib/bindings/python/src/dynamo/llm/__init__.py
+8
-3
lib/bindings/python/tests/test_tensor.py
lib/bindings/python/tests/test_tensor.py
+3
-3
lib/llm/src/preprocessor/media/README.md
lib/llm/src/preprocessor/media/README.md
+1
-1
tests/frontend/grpc/echo_tensor_worker.py
tests/frontend/grpc/echo_tensor_worker.py
+3
-3
tests/serve/launch/template_verifier.py
tests/serve/launch/template_verifier.py
+2
-2
No files found.
examples/backends/tritonserver/src/tritonworker.py
View file @
5624d144
...
@@ -12,7 +12,7 @@ import uvloop
...
@@ -12,7 +12,7 @@ import uvloop
from
google.protobuf
import
text_format
from
google.protobuf
import
text_format
from
tritonclient.utils
import
triton_to_np_dtype
from
tritonclient.utils
import
triton_to_np_dtype
from
dynamo.llm
import
ModelInput
,
ModelRuntimeConfig
,
ModelType
,
register_
llm
from
dynamo.llm
import
ModelInput
,
ModelRuntimeConfig
,
ModelType
,
register_
model
from
dynamo.runtime
import
DistributedRuntime
,
dynamo_worker
from
dynamo.runtime
import
DistributedRuntime
,
dynamo_worker
from
dynamo.runtime.logging
import
configure_dynamo_logging
from
dynamo.runtime.logging
import
configure_dynamo_logging
...
@@ -147,8 +147,8 @@ async def triton_worker(runtime: DistributedRuntime, args: argparse.Namespace):
...
@@ -147,8 +147,8 @@ async def triton_worker(runtime: DistributedRuntime, args: argparse.Namespace):
runtime_config
.
set_tensor_model_config
(
model_config
)
runtime_config
.
set_tensor_model_config
(
model_config
)
logger
.
info
(
"Attempting to register model with Dynamo runtime..."
)
logger
.
info
(
"Attempting to register model with Dynamo runtime..."
)
# Use register_
llm
for tensor-based models (skips HuggingFace downloads)
# Use register_
model
for tensor-based models (skips HuggingFace downloads)
await
register_
llm
(
await
register_
model
(
ModelInput
.
Tensor
,
ModelInput
.
Tensor
,
ModelType
.
TensorBased
,
ModelType
.
TensorBased
,
endpoint
,
endpoint
,
...
...
examples/multimodal/components/processor.py
View file @
5624d144
...
@@ -20,7 +20,7 @@ from vllm.outputs import RequestOutput
...
@@ -20,7 +20,7 @@ from vllm.outputs import RequestOutput
from
vllm.tokenizers
import
TokenizerLike
as
AnyTokenizer
from
vllm.tokenizers
import
TokenizerLike
as
AnyTokenizer
from
vllm.utils.argparse_utils
import
FlexibleArgumentParser
from
vllm.utils.argparse_utils
import
FlexibleArgumentParser
from
dynamo.llm
import
ModelInput
,
ModelType
,
register_
llm
from
dynamo.llm
import
ModelInput
,
ModelType
,
register_
model
from
dynamo.runtime
import
Client
,
DistributedRuntime
,
dynamo_worker
from
dynamo.runtime
import
Client
,
DistributedRuntime
,
dynamo_worker
from
dynamo.runtime.logging
import
configure_dynamo_logging
from
dynamo.runtime.logging
import
configure_dynamo_logging
...
@@ -318,7 +318,7 @@ async def init(runtime: DistributedRuntime, args: argparse.Namespace, config: Co
...
@@ -318,7 +318,7 @@ async def init(runtime: DistributedRuntime, args: argparse.Namespace, config: Co
await
encode_worker_client
.
wait_for_instances
()
await
encode_worker_client
.
wait_for_instances
()
# Register the endpoint as entrypoint to a model
# Register the endpoint as entrypoint to a model
await
register_
llm
(
await
register_
model
(
ModelInput
.
Text
,
# Custom processor is used and this type bypasses SDK processor
ModelInput
.
Text
,
# Custom processor is used and this type bypasses SDK processor
ModelType
.
Chat
,
ModelType
.
Chat
,
generate_endpoint
,
generate_endpoint
,
...
...
lib/bindings/python/examples/hello_world/server_sglang.py
View file @
5624d144
...
@@ -8,7 +8,7 @@
...
@@ -8,7 +8,7 @@
# request via NATS to this python script, which runs sglang.
# request via NATS to this python script, which runs sglang.
#
#
# The key differences between this and `server_sglang_tok.py` are:
# The key differences between this and `server_sglang_tok.py` are:
# - The `register_
llm
` function registers us a `Chat` and `Completions` model that accepts `Tokens` input
# - The `register_
model
` function registers us a `Chat` and `Completions` model that accepts `Tokens` input
# - The `generate` function receives a pre-tokenized request and must return token_ids in the response.
# - The `generate` function receives a pre-tokenized request and must return token_ids in the response.
#
#
# Setup a virtualenv with dynamo.llm, dynamo.runtime and sglang[all] installed
# Setup a virtualenv with dynamo.llm, dynamo.runtime and sglang[all] installed
...
@@ -27,7 +27,7 @@ import sglang
...
@@ -27,7 +27,7 @@ import sglang
import
uvloop
import
uvloop
from
sglang.srt.server_args
import
ServerArgs
from
sglang.srt.server_args
import
ServerArgs
from
dynamo.llm
import
ModelInput
,
ModelType
,
register_
llm
from
dynamo.llm
import
ModelInput
,
ModelType
,
register_
model
from
dynamo.runtime
import
DistributedRuntime
,
dynamo_worker
from
dynamo.runtime
import
DistributedRuntime
,
dynamo_worker
DYN_NAMESPACE
=
os
.
environ
.
get
(
"DYN_NAMESPACE"
,
"dynamo"
)
DYN_NAMESPACE
=
os
.
environ
.
get
(
"DYN_NAMESPACE"
,
"dynamo"
)
...
@@ -91,7 +91,7 @@ async def init(runtime: DistributedRuntime, config: Config):
...
@@ -91,7 +91,7 @@ async def init(runtime: DistributedRuntime, config: Config):
component
=
runtime
.
namespace
(
config
.
namespace
).
component
(
config
.
component
)
component
=
runtime
.
namespace
(
config
.
namespace
).
component
(
config
.
component
)
endpoint
=
component
.
endpoint
(
config
.
endpoint
)
endpoint
=
component
.
endpoint
(
config
.
endpoint
)
await
register_
llm
(
await
register_
model
(
ModelInput
.
Tokens
,
ModelInput
.
Tokens
,
ModelType
.
Chat
|
ModelType
.
Completions
,
ModelType
.
Chat
|
ModelType
.
Completions
,
endpoint
,
endpoint
,
...
...
lib/bindings/python/examples/hello_world/server_sglang_tok.py
View file @
5624d144
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
# do the pre/post-processing.
# do the pre/post-processing.
#
#
# The key differences between this and `server_sglang.py` are:
# The key differences between this and `server_sglang.py` are:
# - The `register_
llm
` function registers us a `Chat` and `Completions` model that accepts `Text` input
# - The `register_
model
` function registers us a `Chat` and `Completions` model that accepts `Text` input
# - The `generate` function receives a chat completion request and must return matching response
# - The `generate` function receives a chat completion request and must return matching response
#
#
# Setup a virtualenv with dynamo.llm, dynamo.runtime and sglang[all] installed
# Setup a virtualenv with dynamo.llm, dynamo.runtime and sglang[all] installed
...
@@ -31,7 +31,7 @@ from sglang.srt.openai_api.adapter import v1_chat_generate_request
...
@@ -31,7 +31,7 @@ from sglang.srt.openai_api.adapter import v1_chat_generate_request
from
sglang.srt.openai_api.protocol
import
ChatCompletionRequest
from
sglang.srt.openai_api.protocol
import
ChatCompletionRequest
from
sglang.srt.server_args
import
ServerArgs
from
sglang.srt.server_args
import
ServerArgs
from
dynamo.llm
import
ModelInput
,
ModelType
,
register_
llm
from
dynamo.llm
import
ModelInput
,
ModelType
,
register_
model
from
dynamo.runtime
import
DistributedRuntime
,
dynamo_worker
from
dynamo.runtime
import
DistributedRuntime
,
dynamo_worker
DYN_NAMESPACE
=
os
.
environ
.
get
(
"DYN_NAMESPACE"
,
"dynamo"
)
DYN_NAMESPACE
=
os
.
environ
.
get
(
"DYN_NAMESPACE"
,
"dynamo"
)
...
@@ -104,7 +104,7 @@ async def init(runtime: DistributedRuntime, config: Config):
...
@@ -104,7 +104,7 @@ async def init(runtime: DistributedRuntime, config: Config):
component
=
runtime
.
namespace
(
config
.
namespace
).
component
(
config
.
component
)
component
=
runtime
.
namespace
(
config
.
namespace
).
component
(
config
.
component
)
endpoint
=
component
.
endpoint
(
config
.
endpoint
)
endpoint
=
component
.
endpoint
(
config
.
endpoint
)
await
register_
llm
(
await
register_
model
(
ModelInput
.
Text
,
ModelType
.
Chat
|
ModelType
.
Completions
,
endpoint
,
config
.
model
ModelInput
.
Text
,
ModelType
.
Chat
|
ModelType
.
Completions
,
endpoint
,
config
.
model
)
)
...
...
lib/bindings/python/examples/hello_world/server_vllm.py
View file @
5624d144
...
@@ -27,7 +27,7 @@ from vllm.entrypoints.openai.api_server import (
...
@@ -27,7 +27,7 @@ from vllm.entrypoints.openai.api_server import (
)
)
from
vllm.inputs
import
TokensPrompt
from
vllm.inputs
import
TokensPrompt
from
dynamo.llm
import
ModelInput
,
ModelType
,
register_
llm
from
dynamo.llm
import
ModelInput
,
ModelType
,
register_
model
from
dynamo.runtime
import
DistributedRuntime
,
dynamo_worker
from
dynamo.runtime
import
DistributedRuntime
,
dynamo_worker
DYN_NAMESPACE
=
os
.
environ
.
get
(
"DYN_NAMESPACE"
,
"dynamo"
)
DYN_NAMESPACE
=
os
.
environ
.
get
(
"DYN_NAMESPACE"
,
"dynamo"
)
...
@@ -102,7 +102,7 @@ async def init(runtime: DistributedRuntime, config: Config):
...
@@ -102,7 +102,7 @@ async def init(runtime: DistributedRuntime, config: Config):
component
=
runtime
.
namespace
(
config
.
namespace
).
component
(
config
.
component
)
component
=
runtime
.
namespace
(
config
.
namespace
).
component
(
config
.
component
)
endpoint
=
component
.
endpoint
(
config
.
endpoint
)
endpoint
=
component
.
endpoint
(
config
.
endpoint
)
await
register_
llm
(
await
register_
model
(
ModelInput
.
Tokens
,
ModelInput
.
Tokens
,
ModelType
.
Chat
|
ModelType
.
Completions
,
ModelType
.
Chat
|
ModelType
.
Completions
,
endpoint
,
endpoint
,
...
...
lib/bindings/python/rust/lib.rs
View file @
5624d144
...
@@ -142,9 +142,9 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
...
@@ -142,9 +142,9 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
m
.add_function
(
wrap_pyfunction!
(
llm
::
kv
::
compute_block_hash_for_seq_py
,
m
)
?
)
?
;
m
.add_function
(
wrap_pyfunction!
(
llm
::
kv
::
compute_block_hash_for_seq_py
,
m
)
?
)
?
;
m
.add_function
(
wrap_pyfunction!
(
lora_name_to_id
,
m
)
?
)
?
;
m
.add_function
(
wrap_pyfunction!
(
lora_name_to_id
,
m
)
?
)
?
;
m
.add_function
(
wrap_pyfunction!
(
log_message
,
m
)
?
)
?
;
m
.add_function
(
wrap_pyfunction!
(
log_message
,
m
)
?
)
?
;
m
.add_function
(
wrap_pyfunction!
(
register_
llm
,
m
)
?
)
?
;
m
.add_function
(
wrap_pyfunction!
(
register_
model
,
m
)
?
)
?
;
m
.add_function
(
wrap_pyfunction!
(
unregister_
llm
,
m
)
?
)
?
;
m
.add_function
(
wrap_pyfunction!
(
unregister_
model
,
m
)
?
)
?
;
m
.add_function
(
wrap_pyfunction!
(
fetch_
llm
,
m
)
?
)
?
;
m
.add_function
(
wrap_pyfunction!
(
fetch_
model
,
m
)
?
)
?
;
m
.add_function
(
wrap_pyfunction!
(
llm
::
entrypoint
::
make_engine
,
m
)
?
)
?
;
m
.add_function
(
wrap_pyfunction!
(
llm
::
entrypoint
::
make_engine
,
m
)
?
)
?
;
m
.add_function
(
wrap_pyfunction!
(
llm
::
entrypoint
::
run_input
,
m
)
?
)
?
;
m
.add_function
(
wrap_pyfunction!
(
llm
::
entrypoint
::
run_input
,
m
)
?
)
?
;
...
@@ -228,7 +228,7 @@ fn lora_name_to_id(lora_name: &str) -> i32 {
...
@@ -228,7 +228,7 @@ fn lora_name_to_id(lora_name: &str) -> i32 {
#[pyfunction]
#[pyfunction]
#[pyo3(signature
=
(model_input,
model_type,
endpoint,
model_path,
model_name=None,
context_length=None,
kv_cache_block_size=None,
router_mode=None,
runtime_config=None,
user_data=None,
custom_template_path=None,
media_decoder=None,
media_fetcher=None,
lora_name=None,
base_model_path=None))]
#[pyo3(signature
=
(model_input,
model_type,
endpoint,
model_path,
model_name=None,
context_length=None,
kv_cache_block_size=None,
router_mode=None,
runtime_config=None,
user_data=None,
custom_template_path=None,
media_decoder=None,
media_fetcher=None,
lora_name=None,
base_model_path=None))]
#[allow(clippy::too_many_arguments)]
#[allow(clippy::too_many_arguments)]
fn
register_
llm
<
'p
>
(
fn
register_
model
<
'p
>
(
py
:
Python
<
'p
>
,
py
:
Python
<
'p
>
,
model_input
:
ModelInput
,
model_input
:
ModelInput
,
model_type
:
ModelType
,
model_type
:
ModelType
,
...
@@ -409,7 +409,7 @@ fn register_llm<'p>(
...
@@ -409,7 +409,7 @@ fn register_llm<'p>(
/// - LoRA model: `v1/mdc/{namespace}/{component}/{endpoint}/{instance_id}/{lora_slug}`
/// - LoRA model: `v1/mdc/{namespace}/{component}/{endpoint}/{instance_id}/{lora_slug}`
#[pyfunction]
#[pyfunction]
#[pyo3(signature
=
(endpoint,
lora_name=None))]
#[pyo3(signature
=
(endpoint,
lora_name=None))]
fn
unregister_
llm
<
'p
>
(
fn
unregister_
model
<
'p
>
(
py
:
Python
<
'p
>
,
py
:
Python
<
'p
>
,
endpoint
:
Endpoint
,
endpoint
:
Endpoint
,
lora_name
:
Option
<&
str
>
,
lora_name
:
Option
<&
str
>
,
...
@@ -425,11 +425,11 @@ fn unregister_llm<'p>(
...
@@ -425,11 +425,11 @@ fn unregister_llm<'p>(
})
})
}
}
/// Download a model from Hugging Face, returning it
'
s local path
/// Download a model from Hugging Face, returning its local path
/// Example: `model_path = await fetch_
llm
("Qwen/Qwen3-0.6B")`
/// Example: `model_path = await fetch_
model
("Qwen/Qwen3-0.6B")`
#[pyfunction]
#[pyfunction]
#[pyo3(signature
=
(remote_name,
ignore_weights=
false
))]
#[pyo3(signature
=
(remote_name,
ignore_weights=
false
))]
fn
fetch_
llm
<
'p
>
(
fn
fetch_
model
<
'p
>
(
py
:
Python
<
'p
>
,
py
:
Python
<
'p
>
,
remote_name
:
&
str
,
remote_name
:
&
str
,
ignore_weights
:
bool
,
ignore_weights
:
bool
,
...
...
lib/bindings/python/src/dynamo/_core.pyi
View file @
5624d144
...
@@ -1011,7 +1011,7 @@ class KvRouterConfig:
...
@@ -1011,7 +1011,7 @@ class KvRouterConfig:
"""
"""
...
...
async def register_
llm
(
async def register_
model
(
model_input: ModelInput,
model_input: ModelInput,
model_type: ModelType,
model_type: ModelType,
endpoint: Endpoint,
endpoint: Endpoint,
...
@@ -1040,7 +1040,7 @@ async def register_llm(
...
@@ -1040,7 +1040,7 @@ async def register_llm(
"""
"""
...
...
async def unregister_
llm
(
async def unregister_
model
(
endpoint: Endpoint,
endpoint: Endpoint,
lora_name: Optional[str] = None,
lora_name: Optional[str] = None,
) -> None:
) -> None:
...
@@ -1055,14 +1055,19 @@ def lora_name_to_id(lora_name: str) -> int:
...
@@ -1055,14 +1055,19 @@ def lora_name_to_id(lora_name: str) -> int:
"""Generate a deterministic integer ID from a LoRA name using blake3 hash."""
"""Generate a deterministic integer ID from a LoRA name using blake3 hash."""
...
...
async def fetch_
llm
(remote_name: str, ignore_weights: bool = False) -> str:
async def fetch_
model
(remote_name: str, ignore_weights: bool = False) -> str:
"""
"""
Download a model from Hugging Face, returning it
'
s local path.
Download a model from Hugging Face, returning its local path.
If `ignore_weights` is True, only fetches tokenizer and config files.
If `ignore_weights` is True, only fetches tokenizer and config files.
Example: `model_path = await fetch_
llm
("Qwen/Qwen3-0.6B")`
Example: `model_path = await fetch_
model
("Qwen/Qwen3-0.6B")`
"""
"""
...
...
# Backward-compatible aliases (deprecated, use new names)
fetch_llm = fetch_model
register_llm = register_model
unregister_llm = unregister_model
class EngineConfig:
class EngineConfig:
"""Holds internal configuration for a Dynamo engine."""
"""Holds internal configuration for a Dynamo engine."""
...
...
...
...
lib/bindings/python/src/dynamo/llm/__init__.py
View file @
5624d144
...
@@ -29,11 +29,16 @@ from dynamo._core import RouterMode as RouterMode
...
@@ -29,11 +29,16 @@ from dynamo._core import RouterMode as RouterMode
from
dynamo._core
import
WorkerMetricsPublisher
as
WorkerMetricsPublisher
from
dynamo._core
import
WorkerMetricsPublisher
as
WorkerMetricsPublisher
from
dynamo._core
import
ZmqKvEventListener
as
ZmqKvEventListener
from
dynamo._core
import
ZmqKvEventListener
as
ZmqKvEventListener
from
dynamo._core
import
compute_block_hash_for_seq
as
compute_block_hash_for_seq
from
dynamo._core
import
compute_block_hash_for_seq
as
compute_block_hash_for_seq
from
dynamo._core
import
fetch_
llm
as
fetch_
llm
from
dynamo._core
import
fetch_
model
as
fetch_
model
from
dynamo._core
import
lora_name_to_id
as
lora_name_to_id
from
dynamo._core
import
lora_name_to_id
as
lora_name_to_id
from
dynamo._core
import
make_engine
from
dynamo._core
import
make_engine
from
dynamo._core
import
register_
llm
as
register_
llm
from
dynamo._core
import
register_
model
as
register_
model
from
dynamo._core
import
run_input
from
dynamo._core
import
run_input
from
dynamo._core
import
unregister_
llm
as
unregister_
llm
from
dynamo._core
import
unregister_
model
as
unregister_
model
from
.exceptions
import
HttpError
from
.exceptions
import
HttpError
# Backward-compatible aliases
fetch_llm
=
fetch_model
register_llm
=
register_model
unregister_llm
=
unregister_model
lib/bindings/python/tests/test_tensor.py
View file @
5624d144
...
@@ -8,7 +8,7 @@ import os
...
@@ -8,7 +8,7 @@ import os
import
pytest
import
pytest
import
uvloop
import
uvloop
from
dynamo.llm
import
ModelInput
,
ModelRuntimeConfig
,
ModelType
,
register_
llm
from
dynamo.llm
import
ModelInput
,
ModelRuntimeConfig
,
ModelType
,
register_
model
from
dynamo.runtime
import
DistributedRuntime
from
dynamo.runtime
import
DistributedRuntime
TEST_END_TO_END
=
os
.
environ
.
get
(
"TEST_END_TO_END"
,
0
)
TEST_END_TO_END
=
os
.
environ
.
get
(
"TEST_END_TO_END"
,
0
)
...
@@ -34,8 +34,8 @@ async def test_register(runtime: DistributedRuntime):
...
@@ -34,8 +34,8 @@ async def test_register(runtime: DistributedRuntime):
assert
model_config
==
runtime_config
.
get_tensor_model_config
()
assert
model_config
==
runtime_config
.
get_tensor_model_config
()
# Use register_
llm
for tensor-based backends (skips HuggingFace downloads)
# Use register_
model
for tensor-based backends (skips HuggingFace downloads)
await
register_
llm
(
await
register_
model
(
ModelInput
.
Tensor
,
ModelInput
.
Tensor
,
ModelType
.
TensorBased
,
ModelType
.
TensorBased
,
endpoint
,
endpoint
,
...
...
lib/llm/src/preprocessor/media/README.md
View file @
5624d144
...
@@ -33,7 +33,7 @@ If `enable_image` or `enable_video` are not called, requests containing the corr
...
@@ -33,7 +33,7 @@ If `enable_image` or `enable_video` are not called, requests containing the corr
Register the LLM as usual, adding the media configuration:
Register the LLM as usual, adding the media configuration:
```
python
```
python
register_
llm
(
register_
model
(
...,
...,
media_decoder
=
decoder
,
media_decoder
=
decoder
,
media_fetcher
=
fetcher
,
media_fetcher
=
fetcher
,
...
...
tests/frontend/grpc/echo_tensor_worker.py
View file @
5624d144
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
import
tritonclient.grpc.model_config_pb2
as
mc
import
tritonclient.grpc.model_config_pb2
as
mc
import
uvloop
import
uvloop
from
dynamo.llm
import
ModelInput
,
ModelRuntimeConfig
,
ModelType
,
register_
llm
from
dynamo.llm
import
ModelInput
,
ModelRuntimeConfig
,
ModelType
,
register_
model
from
dynamo.runtime
import
DistributedRuntime
,
dynamo_worker
from
dynamo.runtime
import
DistributedRuntime
,
dynamo_worker
...
@@ -53,8 +53,8 @@ async def echo_tensor_worker(runtime: DistributedRuntime):
...
@@ -53,8 +53,8 @@ async def echo_tensor_worker(runtime: DistributedRuntime):
)
)
assert
model_config
==
retrieved_model_config
assert
model_config
==
retrieved_model_config
# Use register_
llm
for tensor-based backends (skips HuggingFace downloads)
# Use register_
model
for tensor-based backends (skips HuggingFace downloads)
await
register_
llm
(
await
register_
model
(
ModelInput
.
Tensor
,
ModelInput
.
Tensor
,
ModelType
.
TensorBased
,
ModelType
.
TensorBased
,
endpoint
,
endpoint
,
...
...
tests/serve/launch/template_verifier.py
View file @
5624d144
...
@@ -9,7 +9,7 @@ import uvloop
...
@@ -9,7 +9,7 @@ import uvloop
from
transformers
import
AutoTokenizer
from
transformers
import
AutoTokenizer
from
dynamo.common.utils.paths
import
WORKSPACE_DIR
from
dynamo.common.utils.paths
import
WORKSPACE_DIR
from
dynamo.llm
import
ModelInput
,
ModelType
,
register_
llm
from
dynamo.llm
import
ModelInput
,
ModelType
,
register_
model
from
dynamo.runtime
import
DistributedRuntime
,
dynamo_worker
from
dynamo.runtime
import
DistributedRuntime
,
dynamo_worker
SERVE_TEST_DIR
=
os
.
path
.
join
(
WORKSPACE_DIR
,
"tests/serve"
)
SERVE_TEST_DIR
=
os
.
path
.
join
(
WORKSPACE_DIR
,
"tests/serve"
)
...
@@ -54,7 +54,7 @@ async def main(runtime: DistributedRuntime):
...
@@ -54,7 +54,7 @@ async def main(runtime: DistributedRuntime):
# Register model with custom template
# Register model with custom template
model_name
=
"Qwen/Qwen3-0.6B"
model_name
=
"Qwen/Qwen3-0.6B"
await
register_
llm
(
await
register_
model
(
ModelInput
.
Tokens
,
ModelInput
.
Tokens
,
ModelType
.
Chat
,
ModelType
.
Chat
,
endpoint
,
endpoint
,
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment