"vscode:/vscode.git/clone" did not exist on "f70902b827629a94db2fca21c4898a0cac89b328"
Unverified Commit 99cd9d85 authored by Graham King's avatar Graham King Committed by GitHub
Browse files

feat: dynamo-run <-> python interop (#934)

Adding this to a Python script makes it register on the network so that `dynamo-run` can discover it and send it requests:
```
from dynamo.llm import register_llm

MODEL = "Qwen/Qwen2.5-0.5B-Instruct"
await register_llm(endpoint, MODEL, 3)
```

Full vllm example, with pre-processing in dynamo:
- `dynamo-run in=text out=dyn://dynamo.backend.generate`
- `cd lib/bindings/python/examples/hello_world`
- `python server_vllm.py`

This builds on top of the work to move pre-processor to ingress side. It means we can decouple Rust and Python using NATS as the bus.

The `register_llm` call does this:

- Download the model from HF if necessary
- Load the model deployment card from the HF folder or extract from GGUF
- Push the tokenizer config etc into NATS object store so ingress can access it from a different machine
- Publish the model deployment card to ETCD
parent 829e1cf5
......@@ -20,9 +20,7 @@ const HF_PATH: &str = "tests/data/sample-models/TinyLlama_v1.1";
#[tokio::test]
async fn test_model_info_from_hf_like_local_repo() {
let mdc = ModelDeploymentCard::from_local_path(HF_PATH, None)
.await
.unwrap();
let mdc = ModelDeploymentCard::load(HF_PATH).await.unwrap();
let info = mdc.model_info.unwrap().get_model_info().await.unwrap();
assert_eq!(info.model_type(), "llama");
assert_eq!(info.bos_token_id(), 1);
......@@ -34,15 +32,13 @@ async fn test_model_info_from_hf_like_local_repo() {
#[tokio::test]
async fn test_model_info_from_non_existent_local_repo() {
let path = "tests/data/sample-models/this-model-does-not-exist";
let result = ModelDeploymentCard::from_local_path(path, None).await;
let result = ModelDeploymentCard::load(path).await;
assert!(result.is_err());
}
#[tokio::test]
async fn test_tokenizer_from_hf_like_local_repo() {
let mdc = ModelDeploymentCard::from_local_path(HF_PATH, None)
.await
.unwrap();
let mdc = ModelDeploymentCard::load(HF_PATH).await.unwrap();
// Verify tokenizer file was found
match mdc.tokenizer.unwrap() {
TokenizerKind::HfTokenizerJson(_) => (),
......@@ -52,9 +48,7 @@ async fn test_tokenizer_from_hf_like_local_repo() {
#[tokio::test]
async fn test_prompt_formatter_from_hf_like_local_repo() {
let mdc = ModelDeploymentCard::from_local_path(HF_PATH, None)
.await
.unwrap();
let mdc = ModelDeploymentCard::load(HF_PATH).await.unwrap();
// Verify prompt formatter was found
match mdc.prompt_formatter {
Some(PromptFormatterArtifact::HfTokenizerConfigJson(_)) => (),
......@@ -66,7 +60,7 @@ async fn test_prompt_formatter_from_hf_like_local_repo() {
async fn test_missing_required_files() {
// Create empty temp directory
let temp_dir = tempdir().unwrap();
let result = ModelDeploymentCard::from_local_path(temp_dir.path(), None).await;
let result = ModelDeploymentCard::load(temp_dir.path()).await;
assert!(result.is_err());
let err = result.unwrap_err().to_string();
// Should fail because config.json is missing
......
......@@ -45,9 +45,8 @@ async fn make_mdc_from_repo(
//TODO: remove this once we have nim-hub support. See the NOTE above.
let downloaded_path = maybe_download_model(local_path, hf_repo, hf_revision).await;
let display_name = format!("{}--{}", hf_repo, hf_revision);
let mut mdc = ModelDeploymentCard::from_local_path(downloaded_path, Some(&display_name))
.await
.unwrap();
let mut mdc = ModelDeploymentCard::load(downloaded_path).await.unwrap();
mdc.set_name(&display_name);
mdc.prompt_context = mixins;
mdc
}
......
......@@ -48,6 +48,7 @@ use super::{
};
use crate::pipeline::network::{ingress::push_endpoint::PushEndpoint, PushWorkHandler};
use crate::protocols::Endpoint as EndpointId;
use async_nats::{
rustls::quic,
service::{Service, ServiceExt},
......@@ -237,6 +238,14 @@ impl RuntimeProvider for Endpoint {
}
impl Endpoint {
pub fn id(&self) -> EndpointId {
EndpointId {
namespace: self.component.namespace().name().to_string(),
component: self.component.name().to_string(),
name: self.name().to_string(),
}
}
pub fn name(&self) -> &str {
&self.name
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment