"vscode:/vscode.git/clone" did not exist on "04a73eda55689d009dbaafe2c2f4c6b5dccf16ca"
Unverified Commit 99cd9d85 authored by Graham King's avatar Graham King Committed by GitHub
Browse files

feat: dynamo-run <-> python interop (#934)

Adding this to a Python script makes it register on the network so that `dynamo-run` can discover it and send it requests:
```
from dynamo.llm import register_llm

MODEL = "Qwen/Qwen2.5-0.5B-Instruct"
await register_llm(endpoint, MODEL, 3)
```

Full vllm example, with pre-processing in dynamo:
- `dynamo-run in=text out=dyn://dynamo.backend.generate`
- `cd lib/bindings/python/examples/hello_world`
- `python server_vllm.py`

This builds on top of the work to move pre-processor to ingress side. It means we can decouple Rust and Python using NATS as the bus.

The `register_llm` call does this:

- Download the model from HF if necessary
- Load the model deployment card from the HF folder or extract from GGUF
- Push the tokenizer config etc into NATS object store so ingress can access it from a different machine
- Publish the model deployment card to ETCD
parent 829e1cf5
...@@ -20,9 +20,7 @@ const HF_PATH: &str = "tests/data/sample-models/TinyLlama_v1.1"; ...@@ -20,9 +20,7 @@ const HF_PATH: &str = "tests/data/sample-models/TinyLlama_v1.1";
#[tokio::test] #[tokio::test]
async fn test_model_info_from_hf_like_local_repo() { async fn test_model_info_from_hf_like_local_repo() {
let mdc = ModelDeploymentCard::from_local_path(HF_PATH, None) let mdc = ModelDeploymentCard::load(HF_PATH).await.unwrap();
.await
.unwrap();
let info = mdc.model_info.unwrap().get_model_info().await.unwrap(); let info = mdc.model_info.unwrap().get_model_info().await.unwrap();
assert_eq!(info.model_type(), "llama"); assert_eq!(info.model_type(), "llama");
assert_eq!(info.bos_token_id(), 1); assert_eq!(info.bos_token_id(), 1);
...@@ -34,15 +32,13 @@ async fn test_model_info_from_hf_like_local_repo() { ...@@ -34,15 +32,13 @@ async fn test_model_info_from_hf_like_local_repo() {
#[tokio::test] #[tokio::test]
async fn test_model_info_from_non_existent_local_repo() { async fn test_model_info_from_non_existent_local_repo() {
let path = "tests/data/sample-models/this-model-does-not-exist"; let path = "tests/data/sample-models/this-model-does-not-exist";
let result = ModelDeploymentCard::from_local_path(path, None).await; let result = ModelDeploymentCard::load(path).await;
assert!(result.is_err()); assert!(result.is_err());
} }
#[tokio::test] #[tokio::test]
async fn test_tokenizer_from_hf_like_local_repo() { async fn test_tokenizer_from_hf_like_local_repo() {
let mdc = ModelDeploymentCard::from_local_path(HF_PATH, None) let mdc = ModelDeploymentCard::load(HF_PATH).await.unwrap();
.await
.unwrap();
// Verify tokenizer file was found // Verify tokenizer file was found
match mdc.tokenizer.unwrap() { match mdc.tokenizer.unwrap() {
TokenizerKind::HfTokenizerJson(_) => (), TokenizerKind::HfTokenizerJson(_) => (),
...@@ -52,9 +48,7 @@ async fn test_tokenizer_from_hf_like_local_repo() { ...@@ -52,9 +48,7 @@ async fn test_tokenizer_from_hf_like_local_repo() {
#[tokio::test] #[tokio::test]
async fn test_prompt_formatter_from_hf_like_local_repo() { async fn test_prompt_formatter_from_hf_like_local_repo() {
let mdc = ModelDeploymentCard::from_local_path(HF_PATH, None) let mdc = ModelDeploymentCard::load(HF_PATH).await.unwrap();
.await
.unwrap();
// Verify prompt formatter was found // Verify prompt formatter was found
match mdc.prompt_formatter { match mdc.prompt_formatter {
Some(PromptFormatterArtifact::HfTokenizerConfigJson(_)) => (), Some(PromptFormatterArtifact::HfTokenizerConfigJson(_)) => (),
...@@ -66,7 +60,7 @@ async fn test_prompt_formatter_from_hf_like_local_repo() { ...@@ -66,7 +60,7 @@ async fn test_prompt_formatter_from_hf_like_local_repo() {
async fn test_missing_required_files() { async fn test_missing_required_files() {
// Create empty temp directory // Create empty temp directory
let temp_dir = tempdir().unwrap(); let temp_dir = tempdir().unwrap();
let result = ModelDeploymentCard::from_local_path(temp_dir.path(), None).await; let result = ModelDeploymentCard::load(temp_dir.path()).await;
assert!(result.is_err()); assert!(result.is_err());
let err = result.unwrap_err().to_string(); let err = result.unwrap_err().to_string();
// Should fail because config.json is missing // Should fail because config.json is missing
......
...@@ -45,9 +45,8 @@ async fn make_mdc_from_repo( ...@@ -45,9 +45,8 @@ async fn make_mdc_from_repo(
//TODO: remove this once we have nim-hub support. See the NOTE above. //TODO: remove this once we have nim-hub support. See the NOTE above.
let downloaded_path = maybe_download_model(local_path, hf_repo, hf_revision).await; let downloaded_path = maybe_download_model(local_path, hf_repo, hf_revision).await;
let display_name = format!("{}--{}", hf_repo, hf_revision); let display_name = format!("{}--{}", hf_repo, hf_revision);
let mut mdc = ModelDeploymentCard::from_local_path(downloaded_path, Some(&display_name)) let mut mdc = ModelDeploymentCard::load(downloaded_path).await.unwrap();
.await mdc.set_name(&display_name);
.unwrap();
mdc.prompt_context = mixins; mdc.prompt_context = mixins;
mdc mdc
} }
......
...@@ -48,6 +48,7 @@ use super::{ ...@@ -48,6 +48,7 @@ use super::{
}; };
use crate::pipeline::network::{ingress::push_endpoint::PushEndpoint, PushWorkHandler}; use crate::pipeline::network::{ingress::push_endpoint::PushEndpoint, PushWorkHandler};
use crate::protocols::Endpoint as EndpointId;
use async_nats::{ use async_nats::{
rustls::quic, rustls::quic,
service::{Service, ServiceExt}, service::{Service, ServiceExt},
...@@ -237,6 +238,14 @@ impl RuntimeProvider for Endpoint { ...@@ -237,6 +238,14 @@ impl RuntimeProvider for Endpoint {
} }
impl Endpoint { impl Endpoint {
pub fn id(&self) -> EndpointId {
EndpointId {
namespace: self.component.namespace().name().to_string(),
component: self.component.name().to_string(),
name: self.name().to_string(),
}
}
pub fn name(&self) -> &str { pub fn name(&self) -> &str {
&self.name &self.name
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment