Unverified Commit 8daacbd7 authored by milesial's avatar milesial Committed by GitHub
Browse files

feat: default with lib/memory, media-nixl and kvbm (#5602)


Signed-off-by: default avatarAlexandre Milesi <milesial@users.noreply.github.com>
parent 0862f87b
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
name: Build Frontend Image
on:
push:
branches:
- main
- "pull-request/[0-9]+"
# Note: release/* branches are handled by release.yml which calls this workflow
workflow_call:
secrets:
AWS_ACCOUNT_ID:
required: true
AWS_DEFAULT_REGION:
required: true
AWS_ACCESS_KEY_ID:
required: true
AWS_SECRET_ACCESS_KEY:
required: true
AZURE_ACR_HOSTNAME:
required: true
AZURE_ACR_USER:
required: true
AZURE_ACR_PASSWORD:
required: true
CI_TOKEN:
required: true
SCCACHE_S3_BUCKET:
required: true
concurrency:
group: ${{ github.workflow }}-${{ github.ref_name == 'main' && github.run_id || github.ref_name }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
jobs:
changed-files:
runs-on: ubuntu-latest
outputs:
frontend: ${{ steps.changes.outputs.frontend }}
steps:
- name: Checkout code
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
fetch-depth: 0
- name: Check for changes
id: changes
uses: ./.github/actions/changed-files
with:
gh_token: ${{ github.token }}
build-frontend-image:
name: Build Frontend Image (${{ matrix.platform.arch }})
needs: changed-files
if: needs.changed-files.outputs.frontend == 'true'
strategy:
fail-fast: false
matrix:
platform:
- { arch: amd64, runner: prod-builder-amd-v1 }
- { arch: arm64, runner: prod-builder-arm-v1 }
runs-on: ${{ matrix.platform.runner }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: '1.24'
- name: Install dependencies
shell: bash
run: |
set -euo pipefail
# Install system dependencies from apt
sudo apt-get update && sudo apt-get install -y git build-essential protobuf-compiler
# Install Rust (cargo + rustc)
curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain stable
# Make cargo available to later steps
echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
- name: Install cbindgen
shell: bash
run: |
set -euo pipefail
cargo install cbindgen
- name: Docker Login
uses: ./.github/actions/docker-login
with:
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
- name: Build Frontend Container
id: build-image
uses: ./.github/actions/docker-build
env:
PLATFORMS: linux/${{ matrix.platform.arch }}
TARGETARCH: ${{ matrix.platform.arch }}
with:
framework: none
target: frontend
platform: ${{ env.PLATFORMS }}
ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
- name: Docker Tag and Push Frontend Image
uses: ./.github/actions/docker-tag-push
with:
local_image: ${{ steps.build-image.outputs.image_tag }}
push_tags: ai-dynamo/dynamo:${{ github.sha }}-frontend-${{ matrix.platform.arch }}
aws_push: 'true'
azure_push: 'true'
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
frontend-status-check:
runs-on: ubuntu-latest
needs: [changed-files, build-frontend-image]
if: always()
steps:
- name: "Check all dependent jobs"
run: |
echo '${{ toJson(needs) }}' | jq -e 'to_entries | map(.value.result) | all(. as $result | ["success", "skipped"] | any($result == .))'
\ No newline at end of file
...@@ -83,7 +83,7 @@ jobs: ...@@ -83,7 +83,7 @@ jobs:
bash -ec 'rustup component add rustfmt clippy && \ bash -ec 'rustup component add rustfmt clippy && \
cargo fmt -- --check && \ cargo fmt -- --check && \
cargo clippy --features block-manager,media-nixl,media-ffmpeg --no-deps --all-targets -- -D warnings && \ cargo clippy --features block-manager,media-nixl,media-ffmpeg --no-deps --all-targets -- -D warnings && \
cargo test --locked --all-targets --features=block-manager,media-nixl,media-ffmpeg && \ cargo test --locked --all-targets --features=block-manager,media-nixl,media-ffmpeg,testing-nixl && \
cargo test --locked --features integration -- --nocapture' cargo test --locked --features integration -- --nocapture'
- name: Cleanup services - name: Cleanup services
if: always() if: always()
......
...@@ -26,6 +26,7 @@ default-members = [ ...@@ -26,6 +26,7 @@ default-members = [
"lib/runtime", "lib/runtime",
"lib/config", "lib/config",
"lib/tokens", "lib/tokens",
"lib/memory",
"lib/async-openai", "lib/async-openai",
"lib/parsers", "lib/parsers",
"lib/bindings/c", "lib/bindings/c",
...@@ -48,6 +49,7 @@ dynamo-runtime = { path = "lib/runtime", version = "0.9.0" } ...@@ -48,6 +49,7 @@ dynamo-runtime = { path = "lib/runtime", version = "0.9.0" }
dynamo-llm = { path = "lib/llm", version = "0.9.0" } dynamo-llm = { path = "lib/llm", version = "0.9.0" }
dynamo-config = { path = "lib/config", version = "0.9.0" } dynamo-config = { path = "lib/config", version = "0.9.0" }
dynamo-tokens = { path = "lib/tokens", version = "0.9.0" } dynamo-tokens = { path = "lib/tokens", version = "0.9.0" }
dynamo-memory = { path = "lib/memory", version = "0.9.0" }
dynamo-kv-router = { path = "lib/kv-router", version = "0.9.0", features = ["metrics"] } dynamo-kv-router = { path = "lib/kv-router", version = "0.9.0", features = ["metrics"] }
dynamo-async-openai = { path = "lib/async-openai", version = "0.9.0", features = ["byot"] } dynamo-async-openai = { path = "lib/async-openai", version = "0.9.0", features = ["byot"] }
dynamo-parsers = { path = "lib/parsers", version = "0.9.0" } dynamo-parsers = { path = "lib/parsers", version = "0.9.0" }
......
...@@ -72,6 +72,7 @@ async def read_decoded_media_via_nixl( ...@@ -72,6 +72,7 @@ async def read_decoded_media_via_nixl(
) )
array = tensor.numpy() # zero-copy array = tensor.numpy() # zero-copy
array = array[..., :3] # ignore alpha
if return_metadata: if return_metadata:
return array, decoded_meta.get("metadata") return array, decoded_meta.get("metadata")
else: else:
......
...@@ -543,6 +543,7 @@ async def register_vllm_model( ...@@ -543,6 +543,7 @@ async def register_vllm_model(
media_fetcher = MediaFetcher() media_fetcher = MediaFetcher()
media_fetcher.timeout_ms(30000) media_fetcher.timeout_ms(30000)
media_fetcher.allow_direct_port(True)
await register_llm( await register_llm(
model_input, model_input,
......
...@@ -13,19 +13,19 @@ readme.workspace = true ...@@ -13,19 +13,19 @@ readme.workspace = true
description = "Dynamo LLM Library" description = "Dynamo LLM Library"
[features] [features]
default = [] default = ["media-nixl", "block-manager"]
# todo(ops): get this working in CI as a default. # todo(ops): get this working in CI as a default.
# default = ["block-manager", "testing-full"] # default = ["block-manager", "testing-full"]
testing-full = ["testing-cuda", "testing-nixl"] testing-full = ["testing-cuda", "testing-nixl"]
testing-cuda = ["dep:cudarc"] testing-cuda = ["dep:cudarc", "dynamo-memory/testing-cuda"]
testing-nixl = ["dep:nixl-sys"] testing-nixl = ["dep:nixl-sys", "dynamo-memory/testing-nixl"]
testing-etcd = [] testing-etcd = []
block-manager = ["dep:nixl-sys", "dep:cudarc", "dep:nix", "dep:aligned-vec", "dep:dynamo-memory"] block-manager = ["dep:nixl-sys", "dep:cudarc", "dep:nix", "dep:aligned-vec"]
block-manager-bench = ["block-manager", "testing-full", "dep:clap", "dep:indicatif"] block-manager-bench = ["block-manager", "testing-full", "dep:clap", "dep:indicatif"]
cuda = ["dep:cudarc"] cuda = ["dep:cudarc"]
integration = ["dynamo-runtime/integration"] integration = ["dynamo-runtime/integration"]
media-nixl = ["dep:nixl-sys", "dep:dynamo-memory", "dep:flate2"] media-nixl = ["dep:nixl-sys", "dep:flate2"]
media-ffmpeg = ["dep:video-rs", "dep:ffmpeg-next", "dep:memfile", "media-nixl"] media-ffmpeg = ["dep:video-rs", "dep:ffmpeg-next", "dep:memfile", "media-nixl"]
kv-router-stress = ["dep:clap", "dep:indicatif"] kv-router-stress = ["dep:clap", "dep:indicatif"]
...@@ -43,7 +43,7 @@ required-features = ["block-manager", "testing-cuda"] ...@@ -43,7 +43,7 @@ required-features = ["block-manager", "testing-cuda"]
dynamo-runtime = { workspace = true } dynamo-runtime = { workspace = true }
dynamo-tokens = { workspace = true } dynamo-tokens = { workspace = true }
dynamo-kv-router = { workspace = true, features = ["metrics"] } dynamo-kv-router = { workspace = true, features = ["metrics"] }
dynamo-memory = { path = "../memory", optional = true } dynamo-memory = { workspace = true }
# workspace # workspace
aho-corasick = "1.1" aho-corasick = "1.1"
......
...@@ -63,10 +63,9 @@ ...@@ -63,10 +63,9 @@
//! //!
//! ```rust //! ```rust
//! use dynamo_llm::block_manager::layout::{ //! use dynamo_llm::block_manager::layout::{
//! LayoutConfig, FullyContiguous, BlockLayout, BlockLayoutLookup, BlockLayoutConfig, //! LayoutConfig, FullyContiguous, BlockLayout, GenericBlockLayout, BlockLayoutConfig,
//! }; //! };
//! use dynamo_llm::block_manager::storage::{SystemAllocator, StorageType}; //! use dynamo_llm::block_manager::storage::{SystemAllocator, StorageType};
//! use dynamo_llm::common::dtype::DType;
//! //!
//! // Define the layout configuration //! // Define the layout configuration
//! let config = LayoutConfig::builder() //! let config = LayoutConfig::builder()
...@@ -75,22 +74,20 @@ ...@@ -75,22 +74,20 @@
//! .outer_dim(1) //! .outer_dim(1)
//! .page_size(16) //! .page_size(16)
//! .inner_dim(128) //! .inner_dim(128)
//! .dtype(DType::FP16)
//! .build() //! .build()
//! .unwrap(); //! .unwrap();
//! //!
//!
//! // Allocate a FullyContiguous layout using a SystemAllocator //! // Allocate a FullyContiguous layout using a SystemAllocator
//! let allocator = SystemAllocator; //! let allocator = SystemAllocator;
//! let layout = FullyContiguous::allocate(config, &allocator).unwrap(); //! let layout = FullyContiguous::allocate(config, &allocator).unwrap();
//! //!
//! // Access layout properties //! // Access layout properties
//! assert_eq!(layout.num_blocks(), 10); //! assert_eq!(layout.num_blocks(), 10);
//! assert_eq!(layout.storage_type(), StorageType::System); //! assert_eq!(layout.storage_type(), &StorageType::System);
//! //!
//! // Get the address of a specific page //! // Get the memory region of a specific block/layer
//! let addr = layout.memory_region_addr(0, 0).unwrap(); //! let region = layout.memory_region(0, 0, 0).unwrap();
//! println!("Address of block 0, layer 0: {}", addr); //! println!("Address of block 0, layer 0: {}", region.addr());
//! ``` //! ```
//! //!
//! ## NIXL Integration //! ## NIXL Integration
......
...@@ -56,7 +56,7 @@ ...@@ -56,7 +56,7 @@
//! reconstruct an `Arc<dyn BlockLayout<StorageType = NixlStorage>>`. This reconstructed layout now //! reconstruct an `Arc<dyn BlockLayout<StorageType = NixlStorage>>`. This reconstructed layout now
//! refers to the remote NIXL memory regions. //! refers to the remote NIXL memory regions.
//! //!
//! ```rust //! ```rust,ignore
//! use dynamo_llm::block_manager::layout::{LayoutConfig, LayoutType}; //! use dynamo_llm::block_manager::layout::{LayoutConfig, LayoutType};
//! use dynamo_llm::block_manager::layout::nixl::{NixlLayout, ToSerializedNixlBlockLayout, SerializedNixlBlockLayout}; //! use dynamo_llm::block_manager::layout::nixl::{NixlLayout, ToSerializedNixlBlockLayout, SerializedNixlBlockLayout};
//! use dynamo_llm::block_manager::storage::nixl::NixlAgent; //! use dynamo_llm::block_manager::storage::nixl::NixlAgent;
...@@ -67,6 +67,7 @@ ...@@ -67,6 +67,7 @@
//! let config = LayoutConfig::builder() //! let config = LayoutConfig::builder()
//! .num_blocks(10) //! .num_blocks(10)
//! .num_layers(2) //! .num_layers(2)
//! .outer_dim(1)
//! .page_size(4) //! .page_size(4)
//! .inner_dim(13) //! .inner_dim(13)
//! .build().unwrap(); //! .build().unwrap();
...@@ -340,7 +341,7 @@ impl SerializedNixlBlockLayout { ...@@ -340,7 +341,7 @@ impl SerializedNixlBlockLayout {
} }
} }
#[cfg(test)] #[cfg(all(test, feature = "testing-nixl"))]
mod tests { mod tests {
use super::super::*; use super::super::*;
use super::*; use super::*;
......
...@@ -43,7 +43,7 @@ ...@@ -43,7 +43,7 @@
//! ``` //! ```
//! //!
//! For registering with external libraries: //! For registering with external libraries:
//! ```rust //! ```rust,ignore
//! use dynamo_llm::block_manager::storage::{ //! use dynamo_llm::block_manager::storage::{
//! PinnedAllocator, StorageAllocator, //! PinnedAllocator, StorageAllocator,
//! nixl::NixlRegisterableStorage //! nixl::NixlRegisterableStorage
......
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
//! ## Usage //! ## Usage
//! //!
//! ### Using Allocators //! ### Using Allocators
//! ```rust //! ```rust,ignore
//! use dynamo_llm::block_manager::storage::{DeviceAllocator, PinnedAllocator, StorageAllocator}; //! use dynamo_llm::block_manager::storage::{DeviceAllocator, PinnedAllocator, StorageAllocator};
//! //!
//! // Create a pinned memory allocator //! // Create a pinned memory allocator
...@@ -41,7 +41,7 @@ ...@@ -41,7 +41,7 @@
//! ``` //! ```
//! //!
//! ### Memory Operations //! ### Memory Operations
//! ```rust //! ```rust,ignore
//! use dynamo_llm::block_manager::storage::{ //! use dynamo_llm::block_manager::storage::{
//! PinnedAllocator, StorageAllocator, Storage, StorageMemset //! PinnedAllocator, StorageAllocator, Storage, StorageMemset
//! }; //! };
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
//! //!
//! ## Usage //! ## Usage
//! //!
//! ```rust //! ```rust,ignore
//! use dynamo_llm::block_manager::storage::{ //! use dynamo_llm::block_manager::storage::{
//! PinnedAllocator, StorageAllocator, //! PinnedAllocator, StorageAllocator,
//! nixl::NixlRegisterableStorage //! nixl::NixlRegisterableStorage
......
...@@ -110,7 +110,7 @@ mod cuda_tests { ...@@ -110,7 +110,7 @@ mod cuda_tests {
// Tests for NIXL registration would require a real NIXL agent, // Tests for NIXL registration would require a real NIXL agent,
// so we'll skip those for now. In practice, you'd mock the agent // so we'll skip those for now. In practice, you'd mock the agent
// or use integration tests. // or use integration tests.
#[cfg(feature = "testing-nixl")] #[cfg(all(feature = "testing-nixl", feature = "testing-cuda"))]
mod nixl_tests { mod nixl_tests {
use super::super::registered::register_with_nixl; use super::super::registered::register_with_nixl;
use super::*; use super::*;
......
...@@ -199,7 +199,7 @@ impl Layout for FullyContiguousLayout { ...@@ -199,7 +199,7 @@ impl Layout for FullyContiguousLayout {
} }
} }
#[cfg(test)] #[cfg(all(test, feature = "testing-nixl"))]
mod tests { mod tests {
use super::super::tests::*; use super::super::tests::*;
use super::*; use super::*;
......
...@@ -208,7 +208,7 @@ impl Layout for LayerSeparateLayout { ...@@ -208,7 +208,7 @@ impl Layout for LayerSeparateLayout {
} }
} }
#[cfg(test)] #[cfg(all(test, feature = "testing-nixl"))]
mod tests { mod tests {
use super::super::tests::*; use super::super::tests::*;
use super::*; use super::*;
......
...@@ -18,7 +18,7 @@ mod physical; ...@@ -18,7 +18,7 @@ mod physical;
mod serialize; mod serialize;
mod validation; mod validation;
#[cfg(test)] #[cfg(all(test, feature = "testing-nixl"))]
pub(super) mod tests; pub(super) mod tests;
// #[cfg(test)] // #[cfg(test)]
......
...@@ -39,7 +39,7 @@ use crate::block_manager::v2::physical::{ ...@@ -39,7 +39,7 @@ use crate::block_manager::v2::physical::{
/// # Example /// # Example
/// ///
/// ``` /// ```
/// # use dynamo_kvbm::v2::physical::transfer::TransferCapabilities; /// # use dynamo_llm::block_manager::v2::physical::transfer::TransferCapabilities;
/// // Default conservative policy /// // Default conservative policy
/// let caps = TransferCapabilities::default(); /// let caps = TransferCapabilities::default();
/// assert!(!caps.allow_gds); /// assert!(!caps.allow_gds);
......
...@@ -154,7 +154,7 @@ fn compute_single_block_checksum( ...@@ -154,7 +154,7 @@ fn compute_single_block_checksum(
Ok(hasher.finalize().to_string()) Ok(hasher.finalize().to_string())
} }
#[cfg(test)] #[cfg(all(test, feature = "testing-nixl"))]
mod tests { mod tests {
use super::super::tests::*; use super::super::tests::*;
use super::*; use super::*;
......
...@@ -191,7 +191,7 @@ fn fill_memory_region( ...@@ -191,7 +191,7 @@ fn fill_memory_region(
Ok(()) Ok(())
} }
#[cfg(test)] #[cfg(all(test, feature = "testing-nixl"))]
mod tests { mod tests {
use super::super::tests::*; use super::super::tests::*;
use super::*; use super::*;
......
...@@ -49,7 +49,7 @@ pub mod preferences; ...@@ -49,7 +49,7 @@ pub mod preferences;
pub mod strategy; pub mod strategy;
pub mod validation; pub mod validation;
#[cfg(test)] #[cfg(all(test, feature = "testing-nixl"))]
mod tests; mod tests;
// Re-export StorageKind // Re-export StorageKind
......
...@@ -33,7 +33,7 @@ impl CompletionChecker for CudaEventChecker { ...@@ -33,7 +33,7 @@ impl CompletionChecker for CudaEventChecker {
} }
} }
#[cfg(all(test, feature = "testing-cuda"))] #[cfg(all(test, feature = "testing-cuda", feature = "testing-nixl"))]
mod tests { mod tests {
use crate::block_manager::v2::physical::manager::TransportManager; use crate::block_manager::v2::physical::manager::TransportManager;
use crate::block_manager::v2::physical::transfer::nixl_agent::NixlAgent; use crate::block_manager::v2::physical::transfer::nixl_agent::NixlAgent;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment