Unverified Commit 8daacbd7 authored by milesial's avatar milesial Committed by GitHub
Browse files

feat: default with lib/memory, media-nixl and kvbm (#5602)


Signed-off-by: default avatarAlexandre Milesi <milesial@users.noreply.github.com>
parent 0862f87b
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
name: Build Frontend Image
on:
push:
branches:
- main
- "pull-request/[0-9]+"
# Note: release/* branches are handled by release.yml which calls this workflow
workflow_call:
secrets:
AWS_ACCOUNT_ID:
required: true
AWS_DEFAULT_REGION:
required: true
AWS_ACCESS_KEY_ID:
required: true
AWS_SECRET_ACCESS_KEY:
required: true
AZURE_ACR_HOSTNAME:
required: true
AZURE_ACR_USER:
required: true
AZURE_ACR_PASSWORD:
required: true
CI_TOKEN:
required: true
SCCACHE_S3_BUCKET:
required: true
concurrency:
group: ${{ github.workflow }}-${{ github.ref_name == 'main' && github.run_id || github.ref_name }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
jobs:
changed-files:
runs-on: ubuntu-latest
outputs:
frontend: ${{ steps.changes.outputs.frontend }}
steps:
- name: Checkout code
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
fetch-depth: 0
- name: Check for changes
id: changes
uses: ./.github/actions/changed-files
with:
gh_token: ${{ github.token }}
build-frontend-image:
name: Build Frontend Image (${{ matrix.platform.arch }})
needs: changed-files
if: needs.changed-files.outputs.frontend == 'true'
strategy:
fail-fast: false
matrix:
platform:
- { arch: amd64, runner: prod-builder-amd-v1 }
- { arch: arm64, runner: prod-builder-arm-v1 }
runs-on: ${{ matrix.platform.runner }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: '1.24'
- name: Install dependencies
shell: bash
run: |
set -euo pipefail
# Install system dependencies from apt
sudo apt-get update && sudo apt-get install -y git build-essential protobuf-compiler
# Install Rust (cargo + rustc)
curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain stable
# Make cargo available to later steps
echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
- name: Install cbindgen
shell: bash
run: |
set -euo pipefail
cargo install cbindgen
- name: Docker Login
uses: ./.github/actions/docker-login
with:
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
- name: Build Frontend Container
id: build-image
uses: ./.github/actions/docker-build
env:
PLATFORMS: linux/${{ matrix.platform.arch }}
TARGETARCH: ${{ matrix.platform.arch }}
with:
framework: none
target: frontend
platform: ${{ env.PLATFORMS }}
ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
- name: Docker Tag and Push Frontend Image
uses: ./.github/actions/docker-tag-push
with:
local_image: ${{ steps.build-image.outputs.image_tag }}
push_tags: ai-dynamo/dynamo:${{ github.sha }}-frontend-${{ matrix.platform.arch }}
aws_push: 'true'
azure_push: 'true'
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
frontend-status-check:
runs-on: ubuntu-latest
needs: [changed-files, build-frontend-image]
if: always()
steps:
- name: "Check all dependent jobs"
run: |
echo '${{ toJson(needs) }}' | jq -e 'to_entries | map(.value.result) | all(. as $result | ["success", "skipped"] | any($result == .))'
\ No newline at end of file
......@@ -83,7 +83,7 @@ jobs:
bash -ec 'rustup component add rustfmt clippy && \
cargo fmt -- --check && \
cargo clippy --features block-manager,media-nixl,media-ffmpeg --no-deps --all-targets -- -D warnings && \
cargo test --locked --all-targets --features=block-manager,media-nixl,media-ffmpeg && \
cargo test --locked --all-targets --features=block-manager,media-nixl,media-ffmpeg,testing-nixl && \
cargo test --locked --features integration -- --nocapture'
- name: Cleanup services
if: always()
......
......@@ -26,6 +26,7 @@ default-members = [
"lib/runtime",
"lib/config",
"lib/tokens",
"lib/memory",
"lib/async-openai",
"lib/parsers",
"lib/bindings/c",
......@@ -48,6 +49,7 @@ dynamo-runtime = { path = "lib/runtime", version = "0.9.0" }
dynamo-llm = { path = "lib/llm", version = "0.9.0" }
dynamo-config = { path = "lib/config", version = "0.9.0" }
dynamo-tokens = { path = "lib/tokens", version = "0.9.0" }
dynamo-memory = { path = "lib/memory", version = "0.9.0" }
dynamo-kv-router = { path = "lib/kv-router", version = "0.9.0", features = ["metrics"] }
dynamo-async-openai = { path = "lib/async-openai", version = "0.9.0", features = ["byot"] }
dynamo-parsers = { path = "lib/parsers", version = "0.9.0" }
......
......@@ -72,6 +72,7 @@ async def read_decoded_media_via_nixl(
)
array = tensor.numpy() # zero-copy
array = array[..., :3] # ignore alpha
if return_metadata:
return array, decoded_meta.get("metadata")
else:
......
......@@ -543,6 +543,7 @@ async def register_vllm_model(
media_fetcher = MediaFetcher()
media_fetcher.timeout_ms(30000)
media_fetcher.allow_direct_port(True)
await register_llm(
model_input,
......
......@@ -13,19 +13,19 @@ readme.workspace = true
description = "Dynamo LLM Library"
[features]
default = []
default = ["media-nixl", "block-manager"]
# todo(ops): get this working in CI as a default.
# default = ["block-manager", "testing-full"]
testing-full = ["testing-cuda", "testing-nixl"]
testing-cuda = ["dep:cudarc"]
testing-nixl = ["dep:nixl-sys"]
testing-cuda = ["dep:cudarc", "dynamo-memory/testing-cuda"]
testing-nixl = ["dep:nixl-sys", "dynamo-memory/testing-nixl"]
testing-etcd = []
block-manager = ["dep:nixl-sys", "dep:cudarc", "dep:nix", "dep:aligned-vec", "dep:dynamo-memory"]
block-manager = ["dep:nixl-sys", "dep:cudarc", "dep:nix", "dep:aligned-vec"]
block-manager-bench = ["block-manager", "testing-full", "dep:clap", "dep:indicatif"]
cuda = ["dep:cudarc"]
integration = ["dynamo-runtime/integration"]
media-nixl = ["dep:nixl-sys", "dep:dynamo-memory", "dep:flate2"]
media-nixl = ["dep:nixl-sys", "dep:flate2"]
media-ffmpeg = ["dep:video-rs", "dep:ffmpeg-next", "dep:memfile", "media-nixl"]
kv-router-stress = ["dep:clap", "dep:indicatif"]
......@@ -43,7 +43,7 @@ required-features = ["block-manager", "testing-cuda"]
dynamo-runtime = { workspace = true }
dynamo-tokens = { workspace = true }
dynamo-kv-router = { workspace = true, features = ["metrics"] }
dynamo-memory = { path = "../memory", optional = true }
dynamo-memory = { workspace = true }
# workspace
aho-corasick = "1.1"
......
......@@ -63,10 +63,9 @@
//!
//! ```rust
//! use dynamo_llm::block_manager::layout::{
//! LayoutConfig, FullyContiguous, BlockLayout, BlockLayoutLookup, BlockLayoutConfig,
//! LayoutConfig, FullyContiguous, BlockLayout, GenericBlockLayout, BlockLayoutConfig,
//! };
//! use dynamo_llm::block_manager::storage::{SystemAllocator, StorageType};
//! use dynamo_llm::common::dtype::DType;
//!
//! // Define the layout configuration
//! let config = LayoutConfig::builder()
......@@ -75,22 +74,20 @@
//! .outer_dim(1)
//! .page_size(16)
//! .inner_dim(128)
//! .dtype(DType::FP16)
//! .build()
//! .unwrap();
//!
//!
//! // Allocate a FullyContiguous layout using a SystemAllocator
//! let allocator = SystemAllocator;
//! let layout = FullyContiguous::allocate(config, &allocator).unwrap();
//!
//! // Access layout properties
//! assert_eq!(layout.num_blocks(), 10);
//! assert_eq!(layout.storage_type(), StorageType::System);
//! assert_eq!(layout.storage_type(), &StorageType::System);
//!
//! // Get the address of a specific page
//! let addr = layout.memory_region_addr(0, 0).unwrap();
//! println!("Address of block 0, layer 0: {}", addr);
//! // Get the memory region of a specific block/layer
//! let region = layout.memory_region(0, 0, 0).unwrap();
//! println!("Address of block 0, layer 0: {}", region.addr());
//! ```
//!
//! ## NIXL Integration
......
......@@ -56,7 +56,7 @@
//! reconstruct an `Arc<dyn BlockLayout<StorageType = NixlStorage>>`. This reconstructed layout now
//! refers to the remote NIXL memory regions.
//!
//! ```rust
//! ```rust,ignore
//! use dynamo_llm::block_manager::layout::{LayoutConfig, LayoutType};
//! use dynamo_llm::block_manager::layout::nixl::{NixlLayout, ToSerializedNixlBlockLayout, SerializedNixlBlockLayout};
//! use dynamo_llm::block_manager::storage::nixl::NixlAgent;
......@@ -67,6 +67,7 @@
//! let config = LayoutConfig::builder()
//! .num_blocks(10)
//! .num_layers(2)
//! .outer_dim(1)
//! .page_size(4)
//! .inner_dim(13)
//! .build().unwrap();
......@@ -340,7 +341,7 @@ impl SerializedNixlBlockLayout {
}
}
#[cfg(test)]
#[cfg(all(test, feature = "testing-nixl"))]
mod tests {
use super::super::*;
use super::*;
......
......@@ -43,7 +43,7 @@
//! ```
//!
//! For registering with external libraries:
//! ```rust
//! ```rust,ignore
//! use dynamo_llm::block_manager::storage::{
//! PinnedAllocator, StorageAllocator,
//! nixl::NixlRegisterableStorage
......
......@@ -28,7 +28,7 @@
//! ## Usage
//!
//! ### Using Allocators
//! ```rust
//! ```rust,ignore
//! use dynamo_llm::block_manager::storage::{DeviceAllocator, PinnedAllocator, StorageAllocator};
//!
//! // Create a pinned memory allocator
......@@ -41,7 +41,7 @@
//! ```
//!
//! ### Memory Operations
//! ```rust
//! ```rust,ignore
//! use dynamo_llm::block_manager::storage::{
//! PinnedAllocator, StorageAllocator, Storage, StorageMemset
//! };
......
......@@ -22,7 +22,7 @@
//!
//! ## Usage
//!
//! ```rust
//! ```rust,ignore
//! use dynamo_llm::block_manager::storage::{
//! PinnedAllocator, StorageAllocator,
//! nixl::NixlRegisterableStorage
......
......@@ -110,7 +110,7 @@ mod cuda_tests {
// Tests for NIXL registration would require a real NIXL agent,
// so we'll skip those for now. In practice, you'd mock the agent
// or use integration tests.
#[cfg(feature = "testing-nixl")]
#[cfg(all(feature = "testing-nixl", feature = "testing-cuda"))]
mod nixl_tests {
use super::super::registered::register_with_nixl;
use super::*;
......
......@@ -199,7 +199,7 @@ impl Layout for FullyContiguousLayout {
}
}
#[cfg(test)]
#[cfg(all(test, feature = "testing-nixl"))]
mod tests {
use super::super::tests::*;
use super::*;
......
......@@ -208,7 +208,7 @@ impl Layout for LayerSeparateLayout {
}
}
#[cfg(test)]
#[cfg(all(test, feature = "testing-nixl"))]
mod tests {
use super::super::tests::*;
use super::*;
......
......@@ -18,7 +18,7 @@ mod physical;
mod serialize;
mod validation;
#[cfg(test)]
#[cfg(all(test, feature = "testing-nixl"))]
pub(super) mod tests;
// #[cfg(test)]
......
......@@ -39,7 +39,7 @@ use crate::block_manager::v2::physical::{
/// # Example
///
/// ```
/// # use dynamo_kvbm::v2::physical::transfer::TransferCapabilities;
/// # use dynamo_llm::block_manager::v2::physical::transfer::TransferCapabilities;
/// // Default conservative policy
/// let caps = TransferCapabilities::default();
/// assert!(!caps.allow_gds);
......
......@@ -154,7 +154,7 @@ fn compute_single_block_checksum(
Ok(hasher.finalize().to_string())
}
#[cfg(test)]
#[cfg(all(test, feature = "testing-nixl"))]
mod tests {
use super::super::tests::*;
use super::*;
......
......@@ -191,7 +191,7 @@ fn fill_memory_region(
Ok(())
}
#[cfg(test)]
#[cfg(all(test, feature = "testing-nixl"))]
mod tests {
use super::super::tests::*;
use super::*;
......
......@@ -49,7 +49,7 @@ pub mod preferences;
pub mod strategy;
pub mod validation;
#[cfg(test)]
#[cfg(all(test, feature = "testing-nixl"))]
mod tests;
// Re-export StorageKind
......
......@@ -33,7 +33,7 @@ impl CompletionChecker for CudaEventChecker {
}
}
#[cfg(all(test, feature = "testing-cuda"))]
#[cfg(all(test, feature = "testing-cuda", feature = "testing-nixl"))]
mod tests {
use crate::block_manager::v2::physical::manager::TransportManager;
use crate::block_manager::v2::physical::transfer::nixl_agent::NixlAgent;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment