Unverified Commit c6d8f225 authored by Ryan Olson's avatar Ryan Olson Committed by GitHub
Browse files

feat(velo-events): trait-based event system for async coordination (#6315)


Signed-off-by: default avatarRyan Olson <rolson@nvidia.com>
parent 713c96d2
......@@ -3622,9 +3622,9 @@ dependencies = [
[[package]]
name = "js-sys"
version = "0.3.90"
version = "0.3.91"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14dc6f6450b3f6d4ed5b16327f38fed626d375a886159ca555bd7822c0c3a5a6"
checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c"
dependencies = [
"once_cell",
"wasm-bindgen",
......@@ -8563,6 +8563,22 @@ version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
[[package]]
name = "velo-events"
version = "1.0.0"
dependencies = [
"anyhow",
"dashmap 6.1.0",
"futures",
"parking_lot",
"serde",
"tokio",
"tokio-util",
"tracing",
"uuid",
"xxhash-rust",
]
[[package]]
name = "version-compare"
version = "0.2.1"
......@@ -8646,9 +8662,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen"
version = "0.2.113"
version = "0.2.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60722a937f594b7fde9adb894d7c092fc1bb6612897c46368d18e7a20208eff2"
checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e"
dependencies = [
"cfg-if 1.0.4",
"once_cell",
......@@ -8659,9 +8675,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-futures"
version = "0.4.63"
version = "0.4.64"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a89f4650b770e4521aa6573724e2aed4704372151bd0de9d16a3bbabb87441a"
checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8"
dependencies = [
"cfg-if 1.0.4",
"futures-util",
......@@ -8673,9 +8689,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.113"
version = "0.2.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fac8c6395094b6b91c4af293f4c79371c163f9a6f56184d2c9a85f5a95f3950"
checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
......@@ -8683,9 +8699,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.113"
version = "0.2.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab3fabce6159dc20728033842636887e4877688ae94382766e00b180abac9d60"
checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3"
dependencies = [
"bumpalo",
"proc-macro2",
......@@ -8696,9 +8712,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.113"
version = "0.2.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de0e091bdb824da87dc01d967388880d017a0a9bc4f3bdc0d86ee9f9336e3bb5"
checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16"
dependencies = [
"unicode-ident",
]
......@@ -8752,9 +8768,9 @@ dependencies = [
[[package]]
name = "web-sys"
version = "0.3.90"
version = "0.3.91"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "705eceb4ce901230f8625bd1d665128056ccbe4b7408faa625eec1ba80f59a97"
checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9"
dependencies = [
"js-sys",
"wasm-bindgen",
......
......@@ -18,6 +18,7 @@ members = [
"lib/bindings/c",
"lib/bindings/python/codegen",
"lib/config",
"lib/velo-events",
]
resolver = "3"
......@@ -42,8 +43,14 @@ dynamo-mocker = { path = "lib/mocker", version = "1.0.0" }
dynamo-kv-router = { path = "lib/kv-router", version = "1.0.0", features = ["metrics"] }
dynamo-async-openai = { path = "lib/async-openai", version = "1.0.0", features = ["byot"] }
dynamo-parsers = { path = "lib/parsers", version = "1.0.0" }
# kvbm
kvbm-kernels = { path = "lib/kvbm-kernels", version = "1.0.0" }
kvbm-logical = { path = "lib/kvbm-logical", version = "1.0.0" }
# velo
velo-events = { path = "lib/velo-events", version = "0.9.0" }
# External dependencies
anyhow = { version = "1" }
async-nats = { version = "0.45.0", features = ["service"] }
......
# CLAUDE.md
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
## Build & Test
```bash
# Build
cargo build -p velo-events
# Run all tests
cargo test -p velo-events
# Run a single test
cargo test -p velo-events <test_name>
# Check (no codegen)
cargo check -p velo-events
```
## Architecture
`velo-events` is a generational event system for coordinating async awaiters with minimal overhead. Events can be triggered (success) or poisoned (error), and entries are recycled across generations.
### Core types (`event.rs`, `manager.rs`)
- **`Event`** — concrete RAII guard for a single event. Dropping without calling `trigger(self)` or `poison(self, ...)` auto-poisons the event. `into_handle(self)` disarms the guard and returns the bare handle. `trigger` and `poison` consume `self`, preventing double-completion at compile time.
- **`EventManager`** — concrete struct that manages a collection of events: `new_event`, `awaiter`, `poll`, `trigger`, `poison`, `merge_events`, `force_shutdown`. Create with `EventManager::local()` for local use or `EventManager::new(base, backend)` for distributed setups.
- **`EventBackend`** — public trait with 3 methods (`trigger`, `poison`, `awaiter`) that serves as the routing customization point. `EventSystemBase` implements this for the local path; distributed backends implement it to add network routing.
### Base implementation (`base/`)
- **`EventSystemBase`** — the core event storage, allocation, and recycling engine. Uses `DashMap` for concurrent event storage with a free-list for entry recycling. Implements `EventBackend` for local trigger/poison/awaiter routing. Constructors: `EventSystemBase::local()` (random system_id, local flag set) and `EventSystemBase::distributed(system_id)` (explicit id, no local flag). Public `_inner` methods (`trigger_inner`, `poison_inner`, `awaiter_inner`) allow distributed backends to delegate local operations.
### Handle encoding (`handle.rs`)
`EventHandle` packs identity into a single `u128`: `[system_id: 64][local_index: 32][generation: 32]`. Bit 31 of `local_index` distinguishes local (bit set) from distributed (bit clear) handles. Both local and distributed systems have unique non-zero `system_id` values. `EventSystemBase` validates that handles belong to the system that created them.
### Slot machinery (`slot/`)
Single-lock synchronization primitives. See [docs/slot-state-machine.md](docs/slot-state-machine.md)
for invariants. Any change to `slot/` must preserve all invariants (I1-I6)
and update the document.
Key types:
- **`EventEntry`** — per-index state machine with a single `ParkingMutex<EventState>` protecting generation tracking, waker registration, and poison history.
- **`EventAwaiter`**`Future` impl that resolves to `Result<()>`. Supports both immediate (already-complete) and pending modes. Delegates poll to `EventEntry::poll_waiter`.
- **`CompletionKind`**`Triggered` | `Poisoned(Arc<EventPoison>)`.
### Factory (`factory.rs`)
`DistributedEventFactory` creates an `EventManager` pre-configured with a `system_id` for distributed (Nova-managed) deployments.
## Key Design Decisions
- `Event` is an RAII guard by default — dropping without triggering auto-poisons. `into_handle()` is the explicit opt-out for manager-level operations. `Clone` is intentionally not implemented; each event is a unique ownership token.
- `EventManager` is a concrete `Clone` struct holding `Arc<EventSystemBase>` (lifecycle) + `Arc<dyn EventBackend>` (routing). `EventManager::local()` creates both from the same `EventSystemBase`. `EventManager::new(base, backend)` accepts a custom backend for distributed routing.
- `EventBackend` is the public routing trait (3 methods) that enables distributed routing without touching the core event lifecycle. Distributed backends call `EventSystemBase::trigger_inner` / `poison_inner` / `awaiter_inner` for local handles and route remote handles over the network.
- Slot entries track a `BTreeMap<Generation, PoisonArc>` for poison history, allowing past-generation poison queries.
- Generation overflow causes entry retirement and a new entry allocation (transparent retry loop in `new_event_with_backend`).
- `force_shutdown` poisons all pending events and rejects future allocations via an `AtomicBool` flag.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
[package]
name = "velo-events"
version.workspace = true
edition.workspace = true
description.workspace = true
authors.workspace = true
license.workspace = true
homepage.workspace = true
repository.workspace = true
keywords.workspace = true
[dependencies]
anyhow = { workspace = true }
dashmap = { workspace = true }
futures = { workspace = true }
parking_lot = { workspace = true }
serde = { workspace = true }
tokio = { workspace = true }
tokio-util = { workspace = true }
tracing = { workspace = true }
uuid = { workspace = true }
xxhash-rust = { workspace = true }
# velo-events
A generational event system for coordinating async tasks with [minimal overhead](https://drive.google.com/file/d/1s9M1I-dUbhqWLrMFB5ehPSM-qDQBGPZG).
Events can be created, awaited, merged into precondition graphs, and poisoned
on failure. The local implementation lives in this crate; a distributed event
system can be built on top via active messaging.
## Core concepts
| Operation | What it does |
|-----------|-------------|
| **Create** | `manager.new_event()` allocates a pending event and returns an `Event` — an RAII guard you can trigger or await. |
| **Await** | `manager.awaiter(handle)?.await` suspends the current task until the event completes (or is poisoned). |
| **Merge** | `manager.merge_events(vec![a, b, c])` creates a new event that completes only after **all** inputs complete — this is how you build precondition graphs. |
| **Poison** | Events can fail with a reason string. Dropping an `Event` without triggering it auto-poisons so events are never silently lost. |
## Usage
### Create, trigger, await
```rust,no_run
use velo_events::EventManager;
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let manager = EventManager::local();
let event = manager.new_event()?;
let handle = event.handle();
// Spawn a task that waits for the event
let mgr = manager.clone();
let waiter = tokio::spawn(async move {
mgr.awaiter(handle)?.await
});
// Complete the event — consumes self, disarms the drop guard
event.trigger()?;
waiter.await??;
Ok(())
}
```
### RAII drop safety
`Event` is an RAII guard: dropping it without calling `trigger()` or `poison()`
automatically poisons the event so waiters are never silently abandoned. Both
`trigger` and `poison` consume `self`, preventing double-completion at compile
time.
To opt out of auto-poisoning (e.g. when handing ownership to a manager-level
operation), call `into_handle()`:
```rust,no_run
use velo_events::EventManager;
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let manager = EventManager::local();
let event = manager.new_event()?;
let handle = event.handle();
// If this function returns early or panics, the event
// drops and is automatically poisoned.
do_work()?;
event.trigger()?; // success — consumes the event
Ok(())
}
fn do_work() -> anyhow::Result<()> { Ok(()) }
```
### Merging events (precondition graphs)
`merge_events` lets you express "wait for all of these before proceeding":
```rust,no_run
use velo_events::EventManager;
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let manager = EventManager::local();
let load_weights = manager.new_event()?;
let load_tokenizer = manager.new_event()?;
// merged event completes only after both inputs complete
let ready = manager.merge_events(vec![
load_weights.handle(),
load_tokenizer.handle(),
])?;
load_weights.trigger()?;
load_tokenizer.trigger()?;
manager.awaiter(ready)?.await?;
Ok(())
}
```
Because merged events are themselves events, you can merge merges to build
arbitrary DAGs of preconditions.
### Poison propagation
When an event is poisoned, all awaiters receive an error containing the
reason. Merged events accumulate poison reasons from their inputs:
```rust,no_run
use velo_events::{EventManager, EventPoison};
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let manager = EventManager::local();
let a = manager.new_event()?;
let b = manager.new_event()?;
let merged = manager.merge_events(vec![a.handle(), b.handle()])?;
manager.poison(a.handle(), "a failed")?;
manager.poison(b.handle(), "b failed")?;
let err = manager.awaiter(merged)?.await.unwrap_err();
let poison = err.downcast::<EventPoison>()?;
assert!(poison.reason().contains("a failed"));
assert!(poison.reason().contains("b failed"));
Ok(())
}
```
### Application responsibility
In distributed systems, concurrent trigger/poison calls cannot be coordinated
through the type system alone. Application logic must carefully manage how
events are completed.
**Pattern: don't use trigger/poison as if/else on one event.** Poison reasons
are kept in a `BTreeMap` history per entry, so poison strings persist in memory.
Instead, create a separate event per outcome arm and use `tokio::select!` to
race them:
```rust,no_run
use velo_events::EventManager;
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let manager = EventManager::local();
let success_event = manager.new_event()?;
let failure_event = manager.new_event()?;
let success_handle = success_event.handle();
let failure_handle = failure_event.handle();
// Producer decides which arm:
// success_event.trigger()? OR failure_event.trigger()?
// Consumer races:
let success_awaiter = manager.awaiter(success_handle)?;
let failure_awaiter = manager.awaiter(failure_handle)?;
tokio::select! {
ok = success_awaiter => { ok?; /* success path */ }
err = failure_awaiter => { err?; /* failure path */ }
}
Ok(())
}
```
## Distributed events
For distributed deployments, `EventBackend` and `EventSystemBase` are public
so you can implement custom routing. Create a base with an explicit system_id,
implement `EventBackend` to route local vs remote handles, and pass both to
`EventManager::new`:
```rust,no_run
use velo_events::{EventSystemBase, EventBackend, EventManager, EventHandle, EventAwaiter};
use anyhow::Result;
use std::sync::Arc;
struct MyDistributedBackend {
local: Arc<EventSystemBase>,
// router: MyRouter,
}
impl EventBackend for MyDistributedBackend {
fn trigger(&self, handle: EventHandle) -> Result<()> {
if handle.system_id() == self.local.system_id() {
self.local.trigger_inner(handle) // fast local path
} else {
todo!("route over network")
}
}
fn poison(&self, handle: EventHandle, reason: Arc<str>) -> Result<()> {
if handle.system_id() == self.local.system_id() {
self.local.poison_inner(handle, reason)
} else {
todo!("route over network")
}
}
fn awaiter(&self, handle: EventHandle) -> Result<EventAwaiter> {
if handle.system_id() == self.local.system_id() {
self.local.awaiter_inner(handle)
} else {
todo!("route over network")
}
}
}
let base = EventSystemBase::distributed(0x42);
let backend = Arc::new(MyDistributedBackend { local: base.clone() });
let manager = EventManager::new(base, backend);
// handles produced by this manager carry system_id = 0x42
```
For simpler cases where you just need handles stamped with a system_id (without
custom routing), `DistributedEventFactory` is a convenience wrapper:
```rust,no_run
use velo_events::DistributedEventFactory;
let factory = DistributedEventFactory::new(0x42.try_into().unwrap());
let manager = factory.event_manager();
// handles produced by this manager carry system_id = 0x42
```
# Slot State Machine Specification
## Overview
The slot module uses a single entry-level `ParkingMutex<EventState>` that holds
all per-entry state including waker registration. This design eliminates
stale-completion races by construction — there is no separate lock or atomic
guard whose ordering could allow stale results to leak across generations.
## State Variables
All fields are protected by a single `parking_lot::Mutex`:
```rust
struct EventState {
last_triggered: Generation, // highest completed generation
active_generation: Option<Generation>, // currently pending generation
wakers: Vec<Waker>, // registered waiter wakers
poisoned: BTreeMap<Generation, PoisonArc>, // poison history per generation
retired: bool, // permanently unusable
}
```
## Lifecycle Phases
```
begin_generation()
Idle ──────────────────> Active
^ |
| finalize_completion()|
| v
+──────────────────── Completing
```
All transitions happen under a single lock acquisition.
### Idle
- `active_generation = None`
- `wakers` is empty (drained by prior `finalize_completion`)
- Entry is available for reuse via the free list
### Active
- `active_generation = Some(gen)`
- Waiters may register wakers via `poll_waiter`
- Only one generation can be active at a time
### Completing
- `finalize_completion` sets `last_triggered`, clears `active_generation`,
stores poison (if applicable), drains wakers, then wakes them
- Transitions back to Idle
## Operations
### begin_generation
1. Acquire lock
2. Validate: no active generation, not retired, not overflowed
3. Compute `next = last_triggered + 1`
4. Drain stale wakers (`std::mem::take`)
5. Set `active_generation = Some(next)`
6. Release lock
7. Wake stale wakers (outside lock)
### finalize_completion(generation, completion)
1. Acquire lock
2. Validate: `active_generation == Some(generation)`
3. Set `last_triggered = generation`
4. Clear `active_generation`
5. Insert/remove from poison map
6. Drain wakers
7. Release lock
8. Wake all drained wakers (outside lock)
### register_local_waiter(generation)
1. Acquire lock
2. If `generation <= last_triggered`: return Ready or Poisoned
3. If `active_generation == Some(generation)`: return Pending
4. Otherwise: return InvalidGeneration error
### poll_waiter(observed_generation, cx)
1. Acquire lock
2. If `observed_generation <= last_triggered`: return completion result
3. If `active_generation.is_none()`: return "generation expired" error
4. Register waker with deduplication (`will_wake` check)
5. Return Pending
### try_to_poison(generation, poison)
1. Acquire lock
2. If `generation <= last_triggered`:
- If `poisoned.contains_key(generation)`: return `AlreadyPoisoned`
- Else: return `AlreadyCompleted` error
3. Validate: `active_generation == Some(generation)`
4. Set `last_triggered = generation`
5. Clear `active_generation`
6. Insert into poison map
7. Drain wakers
8. Release lock
9. Wake all drained wakers (outside lock)
10. Return `Poisoned`
This is equivalent to an atomic `status_for` + `finalize_completion(Poisoned)`,
eliminating the TOCTOU window when the two are called separately.
### retire
1. Acquire lock
2. Debug-assert: wakers list is empty (callers should ensure all waiters resolved before retirement)
3. Set `retired = true`, clear `active_generation`
4. Drain wakers (defensive, prevents silent hangs if invariant violated)
5. Release lock
6. Wake drained wakers (outside lock)
## Invariants
- **I1: Generation monotonicity**`last_triggered` only increases. Each
`begin_generation` computes `last_triggered + 1`.
- **I2: Single completion per generation**`active_generation` guard ensures
only one generation is active. `finalize_completion` validates
`active_generation == Some(generation)`.
- **I3: Completion visibility** — Waiter resolution is determined by
`observed_generation <= last_triggered` (success) plus the `poisoned`
BTreeMap (error). Both are set under the same lock that the waiter reads.
- **I4: No stale completion leakage** — There is no stored completion value
that could leak. Waiters resolve via generation comparison + poison map.
`begin_generation` unconditionally drains stale wakers.
- **I5: No lost wakeups**`finalize_completion` sets `last_triggered` and
drains wakers in the same lock scope. Any waiter that registered before
the drain will be woken. Any waiter that polls after the drain will see
`observed_generation <= last_triggered` and resolve immediately.
- **I6: Stale waiter resolution** — Waiters from generation N check
`observed_generation <= last_triggered` on every poll. After generation N
completes, this check succeeds regardless of what generation is currently
active. `begin_generation` also flushes stale wakers defensively.
## Concurrency Rules
A single `parking_lot::Mutex` per entry serializes all state mutations. This
eliminates the need for:
- Atomic `waiter_count` (no conditional clearing)
- Atomic `completed` flag (redundant with lock-protected state)
- Separate slot-level `generation` counter (entry-level suffices)
- Manual waker deduplication across lock boundaries
The only concurrency pattern is: acquire lock, read/write state, release lock,
then wake drained wakers outside the lock (waker invocation only enqueues
tasks on the runtime, it does not poll them synchronously).
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Core event storage engine backed by a generational slot system.
pub(crate) mod system;
pub use system::EventSystemBase;
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use anyhow::{Result, anyhow, bail};
use dashmap::DashMap;
use parking_lot::Mutex as ParkingMutex;
use std::collections::VecDeque;
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
use tokio_util::task::TaskTracker;
use tracing::{error, trace};
use crate::event::{Event, EventBackend};
use crate::handle::{EventHandle, LOCAL_FLAG};
use crate::slot::{
CompletionKind, EventAwaiter, EventEntry, EventKey, PoisonArc, PoisonOutcome, WaitRegistration,
};
use crate::status::{EventPoison, EventStatus};
/// Maximum counter value for local indices (31-bit counter space, ~2B entries).
const MAX_LOCAL_INDEX: u32 = (1u32 << 31) - 1;
/// Core event storage, allocation, and recycling engine.
///
/// Handles event storage, allocation, recycling, and generation tracking.
/// This is the implementation backing [`EventManager`](crate::EventManager).
/// Events created by an `EventSystemBase` are bound to that system. Passing
/// a handle from one system to another will return an error.
///
/// `EventSystemBase` also implements [`EventBackend`] for the local path,
/// so it can be used directly as both the base and the backend for local-only
/// setups. For distributed setups, implement [`EventBackend`] on your own type
/// and delegate local operations to the `_inner` methods on `EventSystemBase`.
pub struct EventSystemBase {
system_id: u64,
is_local: bool,
events: DashMap<EventKey, Arc<EventEntry>>,
free_lists: ParkingMutex<VecDeque<Arc<EventEntry>>>,
next_local_index: AtomicU32,
tasks: TaskTracker,
shutdown: AtomicBool,
}
impl EventSystemBase {
/// Create a new local event system with a random system_id.
///
/// The system_id is derived from `xxh3_64(Uuid::new_v4())` to ensure
/// each local system is uniquely identifiable. Handles produced by this
/// system have bit 31 set in their `local_index` to mark them as local.
///
/// Events created by this system can only be triggered, awaited, poisoned,
/// or polled through this same system instance.
pub fn local() -> Arc<Self> {
let system_id = xxhash_rust::xxh3::xxh3_64(uuid::Uuid::new_v4().as_bytes());
Self::create(system_id, true)
}
/// Create a system pre-configured with a system_id for distributed use.
///
/// Handles produced by this system do **not** have the local flag set,
/// distinguishing them from local handles.
pub fn distributed(system_id: u64) -> Arc<Self> {
Self::create(system_id, false)
}
fn create(system_id: u64, is_local: bool) -> Arc<Self> {
Arc::new(Self {
system_id,
is_local,
events: DashMap::new(),
free_lists: ParkingMutex::new(VecDeque::new()),
next_local_index: AtomicU32::new(0),
tasks: TaskTracker::new(),
shutdown: AtomicBool::new(false),
})
}
/// The unique system identity stamped into every handle produced by this system.
pub fn system_id(&self) -> u64 {
self.system_id
}
// ── Ownership validation ─────────────────────────────────────────
fn validate_handle(&self, handle: EventHandle) -> Result<()> {
if handle.system_id() != self.system_id {
bail!(
"Handle {} belongs to system {:#x}, not this system {:#x}",
handle,
handle.system_id(),
self.system_id,
);
}
Ok(())
}
// ── Backend-aware event creation ─────────────────────────────────
/// Allocate a new pending event, using `backend` for the RAII guard's
/// completion routing.
pub(crate) fn new_event_with_backend(
self: &Arc<Self>,
backend: Arc<dyn EventBackend>,
) -> Result<Event> {
if self.is_shutdown() {
bail!("Event system shutdown in progress");
}
loop {
let entry = self.allocate_entry()?;
match entry.begin_generation() {
Ok(generation) => {
if self.is_shutdown() {
let handle = entry.key().handle(self.system_id, generation);
let poison = Arc::new(EventPoison::new(
handle,
"Event system shutdown in progress",
));
let _ = self.poison_local_entry(entry, handle, poison);
bail!("Event system shutdown in progress");
}
let handle = entry.key().handle(self.system_id, generation);
return Ok(Event::new(handle, backend));
}
Err(crate::slot::entry::EventEntryError::GenerationOverflow { key }) => {
trace!(
?key,
"retiring event entry after exhausting generation space"
);
self.retire_entry(entry);
continue;
}
Err(err) => {
self.recycle_entry(entry);
return Err(err.into());
}
}
}
}
/// Merge events, using `backend` for the spawned task's completion routing.
pub(crate) fn merge_events_with(
self: &Arc<Self>,
inputs: Vec<EventHandle>,
backend: Arc<dyn EventBackend>,
) -> Result<EventHandle> {
if inputs.is_empty() {
bail!("Cannot merge empty event list");
}
for input in &inputs {
self.validate_handle(*input)?;
}
let merged = self.new_event_with_backend(backend.clone())?;
// Disarm the RAII guard — the spawned task owns completion via handle.
let handle = merged.into_handle();
let system = Arc::clone(self);
self.tasks.spawn(async move {
let mut failure_reasons: Option<Vec<Arc<str>>> = None;
for dependency in &inputs {
let wait_result = match backend.awaiter(*dependency) {
Ok(waiter) => waiter.await,
Err(err) => Err(err),
};
match wait_result {
Ok(()) => {}
Err(err) => {
let reason = match err.downcast::<EventPoison>() {
Ok(poison) => format!(
"Merge dependency {} poisoned: {}",
dependency,
poison.reason()
),
Err(other) => {
format!("Merge dependency {} failed: {}", dependency, other)
}
};
let reason_arc: Arc<str> = Arc::from(reason);
error!("{}", &*reason_arc);
failure_reasons
.get_or_insert_with(Vec::new)
.push(reason_arc);
}
}
}
let result = match failure_reasons {
None => backend.trigger(handle),
Some(reasons) => {
if reasons.len() == 1 {
backend.poison(handle, reasons[0].clone())
} else {
let mut message = String::from("Multiple merge dependencies failed:\n");
for (idx, reason) in reasons.iter().enumerate() {
if idx > 0 {
message.push('\n');
}
message.push_str(reason.as_ref());
}
backend.poison(handle, Arc::from(message))
}
}
};
if let Err(e) = result {
error!("Failed to complete merged event {}: {}", handle, e);
}
drop(system); // ensure system lives until the task completes
});
Ok(handle)
}
// ── Public inner methods (for distributed backends) ──────────────
/// Trigger a local event by handle. Validates that the handle belongs to this system.
///
/// Distributed backends should call this for handles that belong to the local system.
pub fn trigger_inner(&self, handle: EventHandle) -> Result<()> {
self.validate_handle(handle)?;
let entry = self
.events
.get(&EventKey::from_handle(handle))
.map(|guard| guard.clone())
.ok_or_else(|| anyhow!("Unknown event {}", handle))?;
self.trigger_local_entry(entry, handle)
}
/// Poison a local event by handle. Validates that the handle belongs to this system.
///
/// Distributed backends should call this for handles that belong to the local system.
pub fn poison_inner(&self, handle: EventHandle, reason: impl Into<Arc<str>>) -> Result<()> {
self.validate_handle(handle)?;
let reason: Arc<str> = reason.into();
let entry = self
.events
.get(&EventKey::from_handle(handle))
.map(|guard| guard.clone())
.ok_or_else(|| anyhow!("Unknown event {}", handle))?;
let poison = Arc::new(EventPoison::new(handle, reason));
self.poison_local_entry(entry, handle, poison)
}
/// Create a future that resolves when the local event completes.
/// Validates that the handle belongs to this system.
///
/// Distributed backends should call this for handles that belong to the local system.
pub fn awaiter_inner(&self, handle: EventHandle) -> Result<EventAwaiter> {
self.validate_handle(handle)?;
self.wait_local(handle)
}
pub(crate) fn poll_inner(&self, handle: EventHandle) -> Result<EventStatus> {
self.validate_handle(handle)?;
self.poll_local(handle)
}
pub(crate) fn force_shutdown_inner(&self, reason: impl Into<Arc<str>>) {
let was_shutdown = self.shutdown.swap(true, Ordering::SeqCst);
if was_shutdown {
return;
}
let reason: Arc<str> = reason.into();
let mut pending = Vec::new();
for entry in self.events.iter() {
if let Some(handle) = entry.value().active_handle(self.system_id) {
pending.push((entry.value().clone(), handle));
}
}
for (entry, handle) in pending {
let poison = Arc::new(EventPoison::new(handle, Arc::clone(&reason)));
if let Err(err) = self.poison_local_entry(entry, handle, poison) {
error!("force_shutdown: failed to poison {}: {}", handle, err);
}
}
self.free_lists.lock().clear();
}
// ── Low-level helpers ─────────────────────────────────────────────
/// Return the poison reason for a completed generation, if any.
#[allow(dead_code)]
pub(crate) fn poison_reason(&self, handle: EventHandle) -> Option<Arc<str>> {
let entry = self.events.get(&EventKey::from_handle(handle))?;
entry.poison_reason(handle.generation())
}
pub(crate) fn trigger_local_entry(
&self,
entry: Arc<EventEntry>,
handle: EventHandle,
) -> Result<()> {
self.complete_local_entry(entry, handle, CompletionKind::Triggered)
}
pub(crate) fn poison_local_entry(
&self,
entry: Arc<EventEntry>,
handle: EventHandle,
poison: PoisonArc,
) -> Result<()> {
match entry
.try_to_poison(handle.generation(), poison)
.map_err(anyhow::Error::new)?
{
PoisonOutcome::Poisoned => {
self.recycle_entry(entry);
Ok(())
}
PoisonOutcome::AlreadyPoisoned => Ok(()),
}
}
fn complete_local_entry(
&self,
entry: Arc<EventEntry>,
handle: EventHandle,
completion: CompletionKind,
) -> Result<()> {
entry
.finalize_completion(handle.generation(), completion)
.map_err(anyhow::Error::new)?;
self.recycle_entry(entry);
Ok(())
}
fn wait_local(&self, handle: EventHandle) -> Result<EventAwaiter> {
let entry = self
.events
.get(&EventKey::from_handle(handle))
.map(|guard| guard.clone())
.ok_or_else(|| anyhow!("Unknown local event {}", handle))?;
match entry.register_local_waiter(handle.generation())? {
WaitRegistration::Ready => {
Ok(EventAwaiter::immediate(Arc::new(CompletionKind::Triggered)))
}
WaitRegistration::Poisoned(poison) => Ok(EventAwaiter::immediate(Arc::new(
CompletionKind::Poisoned(poison),
))),
WaitRegistration::Pending => Ok(EventAwaiter::pending(entry, handle.generation())),
}
}
fn poll_local(&self, handle: EventHandle) -> Result<EventStatus> {
let entry = self
.events
.get(&EventKey::from_handle(handle))
.map(|guard| guard.clone())
.ok_or_else(|| anyhow!("Unknown local event {}", handle))?;
Ok(entry.status_for(handle.generation()))
}
fn allocate_entry(self: &Arc<Self>) -> Result<Arc<EventEntry>> {
if let Some(entry) = self.try_reuse_entry() {
return Ok(entry);
}
let counter = self
.next_local_index
.fetch_update(Ordering::AcqRel, Ordering::Acquire, |current| {
(current < MAX_LOCAL_INDEX).then_some(current + 1)
})
.map_err(|_| {
anyhow!(
"Local event index space exhausted ({} entries)",
MAX_LOCAL_INDEX
)
})?;
let local_index = if self.is_local {
counter | LOCAL_FLAG
} else {
counter
};
let key = EventKey::new(local_index);
let entry = Arc::new(EventEntry::new(key));
self.events.insert(key, entry.clone());
Ok(entry)
}
fn try_reuse_entry(&self) -> Option<Arc<EventEntry>> {
let mut free_lists = self.free_lists.lock();
free_lists.pop_front()
}
fn recycle_entry(&self, entry: Arc<EventEntry>) {
if entry.is_retired() {
return;
}
let mut free_lists = self.free_lists.lock();
free_lists.push_back(entry);
}
/// Mark an entry as permanently unusable but keep it in `self.events`.
///
/// Retired entries are intentionally **not** removed from the DashMap so that
/// callers holding stale handles to poisoned generations can still query
/// poison history via `poison_reason()` / `status_for()`. Removing the entry
/// would turn a diagnosable poison into an opaque "Unknown event" error.
///
/// Future optimisation: evict the full `EventEntry` from the DashMap and
/// migrate only the poisoned generation keys into a secondary
/// `HashSet<(EventKey, Generation)>` with a shared "entry retired" reason.
/// This trades per-generation `Arc<str>` detail for bounded memory on
/// long-running systems that exhaust many entries' generation spaces.
fn retire_entry(&self, entry: Arc<EventEntry>) {
entry.retire();
}
fn is_shutdown(&self) -> bool {
self.shutdown.load(Ordering::Acquire)
}
}
// ── EventBackend impl ────────────────────────────────────────────────
impl EventBackend for EventSystemBase {
fn trigger(&self, handle: EventHandle) -> Result<()> {
self.trigger_inner(handle)
}
fn poison(&self, handle: EventHandle, reason: Arc<str>) -> Result<()> {
self.poison_inner(handle, reason)
}
fn awaiter(&self, handle: EventHandle) -> Result<EventAwaiter> {
self.awaiter_inner(handle)
}
}
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Concrete [`Event`] RAII guard and [`EventBackend`] routing trait.
use anyhow::Result;
use std::sync::{Arc, LazyLock};
use crate::handle::EventHandle;
use crate::slot::EventAwaiter;
/// Static poison reason reused across all drop-triggered poisons.
static DROP_POISON_REASON: LazyLock<Arc<str>> =
LazyLock::new(|| Arc::from("event dropped without being triggered"));
// ── Backend trait: routing customization point ──────────────────────
/// Routing layer for event completion operations.
///
/// Only three methods — just the operations that need local-vs-remote routing
/// in a distributed setup. [`EventSystemBase`](crate::EventSystemBase) implements
/// this for the local path; a distributed backend would add network routing.
pub trait EventBackend: Send + Sync {
/// Mark the event as successfully completed, waking all waiters.
fn trigger(&self, handle: EventHandle) -> Result<()>;
/// Poison the event with the given reason, waking all waiters with an error.
fn poison(&self, handle: EventHandle, reason: Arc<str>) -> Result<()>;
/// Create a future that resolves when the event completes.
fn awaiter(&self, handle: EventHandle) -> Result<EventAwaiter>;
}
// ── Concrete Event ─────────────────────────────────────────────────
/// A single event that can be triggered or poisoned exactly once.
///
/// `Event` is an RAII guard: dropping it without calling [`trigger`](Event::trigger)
/// or [`poison`](Event::poison) automatically poisons the event so waiters are
/// never silently abandoned. To opt out of drop-poisoning (e.g. when handing
/// ownership to a manager-level operation), call [`into_handle`](Event::into_handle).
///
/// `trigger` and `poison` consume `self`, preventing double-completion at
/// compile time.
pub struct Event {
inner: Option<EventInner>,
}
struct EventInner {
handle: EventHandle,
backend: Arc<dyn EventBackend>,
}
impl Event {
/// Create a new event RAII guard.
pub(crate) fn new(handle: EventHandle, backend: Arc<dyn EventBackend>) -> Self {
Self {
inner: Some(EventInner { handle, backend }),
}
}
/// Take the inner state, disarming the drop guard.
fn take_inner(&mut self) -> EventInner {
self.inner.take().expect("event already consumed")
}
/// Return the handle that identifies this event.
pub fn handle(&self) -> EventHandle {
self.inner.as_ref().expect("event already consumed").handle
}
/// Mark the event as successfully completed, waking all waiters.
/// Consumes the event, disarming the drop guard.
pub fn trigger(mut self) -> Result<()> {
let inner = self.take_inner();
inner.backend.trigger(inner.handle)
}
/// Poison the event with the given reason, waking all waiters with an error.
/// Consumes the event, disarming the drop guard.
pub fn poison(mut self, reason: impl Into<Arc<str>>) -> Result<()> {
let inner = self.take_inner();
inner.backend.poison(inner.handle, reason.into())
}
/// Create a future that resolves when this event completes.
pub fn awaiter(&self) -> Result<EventAwaiter> {
let inner = self.inner.as_ref().expect("event already consumed");
inner.backend.awaiter(inner.handle)
}
/// Disarm the drop guard and return the bare handle.
///
/// After this call the event will **not** be auto-poisoned on drop.
/// Use the returned handle with [`EventManager`](crate::EventManager)
/// methods to complete the event manually.
pub fn into_handle(mut self) -> EventHandle {
self.take_inner().handle
}
}
impl Drop for Event {
fn drop(&mut self) {
if let Some(inner) = self.inner.take() {
let _ = inner
.backend
.poison(inner.handle, Arc::clone(&*DROP_POISON_REASON));
}
}
}
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Factory for creating distributed event systems with a system identity.
use std::{num::NonZero, sync::Arc};
use crate::base::EventSystemBase;
use crate::manager::EventManager;
/// Factory that creates an [`EventManager`] pre-configured with a system_id.
///
/// Use this when events need globally-unique handles that embed a non-zero
/// system identifier (e.g. in a Nova-managed distributed system).
///
/// For purely local use, call [`EventManager::local()`] directly instead.
pub struct DistributedEventFactory {
system_id: u64,
base: Arc<EventSystemBase>,
}
impl DistributedEventFactory {
/// Create a new factory (and its backing event system) for the given system.
pub fn new(system_id: NonZero<u64>) -> Self {
Self {
system_id: system_id.get(),
base: EventSystemBase::distributed(system_id.get()),
}
}
/// The system identity stamped into every handle produced by this factory.
pub fn system_id(&self) -> u64 {
self.system_id
}
/// Borrow the underlying event system base.
pub fn system(&self) -> &Arc<EventSystemBase> {
&self.base
}
/// Create an [`EventManager`] backed by this factory's system.
///
/// Currently uses the local backend; a future distributed backend will
/// route remote handles over the network.
pub fn event_manager(&self) -> EventManager {
EventManager::new(self.base.clone(), self.base.clone() as _)
}
}
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Unified event handle encoded in a single `u128` value.
use serde::{Deserialize, Serialize};
use std::fmt::{Display, Formatter};
use crate::status::Generation;
const SYSTEM_BITS: u32 = 64;
const LOCAL_BITS: u32 = 32;
const GENERATION_BITS: u32 = 32;
const LOCAL_SHIFT: u32 = GENERATION_BITS;
const SYSTEM_SHIFT: u32 = LOCAL_SHIFT + LOCAL_BITS;
const SYSTEM_MASK: u128 = ((1u128 << SYSTEM_BITS) - 1) << SYSTEM_SHIFT;
const LOCAL_MASK: u128 = ((1u128 << LOCAL_BITS) - 1) << LOCAL_SHIFT;
const GENERATION_MASK: u128 = (1u128 << GENERATION_BITS) - 1;
/// Bit 31 of `local_index` marks handles as local vs distributed.
pub(crate) const LOCAL_FLAG: u32 = 1 << 31;
/// Mask for the counter portion of `local_index` (strips the local flag bit).
pub(crate) const INDEX_COUNTER_MASK: u32 = LOCAL_FLAG - 1;
/// Public event handle encoded in a single u128 value.
///
/// Layout (MSB to LSB): `[system_id: 64 bits][local_index: 32 bits][generation: 32 bits]`
///
/// The `local_index` field uses bit 31 as a local/distributed flag:
/// - Bit 31 = 1: local event (created by `LocalEventSystem::new()`)
/// - Bit 31 = 0: distributed event (created via `DistributedEventFactory`)
///
/// Both local and distributed systems have unique non-zero `system_id` values.
/// Use `is_local()` / `is_distributed()` to check origin type.
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct EventHandle(u128);
impl EventHandle {
/// Create a handle with an explicit system id.
pub(crate) fn new(system_id: u64, local_index: u32, generation: Generation) -> Self {
let raw = ((system_id as u128) << SYSTEM_SHIFT)
| ((local_index as u128) << LOCAL_SHIFT)
| (generation as u128);
Self(raw)
}
/// Reconstruct a handle from its raw u128 representation.
pub fn from_raw(raw: u128) -> Self {
Self(raw)
}
/// Return the raw u128 representation.
pub fn raw(&self) -> u128 {
self.0
}
/// Extract the system id (upper 64 bits).
pub fn system_id(&self) -> u64 {
((self.0 & SYSTEM_MASK) >> SYSTEM_SHIFT) as u64
}
/// Extract the local index (middle 32 bits), including the local flag bit.
pub fn local_index(&self) -> u32 {
((self.0 & LOCAL_MASK) >> LOCAL_SHIFT) as u32
}
/// Extract the generation counter (lower 32 bits).
pub fn generation(&self) -> Generation {
(self.0 & GENERATION_MASK) as Generation
}
/// Returns `true` when the handle was created by a local event system.
pub fn is_local(&self) -> bool {
(self.local_index() & LOCAL_FLAG) != 0
}
/// Returns `true` when the handle was created by a distributed event system.
pub fn is_distributed(&self) -> bool {
!self.is_local()
}
/// Extract the counter portion of the local index (strips the flag bit).
pub(crate) fn index_counter(&self) -> u32 {
self.local_index() & INDEX_COUNTER_MASK
}
/// Return a copy of this handle with a different generation.
pub fn with_generation(&self, generation: Generation) -> Self {
Self::new(self.system_id(), self.local_index(), generation)
}
}
impl Display for EventHandle {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
write!(
f,
"EventHandle {{ system={}, index={}, generation={}, {} }}",
self.system_id(),
self.index_counter(),
self.generation(),
if self.is_local() {
"local"
} else {
"distributed"
}
)
}
}
This diff is collapsed.
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Concrete [`EventManager`] that ties lifecycle and routing together.
use anyhow::Result;
use std::sync::Arc;
use crate::base::EventSystemBase;
use crate::event::{Event, EventBackend};
use crate::handle::EventHandle;
use crate::slot::EventAwaiter;
use crate::status::EventStatus;
/// Manages a collection of events — creating, triggering, poisoning, and
/// merging them.
///
/// `EventManager` is `Clone` and `Send + Sync`, so it can be cheaply shared
/// across async tasks.
///
/// # Local vs distributed
///
/// [`EventManager::local()`] creates a purely local manager backed by
/// [`EventSystemBase`]. For distributed setups, construct a manager with
/// [`EventManager::new()`] providing a custom [`EventBackend`] that routes
/// remote handles over the network.
#[derive(Clone)]
pub struct EventManager {
base: Arc<EventSystemBase>,
backend: Arc<dyn EventBackend>,
}
impl EventManager {
/// Create a purely local event manager.
///
/// The [`EventSystemBase`] is used as both the lifecycle store and
/// the completion backend.
pub fn local() -> Self {
let base = EventSystemBase::local();
let backend = base.clone() as Arc<dyn EventBackend>;
Self { base, backend }
}
/// Create an event manager with a custom backend for routing.
///
/// Used for distributed setups where trigger/poison/awaiter may be routed
/// over the network.
pub fn new(base: Arc<EventSystemBase>, backend: Arc<dyn EventBackend>) -> Self {
Self { base, backend }
}
/// The system identity stamped into every handle produced by this manager.
pub fn system_id(&self) -> u64 {
self.base.system_id()
}
/// Borrow the underlying event system base.
pub fn base(&self) -> &Arc<EventSystemBase> {
&self.base
}
/// Allocate a new pending event.
pub fn new_event(&self) -> Result<Event> {
self.base.new_event_with_backend(self.backend.clone())
}
/// Create a future that resolves when the given event completes.
pub fn awaiter(&self, handle: EventHandle) -> Result<EventAwaiter> {
self.backend.awaiter(handle)
}
/// Non-blocking status check.
pub fn poll(&self, handle: EventHandle) -> Result<EventStatus> {
self.base.poll_inner(handle)
}
/// Trigger the event identified by `handle`.
pub fn trigger(&self, handle: EventHandle) -> Result<()> {
self.backend.trigger(handle)
}
/// Poison the event identified by `handle` with the given reason.
pub fn poison(&self, handle: EventHandle, reason: impl Into<Arc<str>>) -> Result<()> {
self.backend.poison(handle, reason.into())
}
/// Create a new event that completes when **all** `inputs` complete.
///
/// If any input is poisoned the merged event is poisoned with the
/// accumulated reasons.
pub fn merge_events(&self, inputs: Vec<EventHandle>) -> Result<EventHandle> {
self.base.merge_events_with(inputs, self.backend.clone())
}
/// Poison every pending event and reject future allocations.
pub fn force_shutdown(&self, reason: impl Into<Arc<str>>) {
self.base.force_shutdown_inner(reason)
}
/// Return the poison reason for a completed generation, if any.
#[allow(dead_code)]
pub(crate) fn poison_reason(&self, handle: EventHandle) -> Option<Arc<str>> {
self.base.poison_reason(handle)
}
}
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use std::sync::Arc;
use crate::status::EventPoison;
pub(crate) type PoisonArc = Arc<EventPoison>;
#[derive(Clone, Debug)]
pub(crate) enum CompletionKind {
Triggered,
Poisoned(PoisonArc),
}
impl CompletionKind {
pub(crate) fn as_result(&self) -> Result<(), EventPoison> {
match self {
Self::Triggered => Ok(()),
Self::Poisoned(poison) => Err((**poison).clone()),
}
}
}
pub(crate) enum WaitRegistration {
Ready,
Pending,
Poisoned(PoisonArc),
}
This diff is collapsed.
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Single-lock synchronization primitives for the event system.
//!
//! All per-entry state — generation tracking, completion status, and waker
//! registration — is consolidated under a single `parking_lot::Mutex`,
//! eliminating stale-completion races by construction.
//!
//! See `docs/slot-state-machine.md` for the formal state machine specification.
mod completion;
pub(crate) mod entry;
mod waiter;
pub(crate) use completion::{CompletionKind, PoisonArc, WaitRegistration};
pub(crate) use entry::{EventEntry, EventKey, PoisonOutcome};
pub use waiter::EventAwaiter;
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use std::future::Future;
use std::pin::Pin;
use std::sync::Arc;
use std::task::{Context, Poll};
use super::completion::CompletionKind;
use super::entry::EventEntry;
use crate::status::Generation;
/// Future that waits for an event to complete.
///
/// This can be used in `tokio::select!` and polled multiple times efficiently.
/// Waker deduplication inside the entry lock prevents unbounded growth.
pub struct EventAwaiter {
entry: Option<Arc<EventEntry>>,
observed_generation: Generation,
immediate_result: Option<Arc<CompletionKind>>,
}
impl EventAwaiter {
/// Creates a waiter that immediately resolves with the given result.
#[allow(private_interfaces)]
pub(crate) fn immediate(result: Arc<CompletionKind>) -> Self {
Self {
entry: None,
observed_generation: 0,
immediate_result: Some(result),
}
}
/// Creates a waiter that will poll the entry for completion.
#[allow(private_interfaces)]
pub(crate) fn pending(entry: Arc<EventEntry>, generation: Generation) -> Self {
Self {
entry: Some(entry),
observed_generation: generation,
immediate_result: None,
}
}
}
impl Future for EventAwaiter {
type Output = anyhow::Result<()>;
fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
let this = self.get_mut();
// Fast path: immediate result (already-completed event)
if let Some(result) = &this.immediate_result {
return Poll::Ready(result.as_ref().as_result().map_err(anyhow::Error::new));
}
let entry = this
.entry
.as_ref()
.expect("EventAwaiter with no entry or immediate_result");
entry.poll_waiter(this.observed_generation, cx)
}
}
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Event status types shared across local and distributed implementations.
use std::fmt::{self, Display, Formatter};
use std::sync::Arc;
use crate::handle::EventHandle;
/// Alias for event generation counters.
pub type Generation = u32;
/// Status returned from non-blocking event queries.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
#[allow(missing_docs)]
pub enum EventStatus {
Pending,
Ready,
Poisoned,
}
/// Describes a poisoned event generation.
#[derive(Clone, Debug)]
pub struct EventPoison {
handle: EventHandle,
reason: Arc<str>,
}
impl EventPoison {
/// Create a new poisoned event.
pub fn new(handle: EventHandle, reason: impl Into<Arc<str>>) -> Self {
Self {
handle,
reason: reason.into(),
}
}
/// Get the handle of the poisoned event.
pub fn handle(&self) -> EventHandle {
self.handle
}
/// Get the reason of the poisoned event.
pub fn reason(&self) -> &str {
&self.reason
}
/// Get the reason of the poisoned event as an `Arc<str>`.
pub fn reason_arc(&self) -> &Arc<str> {
&self.reason
}
}
impl Display for EventPoison {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "Event {} poisoned: {}", self.handle, self.reason())
}
}
impl std::error::Error for EventPoison {}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment