scheduler.rs 22.1 KB
Newer Older
1
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
// SPDX-License-Identifier: Apache-2.0
3

4
5
6
7
8
9
use super::KvRouterConfig;
use super::RouterConfigOverride;
use super::WorkerSelector;
use super::protocols::{DpRank, OverlapScores, WorkerId, WorkerSelectionResult, WorkerWithDpRank};
use super::queue::SchedulerQueue;
use super::sequence::{ActiveSequencesMultiWorker, SequenceError, SequenceRequest};
10
use crate::discovery::RuntimeConfigWatch;
11
use crate::local_model::runtime_config::ModelRuntimeConfig;
12
use anyhow::Result;
13
use dynamo_runtime::component::Component;
Yan Ru Pei's avatar
Yan Ru Pei committed
14
use dynamo_runtime::traits::DistributedRuntimeProvider;
15
use rand::Rng;
16
use serde::{Deserialize, Serialize};
17
use std::collections::{HashMap, HashSet};
18
19
use std::sync::Arc;
use std::time::Duration;
20
21
#[cfg(feature = "bench")]
use std::time::Instant;
22

23
use dynamo_tokens::SequenceHash;
24

25
26
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PotentialLoad {
Yan Ru Pei's avatar
Yan Ru Pei committed
27
28
    pub worker_id: WorkerId,
    pub dp_rank: DpRank,
29
30
31
32
    pub potential_prefill_tokens: usize,
    pub potential_decode_blocks: usize,
}

33
34
#[derive(Debug, thiserror::Error)]
pub enum KvSchedulerError {
35
    #[error("no endpoints available to route work")]
36
37
38
39
    NoEndpoints,

    #[error("endpoint subscriber shutdown")]
    SubscriberShutdown,
40
41
42

    #[error("failed to initialize event publisher: {0}")]
    InitFailed(String),
43
44
}

45
46
#[derive(Debug)]
pub struct SchedulingResponse {
Yan Ru Pei's avatar
Yan Ru Pei committed
47
    pub best_worker: WorkerWithDpRank,
48
    pub overlap_blocks: u32,
49
50
}

51
pub struct SchedulingRequest {
Yan Ru Pei's avatar
Yan Ru Pei committed
52
    pub maybe_request_id: Option<String>,
53
    pub token_seq: Option<Vec<SequenceHash>>,
54
    pub isl_tokens: usize,
55
    pub overlaps: OverlapScores,
Yan Ru Pei's avatar
Yan Ru Pei committed
56
57
    pub decode_blocks: HashMap<WorkerWithDpRank, usize>,
    pub prefill_tokens: HashMap<WorkerWithDpRank, usize>,
58
59
    // Router config overrides for this specific request
    pub router_config_override: Option<RouterConfigOverride>,
60
61
    // Whether to update scheduler states (false for query_instance_id requests)
    pub update_states: bool,
62
63
    // LORA adapter name extracted from request.model field
    pub lora_name: Option<String>,
64
65
    /// Priority jump in seconds; decreases effective arrival time in the queue.
    pub priority_jump: f64,
66
    resp_tx: Option<tokio::sync::oneshot::Sender<Result<SchedulingResponse, KvSchedulerError>>>,
67
68
69
}

impl SchedulingRequest {
70
71
    pub fn respond(&mut self, result: Result<SchedulingResponse, KvSchedulerError>) {
        let Some(tx) = self.resp_tx.take() else {
72
            tracing::error!("respond called multiple times on same request");
73
74
75
76
            return;
        };
        if tx.send(result).is_err() {
            tracing::error!("failed to send response to requestor");
77
78
79
80
81
82
        }
    }
}

pub struct KvScheduler {
    request_tx: tokio::sync::mpsc::Sender<SchedulingRequest>,
83
    slots: Arc<ActiveSequencesMultiWorker>,
84
    queue: Arc<SchedulerQueue>,
85
86
87
88
}

impl KvScheduler {
    pub async fn start(
89
        component: Component,
90
        block_size: u32,
91
        workers_with_configs: RuntimeConfigWatch,
92
        selector: Option<Box<dyn WorkerSelector + Send + Sync>>,
93
        kv_router_config: &KvRouterConfig,
94
        worker_type: &'static str,
95
    ) -> Result<Self, KvSchedulerError> {
96
        let selector = selector.unwrap_or(Box::new(DefaultWorkerSelector::default()));
97

98
99
100
101
        // Get initial workers from watch receiver.
        // Caller must ensure at least one worker is present (via wait_for).
        let initial_workers: HashMap<WorkerId, ModelRuntimeConfig> =
            workers_with_configs.borrow().clone();
102

103
        let router_id = component.drt().discovery().instance_id();
104
105
106
107
108
        let slots = Arc::new(
            ActiveSequencesMultiWorker::new(
                component.clone(),
                block_size as usize,
                initial_workers,
109
                kv_router_config.router_replica_sync,
110
                router_id,
111
                worker_type,
112
113
114
115
            )
            .await
            .map_err(|e| KvSchedulerError::InitFailed(e.to_string()))?,
        );
116

117
        // Spawn background task to sync slots when the watch value changes.
Yan Ru Pei's avatar
Yan Ru Pei committed
118
        let slots_monitor = slots.clone();
119
        let mut monitor_rx = workers_with_configs.clone();
120
        let monitor_cancel_token = component.drt().child_token();
121
        tokio::spawn(async move {
122
            tracing::trace!("KvScheduler workers monitoring task started");
123
            let mut last_workers: HashMap<WorkerId, ModelRuntimeConfig> = HashMap::new();
124

125
            loop {
Yan Ru Pei's avatar
Yan Ru Pei committed
126
127
                tokio::select! {
                    _ = monitor_cancel_token.cancelled() => {
128
                        tracing::trace!("KvScheduler workers monitoring task shutting down");
129
130
                        break;
                    }
131
                    result = monitor_rx.changed() => {
132
133
134
135
136
                        if result.is_err() {
                            tracing::warn!("KvScheduler: config watch sender dropped, shutting down");
                            break;
                        }
                    }
137
138
                }

139
140
141
142
143
                let current_workers = monitor_rx.borrow_and_update().clone();

                if current_workers != last_workers {
                    slots_monitor.update_workers(current_workers.clone());
                    last_workers = current_workers;
Yan Ru Pei's avatar
Yan Ru Pei committed
144
145
146
147
148
149
150
                }
            }
        });

        let (request_tx, request_rx) = tokio::sync::mpsc::channel::<SchedulingRequest>(1024);
        let scheduler_cancel_token = component.drt().primary_token();

151
152
153
        let queue = Arc::new(SchedulerQueue::new(
            slots.clone(),
            workers_with_configs.clone(),
154
            kv_router_config.router_queue_threshold,
155
156
            block_size,
            selector,
157
158
159
        ));
        let queue_clone = queue.clone();

160
        // Background task: receive requests and periodically recheck pending
Yan Ru Pei's avatar
Yan Ru Pei committed
161
162
        tokio::spawn(async move {
            let mut request_rx = request_rx;
163
            let mut recheck_interval = tokio::time::interval(Duration::from_secs(60));
Yan Ru Pei's avatar
Yan Ru Pei committed
164
165
166
            tracing::trace!("scheduler background task started");

            loop {
167
168
169
170
171
172
173
174
175
                tokio::select! {
                    _ = scheduler_cancel_token.cancelled() => {
                        tracing::trace!("scheduler background task shutting down");
                        break;
                    }
                    request = request_rx.recv() => {
                        let Some(request) = request else {
                            tracing::warn!("scheduler shutdown");
                            break;
176
                        };
177
178
179
180
181
182
183
                        tracing::trace!("received request to be scheduled");
                        queue_clone.enqueue(request).await;
                    }
                    _ = recheck_interval.tick() => {
                        queue_clone.update().await;
                    }
                }
184
185
            }

186
            tracing::trace!("background endpoint subscriber shutting down");
187
188
        });

189
190
191
192
193
        Ok(KvScheduler {
            request_tx,
            slots,
            queue,
        })
194
195
    }

196
    #[allow(clippy::too_many_arguments)]
197
198
    pub async fn schedule(
        &self,
Yan Ru Pei's avatar
Yan Ru Pei committed
199
        maybe_request_id: Option<String>,
200
        isl_tokens: usize,
201
        token_seq: Option<Vec<SequenceHash>>,
202
        overlaps: OverlapScores,
203
        router_config_override: Option<&RouterConfigOverride>,
204
        update_states: bool,
205
        lora_name: Option<String>,
206
        priority_jump: f64,
Yan Ru Pei's avatar
Yan Ru Pei committed
207
    ) -> Result<WorkerWithDpRank, KvSchedulerError> {
208
209
210
        #[cfg(feature = "bench")]
        let start = Instant::now();

211
212
        let (resp_tx, resp_rx) = tokio::sync::oneshot::channel();
        let request = SchedulingRequest {
Yan Ru Pei's avatar
Yan Ru Pei committed
213
            maybe_request_id,
214
            token_seq,
215
            isl_tokens,
216
            overlaps,
217
218
            decode_blocks: HashMap::new(),
            prefill_tokens: HashMap::new(),
219
            router_config_override: router_config_override.cloned(),
220
            update_states,
221
            lora_name,
222
223
            priority_jump,
            resp_tx: Some(resp_tx),
224
        };
225

226
227
228
229
        self.request_tx
            .send(request)
            .await
            .map_err(|_| KvSchedulerError::SubscriberShutdown)?;
230
231
232
233

        #[cfg(feature = "bench")]
        let send_elapsed = start.elapsed();

234
        let response = resp_rx
235
            .await
236
            .map_err(|_| KvSchedulerError::SubscriberShutdown)??;
237

238
239
240
241
242
243
244
245
246
247
        #[cfg(feature = "bench")]
        let total_elapsed = start.elapsed();
        #[cfg(feature = "bench")]
        tracing::info!(
            isl_tokens,
            send_us = send_elapsed.as_micros() as u64,
            total_us = total_elapsed.as_micros() as u64,
            "scheduler.schedule completed"
        );

Yan Ru Pei's avatar
Yan Ru Pei committed
248
        Ok(response.best_worker)
249
250
    }

251
252
    pub async fn add_request(&self, req: SequenceRequest) -> Result<(), SequenceError> {
        self.slots.add_request(req).await
253
254
    }

255
    pub async fn mark_prefill_completed(&self, request_id: &str) -> Result<(), SequenceError> {
256
        self.slots
257
            .mark_prefill_completed(&request_id.to_string())
258
259
260
            .await?;
        self.queue.update().await;
        Ok(())
261
262
    }

263
    pub async fn free(&self, request_id: &str) -> Result<(), SequenceError> {
264
265
266
        self.slots.free(&request_id.to_string()).await?;
        self.queue.update().await;
        Ok(())
267
    }
268

269
270
271
272
273
274
    /// Get the worker type for this scheduler ("prefill" or "decode").
    /// Used for Prometheus metric labeling.
    pub fn worker_type(&self) -> &'static str {
        self.slots.worker_type()
    }

275
276
277
278
279
280
281
282
283
284
    pub async fn add_output_block(
        &self,
        request_id: &str,
        decay_fraction: Option<f64>,
    ) -> Result<(), SequenceError> {
        self.slots
            .add_output_block(&request_id.to_string(), decay_fraction)
            .await
    }

285
286
    pub async fn get_potential_loads(
        &self,
287
        token_seq: Option<Vec<SequenceHash>>,
288
289
290
291
292
293
294
295
        isl_tokens: usize,
        overlaps: OverlapScores,
    ) -> Vec<PotentialLoad> {
        let (decode_blocks, prefill_tokens) = self
            .slots
            .potential_blocks_and_tokens(token_seq, isl_tokens, overlaps)
            .await;

Yan Ru Pei's avatar
Yan Ru Pei committed
296
297
298
299
        // Get all unique WorkerWithDpRank from both hashmaps
        let mut workers: HashSet<WorkerWithDpRank> = HashSet::new();
        workers.extend(decode_blocks.keys().copied());
        workers.extend(prefill_tokens.keys().copied());
300
301
302

        // Create PotentialLoad for each worker
        let mut loads = Vec::new();
Yan Ru Pei's avatar
Yan Ru Pei committed
303
        for worker in workers {
304
            loads.push(PotentialLoad {
Yan Ru Pei's avatar
Yan Ru Pei committed
305
306
                worker_id: worker.worker_id,
                dp_rank: worker.dp_rank,
307
                potential_prefill_tokens: prefill_tokens
Yan Ru Pei's avatar
Yan Ru Pei committed
308
                    .get(&worker)
309
310
                    .copied()
                    .unwrap_or(isl_tokens),
Yan Ru Pei's avatar
Yan Ru Pei committed
311
                potential_decode_blocks: decode_blocks.get(&worker).copied().unwrap_or(0),
312
313
314
315
316
            });
        }

        loads
    }
317
318
319
320
321

    /// Get active request counts grouped by LORA name
    pub fn get_active_lora_counts(&self) -> HashMap<String, usize> {
        self.slots.get_active_lora_counts()
    }
322
323
}

324
// Helper function for softmax sampling
325
326
327
328
329
// Returns a vec of workers: multiple if tied, single if sampled
fn softmax_sample(
    logits: &HashMap<WorkerWithDpRank, f64>,
    temperature: f64,
) -> Vec<WorkerWithDpRank> {
330
331
332
333
    if logits.is_empty() {
        panic!("Empty logits for softmax sampling");
    }

334
    // Guard: if temperature is 0, return all keys with the smallest logit value (ties)
335
336
337
338
339
340
341
    if temperature == 0.0 {
        // Find the minimum logit value
        let min_logit = logits.values().fold(f64::INFINITY, |a, &b| a.min(b));

        // Collect all keys with the minimum logit value (to handle ties)
        let min_keys: Vec<_> = logits
            .iter()
342
            .filter(|&(_, &v)| v == min_logit)
343
344
345
            .map(|(k, _)| *k)
            .collect();

346
        return min_keys;
347
348
    }

349
350
351
352
353
354
355
356
357
358
359
    let keys: Vec<_> = logits.keys().copied().collect();
    let values: Vec<_> = logits.values().copied().collect();

    // Find min and max for normalization
    let min_val = values.iter().fold(f64::INFINITY, |a, &b| a.min(b));
    let max_val = values.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));

    let probabilities = if min_val == max_val {
        // All values are the same, uniform probability
        vec![1.0 / keys.len() as f64; keys.len()]
    } else {
360
361
362
        // Fused normalize → negate → scale → exp, then normalize probabilities
        let range = max_val - min_val;
        let scaled: Vec<f64> = values.iter().map(|&v| -(v / range) / temperature).collect();
363
        let max_scaled = scaled.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
364
365
366
367
        let mut probs: Vec<f64> = scaled.iter().map(|&v| (v - max_scaled).exp()).collect();
        let sum: f64 = probs.iter().sum();
        probs.iter_mut().for_each(|p| *p /= sum);
        probs
368
369
370
371
372
373
374
375
376
377
    };

    // Sample from the probability distribution
    let mut rng = rand::rng();
    let sample: f64 = rng.random();

    let mut cumsum = 0.0;
    for (i, &prob) in probabilities.iter().enumerate() {
        cumsum += prob;
        if sample <= cumsum {
378
            return vec![keys[i]];
379
380
381
382
        }
    }

    // Fallback to last key (shouldn't normally reach here)
383
    vec![keys[keys.len() - 1]]
384
385
}

386
// Default implementation matching the Python _cost_function
387
388
389
390
391
392
393
394
395
396
397
398
#[derive(Debug, Clone, Default)]
pub struct DefaultWorkerSelector {
    pub kv_router_config: KvRouterConfig,
}

impl DefaultWorkerSelector {
    pub fn new(kv_router_config: Option<KvRouterConfig>) -> Self {
        Self {
            kv_router_config: kv_router_config.unwrap_or_default(),
        }
    }
}
399
400
401
402

impl WorkerSelector for DefaultWorkerSelector {
    fn select_worker(
        &self,
403
        workers: &HashMap<WorkerId, ModelRuntimeConfig>,
404
        request: &SchedulingRequest,
405
        block_size: u32,
406
407
408
    ) -> Result<WorkerSelectionResult, KvSchedulerError> {
        assert!(request.isl_tokens > 0);

409
        if workers.is_empty() {
410
411
412
            return Err(KvSchedulerError::NoEndpoints);
        }

413
414
415
416
        let isl = request.isl_tokens;
        let request_blocks = isl.div_ceil(block_size as usize);
        let overlaps = &request.overlaps.scores;

417
418
        let decode_blocks = &request.decode_blocks;
        let prefill_tokens = &request.prefill_tokens;
419

420
        let mut worker_logits = HashMap::new();
421

422
423
424
425
426
427
428
        // Use override if provided, otherwise use default config
        let overlap_weight = request
            .router_config_override
            .as_ref()
            .and_then(|cfg| cfg.overlap_score_weight)
            .unwrap_or(self.kv_router_config.overlap_score_weight);

Yan Ru Pei's avatar
Yan Ru Pei committed
429
430
431
432
        // Calculate logits for each worker with dp_rank
        // Outer loop: iterate over all workers from runtime config
        // Inner loop: iterate over all dp_ranks for each worker
        for (worker_id, config) in workers.iter() {
433
            let data_parallel_size = config.data_parallel_size;
Yan Ru Pei's avatar
Yan Ru Pei committed
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463

            for dp_rank in 0..data_parallel_size {
                let worker = WorkerWithDpRank::new(*worker_id, dp_rank);

                // Get overlap for this worker (defaults to 0 if not in overlaps)
                let overlap = *overlaps.get(&worker).unwrap_or(&0);

                // this is the number of prefill tokens the worker would have if the request were scheduled there
                let prefill_token = *prefill_tokens.get(&worker).unwrap_or(&isl);
                let potential_prefill_block = (prefill_token as f64) / (block_size as f64);

                // this is the number of decode blocks the worker would have if the request were scheduled there
                let decode_block = *decode_blocks
                    .get(&worker)
                    .unwrap_or(&(potential_prefill_block.floor() as usize))
                    as f64;

                // Calculate logit (lower is better)
                let logit = overlap_weight * potential_prefill_block + decode_block;

                worker_logits.insert(worker, logit);

                tracing::info!(
                    "Formula for worker_id={} dp_rank={:?} with {overlap} cached blocks: {logit:.3} \
                     = {overlap_weight:.1} * prefill_blocks + decode_blocks \
                     = {overlap_weight:.1} * {potential_prefill_block:.3} + {decode_block:.3}",
                    worker.worker_id,
                    worker.dp_rank
                );
            }
464
465
        }

466
        // Use softmax sampling to select worker(s)
467
468
469
470
471
472
        // Use override if provided, otherwise use default config
        let temperature = request
            .router_config_override
            .as_ref()
            .and_then(|cfg| cfg.router_temperature)
            .unwrap_or(self.kv_router_config.router_temperature);
473
474
475
        let candidates = softmax_sample(&worker_logits, temperature);

        // If multiple candidates (tied), use tree size as tie-breaker
476
        // If tree sizes are also equal, use random selection to avoid bias
477
478
        let best_worker = if candidates.len() > 1 {
            tracing::info!("Multiple workers tied with same logit, using tree size as tie-breaker");
479
            let tree_sizes: Vec<(usize, &WorkerWithDpRank)> = candidates
480
                .iter()
481
482
483
484
485
486
487
488
489
                .map(|w| (request.overlaps.tree_sizes.get(w).copied().unwrap_or(0), w))
                .collect();

            if tree_sizes.iter().all(|(s, _)| *s == tree_sizes[0].0) {
                let idx = rand::rng().random_range(0..candidates.len());
                candidates[idx]
            } else {
                *tree_sizes.iter().min_by_key(|(s, _)| *s).unwrap().1
            }
490
491
492
493
        } else {
            candidates[0]
        };

Yan Ru Pei's avatar
Yan Ru Pei committed
494
495
496
        let best_logit = worker_logits[&best_worker];

        let best_overlap = *overlaps.get(&best_worker).unwrap_or(&0);
497

Yan Ru Pei's avatar
Yan Ru Pei committed
498
        // this is a runtime config set on a per worker basis, not per dp-rank
499
        let total_blocks_info = workers
Yan Ru Pei's avatar
Yan Ru Pei committed
500
            .get(&best_worker.worker_id)
501
502
503
504
            .and_then(|cfg| cfg.total_kv_blocks)
            .map(|blocks| format!(", total blocks: {}", blocks))
            .unwrap_or_default();

505
506
507
508
509
510
511
        let tree_size = request
            .overlaps
            .tree_sizes
            .get(&best_worker)
            .copied()
            .unwrap_or(0);

512
        tracing::info!(
513
            "Selected worker: worker_id={} dp_rank={:?}, logit: {:.3}, cached blocks: {}, tree size: {}{}",
Yan Ru Pei's avatar
Yan Ru Pei committed
514
515
            best_worker.worker_id,
            best_worker.dp_rank,
516
517
            best_logit,
            best_overlap,
518
            tree_size,
519
            total_blocks_info
520
        );
521
522

        Ok(WorkerSelectionResult {
Yan Ru Pei's avatar
Yan Ru Pei committed
523
            worker: best_worker,
524
            required_blocks: request_blocks as u64,
Yan Ru Pei's avatar
Yan Ru Pei committed
525
            overlap_blocks: overlaps.get(&best_worker).copied().unwrap_or(0),
526
        })
527
528
    }
}
529
530
531
532
533

#[cfg(test)]
mod tests {
    use super::*;

534
535
536
537
    #[test]
    fn test_softmax_sample_single_key() {
        // Test that with a single key, softmax_sample always returns that key
        let mut logits = HashMap::new();
Yan Ru Pei's avatar
Yan Ru Pei committed
538
539
        let worker = WorkerWithDpRank::from_worker_id(42);
        logits.insert(worker, 0.5); // The value doesn't matter
540
541
542
543

        // Test with different temperatures
        for temperature in &[0.1, 1.0, 10.0] {
            let result = softmax_sample(&logits, *temperature);
544
545
            assert_eq!(result.len(), 1, "Should return exactly one worker");
            assert_eq!(result[0], worker, "Should return the only available worker");
546
547
548
549
        }

        // Test with different logit values
        logits.clear();
Yan Ru Pei's avatar
Yan Ru Pei committed
550
        logits.insert(worker, -100.0); // Very negative value
551
552
553
        let result = softmax_sample(&logits, 1.0);
        assert_eq!(result.len(), 1);
        assert_eq!(result[0], worker);
554
555

        logits.clear();
Yan Ru Pei's avatar
Yan Ru Pei committed
556
        logits.insert(worker, 100.0); // Very positive value
557
558
559
        let result = softmax_sample(&logits, 1.0);
        assert_eq!(result.len(), 1);
        assert_eq!(result[0], worker);
560
561

        logits.clear();
Yan Ru Pei's avatar
Yan Ru Pei committed
562
        logits.insert(worker, 0.0); // Zero value
563
564
565
        let result = softmax_sample(&logits, 1.0);
        assert_eq!(result.len(), 1);
        assert_eq!(result[0], worker);
566
567
    }

568
569
    #[test]
    fn test_softmax_sample_zero_temperature() {
570
        // Test that with temperature 0, softmax_sample returns all keys with smallest logit
571
        let mut logits = HashMap::new();
Yan Ru Pei's avatar
Yan Ru Pei committed
572
573
574
575
576
577
578
579
        let worker1 = WorkerWithDpRank::from_worker_id(1);
        let worker2 = WorkerWithDpRank::from_worker_id(2);
        let worker3 = WorkerWithDpRank::from_worker_id(3);
        let worker4 = WorkerWithDpRank::from_worker_id(4);
        logits.insert(worker1, 5.0);
        logits.insert(worker2, 3.0); // This has the smallest logit
        logits.insert(worker3, 7.0);
        logits.insert(worker4, 3.5);
580

581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
        // With temperature 0, should always return only worker2 (smallest logit)
        let result = softmax_sample(&logits, 0.0);
        assert_eq!(
            result.len(),
            1,
            "Should return one worker when there's no tie"
        );
        assert_eq!(
            result[0], worker2,
            "Should return worker with smallest logit when temperature is 0"
        );

        // Test with tied minimum logits
        logits.clear();
        let worker5 = WorkerWithDpRank::from_worker_id(5);
        let worker6 = WorkerWithDpRank::from_worker_id(6);
        logits.insert(worker1, 5.0);
        logits.insert(worker2, 3.0); // Tied for smallest
        logits.insert(worker5, 3.0); // Tied for smallest
        logits.insert(worker6, 7.0);

        let result = softmax_sample(&logits, 0.0);
        assert_eq!(
            result.len(),
            2,
            "Should return all workers with smallest logit when tied"
        );
        assert!(
            result.contains(&worker2) && result.contains(&worker5),
            "Should contain both tied workers"
        );
612

613
614
        // Test with negative values
        logits.clear();
Yan Ru Pei's avatar
Yan Ru Pei committed
615
616
617
618
619
620
        let worker10 = WorkerWithDpRank::from_worker_id(10);
        let worker20 = WorkerWithDpRank::from_worker_id(20);
        let worker30 = WorkerWithDpRank::from_worker_id(30);
        logits.insert(worker10, -1.0);
        logits.insert(worker20, -5.0); // This has the smallest logit
        logits.insert(worker30, 0.0);
621

622
        let result = softmax_sample(&logits, 0.0);
623
624
625
626
627
        assert_eq!(result.len(), 1);
        assert_eq!(
            result[0], worker20,
            "Should handle negative logits correctly"
        );
628
629
    }
}