Unverified Commit e361d6fa authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

fix: local block hash consistency in mooncake bench (#6310)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
parent 25b769e5
...@@ -50,6 +50,7 @@ rstest_reuse = "0.7.0" ...@@ -50,6 +50,7 @@ rstest_reuse = "0.7.0"
serde_json = { workspace = true } serde_json = { workspace = true }
tokio = { workspace = true, features = ["rt", "macros", "time"] } tokio = { workspace = true, features = ["rt", "macros", "time"] }
dynamo-mocker = { workspace = true } dynamo-mocker = { workspace = true }
dynamo-tokens = { workspace = true }
minstant = "0.1.7" minstant = "0.1.7"
futures = "0.3" futures = "0.3"
......
...@@ -7,7 +7,9 @@ use dynamo_kv_router::indexer::{ ...@@ -7,7 +7,9 @@ use dynamo_kv_router::indexer::{
KvIndexer, KvIndexerInterface, KvIndexerMetrics, KvIndexerSharded, KvIndexer, KvIndexerInterface, KvIndexerMetrics, KvIndexerSharded,
}; };
use dynamo_kv_router::protocols::RouterEvent; use dynamo_kv_router::protocols::RouterEvent;
use dynamo_kv_router::protocols::XXH3_SEED;
use dynamo_kv_router::{ConcurrentRadixTree, PositionalIndexer, ThreadPoolIndexer}; use dynamo_kv_router::{ConcurrentRadixTree, PositionalIndexer, ThreadPoolIndexer};
use dynamo_tokens::compute_hash_v2;
use rand::prelude::*; use rand::prelude::*;
use std::fs::File; use std::fs::File;
use std::io::{BufRead, BufReader}; use std::io::{BufRead, BufReader};
...@@ -254,6 +256,15 @@ fn tokens_from_request(request: &MooncakeRequest, block_size: u32) -> Vec<u32> { ...@@ -254,6 +256,15 @@ fn tokens_from_request(request: &MooncakeRequest, block_size: u32) -> Vec<u32> {
.collect() .collect()
} }
/// Compute the LocalBlockHash for a block-level hash_id the same way the mock
/// engine does: expand to `block_size` repeated u32 tokens, then XXH3 hash.
fn local_block_hash_from_id(hash_id: u64, block_size: u32) -> LocalBlockHash {
let tokens: Vec<u32> = (0..block_size).map(|_| hash_id as u32).collect();
let bytes: &[u8] =
unsafe { std::slice::from_raw_parts(tokens.as_ptr() as *const u8, tokens.len() * 4) };
LocalBlockHash(compute_hash_v2(bytes, XXH3_SEED))
}
/// Create a styled progress bar, optionally with a known total length. /// Create a styled progress bar, optionally with a known total length.
fn make_progress_bar(total: Option<u64>) -> ProgressBar { fn make_progress_bar(total: Option<u64>) -> ProgressBar {
let progress = match total { let progress = match total {
...@@ -418,7 +429,7 @@ fn prepare_worker_traces( ...@@ -418,7 +429,7 @@ fn prepare_worker_traces(
request request
.hash_ids .hash_ids
.iter() .iter()
.map(|id| LocalBlockHash(*id)) .map(|id| local_block_hash_from_id(*id, args.block_size))
.collect(), .collect(),
), ),
}) })
......
...@@ -17,15 +17,14 @@ ...@@ -17,15 +17,14 @@
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use dynamo_kv_router::protocols::XXH3_SEED;
/// LocalBlockHash type (content hash from tokens only) /// LocalBlockHash type (content hash from tokens only)
type LocalBlockHash = u64; type LocalBlockHash = u64;
/// SequenceHash type (position-aware hash, includes parent context) /// SequenceHash type (position-aware hash, includes parent context)
type SequenceHash = u64; type SequenceHash = u64;
/// Seed for xxHash3 computation (must match the indexer's seed)
const XXH3_SEED: u64 = 1337;
/// Compute a LocalBlockHash from token IDs (content only) /// Compute a LocalBlockHash from token IDs (content only)
fn compute_local_block_hash(token_ids: &[u32]) -> LocalBlockHash { fn compute_local_block_hash(token_ids: &[u32]) -> LocalBlockHash {
let bytes: Vec<u8> = token_ids let bytes: Vec<u8> = token_ids
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment