Unverified Commit e361d6fa authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

fix: local block hash consistency in mooncake bench (#6310)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
parent 25b769e5
......@@ -50,6 +50,7 @@ rstest_reuse = "0.7.0"
serde_json = { workspace = true }
tokio = { workspace = true, features = ["rt", "macros", "time"] }
dynamo-mocker = { workspace = true }
dynamo-tokens = { workspace = true }
minstant = "0.1.7"
futures = "0.3"
......
......@@ -7,7 +7,9 @@ use dynamo_kv_router::indexer::{
KvIndexer, KvIndexerInterface, KvIndexerMetrics, KvIndexerSharded,
};
use dynamo_kv_router::protocols::RouterEvent;
use dynamo_kv_router::protocols::XXH3_SEED;
use dynamo_kv_router::{ConcurrentRadixTree, PositionalIndexer, ThreadPoolIndexer};
use dynamo_tokens::compute_hash_v2;
use rand::prelude::*;
use std::fs::File;
use std::io::{BufRead, BufReader};
......@@ -254,6 +256,15 @@ fn tokens_from_request(request: &MooncakeRequest, block_size: u32) -> Vec<u32> {
.collect()
}
/// Compute the LocalBlockHash for a block-level hash_id the same way the mock
/// engine does: expand to `block_size` repeated u32 tokens, then XXH3 hash.
fn local_block_hash_from_id(hash_id: u64, block_size: u32) -> LocalBlockHash {
let tokens: Vec<u32> = (0..block_size).map(|_| hash_id as u32).collect();
let bytes: &[u8] =
unsafe { std::slice::from_raw_parts(tokens.as_ptr() as *const u8, tokens.len() * 4) };
LocalBlockHash(compute_hash_v2(bytes, XXH3_SEED))
}
/// Create a styled progress bar, optionally with a known total length.
fn make_progress_bar(total: Option<u64>) -> ProgressBar {
let progress = match total {
......@@ -418,7 +429,7 @@ fn prepare_worker_traces(
request
.hash_ids
.iter()
.map(|id| LocalBlockHash(*id))
.map(|id| local_block_hash_from_id(*id, args.block_size))
.collect(),
),
})
......
......@@ -17,15 +17,14 @@
use std::collections::{HashMap, HashSet};
use dynamo_kv_router::protocols::XXH3_SEED;
/// LocalBlockHash type (content hash from tokens only)
type LocalBlockHash = u64;
/// SequenceHash type (position-aware hash, includes parent context)
type SequenceHash = u64;
/// Seed for xxHash3 computation (must match the indexer's seed)
const XXH3_SEED: u64 = 1337;
/// Compute a LocalBlockHash from token IDs (content only)
fn compute_local_block_hash(token_ids: &[u32]) -> LocalBlockHash {
let bytes: Vec<u8> = token_ids
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment