Unverified Commit f712653e authored by Hongkuan Zhou's avatar Hongkuan Zhou Committed by GitHub
Browse files

fix: use rolling hasher for sin_synth.py (#3514)


Signed-off-by: default avatarhongkuanz <hongkuanz@nvidia.com>
Signed-off-by: default avatarHongkuan Zhou <tedzhouhk@gmail.com>
Co-authored-by: default avatarcoderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
parent db192a35
...@@ -17,8 +17,10 @@ import argparse ...@@ -17,8 +17,10 @@ import argparse
import json import json
import logging import logging
import math import math
import random
import numpy as np import numpy as np
from prefix_data_generator.hasher import RollingHasher
from tqdm import tqdm from tqdm import tqdm
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
...@@ -38,7 +40,8 @@ def main(args): ...@@ -38,7 +40,8 @@ def main(args):
else: else:
return (args.isl2, args.osl2) return (args.isl2, args.osl2)
total_hash_ids = np.arange(args.total_blocks) rolling_hasher = RollingHasher()
for t in tqdm(range(0, args.time_duration, args.process_interval)): for t in tqdm(range(0, args.time_duration, args.process_interval)):
t_e = min(t + args.process_interval, args.time_duration) t_e = min(t + args.process_interval, args.time_duration)
request_rate = (args.request_rate_min + args.request_rate_max) / 2 + ( request_rate = (args.request_rate_min + args.request_rate_max) / 2 + (
...@@ -49,14 +52,17 @@ def main(args): ...@@ -49,14 +52,17 @@ def main(args):
for req_idx in range(num_requests): for req_idx in range(num_requests):
t_req = t + (t_e - t) * req_idx / num_requests t_req = t + (t_e - t) * req_idx / num_requests
isl, osl = get_isl_osl(t_req) isl, osl = get_isl_osl(t_req)
hash_ids = [
(random.randrange(args.total_blocks),)
for _ in range(math.ceil(isl / args.block_size))
]
rolling_hash_ids = rolling_hasher(hash_ids)
output_data.append( output_data.append(
{ {
"timestamp": int(t_req * 1000), # in ms, integer "timestamp": int(t_req * 1000), # in ms, integer
"input_length": isl, "input_length": isl,
"output_length": osl, "output_length": osl,
"hash_ids": np.random.choice( "hash_ids": rolling_hash_ids,
total_hash_ids, size=math.ceil(isl / args.block_size)
).tolist(),
} }
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment