Unverified Commit f8213242 authored by Hongkuan Zhou's avatar Hongkuan Zhou Committed by GitHub
Browse files

fix: update profile script (#1336)

parent 6f0ee60d
...@@ -60,6 +60,7 @@ logger.addHandler(console_handler) ...@@ -60,6 +60,7 @@ logger.addHandler(console_handler)
def get_dynamo_serve_cmd(config_file_path): def get_dynamo_serve_cmd(config_file_path):
config_file_path = os.path.abspath(config_file_path)
return [ return [
"dynamo", "dynamo",
"serve", "serve",
...@@ -82,8 +83,6 @@ def _get_common_genai_perf_cmd( ...@@ -82,8 +83,6 @@ def _get_common_genai_perf_cmd(
model, model,
"--tokenizer", "--tokenizer",
model, model,
"--service-kind",
"openai",
"--endpoint-type", "--endpoint-type",
"chat", "chat",
"--endpoint", "--endpoint",
...@@ -176,12 +175,6 @@ def get_decode_genai_perf_cmd( ...@@ -176,12 +175,6 @@ def get_decode_genai_perf_cmd(
def convert_config(config: dict, target: Literal["prefill", "decode"]) -> dict: def convert_config(config: dict, target: Literal["prefill", "decode"]) -> dict:
config = config.copy() config = config.copy()
# all profiles runs with a single prefill/decode worker, hence router doesn't matter
if "Common" in config and "router" in config["Common"]:
config["Common"]["router"] = "round-robin"
else:
config["Processor"]["router"] = "round-robin"
# disable planner # disable planner
if "Planner" in config: if "Planner" in config:
config["Planner"]["no-operation"] = True config["Planner"]["no-operation"] = True
...@@ -353,7 +346,16 @@ def get_kv_cache_size_from_dynamo_log(dynamo_log_fn: str) -> int: ...@@ -353,7 +346,16 @@ def get_kv_cache_size_from_dynamo_log(dynamo_log_fn: str) -> int:
def get_gap_result(artifact_dir: str) -> dict: def get_gap_result(artifact_dir: str) -> dict:
with open(f"{artifact_dir}/profile_export_genai_perf.json", "r") as f: json_file_path = None
for root, _, files in os.walk(artifact_dir):
if "profile_export_genai_perf.json" in files:
json_file_path = os.path.join(root, "profile_export_genai_perf.json")
break
if json_file_path is None:
raise FileNotFoundError(
f"profile_export_genai_perf.json not found in {artifact_dir}"
)
with open(json_file_path, "r") as f:
return json.load(f) return json.load(f)
...@@ -432,9 +434,22 @@ def benchmark_decode(isl, osl, num_request, genai_perf_artifact_dir, model_name, ...@@ -432,9 +434,22 @@ def benchmark_decode(isl, osl, num_request, genai_perf_artifact_dir, model_name,
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument(
"--backend",
type=str,
default="vllm_v0",
choices=["vllm_v0"],
help="backend type (currently only vllm is supported)",
)
parser.add_argument( parser.add_argument(
"--config", type=str, required=True, help="Path to the dynamo config file" "--config", type=str, required=True, help="Path to the dynamo config file"
) )
parser.add_argument(
"--example-dir",
type=str,
default=None,
help="path to the example directory, if not provided, will try to infer from config file location",
)
parser.add_argument( parser.add_argument(
"--output-dir", "--output-dir",
type=str, type=str,
...@@ -451,7 +466,7 @@ if __name__ == "__main__": ...@@ -451,7 +466,7 @@ if __name__ == "__main__":
"--ttft", type=int, default=50, help="target Time To First Token in ms" "--ttft", type=int, default=50, help="target Time To First Token in ms"
) )
parser.add_argument( parser.add_argument(
"--itl", type=int, default=5, help="target Inter Token Latency in ms" "--itl", type=int, default=10, help="target Inter Token Latency in ms"
) )
# below are arguments used for interpolating TTFT and ITL under different ISL/OSL # below are arguments used for interpolating TTFT and ITL under different ISL/OSL
parser.add_argument( parser.add_argument(
...@@ -474,6 +489,18 @@ if __name__ == "__main__": ...@@ -474,6 +489,18 @@ if __name__ == "__main__":
) )
args = parser.parse_args() args = parser.parse_args()
if args.example_dir is None:
logger.info(
"Example directory not provided, inferring from config file location..."
)
try:
args.example_dir = os.path.dirname(os.path.dirname(args.config))
except Exception:
logger.error(
"Failed to infer example directory, please provide explicitly using --example-dir <path-to-example-dir>"
)
exit(1)
with open(args.config, "r") as f: with open(args.config, "r") as f:
config = yaml.safe_load(f) config = yaml.safe_load(f)
...@@ -516,6 +543,7 @@ if __name__ == "__main__": ...@@ -516,6 +543,7 @@ if __name__ == "__main__":
stdout=dynamo_log_f, stdout=dynamo_log_f,
stderr=subprocess.STDOUT, stderr=subprocess.STDOUT,
text=True, text=True,
cwd=args.example_dir,
preexec_fn=os.setsid, # Use process group for clean termination preexec_fn=os.setsid, # Use process group for clean termination
) )
...@@ -595,6 +623,7 @@ if __name__ == "__main__": ...@@ -595,6 +623,7 @@ if __name__ == "__main__":
stdout=dynamo_log_f, stdout=dynamo_log_f,
stderr=subprocess.STDOUT, stderr=subprocess.STDOUT,
text=True, text=True,
cwd=args.example_dir,
preexec_fn=os.setsid, # Use process group for clean termination preexec_fn=os.setsid, # Use process group for clean termination
) )
...@@ -721,10 +750,11 @@ if __name__ == "__main__": ...@@ -721,10 +750,11 @@ if __name__ == "__main__":
prefill_config = set_config_tp_size(prefill_config, tp_size) prefill_config = set_config_tp_size(prefill_config, tp_size)
logger.info(f"Dynamo config: {prefill_config}") logger.info(f"Dynamo config: {prefill_config}")
work_dir = f"{args.output_dir}/prefill_tp{tp_size}_interpolation" work_dir = f"{args.output_dir}/selected_prefill_interpolation"
os.makedirs(work_dir, exist_ok=True) os.makedirs(work_dir, exist_ok=True)
prefill_config_fn = f"{work_dir}/config.yaml" prefill_config_fn = f"{work_dir}/config.yaml"
dynamo_log_fn = f"{work_dir}/dynamo.log" dynamo_log_fn = f"{work_dir}/dynamo.log"
with open(prefill_config_fn, "w") as f: with open(prefill_config_fn, "w") as f:
yaml.dump(prefill_config, f) yaml.dump(prefill_config, f)
...@@ -738,6 +768,7 @@ if __name__ == "__main__": ...@@ -738,6 +768,7 @@ if __name__ == "__main__":
stdout=dynamo_log_f, stdout=dynamo_log_f,
stderr=subprocess.STDOUT, stderr=subprocess.STDOUT,
text=True, text=True,
cwd=args.example_dir,
preexec_fn=os.setsid, # Use process group for clean termination preexec_fn=os.setsid, # Use process group for clean termination
) )
...@@ -771,6 +802,14 @@ if __name__ == "__main__": ...@@ -771,6 +802,14 @@ if __name__ == "__main__":
prefill_ttft_np = np.array(prefill_ttft) prefill_ttft_np = np.array(prefill_ttft)
prefill_thpt_per_gpu_np = np.array(prefill_thpt_per_gpu) prefill_thpt_per_gpu_np = np.array(prefill_thpt_per_gpu)
save_path = f"{work_dir}/raw_data.npz"
np.savez(
save_path,
prefill_isl=prefill_isl_np,
prefill_ttft=prefill_ttft_np,
prefill_thpt_per_gpu=prefill_thpt_per_gpu_np,
)
# Fit quadratic functions # Fit quadratic functions
ttft_coeffs = np.polyfit(prefill_isl_np, prefill_ttft_np, 2) ttft_coeffs = np.polyfit(prefill_isl_np, prefill_ttft_np, 2)
thpt_coeffs = np.polyfit(prefill_isl_np, prefill_thpt_per_gpu_np, 2) thpt_coeffs = np.polyfit(prefill_isl_np, prefill_thpt_per_gpu_np, 2)
...@@ -844,7 +883,7 @@ if __name__ == "__main__": ...@@ -844,7 +883,7 @@ if __name__ == "__main__":
decode_config = set_config_tp_size(decode_config, best_decode_tp) decode_config = set_config_tp_size(decode_config, best_decode_tp)
logger.info(f"Dynamo config: {decode_config}") logger.info(f"Dynamo config: {decode_config}")
work_dir = f"{args.output_dir}/decode_tp{best_decode_tp}_interpolation" work_dir = f"{args.output_dir}/selected_decode_interpolation"
os.makedirs(work_dir, exist_ok=True) os.makedirs(work_dir, exist_ok=True)
decode_config_fn = f"{work_dir}/config.yaml" decode_config_fn = f"{work_dir}/config.yaml"
...@@ -861,6 +900,7 @@ if __name__ == "__main__": ...@@ -861,6 +900,7 @@ if __name__ == "__main__":
stdout=dynamo_log_f, stdout=dynamo_log_f,
stderr=subprocess.STDOUT, stderr=subprocess.STDOUT,
text=True, text=True,
cwd=args.example_dir,
preexec_fn=os.setsid, # Use process group for clean termination preexec_fn=os.setsid, # Use process group for clean termination
) )
...@@ -902,7 +942,7 @@ if __name__ == "__main__": ...@@ -902,7 +942,7 @@ if __name__ == "__main__":
shutdown_deployment(dynamo_process) shutdown_deployment(dynamo_process)
# Save the data points to a .npz file # Save the data points to a .npz file
save_path = f"{work_dir}/decode_tp{tp_size}_data.npz" save_path = f"{work_dir}/raw_data.npz"
np.savez( np.savez(
save_path, save_path,
x_kv_usage=np.array(x_kv_usage), x_kv_usage=np.array(x_kv_usage),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment