Unverified Commit 4af0f92e authored by Tanmay Verma's avatar Tanmay Verma Committed by GitHub
Browse files

fix: Use deepgemm backend for qwen3-235b recipe (#7204)

parent 819661b3
...@@ -24,6 +24,9 @@ data: ...@@ -24,6 +24,9 @@ data:
max_batch_size: 128 max_batch_size: 128
disable_overlap_scheduler: false disable_overlap_scheduler: false
print_iter_log: false print_iter_log: false
moe_config:
backend: DEEPGEMM
max_num_tokens: 8192
--- ---
apiVersion: nvidia.com/v1alpha1 apiVersion: nvidia.com/v1alpha1
kind: DynamoGraphDeployment kind: DynamoGraphDeployment
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment