Unverified Commit f30c6482 authored by Azure's avatar Azure Committed by GitHub
Browse files

Merge pull request #151 from kvcache-ai/update-yaml

[update] Update marlin expert yaml example to fully use gpu.
parents a2fc2a86 0564ac84
...@@ -79,6 +79,24 @@ ...@@ -79,6 +79,24 @@
generate_device: "cuda:1" generate_device: "cuda:1"
prefill_device: "cuda:1" prefill_device: "cuda:1"
- match:
name: "^model\\.layers\\.(0|[1-4])\\.mlp\\.experts$" # inject experts in layer 0~4 as marlin expert
replace:
class: ktransformers.operators.experts.KTransformersExperts
kwargs:
generate_device: "cuda:0" # run in cuda:0
generate_op: "KExpertsMarlin"
recursive: False
- match:
name: "^model\\.layers\\.([3][0])\\.mlp\\.experts$" # inject experts in layer 30~31 as marlin expert
replace:
class: ktransformers.operators.experts.KTransformersExperts
kwargs:
generate_device: "cuda:1"
generate_op: "KExpertsMarlin"
recursive: False
- match: - match:
name: "^model\\.layers\\.(0|[1-9]|[12][0-9])\\.mlp\\.experts$" name: "^model\\.layers\\.(0|[1-9]|[12][0-9])\\.mlp\\.experts$"
replace: replace:
...@@ -139,5 +157,5 @@ ...@@ -139,5 +157,5 @@
replace: replace:
class: "default" class: "default"
kwargs: kwargs:
generate_device: "cuda:1" generate_device: "cuda:0"
prefill_device: "cuda:1" prefill_device: "cuda:0"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment