Commit 8747c099 authored by TangJingqi's avatar TangJingqi
Browse files

update yaml example; update version idx; update docker file

parent 6735beb5
...@@ -25,7 +25,7 @@ rm -rf /var/lib/apt/lists/* && ...@@ -25,7 +25,7 @@ rm -rf /var/lib/apt/lists/* &&
cd ktransformers && cd ktransformers &&
git submodule init && git submodule init &&
git submodule update && git submodule update &&
pip install ninja pyproject numpy && pip install ninja pyproject numpy cpufeature &&
pip install flash-attn && pip install flash-attn &&
CPU_INSTRUCT=NATIVE KTRANSFORMERS_FORCE_BUILD=TRUE TORCH_CUDA_ARCH_LIST="8.0;8.6;8.7;8.9" pip install . --no-build-isolation --verbose && CPU_INSTRUCT=NATIVE KTRANSFORMERS_FORCE_BUILD=TRUE TORCH_CUDA_ARCH_LIST="8.0;8.6;8.7;8.9" pip install . --no-build-isolation --verbose &&
pip cache purge pip cache purge
......
...@@ -5,7 +5,7 @@ Description : ...@@ -5,7 +5,7 @@ Description :
Author : kkk1nak0 Author : kkk1nak0
Date : 2024-08-15 07:34:46 Date : 2024-08-15 07:34:46
Version : 1.0.0 Version : 1.0.0
LastEditors : chenxl LastEditors : Azure-Tang
LastEditTime : 2024-08-28 15:19:03 LastEditTime : 2024-08-29 22:35:51
''' '''
__version__ = "0.1.3" __version__ = "0.1.4"
\ No newline at end of file
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
prefill_device: "cpu" prefill_device: "cpu"
- match: - match:
name: "^model\\.layers\\.([0-9])\\." name: "^model\\.layers\\.([0-9]|[1][0-4])\\."
class: ktransformers.models.modeling_deepseek.DeepseekV2YarnRotaryEmbedding class: ktransformers.models.modeling_deepseek.DeepseekV2YarnRotaryEmbedding
replace: replace:
class: ktransformers.operators.RoPE.YarnRotaryEmbedding class: ktransformers.operators.RoPE.YarnRotaryEmbedding
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
generate_device: "cuda:0" generate_device: "cuda:0"
prefill_device: "cuda:0" prefill_device: "cuda:0"
- match: - match:
name: "^model\\.layers\\.([1][0-9])\\." name: "^model\\.layers\\.([2][0-9]|[1][5-9])\\."
class: ktransformers.models.modeling_deepseek.DeepseekV2YarnRotaryEmbedding class: ktransformers.models.modeling_deepseek.DeepseekV2YarnRotaryEmbedding
replace: replace:
class: ktransformers.operators.RoPE.YarnRotaryEmbedding class: ktransformers.operators.RoPE.YarnRotaryEmbedding
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
generate_device: "cuda:1" generate_device: "cuda:1"
prefill_device: "cuda:1" prefill_device: "cuda:1"
- match: - match:
name: "^model\\.layers\\.([2][0-9])\\." name: "^model\\.layers\\.([3][0-9]|[4][0-4])\\."
class: ktransformers.models.modeling_deepseek.DeepseekV2YarnRotaryEmbedding class: ktransformers.models.modeling_deepseek.DeepseekV2YarnRotaryEmbedding
replace: replace:
class: ktransformers.operators.RoPE.YarnRotaryEmbedding class: ktransformers.operators.RoPE.YarnRotaryEmbedding
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
generate_device: "cuda:2" generate_device: "cuda:2"
prefill_device: "cuda:2" prefill_device: "cuda:2"
- match: - match:
name: "^model\\.layers\\.([345][0-9])\\." name: "^model\\.layers\\.([5][0-9]|[4][5-9])\\."
class: ktransformers.models.modeling_deepseek.DeepseekV2YarnRotaryEmbedding class: ktransformers.models.modeling_deepseek.DeepseekV2YarnRotaryEmbedding
replace: replace:
class: ktransformers.operators.RoPE.YarnRotaryEmbedding class: ktransformers.operators.RoPE.YarnRotaryEmbedding
...@@ -40,7 +40,7 @@ ...@@ -40,7 +40,7 @@
prefill_device: "cuda:3" prefill_device: "cuda:3"
- match: - match:
name: "^model\\.layers\\.([0-9])\\.(?!self_attn).*$" # regular expression name: "^model\\.layers\\.([0-9]|[1][0-4])\\.(?!self_attn\\.kv_b_proj).*$" # regular expression
class: torch.nn.Linear # only match modules matching name and class simultaneously class: torch.nn.Linear # only match modules matching name and class simultaneously
replace: replace:
class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
...@@ -50,7 +50,7 @@ ...@@ -50,7 +50,7 @@
generate_op: "KLinearMarlin" generate_op: "KLinearMarlin"
prefill_op: "KLinearTorch" prefill_op: "KLinearTorch"
- match: - match:
name: "^model\\.layers\\.([1][0-9])\\.(?!self_attn).*$" # regular expression name: "^model\\.layers\\.([2][0-9]|[1][5-9])\\.(?!self_attn\\.kv_b_proj).*$" # regular expression
class: torch.nn.Linear # only match modules matching name and class simultaneously class: torch.nn.Linear # only match modules matching name and class simultaneously
replace: replace:
class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
...@@ -60,7 +60,7 @@ ...@@ -60,7 +60,7 @@
generate_op: "KLinearMarlin" generate_op: "KLinearMarlin"
prefill_op: "KLinearTorch" prefill_op: "KLinearTorch"
- match: - match:
name: "^model\\.layers\\.([2][0-9])\\.(?!self_attn).*$" # regular expression name: "^model\\.layers\\.([3][0-9]|[4][0-4])\\.(?!self_attn\\.kv_b_proj).*$" # regular expression
class: torch.nn.Linear # only match modules matching name and class simultaneously class: torch.nn.Linear # only match modules matching name and class simultaneously
replace: replace:
class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
...@@ -70,7 +70,7 @@ ...@@ -70,7 +70,7 @@
generate_op: "KLinearMarlin" generate_op: "KLinearMarlin"
prefill_op: "KLinearTorch" prefill_op: "KLinearTorch"
- match: - match:
name: "^model\\.layers\\.([345][0-9])\\.(?!self_attn).*$" # regular expression name: "^model\\.layers\\.([5][0-9]|[4][5-9])\\.(?!self_attn\\.kv_b_proj).*$" # regular expression
class: torch.nn.Linear # only match modules matching name and class simultaneously class: torch.nn.Linear # only match modules matching name and class simultaneously
replace: replace:
class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
...@@ -81,7 +81,7 @@ ...@@ -81,7 +81,7 @@
prefill_op: "KLinearTorch" prefill_op: "KLinearTorch"
- match: - match:
name: "^model\\.layers\\.([0-9])\\.mlp$" name: "^model\\.layers\\.([0-9]|[1][0-4])\\.mlp$"
class: ktransformers.models.modeling_deepseek.DeepseekV2MoE class: ktransformers.models.modeling_deepseek.DeepseekV2MoE
replace: replace:
class: ktransformers.operators.experts.KDeepseekV2MoE # mlp module with custom forward function class: ktransformers.operators.experts.KDeepseekV2MoE # mlp module with custom forward function
...@@ -89,7 +89,7 @@ ...@@ -89,7 +89,7 @@
generate_device: "cuda:0" generate_device: "cuda:0"
prefill_device: "cuda:0" prefill_device: "cuda:0"
- match: - match:
name: "^model\\.layers\\.([1][0-9])\\.mlp$" name: "^model\\.layers\\.([2][0-9]|[1][5-9])\\.mlp$"
class: ktransformers.models.modeling_deepseek.DeepseekV2MoE class: ktransformers.models.modeling_deepseek.DeepseekV2MoE
replace: replace:
class: ktransformers.operators.experts.KDeepseekV2MoE # mlp module with custom forward function class: ktransformers.operators.experts.KDeepseekV2MoE # mlp module with custom forward function
...@@ -97,7 +97,7 @@ ...@@ -97,7 +97,7 @@
generate_device: "cuda:1" generate_device: "cuda:1"
prefill_device: "cuda:1" prefill_device: "cuda:1"
- match: - match:
name: "^model\\.layers\\.([2][0-9])\\.mlp$" name: "^model\\.layers\\.([3][0-9]|[4][0-4])\\.mlp$"
class: ktransformers.models.modeling_deepseek.DeepseekV2MoE class: ktransformers.models.modeling_deepseek.DeepseekV2MoE
replace: replace:
class: ktransformers.operators.experts.KDeepseekV2MoE # mlp module with custom forward function class: ktransformers.operators.experts.KDeepseekV2MoE # mlp module with custom forward function
...@@ -105,7 +105,7 @@ ...@@ -105,7 +105,7 @@
generate_device: "cuda:2" generate_device: "cuda:2"
prefill_device: "cuda:2" prefill_device: "cuda:2"
- match: - match:
name: "^model\\.layers\\.([345][0-9])\\.mlp$" name: "^model\\.layers\\.([5][0-9]|[4][5-9])\\.mlp$"
class: ktransformers.models.modeling_deepseek.DeepseekV2MoE class: ktransformers.models.modeling_deepseek.DeepseekV2MoE
replace: replace:
class: ktransformers.operators.experts.KDeepseekV2MoE # mlp module with custom forward function class: ktransformers.operators.experts.KDeepseekV2MoE # mlp module with custom forward function
...@@ -114,7 +114,7 @@ ...@@ -114,7 +114,7 @@
prefill_device: "cuda:3" prefill_device: "cuda:3"
- match: - match:
name: "^model\\.layers\\.([0-9])\\.mlp\\.experts$" name: "^model\\.layers\\.([0-9]|[1][0-4])\\.mlp\\.experts$"
replace: replace:
class: ktransformers.operators.experts.KTransformersExperts # custom MoE Kernel with expert paralleism class: ktransformers.operators.experts.KTransformersExperts # custom MoE Kernel with expert paralleism
kwargs: kwargs:
...@@ -125,7 +125,7 @@ ...@@ -125,7 +125,7 @@
out_device: "cuda:0" out_device: "cuda:0"
recursive: False # don't recursively inject submodules of this module recursive: False # don't recursively inject submodules of this module
- match: - match:
name: "^model\\.layers\\.([1][0-9])\\.mlp\\.experts$" name: "^model\\.layers\\.([2][0-9]|[1][5-9])\\.mlp\\.experts$"
replace: replace:
class: ktransformers.operators.experts.KTransformersExperts # custom MoE Kernel with expert paralleism class: ktransformers.operators.experts.KTransformersExperts # custom MoE Kernel with expert paralleism
kwargs: kwargs:
...@@ -136,7 +136,7 @@ ...@@ -136,7 +136,7 @@
out_device: "cuda:1" out_device: "cuda:1"
recursive: False # don't recursively inject submodules of this module recursive: False # don't recursively inject submodules of this module
- match: - match:
name: "^model\\.layers\\.([2][0-9])\\.mlp\\.experts$" name: "^model\\.layers\\.([3][0-9]|[4][0-4])\\.mlp\\.experts$"
replace: replace:
class: ktransformers.operators.experts.KTransformersExperts # custom MoE Kernel with expert paralleism class: ktransformers.operators.experts.KTransformersExperts # custom MoE Kernel with expert paralleism
kwargs: kwargs:
...@@ -147,7 +147,7 @@ ...@@ -147,7 +147,7 @@
out_device: "cuda:2" out_device: "cuda:2"
recursive: False # don't recursively inject submodules of this module recursive: False # don't recursively inject submodules of this module
- match: - match:
name: "^model\\.layers\\.([345][0-9])\\.mlp\\.experts$" name: "^model\\.layers\\.([5][0-9]|[4][5-9])\\.mlp\\.experts$"
replace: replace:
class: ktransformers.operators.experts.KTransformersExperts # custom MoE Kernel with expert paralleism class: ktransformers.operators.experts.KTransformersExperts # custom MoE Kernel with expert paralleism
kwargs: kwargs:
...@@ -159,28 +159,28 @@ ...@@ -159,28 +159,28 @@
recursive: False # don't recursively inject submodules of this module recursive: False # don't recursively inject submodules of this module
- match: - match:
name: "^model\\.layers\\.([0-9])\\.self_attn$" name: "^model\\.layers\\.([0-9]|[1][0-4])\\.self_attn$"
replace: replace:
class: ktransformers.operators.attention.KDeepseekV2Attention # optimized MLA implementation class: ktransformers.operators.attention.KDeepseekV2Attention # optimized MLA implementation
kwargs: kwargs:
generate_device: "cuda:0" generate_device: "cuda:0"
prefill_device: "cuda:0" prefill_device: "cuda:0"
- match: - match:
name: "^model\\.layers\\.([1][0-9])\\.self_attn$" name: "^model\\.layers\\.([2][0-9]|[1][5-9])\\.self_attn$"
replace: replace:
class: ktransformers.operators.attention.KDeepseekV2Attention # optimized MLA implementation class: ktransformers.operators.attention.KDeepseekV2Attention # optimized MLA implementation
kwargs: kwargs:
generate_device: "cuda:1" generate_device: "cuda:1"
prefill_device: "cuda:1" prefill_device: "cuda:1"
- match: - match:
name: "^model\\.layers\\.([2][0-9])\\.self_attn$" name: "^model\\.layers\\.([3][0-9]|[4][0-4])\\.self_attn$"
replace: replace:
class: ktransformers.operators.attention.KDeepseekV2Attention # optimized MLA implementation class: ktransformers.operators.attention.KDeepseekV2Attention # optimized MLA implementation
kwargs: kwargs:
generate_device: "cuda:2" generate_device: "cuda:2"
prefill_device: "cuda:2" prefill_device: "cuda:2"
- match: - match:
name: "^model\\.layers\\.([345][0-9])\\.self_attn$" name: "^model\\.layers\\.([5][0-9]|[4][5-9])\\.self_attn$"
replace: replace:
class: ktransformers.operators.attention.KDeepseekV2Attention # optimized MLA implementation class: ktransformers.operators.attention.KDeepseekV2Attention # optimized MLA implementation
kwargs: kwargs:
...@@ -194,35 +194,35 @@ ...@@ -194,35 +194,35 @@
kwargs: kwargs:
per_layer_prefill_intput_threshold: 0 # 0 is close layer wise prefill per_layer_prefill_intput_threshold: 0 # 0 is close layer wise prefill
transfer_map: transfer_map:
10: "cuda:1" 15: "cuda:1"
20: "cuda:2" 30: "cuda:2"
30: "cuda:3" 45: "cuda:3"
- match: - match:
name: "^model\\.layers\\.([0-9])\\." name: "^model\\.layers\\.([0-9]|[1][0-4])\\."
replace: replace:
class: "default" class: "default"
kwargs: kwargs:
generate_device: "cuda:0" generate_device: "cuda:0"
prefill_device: "cuda:0" prefill_device: "cuda:0"
- match: - match:
name: "(^model\\.layers\\.([1][0-9])\\.)" name: "(^model\\.layers\\.([2][0-9]|[1][5-9])\\.)"
replace: replace:
class: "default" class: "default"
kwargs: kwargs:
generate_device: "cuda:1" generate_device: "cuda:1"
prefill_device: "cuda:1" prefill_device: "cuda:1"
- match: - match:
name: "(^model\\.layers\\.([2][0-9])\\.)" name: "(^model\\.layers\\.([3][0-9]|[4][0-4])\\.)"
replace: replace:
class: "default" class: "default"
kwargs: kwargs:
generate_device: "cuda:2" generate_device: "cuda:2"
prefill_device: "cuda:2" prefill_device: "cuda:2"
- match: - match:
name: "(^model\\.layers\\.([345][0-9])\\.)|(^model.norm)|(^lm_head)" name: "(^model\\.layers\\.([5][0-9]|[4][5-9])\\.)|(^model.norm)|(^lm_head)"
replace: replace:
class: "default" class: "default"
kwargs: kwargs:
generate_device: "cuda:3" generate_device: "cuda:3"
prefill_device: "cuda:3" prefill_device: "cuda:3"
\ No newline at end of file
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
prefill_device: "cuda:1" prefill_device: "cuda:1"
- match: - match:
name: "^model\\.layers\\.(0|[1-9]|[12][0-9])\\.(?!self_attn).*$" # regular expression name: "^model\\.layers\\.(0|[1-9]|[12][0-9])\\.(?!self_attn\\.kv_b_proj).*$" # regular expression
class: torch.nn.Linear # only match modules matching name and class simultaneously class: torch.nn.Linear # only match modules matching name and class simultaneously
replace: replace:
class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
...@@ -35,7 +35,7 @@ ...@@ -35,7 +35,7 @@
prefill_op: "KLinearTorch" prefill_op: "KLinearTorch"
- match: - match:
name: "^model\\.layers\\.([345][0-9])\\.(?!self_attn).*$" # regular expression name: "^model\\.layers\\.([345][0-9])\\.(?!self_attn\\.kv_b_proj).*$" # regular expression
class: torch.nn.Linear # only match modules matching name and class simultaneously class: torch.nn.Linear # only match modules matching name and class simultaneously
replace: replace:
class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment