"git@developer.sourcefind.cn:orangecat/ollama.git" did not exist on "4dcceeffb7ad9bd656c11c43c8112195f78e6bf8"
Commit 7b12d9b7 authored by danyao12's avatar danyao12
Browse files

some kernels and related api update

parent d4de8495
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -11,13 +11,17 @@ set -x ...@@ -11,13 +11,17 @@ set -x
for prec in "fp16" "bf16" ; do for prec in "fp16" "bf16" ; do
for perm in 0 1 ; do for perm in 0 1 ; do
for hdim in 128 ; do for hdim in 128 ; do
for v3_atomic_fp32 in 0 1 ; do
for v3_rtz_cvt in 0 1 ; do
for mask in 0 1 ; do for mask in 0 1 ; do
$EXE -prec=$prec -b=2 -h=4 -h_k=2 -d=$hdim -s=512 -iperm=$perm -operm=$perm -mask=$mask -bwd_v3=1 -v3_atomic_fp32=0 -v3_rtz_cvt=1 -mode=0 -kname=$KNAME $COMMON_ARGS $EXE -prec=$prec -b=2 -h=4 -h_k=2 -d=$hdim -s=512 -iperm=$perm -operm=$perm -mask=$mask -bwd_v3=1 -v3_atomic_fp32=$v3_atomic_fp32 -v3_rtz_cvt=$v3_rtz_cvt -mode=0 -kname=$KNAME $COMMON_ARGS
$EXE -prec=$prec -b=1 -h=3 -h_k=1 -d=$hdim -s=768 -iperm=$perm -operm=$perm -mask=$mask -bwd_v3=1 -v3_atomic_fp32=0 -v3_rtz_cvt=1 -mode=0 -kname=$KNAME $COMMON_ARGS $EXE -prec=$prec -b=1 -h=3 -h_k=1 -d=$hdim -s=768 -iperm=$perm -operm=$perm -mask=$mask -bwd_v3=1 -v3_atomic_fp32=$v3_atomic_fp32 -v3_rtz_cvt=$v3_rtz_cvt -mode=0 -kname=$KNAME $COMMON_ARGS
done done
done done
done done
done done
done
done
set +x set +x
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment