Commit 768ab4df authored by xuxzh1's avatar xuxzh1 🎱
Browse files

update

parent 44f79c7e
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
......@@ -1545,27 +1545,23 @@ static enum ggml_status ggml_metal_graph_compute(
// to the matrix-vector kernel
int ne11_mm_min = 1;
#if 0
// the numbers below are measured on M2 Ultra for 7B and 13B models
// these numbers do not translate to other devices or model sizes
// TODO: need to find a better approach
if ([ctx->device.name isEqualToString:@"Apple M2 Ultra"]) {
switch (src0t) {
case GGML_TYPE_F16: ne11_mm_min = 2; break;
case GGML_TYPE_Q8_0: ne11_mm_min = 7; break;
case GGML_TYPE_Q2_K: ne11_mm_min = 15; break;
case GGML_TYPE_Q3_K: ne11_mm_min = 7; break;
case GGML_TYPE_Q4_0:
case GGML_TYPE_Q4_1: ne11_mm_min = 15; break;
case GGML_TYPE_Q4_K: ne11_mm_min = 11; break;
case GGML_TYPE_Q5_0: // not tested yet
case GGML_TYPE_Q5_1: ne11_mm_min = 13; break; // not tested yet
case GGML_TYPE_Q5_K: ne11_mm_min = 7; break;
case GGML_TYPE_Q6_K: ne11_mm_min = 7; break;
default: ne11_mm_min = 1; break;
}
switch (src0t) {
case GGML_TYPE_F16: ne11_mm_min = 2; break;
case GGML_TYPE_Q8_0: ne11_mm_min = 7; break;
case GGML_TYPE_Q2_K: ne11_mm_min = 15; break;
case GGML_TYPE_Q3_K: ne11_mm_min = 7; break;
case GGML_TYPE_Q4_0:
case GGML_TYPE_Q4_1: ne11_mm_min = 15; break;
case GGML_TYPE_Q4_K: ne11_mm_min = 11; break;
case GGML_TYPE_Q5_0: // not tested yet
case GGML_TYPE_Q5_1: ne11_mm_min = 13; break; // not tested yet
case GGML_TYPE_Q5_K: ne11_mm_min = 7; break;
case GGML_TYPE_Q6_K: ne11_mm_min = 7; break;
default: ne11_mm_min = 1; break;
}
#endif
// for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
// AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
......
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment