Unverified Commit 877aec85 authored by Yuhao Tsui's avatar Yuhao Tsui Committed by GitHub
Browse files

Merge branch 'kvcache-ai:main' into main

parents 84164f58 9037bf30
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
**/ **/
// Python bindings // Python bindings
#include "cpu_backend/cpuinfer.h" #include "cpu_backend/cpuinfer.h"
#include "device_launch_parameters.h"
#include "llamafile/flags.h" #include "llamafile/flags.h"
#include "operators/kvcache/kvcache.h" #include "operators/kvcache/kvcache.h"
#include "operators/llamafile/linear.h" #include "operators/llamafile/linear.h"
...@@ -535,16 +536,17 @@ class MOEBindings { ...@@ -535,16 +536,17 @@ class MOEBindings {
const float *weights; const float *weights;
const void *input; const void *input;
void *output; void *output;
int *batch_size_tensor;
}; };
static void inner(void *args) { static void inner(void *args) {
Args *args_ = (Args *)args; Args *args_ = (Args *)args;
args_->cpuinfer->enqueue( args_->cpuinfer->enqueue(
&MOE::forward, args_->moe, args_->qlen, args_->k, &MOE::forward, args_->moe, args_->qlen, args_->k,
args_->expert_ids, args_->weights, args_->input, args_->output); args_->expert_ids, args_->weights, args_->input, args_->output, args_->batch_size_tensor);
} }
static std::pair<intptr_t, intptr_t> static std::pair<intptr_t, intptr_t>
cpuinfer_interface(MOE &moe, int qlen, int k, intptr_t expert_ids, cpuinfer_interface(MOE &moe, int qlen, int k, intptr_t expert_ids,
intptr_t weights, intptr_t input, intptr_t output) { intptr_t weights, intptr_t input, intptr_t output, intptr_t batch_size_tensor) {
Args *args = new Args{nullptr, Args *args = new Args{nullptr,
&moe, &moe,
qlen, qlen,
...@@ -552,7 +554,8 @@ class MOEBindings { ...@@ -552,7 +554,8 @@ class MOEBindings {
(const uint64_t *)expert_ids, (const uint64_t *)expert_ids,
(const float *)weights, (const float *)weights,
(const void *)input, (const void *)input,
(void *)output}; (void *)output,
(int *)batch_size_tensor};
return std::make_pair((intptr_t)&inner, (intptr_t)args); return std::make_pair((intptr_t)&inner, (intptr_t)args);
} }
}; };
...@@ -679,4 +682,4 @@ PYBIND11_MODULE(cpuinfer_ext, m) { ...@@ -679,4 +682,4 @@ PYBIND11_MODULE(cpuinfer_ext, m) {
cpuinfer_interface) cpuinfer_interface)
.def("calc_anchor_all_layers", .def("calc_anchor_all_layers",
&KVCacheBindings::CalcAnchorAllLayersBindinds::cpuinfer_interface); &KVCacheBindings::CalcAnchorAllLayersBindinds::cpuinfer_interface);
} }
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment