Unverified Commit a41d2163 authored by wang jiahao's avatar wang jiahao Committed by GitHub
Browse files

Merge pull request #1013 from kvcache-ai/work-concurrent

In v0.2.4 version, we’ve added highly desired multi-concurrency support to the community through a major refactor of the whole architecture.
parents f142f4df 4ed9744e
......@@ -151,4 +151,4 @@ void Backend::worker_thread(int thread_id) {
return;
}
}
}
}
\ No newline at end of file
......@@ -28,7 +28,7 @@
#include "backend.h"
#include "task_queue.h"
#include "../vendors/vendor.h"
#include "./vendors/vendor.h"
#include "llama.cpp/ggml-impl.h"
......
......@@ -68,4 +68,4 @@ PYBIND11_MODULE(KTransformersOps, m) {
py::arg("perm"), py::arg("workspace"), py::arg("num_bits"), py::arg("size_m"),
py::arg("size_n"), py::arg("size_k"), py::arg("is_k_full"));
#endif
}
}
\ No newline at end of file
......@@ -879,4 +879,4 @@ torch::Tensor dequantize_iq4_xs(const int8_t* data, const int num_bytes, const i
}
cudaDeviceSynchronize();
return output;
}
}
\ No newline at end of file
......@@ -19,4 +19,4 @@ torch::Tensor dequantize_q5_k(const int8_t* data, const int num_bytes, const int
torch::Tensor dequantize_q4_k(const int8_t* data, const int num_bytes, const int blk_size, const int ele_per_blk, const torch::Device device, const torch::Dtype target_dtype);
torch::Tensor dequantize_q3_k(const int8_t* data, const int num_bytes, const int blk_size, const int ele_per_blk, const torch::Device device, const torch::Dtype target_dtype);
torch::Tensor dequantize_q2_k(const int8_t* data, const int num_bytes, const int blk_size, const int ele_per_blk, const torch::Device device, const torch::Dtype target_dtype);
torch::Tensor dequantize_iq4_xs(const int8_t* data, const int num_bytes, const int blk_size, const int ele_per_blk, const torch::Device device, const torch::Dtype target_dtype);
torch::Tensor dequantize_iq4_xs(const int8_t* data, const int num_bytes, const int blk_size, const int ele_per_blk, const torch::Device device, const torch::Dtype target_dtype);
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment