Unverified Commit a41d2163 authored by wang jiahao's avatar wang jiahao Committed by GitHub
Browse files

Merge pull request #1013 from kvcache-ai/work-concurrent

In v0.2.4 version, we’ve added highly desired multi-concurrency support to the community through a major refactor of the whole architecture.
parents f142f4df 4ed9744e
...@@ -151,4 +151,4 @@ void Backend::worker_thread(int thread_id) { ...@@ -151,4 +151,4 @@ void Backend::worker_thread(int thread_id) {
return; return;
} }
} }
} }
\ No newline at end of file
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
#include "backend.h" #include "backend.h"
#include "task_queue.h" #include "task_queue.h"
#include "../vendors/vendor.h" #include "./vendors/vendor.h"
#include "llama.cpp/ggml-impl.h" #include "llama.cpp/ggml-impl.h"
......
...@@ -68,4 +68,4 @@ PYBIND11_MODULE(KTransformersOps, m) { ...@@ -68,4 +68,4 @@ PYBIND11_MODULE(KTransformersOps, m) {
py::arg("perm"), py::arg("workspace"), py::arg("num_bits"), py::arg("size_m"), py::arg("perm"), py::arg("workspace"), py::arg("num_bits"), py::arg("size_m"),
py::arg("size_n"), py::arg("size_k"), py::arg("is_k_full")); py::arg("size_n"), py::arg("size_k"), py::arg("is_k_full"));
#endif #endif
} }
\ No newline at end of file
...@@ -879,4 +879,4 @@ torch::Tensor dequantize_iq4_xs(const int8_t* data, const int num_bytes, const i ...@@ -879,4 +879,4 @@ torch::Tensor dequantize_iq4_xs(const int8_t* data, const int num_bytes, const i
} }
cudaDeviceSynchronize(); cudaDeviceSynchronize();
return output; return output;
} }
\ No newline at end of file
...@@ -19,4 +19,4 @@ torch::Tensor dequantize_q5_k(const int8_t* data, const int num_bytes, const int ...@@ -19,4 +19,4 @@ torch::Tensor dequantize_q5_k(const int8_t* data, const int num_bytes, const int
torch::Tensor dequantize_q4_k(const int8_t* data, const int num_bytes, const int blk_size, const int ele_per_blk, const torch::Device device, const torch::Dtype target_dtype); torch::Tensor dequantize_q4_k(const int8_t* data, const int num_bytes, const int blk_size, const int ele_per_blk, const torch::Device device, const torch::Dtype target_dtype);
torch::Tensor dequantize_q3_k(const int8_t* data, const int num_bytes, const int blk_size, const int ele_per_blk, const torch::Device device, const torch::Dtype target_dtype); torch::Tensor dequantize_q3_k(const int8_t* data, const int num_bytes, const int blk_size, const int ele_per_blk, const torch::Device device, const torch::Dtype target_dtype);
torch::Tensor dequantize_q2_k(const int8_t* data, const int num_bytes, const int blk_size, const int ele_per_blk, const torch::Device device, const torch::Dtype target_dtype); torch::Tensor dequantize_q2_k(const int8_t* data, const int num_bytes, const int blk_size, const int ele_per_blk, const torch::Device device, const torch::Dtype target_dtype);
torch::Tensor dequantize_iq4_xs(const int8_t* data, const int num_bytes, const int blk_size, const int ele_per_blk, const torch::Device device, const torch::Dtype target_dtype); torch::Tensor dequantize_iq4_xs(const int8_t* data, const int num_bytes, const int blk_size, const int ele_per_blk, const torch::Device device, const torch::Dtype target_dtype);
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment