Unverified Commit 41ce92bb authored by wang jiahao's avatar wang jiahao Committed by GitHub
Browse files

Merge pull request #1084 from kvcache-ai/fix-config

format kvc2, delete quant_configs, move model_configs to ~/.ktransfor…
parents 10fd2e28 64de7843
#include <atomic>
template <typename T>
struct AtomicPtrWithFlag {
template <typename T> struct AtomicPtrWithFlag {
constexpr static uint64_t mask = 1ull << 63;
std::atomic_uint64_t ptr = 0;
std::pair<T*, bool> load(std::memory_order order = std::memory_order_seq_cst) {
std::pair<T *, bool>
load(std::memory_order order = std::memory_order_seq_cst) {
uint64_t val = ptr.load(order);
return {reinterpret_cast<T*>(val & (~mask)), val & mask};
return {reinterpret_cast<T *>(val & (~mask)), val & mask};
}
void store(T* p, bool flag, std::memory_order order = std::memory_order_seq_cst) {
void store(T *p, bool flag,
std::memory_order order = std::memory_order_seq_cst) {
ptr.store(reinterpret_cast<uint64_t>(p) | (flag ? mask : 0), order);
}
std::pair<T*, bool> exchange(T* p, bool flag, std::memory_order order = std::memory_order_seq_cst) {
uint64_t val = ptr.exchange(reinterpret_cast<uint64_t>(p) | (flag ? mask : 0), order);
return {reinterpret_cast<T*>(val & (~mask)), val & mask};
std::pair<T *, bool>
exchange(T *p, bool flag,
std::memory_order order = std::memory_order_seq_cst) {
uint64_t val =
ptr.exchange(reinterpret_cast<uint64_t>(p) | (flag ? mask : 0), order);
return {reinterpret_cast<T *>(val & (~mask)), val & mask};
}
std::pair<T*, bool> touch_load(std::memory_order order = std::memory_order_seq_cst) {
std::pair<T *, bool>
touch_load(std::memory_order order = std::memory_order_seq_cst) {
uint64_t val = ptr.fetch_and(~mask, order);
return {reinterpret_cast<T*>(val & (~mask)), val & mask};
return {reinterpret_cast<T *>(val & (~mask)), val & mask};
}
bool check_flag(std::memory_order order = std::memory_order_seq_cst) { return ptr.load(order) & mask; }
bool check_flag(std::memory_order order = std::memory_order_seq_cst) {
return ptr.load(order) & mask;
}
};
......@@ -19,7 +19,7 @@ namespace csv {
* @param line The CSV line to parse.
* @return A vector of strings, each representing a field in the CSV line.
*/
inline std::vector<std::string> parse_csv_line(const std::string& line) {
inline std::vector<std::string> parse_csv_line(const std::string &line) {
std::vector<std::string> result;
std::string field;
bool in_quotes = false;
......@@ -57,7 +57,8 @@ inline std::vector<std::string> parse_csv_line(const std::string& line) {
* @return A vector of pairs, each containing a column name and a vector of data
* for that column.
*/
inline std::vector<std::pair<std::string, std::vector<std::string>>> read_csv(const std::string& filename) {
inline std::vector<std::pair<std::string, std::vector<std::string>>>
read_csv(const std::string &filename) {
std::cout << "Reading CSV file: " << filename << std::endl;
// Open the file
std::ifstream file(filename);
......@@ -72,7 +73,7 @@ inline std::vector<std::pair<std::string, std::vector<std::string>>> read_csv(co
// Prepare the result vector with column names
std::vector<std::pair<std::string, std::vector<std::string>>> result;
for (const auto& name : column_names) {
for (const auto &name : column_names) {
result.emplace_back(name, std::vector<std::string>());
}
......@@ -84,7 +85,7 @@ inline std::vector<std::pair<std::string, std::vector<std::string>>> read_csv(co
// Determine the number of threads to use
unsigned int num_threads = std::thread::hardware_concurrency();
if (num_threads == 0)
num_threads = 4; // Default to 4 threads if hardware_concurrency returns 0
num_threads = 4; // Default to 4 threads if hardware_concurrency returns 0
// Calculate chunk start positions based on content size
std::vector<size_t> chunk_starts;
......@@ -100,14 +101,15 @@ inline std::vector<std::pair<std::string, std::vector<std::string>>> read_csv(co
++pos;
}
if (pos < content_size) {
++pos; // Skip the newline character
++pos; // Skip the newline character
}
chunk_starts.push_back(pos);
}
chunk_starts.push_back(content_size);
// Create threads to parse each chunk
std::vector<std::vector<std::vector<std::string>>> thread_results(num_threads);
std::vector<std::vector<std::vector<std::string>>> thread_results(
num_threads);
std::vector<std::thread> threads;
for (unsigned int i = 0; i < num_threads; ++i) {
......@@ -133,13 +135,13 @@ inline std::vector<std::pair<std::string, std::vector<std::string>>> read_csv(co
}
// Wait for all threads to finish
for (auto& t : threads) {
for (auto &t : threads) {
t.join();
}
// Combine the results from all threads into the final result
for (const auto& local_result : thread_results) {
for (const auto& row : local_result) {
for (const auto &local_result : thread_results) {
for (const auto &row : local_result) {
for (size_t i = 0; i < row.size(); ++i) {
if (i < result.size()) {
result[i].second.push_back(row[i]);
......@@ -158,8 +160,9 @@ inline std::vector<std::pair<std::string, std::vector<std::string>>> read_csv(co
* @param data A vector of pairs, each containing a column name and a vector of
* data for that column.
*/
inline void write_csv(const std::string& filename,
const std::vector<std::pair<std::string, std::vector<std::string>>>& data) {
inline void write_csv(
const std::string &filename,
const std::vector<std::pair<std::string, std::vector<std::string>>> &data) {
std::cout << "Writing CSV file: " << filename << std::endl;
// Open the file for writing
......@@ -170,10 +173,10 @@ inline void write_csv(const std::string& filename,
// Check that all columns have the same number of rows
if (data.empty()) {
return; // Nothing to write
return; // Nothing to write
}
size_t num_rows = data[0].second.size();
for (const auto& column : data) {
for (const auto &column : data) {
if (column.second.size() != num_rows) {
throw std::runtime_error("All columns must have the same number of rows");
}
......@@ -191,7 +194,7 @@ inline void write_csv(const std::string& filename,
// Write the data rows
for (size_t row = 0; row < num_rows; ++row) {
for (size_t col = 0; col < data.size(); ++col) {
const std::string& field = data[col].second[row];
const std::string &field = data[col].second[row];
// Handle CSV escaping
std::string escaped_field = field;
bool needs_quotes = false;
......@@ -204,7 +207,8 @@ inline void write_csv(const std::string& filename,
pos += 2;
}
}
if (escaped_field.find(',') != std::string::npos || escaped_field.find('\n') != std::string::npos) {
if (escaped_field.find(',') != std::string::npos ||
escaped_field.find('\n') != std::string::npos) {
needs_quotes = true;
}
if (needs_quotes) {
......@@ -220,6 +224,6 @@ inline void write_csv(const std::string& filename,
}
}
} // namespace csv
} // namespace csv
#endif // CSV_READER_HPP
#endif // CSV_READER_HPP
......@@ -2,15 +2,14 @@
#include <string>
#include <vector>
template <typename T>
std::string format_vector(const std::vector<T>& v) {
template <typename T> std::string format_vector(const std::vector<T> &v) {
std::ostringstream oss;
if (v.empty())
return "[]";
for (size_t i = 0; i < v.size(); ++i) {
oss << v[i];
if (i < v.size() - 1)
oss << ", "; // 逗号分隔
oss << ", "; // 逗号分隔
}
return oss.str();
}
......@@ -4,32 +4,31 @@
#include <optional>
#include <semaphore>
template <typename T>
class MPSCQueue {
template <typename T> class MPSCQueue {
struct Node {
T data;
std::atomic<Node*> next;
std::atomic<Node *> next;
Node() : next(nullptr) {}
Node(T data_) : data(std::move(data_)), next(nullptr) {}
};
std::atomic<Node*> head;
Node* tail;
std::atomic<Node *> head;
Node *tail;
public:
public:
std::atomic_size_t enqueue_count = 0;
size_t dequeue_count = 0;
MPSCQueue() {
Node* dummy = new Node();
Node *dummy = new Node();
head.store(dummy, std::memory_order_seq_cst);
tail = dummy;
}
~MPSCQueue() {
Node* node = tail;
Node *node = tail;
while (node) {
Node* next = node->next.load(std::memory_order_seq_cst);
Node *next = node->next.load(std::memory_order_seq_cst);
delete node;
node = next;
}
......@@ -38,14 +37,14 @@ class MPSCQueue {
// 生产者调用
void enqueue(T data) {
enqueue_count.fetch_add(1);
Node* node = new Node(std::move(data));
Node* prev_head = head.exchange(node, std::memory_order_seq_cst);
Node *node = new Node(std::move(data));
Node *prev_head = head.exchange(node, std::memory_order_seq_cst);
prev_head->next.store(node, std::memory_order_seq_cst);
}
// 消费者调用
std::optional<T> dequeue() {
Node* next = tail->next.load(std::memory_order_seq_cst);
Node *next = tail->next.load(std::memory_order_seq_cst);
if (next) {
T res = std::move(next->data);
delete tail;
......@@ -59,16 +58,16 @@ class MPSCQueue {
size_t size() { return enqueue_count.load() - dequeue_count; }
};
template <typename T>
class MPSCQueueConsumerLock {
template <typename T> class MPSCQueueConsumerLock {
MPSCQueue<T> queue;
std::counting_semaphore<> sema{0};
public:
public:
void enqueue(T data) {
queue.enqueue(std::move(data));
// std::atomic_thread_fence(std::memory_order_seq_cst);// Inserting this because the memory order might be wrong, I
// am also not that sure about this.
// std::atomic_thread_fence(std::memory_order_seq_cst);// Inserting this
// because the memory order might be wrong, I am also not that sure about
// this.
sema.release();
}
......@@ -76,8 +75,10 @@ class MPSCQueueConsumerLock {
auto re = queue.dequeue();
if (re.has_value()) {
while (sema.try_acquire() == false) {
std::cerr << __FILE__ << ":" << __FUNCTION__ << " sema try acquire should be success, retrying, please check"
<< std::endl;
std::cerr
<< __FILE__ << ":" << __FUNCTION__
<< " sema try acquire should be success, retrying, please check"
<< std::endl;
// assert(false);
}
return re.value();
......@@ -91,8 +92,10 @@ class MPSCQueueConsumerLock {
auto re = queue.dequeue();
if (re.has_value()) {
while (sema.try_acquire() == false) {
std::cerr << __FILE__ << ":" << __FUNCTION__ << " sema try acquire should be success, retrying, please check"
<< std::endl;
std::cerr
<< __FILE__ << ":" << __FUNCTION__
<< " sema try acquire should be success, retrying, please check"
<< std::endl;
// assert(false);
}
return re.value();
......
......@@ -7,59 +7,71 @@
#include <unordered_map>
class Statistics {
public:
public:
// Increment the counter for a given key by a specified value (default is 1)
void increment_counter(const std::string& key, int64_t value = 1) { counters_[key] += value; }
void increment_counter(const std::string &key, int64_t value = 1) {
counters_[key] += value;
}
int64_t& get_counter(const std::string& key) { return counters_[key]; }
int64_t &get_counter(const std::string &key) { return counters_[key]; }
// Start the timer for a given key
void start_timer(const std::string& key) { active_timers_[key] = std::chrono::high_resolution_clock::now(); }
void start_timer(const std::string &key) {
active_timers_[key] = std::chrono::high_resolution_clock::now();
}
// Stop the timer for a given key and update the total time and count
void stop_timer(const std::string& key) {
void stop_timer(const std::string &key) {
auto start_it = active_timers_.find(key);
if (start_it != active_timers_.end()) {
auto duration = std::chrono::high_resolution_clock::now() - start_it->second;
auto duration =
std::chrono::high_resolution_clock::now() - start_it->second;
timings_[key].total_time += duration;
timings_[key].count += 1;
active_timers_.erase(start_it);
} else {
// Handle error: stop_timer called without a matching start_timer
std::cerr << "Warning: stop_timer called for key '" << key << "' without a matching start_timer.\n";
std::cerr << "Warning: stop_timer called for key '" << key
<< "' without a matching start_timer.\n";
}
}
// Print out the collected statistical information
void report() const {
std::cout << "Counters:\n";
for (const auto& kv : counters_) {
for (const auto &kv : counters_) {
std::cout << " " << kv.first << ": " << kv.second << "\n";
}
std::cout << "\nTimers:\n";
for (const auto& kv : timings_) {
for (const auto &kv : timings_) {
std::cout << " " << kv.first << ": count = " << kv.second.count
<< ", total_time = " << kv.second.total_time.count() << "s"
<< ", average_time = " << (kv.second.count > 0 ? kv.second.total_time.count() / kv.second.count : 0)
<< ", average_time = "
<< (kv.second.count > 0
? kv.second.total_time.count() / kv.second.count
: 0)
<< "s\n";
}
}
private:
private:
// Mapping from key to counter
std::unordered_map<std::string, int64_t> counters_;
// Struct to hold timing information for a key
struct TimingInfo {
int64_t count = 0;
std::chrono::duration<double> total_time = std::chrono::duration<double>::zero();
std::chrono::duration<double> total_time =
std::chrono::duration<double>::zero();
};
// Mapping from key to timing information
std::unordered_map<std::string, TimingInfo> timings_;
// Mapping from key to the start time of active timers
std::unordered_map<std::string, std::chrono::high_resolution_clock::time_point> active_timers_;
std::unordered_map<std::string,
std::chrono::high_resolution_clock::time_point>
active_timers_;
};
#endif // STATISTICS_HPP
#endif // STATISTICS_HPP
#pragma once
#include "readable_number.hpp"
#include <cassert>
#include <chrono>
#include <iomanip>
......@@ -6,7 +7,6 @@
#include <map>
#include <sstream>
#include <string>
#include "readable_number.hpp"
inline std::string doubleToStringR2(double value) {
std::stringstream stream;
......@@ -15,7 +15,7 @@ inline std::string doubleToStringR2(double value) {
}
class Timer {
public:
public:
std::string name;
bool tmp_timer = false;
......@@ -49,10 +49,14 @@ class Timer {
endTime = m_endTime;
}
return std::chrono::duration_cast<std::chrono::nanoseconds>(endTime - m_startTime).count();
return std::chrono::duration_cast<std::chrono::nanoseconds>(endTime -
m_startTime)
.count();
}
void printElapsedMilliseconds() { std::cout << elapsedNs() / 1e6 << " ms" << std::endl; }
void printElapsedMilliseconds() {
std::cout << elapsedNs() / 1e6 << " ms" << std::endl;
}
static std::string ns_to_string(double duration) {
auto nano_sec = duration;
......@@ -100,13 +104,13 @@ class Timer {
return readable_number(ops) + "op/s";
}
void merge(Timer& other) {
void merge(Timer &other) {
assert(m_isRunning == false);
assert(other.m_isRunning == false);
m_runningNs += other.runningTimeNs();
}
private:
private:
std::chrono::time_point<std::chrono::high_resolution_clock> m_startTime;
std::chrono::time_point<std::chrono::high_resolution_clock> m_endTime;
bool m_isRunning = false;
......@@ -114,14 +118,14 @@ class Timer {
};
class Counter {
public:
public:
Counter() {}
std::map<std::string, size_t> counters;
void inc(const char* name, size_t num) { counters[name] += num; };
void inc(const char *name, size_t num) { counters[name] += num; };
void print() {
for (auto& p : counters) {
for (auto &p : counters) {
std::cout << p.first << " : " << p.second << std::endl;
}
};
......
{
"DeepSeek-Coder-V2-Instruct": {
"hidden_size": 5120,
"intermediate_size": 12288,
"max_position_embeddings": 163840,
"model_type": "deepseek_v2",
"num_attention_heads": 128,
"num_hidden_layers": 60,
"num_key_value_heads": 128,
"vocab_size": 102400
},
"DeepSeek-R1": {
"hidden_size": 7168,
"intermediate_size": 18432,
"max_position_embeddings": 163840,
"model_type": "deepseek_v3",
"num_attention_heads": 128,
"num_hidden_layers": 61,
"num_key_value_heads": 128,
"vocab_size": 129280
},
"DeepSeek-V2-Lite-Chat": {
"hidden_size": 2048,
"intermediate_size": 10944,
"max_position_embeddings": 163840,
"model_type": "deepseek_v2",
"num_attention_heads": 16,
"num_hidden_layers": 27,
"num_key_value_heads": 16,
"vocab_size": 102400
},
"DeepSeek-V3": {
"hidden_size": 7168,
"intermediate_size": 18432,
"max_position_embeddings": 163840,
"model_type": "deepseek_v3",
"num_attention_heads": 128,
"num_hidden_layers": 3,
"num_key_value_heads": 128,
"vocab_size": 129280
},
"DeepSeek-V3-bf16": {
"hidden_size": 7168,
"intermediate_size": 18432,
"max_position_embeddings": 163840,
"model_type": "deepseek_v3",
"num_attention_heads": 128,
"num_hidden_layers": 61,
"num_key_value_heads": 128,
"vocab_size": 129280
},
"LLaMA-2-7B-32K": {
"hidden_size": 4096,
"intermediate_size": 11008,
"max_position_embeddings": 32768,
"model_type": "llama",
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 32,
"vocab_size": 32000
},
"Moonlight-16B-A3B-Instruct": {
"hidden_size": 2048,
"intermediate_size": 11264,
"max_position_embeddings": 8192,
"model_type": "deepseek_v3",
"num_attention_heads": 16,
"num_hidden_layers": 27,
"num_key_value_heads": 16,
"vocab_size": 163840
},
"Qwen2.5-32B-Instruct": {
"hidden_size": 5120,
"intermediate_size": 27648,
"max_position_embeddings": 32768,
"model_type": "qwen2",
"num_attention_heads": 40,
"num_hidden_layers": 64,
"num_key_value_heads": 8,
"vocab_size": 152064
},
"Qwen2.5-32B-Instruct-GPTQ-Int4": {
"hidden_size": 5120,
"intermediate_size": 27648,
"max_position_embeddings": 32768,
"model_type": "qwen2",
"num_attention_heads": 40,
"num_hidden_layers": 64,
"num_key_value_heads": 8,
"vocab_size": 152064
},
"Qwen2.5-7B-Instruct": {
"hidden_size": 3584,
"intermediate_size": 18944,
"max_position_embeddings": 32768,
"model_type": "qwen2",
"num_attention_heads": 28,
"num_hidden_layers": 28,
"num_key_value_heads": 4,
"vocab_size": 152064
},
"Qwen2.5-7B-Instruct-GPTQ-Int4": {
"hidden_size": 3584,
"intermediate_size": 18944,
"max_position_embeddings": 32768,
"model_type": "qwen2",
"num_attention_heads": 28,
"num_hidden_layers": 28,
"num_key_value_heads": 4,
"vocab_size": 152064
},
"qwen2-72b-instruct": {
"hidden_size": 8192,
"intermediate_size": 29568,
"max_position_embeddings": 32768,
"model_type": "qwen2",
"num_attention_heads": 64,
"num_hidden_layers": 80,
"num_key_value_heads": 8,
"vocab_size": 152064
}
}
\ No newline at end of file
{
"BF16": {
"block_element_count": 1,
"block_element_size": 2,
"bytes_per_element": 2.0,
"can_be_used_as_vector": true,
"has_min": false,
"has_scale": false,
"name": "BF16",
"reference": "",
"type_of_dot_vector": "BF16"
},
"FP16": {
"block_element_count": 1,
"block_element_size": 2,
"bytes_per_element": 2.0,
"can_be_used_as_vector": true,
"has_min": false,
"has_scale": false,
"name": "FP16",
"reference": "",
"type_of_dot_vector": "FP16"
},
"FP32": {
"block_element_count": 1,
"block_element_size": 4,
"bytes_per_element": 4.0,
"can_be_used_as_vector": true,
"has_min": false,
"has_scale": false,
"name": "FP32",
"reference": "",
"type_of_dot_vector": "FP32"
},
"Q4_0": {
"block_element_count": 32,
"block_element_size": 18,
"bytes_per_element": 0.5625,
"can_be_used_as_vector": false,
"has_min": false,
"has_scale": true,
"name": "Q4_0",
"reference": "https://huggingface.co/docs/hub/gguf",
"type_of_dot_vector": "Q8_0"
},
"Q8_0": {
"block_element_count": 32,
"block_element_size": 34,
"bytes_per_element": 1.0625,
"can_be_used_as_vector": true,
"has_min": false,
"has_scale": true,
"name": "Q8_0",
"reference": "https://huggingface.co/docs/hub/gguf",
"type_of_dot_vector": "Q8_0"
}
}
\ No newline at end of file
......@@ -70,6 +70,9 @@ class ArgumentParser:
parser.add_argument("--batch_size", type=int, default=self.cfg.batch_size)
parser.add_argument("--cache_lens", type=int, default=self.cfg.cache_lens)
# kvc2 config
parser.add_argument("--kvc2_config_dir", type=str, default=self.cfg.kvc2_config_dir)
# log configs
# log level: debug, info, warn, error, crit
parser.add_argument("--log_dir", type=str, default=self.cfg.log_dir)
......
......@@ -7,9 +7,7 @@ import sys, os
import yaml, json
from time import sleep
current_dir = os.path.dirname(__file__)
# sched_path = os.path.abspath(os.path.join(current_dir, '../../../build/balance_serve/sched'))
# sys.path.insert(0, sched_path)
import sched_ext
from transformers import AutoConfig
......@@ -52,8 +50,7 @@ def create_sched_settings(args):
settings.v_cache_on = False
settings.kvc2_root_path = '/mnt/data/persist-kvc'
settings.kvc2_config_path = os.path.join(current_dir, "..", "..", "configs")
print(os.path.join(current_dir, "..", "..", "configs"))
settings.kvc2_config_path = args.kvc2_config_dir
settings.memory_pool_size_GB = args.cpu_memory_size_GB
settings.evict_count = 40
settings.kvc2_metrics_port = args.kvc2_metrics_port
......
......@@ -34,12 +34,15 @@ class Config(metaclass=Singleton):
user_path: str = os.path.expanduser("~")
localstore_path: str = os.path.join(user_path, ".ktransformers")
kvc2_config_dir = os.path.join(localstore_path, "kvc2")
config_path: str = os.path.join(localstore_path, Config.CONFIG_FILE_NAME)
if not os.path.exists(config_yaml):
print(f"Can't find config file, {config_yaml}")
exit(-1)
if not os.path.exists(localstore_path):
os.mkdir(localstore_path)
if not os.path.exists(kvc2_config_dir):
os.mkdir(kvc2_config_dir)
if not os.path.exists(config_path):
shutil.copyfile(config_yaml, config_path)
with open(config_path, "r", encoding="utf-8") as fp:
......@@ -62,10 +65,13 @@ class Config(metaclass=Singleton):
self.localstore_path: str = os.path.join(self.user_path, ".ktransformers")
# log configs
self.log_dir = os.path.join(self.localstore_path, cfg["log"]["dir"])
if not os.path.exists(self.log_dir):
os.mkdir(self.log_dir)
self.log_file = cfg["log"]["file"]
self.log_level = cfg["log"]["level"]
self.backup_count = cfg["log"]["backup_count"]
self.kvc2_config_dir = os.path.join(self.localstore_path, "kvc2")
# server configs
self.server: dict = cfg.get("server", {})
self.server_ip = self.server.get("ip", "0.0.0.0")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment