Unverified Commit 41ce92bb authored by wang jiahao's avatar wang jiahao Committed by GitHub
Browse files

Merge pull request #1084 from kvcache-ai/fix-config

format kvc2, delete quant_configs, move model_configs to ~/.ktransfor…
parents 10fd2e28 64de7843
#include <atomic> #include <atomic>
template <typename T> template <typename T> struct AtomicPtrWithFlag {
struct AtomicPtrWithFlag {
constexpr static uint64_t mask = 1ull << 63; constexpr static uint64_t mask = 1ull << 63;
std::atomic_uint64_t ptr = 0; std::atomic_uint64_t ptr = 0;
std::pair<T*, bool> load(std::memory_order order = std::memory_order_seq_cst) { std::pair<T *, bool>
load(std::memory_order order = std::memory_order_seq_cst) {
uint64_t val = ptr.load(order); uint64_t val = ptr.load(order);
return {reinterpret_cast<T*>(val & (~mask)), val & mask}; return {reinterpret_cast<T *>(val & (~mask)), val & mask};
} }
void store(T* p, bool flag, std::memory_order order = std::memory_order_seq_cst) { void store(T *p, bool flag,
std::memory_order order = std::memory_order_seq_cst) {
ptr.store(reinterpret_cast<uint64_t>(p) | (flag ? mask : 0), order); ptr.store(reinterpret_cast<uint64_t>(p) | (flag ? mask : 0), order);
} }
std::pair<T*, bool> exchange(T* p, bool flag, std::memory_order order = std::memory_order_seq_cst) { std::pair<T *, bool>
uint64_t val = ptr.exchange(reinterpret_cast<uint64_t>(p) | (flag ? mask : 0), order); exchange(T *p, bool flag,
return {reinterpret_cast<T*>(val & (~mask)), val & mask}; std::memory_order order = std::memory_order_seq_cst) {
uint64_t val =
ptr.exchange(reinterpret_cast<uint64_t>(p) | (flag ? mask : 0), order);
return {reinterpret_cast<T *>(val & (~mask)), val & mask};
} }
std::pair<T*, bool> touch_load(std::memory_order order = std::memory_order_seq_cst) { std::pair<T *, bool>
touch_load(std::memory_order order = std::memory_order_seq_cst) {
uint64_t val = ptr.fetch_and(~mask, order); uint64_t val = ptr.fetch_and(~mask, order);
return {reinterpret_cast<T*>(val & (~mask)), val & mask}; return {reinterpret_cast<T *>(val & (~mask)), val & mask};
} }
bool check_flag(std::memory_order order = std::memory_order_seq_cst) { return ptr.load(order) & mask; } bool check_flag(std::memory_order order = std::memory_order_seq_cst) {
return ptr.load(order) & mask;
}
}; };
...@@ -19,7 +19,7 @@ namespace csv { ...@@ -19,7 +19,7 @@ namespace csv {
* @param line The CSV line to parse. * @param line The CSV line to parse.
* @return A vector of strings, each representing a field in the CSV line. * @return A vector of strings, each representing a field in the CSV line.
*/ */
inline std::vector<std::string> parse_csv_line(const std::string& line) { inline std::vector<std::string> parse_csv_line(const std::string &line) {
std::vector<std::string> result; std::vector<std::string> result;
std::string field; std::string field;
bool in_quotes = false; bool in_quotes = false;
...@@ -57,7 +57,8 @@ inline std::vector<std::string> parse_csv_line(const std::string& line) { ...@@ -57,7 +57,8 @@ inline std::vector<std::string> parse_csv_line(const std::string& line) {
* @return A vector of pairs, each containing a column name and a vector of data * @return A vector of pairs, each containing a column name and a vector of data
* for that column. * for that column.
*/ */
inline std::vector<std::pair<std::string, std::vector<std::string>>> read_csv(const std::string& filename) { inline std::vector<std::pair<std::string, std::vector<std::string>>>
read_csv(const std::string &filename) {
std::cout << "Reading CSV file: " << filename << std::endl; std::cout << "Reading CSV file: " << filename << std::endl;
// Open the file // Open the file
std::ifstream file(filename); std::ifstream file(filename);
...@@ -72,7 +73,7 @@ inline std::vector<std::pair<std::string, std::vector<std::string>>> read_csv(co ...@@ -72,7 +73,7 @@ inline std::vector<std::pair<std::string, std::vector<std::string>>> read_csv(co
// Prepare the result vector with column names // Prepare the result vector with column names
std::vector<std::pair<std::string, std::vector<std::string>>> result; std::vector<std::pair<std::string, std::vector<std::string>>> result;
for (const auto& name : column_names) { for (const auto &name : column_names) {
result.emplace_back(name, std::vector<std::string>()); result.emplace_back(name, std::vector<std::string>());
} }
...@@ -107,7 +108,8 @@ inline std::vector<std::pair<std::string, std::vector<std::string>>> read_csv(co ...@@ -107,7 +108,8 @@ inline std::vector<std::pair<std::string, std::vector<std::string>>> read_csv(co
chunk_starts.push_back(content_size); chunk_starts.push_back(content_size);
// Create threads to parse each chunk // Create threads to parse each chunk
std::vector<std::vector<std::vector<std::string>>> thread_results(num_threads); std::vector<std::vector<std::vector<std::string>>> thread_results(
num_threads);
std::vector<std::thread> threads; std::vector<std::thread> threads;
for (unsigned int i = 0; i < num_threads; ++i) { for (unsigned int i = 0; i < num_threads; ++i) {
...@@ -133,13 +135,13 @@ inline std::vector<std::pair<std::string, std::vector<std::string>>> read_csv(co ...@@ -133,13 +135,13 @@ inline std::vector<std::pair<std::string, std::vector<std::string>>> read_csv(co
} }
// Wait for all threads to finish // Wait for all threads to finish
for (auto& t : threads) { for (auto &t : threads) {
t.join(); t.join();
} }
// Combine the results from all threads into the final result // Combine the results from all threads into the final result
for (const auto& local_result : thread_results) { for (const auto &local_result : thread_results) {
for (const auto& row : local_result) { for (const auto &row : local_result) {
for (size_t i = 0; i < row.size(); ++i) { for (size_t i = 0; i < row.size(); ++i) {
if (i < result.size()) { if (i < result.size()) {
result[i].second.push_back(row[i]); result[i].second.push_back(row[i]);
...@@ -158,8 +160,9 @@ inline std::vector<std::pair<std::string, std::vector<std::string>>> read_csv(co ...@@ -158,8 +160,9 @@ inline std::vector<std::pair<std::string, std::vector<std::string>>> read_csv(co
* @param data A vector of pairs, each containing a column name and a vector of * @param data A vector of pairs, each containing a column name and a vector of
* data for that column. * data for that column.
*/ */
inline void write_csv(const std::string& filename, inline void write_csv(
const std::vector<std::pair<std::string, std::vector<std::string>>>& data) { const std::string &filename,
const std::vector<std::pair<std::string, std::vector<std::string>>> &data) {
std::cout << "Writing CSV file: " << filename << std::endl; std::cout << "Writing CSV file: " << filename << std::endl;
// Open the file for writing // Open the file for writing
...@@ -173,7 +176,7 @@ inline void write_csv(const std::string& filename, ...@@ -173,7 +176,7 @@ inline void write_csv(const std::string& filename,
return; // Nothing to write return; // Nothing to write
} }
size_t num_rows = data[0].second.size(); size_t num_rows = data[0].second.size();
for (const auto& column : data) { for (const auto &column : data) {
if (column.second.size() != num_rows) { if (column.second.size() != num_rows) {
throw std::runtime_error("All columns must have the same number of rows"); throw std::runtime_error("All columns must have the same number of rows");
} }
...@@ -191,7 +194,7 @@ inline void write_csv(const std::string& filename, ...@@ -191,7 +194,7 @@ inline void write_csv(const std::string& filename,
// Write the data rows // Write the data rows
for (size_t row = 0; row < num_rows; ++row) { for (size_t row = 0; row < num_rows; ++row) {
for (size_t col = 0; col < data.size(); ++col) { for (size_t col = 0; col < data.size(); ++col) {
const std::string& field = data[col].second[row]; const std::string &field = data[col].second[row];
// Handle CSV escaping // Handle CSV escaping
std::string escaped_field = field; std::string escaped_field = field;
bool needs_quotes = false; bool needs_quotes = false;
...@@ -204,7 +207,8 @@ inline void write_csv(const std::string& filename, ...@@ -204,7 +207,8 @@ inline void write_csv(const std::string& filename,
pos += 2; pos += 2;
} }
} }
if (escaped_field.find(',') != std::string::npos || escaped_field.find('\n') != std::string::npos) { if (escaped_field.find(',') != std::string::npos ||
escaped_field.find('\n') != std::string::npos) {
needs_quotes = true; needs_quotes = true;
} }
if (needs_quotes) { if (needs_quotes) {
......
...@@ -2,8 +2,7 @@ ...@@ -2,8 +2,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
template <typename T> template <typename T> std::string format_vector(const std::vector<T> &v) {
std::string format_vector(const std::vector<T>& v) {
std::ostringstream oss; std::ostringstream oss;
if (v.empty()) if (v.empty())
return "[]"; return "[]";
......
...@@ -4,32 +4,31 @@ ...@@ -4,32 +4,31 @@
#include <optional> #include <optional>
#include <semaphore> #include <semaphore>
template <typename T> template <typename T> class MPSCQueue {
class MPSCQueue {
struct Node { struct Node {
T data; T data;
std::atomic<Node*> next; std::atomic<Node *> next;
Node() : next(nullptr) {} Node() : next(nullptr) {}
Node(T data_) : data(std::move(data_)), next(nullptr) {} Node(T data_) : data(std::move(data_)), next(nullptr) {}
}; };
std::atomic<Node*> head; std::atomic<Node *> head;
Node* tail; Node *tail;
public: public:
std::atomic_size_t enqueue_count = 0; std::atomic_size_t enqueue_count = 0;
size_t dequeue_count = 0; size_t dequeue_count = 0;
MPSCQueue() { MPSCQueue() {
Node* dummy = new Node(); Node *dummy = new Node();
head.store(dummy, std::memory_order_seq_cst); head.store(dummy, std::memory_order_seq_cst);
tail = dummy; tail = dummy;
} }
~MPSCQueue() { ~MPSCQueue() {
Node* node = tail; Node *node = tail;
while (node) { while (node) {
Node* next = node->next.load(std::memory_order_seq_cst); Node *next = node->next.load(std::memory_order_seq_cst);
delete node; delete node;
node = next; node = next;
} }
...@@ -38,14 +37,14 @@ class MPSCQueue { ...@@ -38,14 +37,14 @@ class MPSCQueue {
// 生产者调用 // 生产者调用
void enqueue(T data) { void enqueue(T data) {
enqueue_count.fetch_add(1); enqueue_count.fetch_add(1);
Node* node = new Node(std::move(data)); Node *node = new Node(std::move(data));
Node* prev_head = head.exchange(node, std::memory_order_seq_cst); Node *prev_head = head.exchange(node, std::memory_order_seq_cst);
prev_head->next.store(node, std::memory_order_seq_cst); prev_head->next.store(node, std::memory_order_seq_cst);
} }
// 消费者调用 // 消费者调用
std::optional<T> dequeue() { std::optional<T> dequeue() {
Node* next = tail->next.load(std::memory_order_seq_cst); Node *next = tail->next.load(std::memory_order_seq_cst);
if (next) { if (next) {
T res = std::move(next->data); T res = std::move(next->data);
delete tail; delete tail;
...@@ -59,16 +58,16 @@ class MPSCQueue { ...@@ -59,16 +58,16 @@ class MPSCQueue {
size_t size() { return enqueue_count.load() - dequeue_count; } size_t size() { return enqueue_count.load() - dequeue_count; }
}; };
template <typename T> template <typename T> class MPSCQueueConsumerLock {
class MPSCQueueConsumerLock {
MPSCQueue<T> queue; MPSCQueue<T> queue;
std::counting_semaphore<> sema{0}; std::counting_semaphore<> sema{0};
public: public:
void enqueue(T data) { void enqueue(T data) {
queue.enqueue(std::move(data)); queue.enqueue(std::move(data));
// std::atomic_thread_fence(std::memory_order_seq_cst);// Inserting this because the memory order might be wrong, I // std::atomic_thread_fence(std::memory_order_seq_cst);// Inserting this
// am also not that sure about this. // because the memory order might be wrong, I am also not that sure about
// this.
sema.release(); sema.release();
} }
...@@ -76,7 +75,9 @@ class MPSCQueueConsumerLock { ...@@ -76,7 +75,9 @@ class MPSCQueueConsumerLock {
auto re = queue.dequeue(); auto re = queue.dequeue();
if (re.has_value()) { if (re.has_value()) {
while (sema.try_acquire() == false) { while (sema.try_acquire() == false) {
std::cerr << __FILE__ << ":" << __FUNCTION__ << " sema try acquire should be success, retrying, please check" std::cerr
<< __FILE__ << ":" << __FUNCTION__
<< " sema try acquire should be success, retrying, please check"
<< std::endl; << std::endl;
// assert(false); // assert(false);
} }
...@@ -91,7 +92,9 @@ class MPSCQueueConsumerLock { ...@@ -91,7 +92,9 @@ class MPSCQueueConsumerLock {
auto re = queue.dequeue(); auto re = queue.dequeue();
if (re.has_value()) { if (re.has_value()) {
while (sema.try_acquire() == false) { while (sema.try_acquire() == false) {
std::cerr << __FILE__ << ":" << __FUNCTION__ << " sema try acquire should be success, retrying, please check" std::cerr
<< __FILE__ << ":" << __FUNCTION__
<< " sema try acquire should be success, retrying, please check"
<< std::endl; << std::endl;
// assert(false); // assert(false);
} }
......
...@@ -7,59 +7,71 @@ ...@@ -7,59 +7,71 @@
#include <unordered_map> #include <unordered_map>
class Statistics { class Statistics {
public: public:
// Increment the counter for a given key by a specified value (default is 1) // Increment the counter for a given key by a specified value (default is 1)
void increment_counter(const std::string& key, int64_t value = 1) { counters_[key] += value; } void increment_counter(const std::string &key, int64_t value = 1) {
counters_[key] += value;
}
int64_t& get_counter(const std::string& key) { return counters_[key]; } int64_t &get_counter(const std::string &key) { return counters_[key]; }
// Start the timer for a given key // Start the timer for a given key
void start_timer(const std::string& key) { active_timers_[key] = std::chrono::high_resolution_clock::now(); } void start_timer(const std::string &key) {
active_timers_[key] = std::chrono::high_resolution_clock::now();
}
// Stop the timer for a given key and update the total time and count // Stop the timer for a given key and update the total time and count
void stop_timer(const std::string& key) { void stop_timer(const std::string &key) {
auto start_it = active_timers_.find(key); auto start_it = active_timers_.find(key);
if (start_it != active_timers_.end()) { if (start_it != active_timers_.end()) {
auto duration = std::chrono::high_resolution_clock::now() - start_it->second; auto duration =
std::chrono::high_resolution_clock::now() - start_it->second;
timings_[key].total_time += duration; timings_[key].total_time += duration;
timings_[key].count += 1; timings_[key].count += 1;
active_timers_.erase(start_it); active_timers_.erase(start_it);
} else { } else {
// Handle error: stop_timer called without a matching start_timer // Handle error: stop_timer called without a matching start_timer
std::cerr << "Warning: stop_timer called for key '" << key << "' without a matching start_timer.\n"; std::cerr << "Warning: stop_timer called for key '" << key
<< "' without a matching start_timer.\n";
} }
} }
// Print out the collected statistical information // Print out the collected statistical information
void report() const { void report() const {
std::cout << "Counters:\n"; std::cout << "Counters:\n";
for (const auto& kv : counters_) { for (const auto &kv : counters_) {
std::cout << " " << kv.first << ": " << kv.second << "\n"; std::cout << " " << kv.first << ": " << kv.second << "\n";
} }
std::cout << "\nTimers:\n"; std::cout << "\nTimers:\n";
for (const auto& kv : timings_) { for (const auto &kv : timings_) {
std::cout << " " << kv.first << ": count = " << kv.second.count std::cout << " " << kv.first << ": count = " << kv.second.count
<< ", total_time = " << kv.second.total_time.count() << "s" << ", total_time = " << kv.second.total_time.count() << "s"
<< ", average_time = " << (kv.second.count > 0 ? kv.second.total_time.count() / kv.second.count : 0) << ", average_time = "
<< (kv.second.count > 0
? kv.second.total_time.count() / kv.second.count
: 0)
<< "s\n"; << "s\n";
} }
} }
private: private:
// Mapping from key to counter // Mapping from key to counter
std::unordered_map<std::string, int64_t> counters_; std::unordered_map<std::string, int64_t> counters_;
// Struct to hold timing information for a key // Struct to hold timing information for a key
struct TimingInfo { struct TimingInfo {
int64_t count = 0; int64_t count = 0;
std::chrono::duration<double> total_time = std::chrono::duration<double>::zero(); std::chrono::duration<double> total_time =
std::chrono::duration<double>::zero();
}; };
// Mapping from key to timing information // Mapping from key to timing information
std::unordered_map<std::string, TimingInfo> timings_; std::unordered_map<std::string, TimingInfo> timings_;
// Mapping from key to the start time of active timers // Mapping from key to the start time of active timers
std::unordered_map<std::string, std::chrono::high_resolution_clock::time_point> active_timers_; std::unordered_map<std::string,
std::chrono::high_resolution_clock::time_point>
active_timers_;
}; };
#endif // STATISTICS_HPP #endif // STATISTICS_HPP
#pragma once #pragma once
#include "readable_number.hpp"
#include <cassert> #include <cassert>
#include <chrono> #include <chrono>
#include <iomanip> #include <iomanip>
...@@ -6,7 +7,6 @@ ...@@ -6,7 +7,6 @@
#include <map> #include <map>
#include <sstream> #include <sstream>
#include <string> #include <string>
#include "readable_number.hpp"
inline std::string doubleToStringR2(double value) { inline std::string doubleToStringR2(double value) {
std::stringstream stream; std::stringstream stream;
...@@ -15,7 +15,7 @@ inline std::string doubleToStringR2(double value) { ...@@ -15,7 +15,7 @@ inline std::string doubleToStringR2(double value) {
} }
class Timer { class Timer {
public: public:
std::string name; std::string name;
bool tmp_timer = false; bool tmp_timer = false;
...@@ -49,10 +49,14 @@ class Timer { ...@@ -49,10 +49,14 @@ class Timer {
endTime = m_endTime; endTime = m_endTime;
} }
return std::chrono::duration_cast<std::chrono::nanoseconds>(endTime - m_startTime).count(); return std::chrono::duration_cast<std::chrono::nanoseconds>(endTime -
m_startTime)
.count();
} }
void printElapsedMilliseconds() { std::cout << elapsedNs() / 1e6 << " ms" << std::endl; } void printElapsedMilliseconds() {
std::cout << elapsedNs() / 1e6 << " ms" << std::endl;
}
static std::string ns_to_string(double duration) { static std::string ns_to_string(double duration) {
auto nano_sec = duration; auto nano_sec = duration;
...@@ -100,13 +104,13 @@ class Timer { ...@@ -100,13 +104,13 @@ class Timer {
return readable_number(ops) + "op/s"; return readable_number(ops) + "op/s";
} }
void merge(Timer& other) { void merge(Timer &other) {
assert(m_isRunning == false); assert(m_isRunning == false);
assert(other.m_isRunning == false); assert(other.m_isRunning == false);
m_runningNs += other.runningTimeNs(); m_runningNs += other.runningTimeNs();
} }
private: private:
std::chrono::time_point<std::chrono::high_resolution_clock> m_startTime; std::chrono::time_point<std::chrono::high_resolution_clock> m_startTime;
std::chrono::time_point<std::chrono::high_resolution_clock> m_endTime; std::chrono::time_point<std::chrono::high_resolution_clock> m_endTime;
bool m_isRunning = false; bool m_isRunning = false;
...@@ -114,14 +118,14 @@ class Timer { ...@@ -114,14 +118,14 @@ class Timer {
}; };
class Counter { class Counter {
public: public:
Counter() {} Counter() {}
std::map<std::string, size_t> counters; std::map<std::string, size_t> counters;
void inc(const char* name, size_t num) { counters[name] += num; }; void inc(const char *name, size_t num) { counters[name] += num; };
void print() { void print() {
for (auto& p : counters) { for (auto &p : counters) {
std::cout << p.first << " : " << p.second << std::endl; std::cout << p.first << " : " << p.second << std::endl;
} }
}; };
......
{
"DeepSeek-Coder-V2-Instruct": {
"hidden_size": 5120,
"intermediate_size": 12288,
"max_position_embeddings": 163840,
"model_type": "deepseek_v2",
"num_attention_heads": 128,
"num_hidden_layers": 60,
"num_key_value_heads": 128,
"vocab_size": 102400
},
"DeepSeek-R1": {
"hidden_size": 7168,
"intermediate_size": 18432,
"max_position_embeddings": 163840,
"model_type": "deepseek_v3",
"num_attention_heads": 128,
"num_hidden_layers": 61,
"num_key_value_heads": 128,
"vocab_size": 129280
},
"DeepSeek-V2-Lite-Chat": {
"hidden_size": 2048,
"intermediate_size": 10944,
"max_position_embeddings": 163840,
"model_type": "deepseek_v2",
"num_attention_heads": 16,
"num_hidden_layers": 27,
"num_key_value_heads": 16,
"vocab_size": 102400
},
"DeepSeek-V3": {
"hidden_size": 7168,
"intermediate_size": 18432,
"max_position_embeddings": 163840,
"model_type": "deepseek_v3",
"num_attention_heads": 128,
"num_hidden_layers": 3,
"num_key_value_heads": 128,
"vocab_size": 129280
},
"DeepSeek-V3-bf16": {
"hidden_size": 7168,
"intermediate_size": 18432,
"max_position_embeddings": 163840,
"model_type": "deepseek_v3",
"num_attention_heads": 128,
"num_hidden_layers": 61,
"num_key_value_heads": 128,
"vocab_size": 129280
},
"LLaMA-2-7B-32K": {
"hidden_size": 4096,
"intermediate_size": 11008,
"max_position_embeddings": 32768,
"model_type": "llama",
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 32,
"vocab_size": 32000
},
"Moonlight-16B-A3B-Instruct": {
"hidden_size": 2048,
"intermediate_size": 11264,
"max_position_embeddings": 8192,
"model_type": "deepseek_v3",
"num_attention_heads": 16,
"num_hidden_layers": 27,
"num_key_value_heads": 16,
"vocab_size": 163840
},
"Qwen2.5-32B-Instruct": {
"hidden_size": 5120,
"intermediate_size": 27648,
"max_position_embeddings": 32768,
"model_type": "qwen2",
"num_attention_heads": 40,
"num_hidden_layers": 64,
"num_key_value_heads": 8,
"vocab_size": 152064
},
"Qwen2.5-32B-Instruct-GPTQ-Int4": {
"hidden_size": 5120,
"intermediate_size": 27648,
"max_position_embeddings": 32768,
"model_type": "qwen2",
"num_attention_heads": 40,
"num_hidden_layers": 64,
"num_key_value_heads": 8,
"vocab_size": 152064
},
"Qwen2.5-7B-Instruct": {
"hidden_size": 3584,
"intermediate_size": 18944,
"max_position_embeddings": 32768,
"model_type": "qwen2",
"num_attention_heads": 28,
"num_hidden_layers": 28,
"num_key_value_heads": 4,
"vocab_size": 152064
},
"Qwen2.5-7B-Instruct-GPTQ-Int4": {
"hidden_size": 3584,
"intermediate_size": 18944,
"max_position_embeddings": 32768,
"model_type": "qwen2",
"num_attention_heads": 28,
"num_hidden_layers": 28,
"num_key_value_heads": 4,
"vocab_size": 152064
},
"qwen2-72b-instruct": {
"hidden_size": 8192,
"intermediate_size": 29568,
"max_position_embeddings": 32768,
"model_type": "qwen2",
"num_attention_heads": 64,
"num_hidden_layers": 80,
"num_key_value_heads": 8,
"vocab_size": 152064
}
}
\ No newline at end of file
{
"BF16": {
"block_element_count": 1,
"block_element_size": 2,
"bytes_per_element": 2.0,
"can_be_used_as_vector": true,
"has_min": false,
"has_scale": false,
"name": "BF16",
"reference": "",
"type_of_dot_vector": "BF16"
},
"FP16": {
"block_element_count": 1,
"block_element_size": 2,
"bytes_per_element": 2.0,
"can_be_used_as_vector": true,
"has_min": false,
"has_scale": false,
"name": "FP16",
"reference": "",
"type_of_dot_vector": "FP16"
},
"FP32": {
"block_element_count": 1,
"block_element_size": 4,
"bytes_per_element": 4.0,
"can_be_used_as_vector": true,
"has_min": false,
"has_scale": false,
"name": "FP32",
"reference": "",
"type_of_dot_vector": "FP32"
},
"Q4_0": {
"block_element_count": 32,
"block_element_size": 18,
"bytes_per_element": 0.5625,
"can_be_used_as_vector": false,
"has_min": false,
"has_scale": true,
"name": "Q4_0",
"reference": "https://huggingface.co/docs/hub/gguf",
"type_of_dot_vector": "Q8_0"
},
"Q8_0": {
"block_element_count": 32,
"block_element_size": 34,
"bytes_per_element": 1.0625,
"can_be_used_as_vector": true,
"has_min": false,
"has_scale": true,
"name": "Q8_0",
"reference": "https://huggingface.co/docs/hub/gguf",
"type_of_dot_vector": "Q8_0"
}
}
\ No newline at end of file
...@@ -70,6 +70,9 @@ class ArgumentParser: ...@@ -70,6 +70,9 @@ class ArgumentParser:
parser.add_argument("--batch_size", type=int, default=self.cfg.batch_size) parser.add_argument("--batch_size", type=int, default=self.cfg.batch_size)
parser.add_argument("--cache_lens", type=int, default=self.cfg.cache_lens) parser.add_argument("--cache_lens", type=int, default=self.cfg.cache_lens)
# kvc2 config
parser.add_argument("--kvc2_config_dir", type=str, default=self.cfg.kvc2_config_dir)
# log configs # log configs
# log level: debug, info, warn, error, crit # log level: debug, info, warn, error, crit
parser.add_argument("--log_dir", type=str, default=self.cfg.log_dir) parser.add_argument("--log_dir", type=str, default=self.cfg.log_dir)
......
...@@ -7,9 +7,7 @@ import sys, os ...@@ -7,9 +7,7 @@ import sys, os
import yaml, json import yaml, json
from time import sleep from time import sleep
current_dir = os.path.dirname(__file__)
# sched_path = os.path.abspath(os.path.join(current_dir, '../../../build/balance_serve/sched'))
# sys.path.insert(0, sched_path)
import sched_ext import sched_ext
from transformers import AutoConfig from transformers import AutoConfig
...@@ -52,8 +50,7 @@ def create_sched_settings(args): ...@@ -52,8 +50,7 @@ def create_sched_settings(args):
settings.v_cache_on = False settings.v_cache_on = False
settings.kvc2_root_path = '/mnt/data/persist-kvc' settings.kvc2_root_path = '/mnt/data/persist-kvc'
settings.kvc2_config_path = os.path.join(current_dir, "..", "..", "configs") settings.kvc2_config_path = args.kvc2_config_dir
print(os.path.join(current_dir, "..", "..", "configs"))
settings.memory_pool_size_GB = args.cpu_memory_size_GB settings.memory_pool_size_GB = args.cpu_memory_size_GB
settings.evict_count = 40 settings.evict_count = 40
settings.kvc2_metrics_port = args.kvc2_metrics_port settings.kvc2_metrics_port = args.kvc2_metrics_port
......
...@@ -34,12 +34,15 @@ class Config(metaclass=Singleton): ...@@ -34,12 +34,15 @@ class Config(metaclass=Singleton):
user_path: str = os.path.expanduser("~") user_path: str = os.path.expanduser("~")
localstore_path: str = os.path.join(user_path, ".ktransformers") localstore_path: str = os.path.join(user_path, ".ktransformers")
kvc2_config_dir = os.path.join(localstore_path, "kvc2")
config_path: str = os.path.join(localstore_path, Config.CONFIG_FILE_NAME) config_path: str = os.path.join(localstore_path, Config.CONFIG_FILE_NAME)
if not os.path.exists(config_yaml): if not os.path.exists(config_yaml):
print(f"Can't find config file, {config_yaml}") print(f"Can't find config file, {config_yaml}")
exit(-1) exit(-1)
if not os.path.exists(localstore_path): if not os.path.exists(localstore_path):
os.mkdir(localstore_path) os.mkdir(localstore_path)
if not os.path.exists(kvc2_config_dir):
os.mkdir(kvc2_config_dir)
if not os.path.exists(config_path): if not os.path.exists(config_path):
shutil.copyfile(config_yaml, config_path) shutil.copyfile(config_yaml, config_path)
with open(config_path, "r", encoding="utf-8") as fp: with open(config_path, "r", encoding="utf-8") as fp:
...@@ -62,10 +65,13 @@ class Config(metaclass=Singleton): ...@@ -62,10 +65,13 @@ class Config(metaclass=Singleton):
self.localstore_path: str = os.path.join(self.user_path, ".ktransformers") self.localstore_path: str = os.path.join(self.user_path, ".ktransformers")
# log configs # log configs
self.log_dir = os.path.join(self.localstore_path, cfg["log"]["dir"]) self.log_dir = os.path.join(self.localstore_path, cfg["log"]["dir"])
if not os.path.exists(self.log_dir):
os.mkdir(self.log_dir)
self.log_file = cfg["log"]["file"] self.log_file = cfg["log"]["file"]
self.log_level = cfg["log"]["level"] self.log_level = cfg["log"]["level"]
self.backup_count = cfg["log"]["backup_count"] self.backup_count = cfg["log"]["backup_count"]
self.kvc2_config_dir = os.path.join(self.localstore_path, "kvc2")
# server configs # server configs
self.server: dict = cfg.get("server", {}) self.server: dict = cfg.get("server", {})
self.server_ip = self.server.get("ip", "0.0.0.0") self.server_ip = self.server.get("ip", "0.0.0.0")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment