Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ox696c
ktransformers
Commits
64de7843
Commit
64de7843
authored
Apr 08, 2025
by
qiyuxinlin
Browse files
format kvc2, delete quant_configs, move model_configs to ~/.ktransformers
parent
9dd24ecd
Changes
31
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
115 additions
and
259 deletions
+115
-259
csrc/balance_serve/sched/utils/atomic_ptr_with_flags.hpp
csrc/balance_serve/sched/utils/atomic_ptr_with_flags.hpp
+18
-11
csrc/balance_serve/sched/utils/csv.hpp
csrc/balance_serve/sched/utils/csv.hpp
+21
-17
csrc/balance_serve/sched/utils/easy_format.hpp
csrc/balance_serve/sched/utils/easy_format.hpp
+2
-3
csrc/balance_serve/sched/utils/mpsc.hpp
csrc/balance_serve/sched/utils/mpsc.hpp
+24
-21
csrc/balance_serve/sched/utils/statistics.hpp
csrc/balance_serve/sched/utils/statistics.hpp
+26
-14
csrc/balance_serve/sched/utils/timer.hpp
csrc/balance_serve/sched/utils/timer.hpp
+13
-9
ktransformers/configs/model_configs.json
ktransformers/configs/model_configs.json
+0
-122
ktransformers/configs/quant_configs.json
ktransformers/configs/quant_configs.json
+0
-57
ktransformers/server/args.py
ktransformers/server/args.py
+3
-0
ktransformers/server/balance_serve/settings.py
ktransformers/server/balance_serve/settings.py
+2
-5
ktransformers/server/config/config.py
ktransformers/server/config/config.py
+6
-0
No files found.
csrc/balance_serve/sched/utils/atomic_ptr_with_flags.hpp
View file @
64de7843
#include <atomic>
#include <atomic>
template
<
typename
T
>
template
<
typename
T
>
struct
AtomicPtrWithFlag
{
struct
AtomicPtrWithFlag
{
constexpr
static
uint64_t
mask
=
1ull
<<
63
;
constexpr
static
uint64_t
mask
=
1ull
<<
63
;
std
::
atomic_uint64_t
ptr
=
0
;
std
::
atomic_uint64_t
ptr
=
0
;
std
::
pair
<
T
*
,
bool
>
load
(
std
::
memory_order
order
=
std
::
memory_order_seq_cst
)
{
std
::
pair
<
T
*
,
bool
>
load
(
std
::
memory_order
order
=
std
::
memory_order_seq_cst
)
{
uint64_t
val
=
ptr
.
load
(
order
);
uint64_t
val
=
ptr
.
load
(
order
);
return
{
reinterpret_cast
<
T
*>
(
val
&
(
~
mask
)),
val
&
mask
};
return
{
reinterpret_cast
<
T
*>
(
val
&
(
~
mask
)),
val
&
mask
};
}
}
void
store
(
T
*
p
,
bool
flag
,
std
::
memory_order
order
=
std
::
memory_order_seq_cst
)
{
void
store
(
T
*
p
,
bool
flag
,
std
::
memory_order
order
=
std
::
memory_order_seq_cst
)
{
ptr
.
store
(
reinterpret_cast
<
uint64_t
>
(
p
)
|
(
flag
?
mask
:
0
),
order
);
ptr
.
store
(
reinterpret_cast
<
uint64_t
>
(
p
)
|
(
flag
?
mask
:
0
),
order
);
}
}
std
::
pair
<
T
*
,
bool
>
exchange
(
T
*
p
,
bool
flag
,
std
::
memory_order
order
=
std
::
memory_order_seq_cst
)
{
std
::
pair
<
T
*
,
bool
>
uint64_t
val
=
ptr
.
exchange
(
reinterpret_cast
<
uint64_t
>
(
p
)
|
(
flag
?
mask
:
0
),
order
);
exchange
(
T
*
p
,
bool
flag
,
return
{
reinterpret_cast
<
T
*>
(
val
&
(
~
mask
)),
val
&
mask
};
std
::
memory_order
order
=
std
::
memory_order_seq_cst
)
{
uint64_t
val
=
ptr
.
exchange
(
reinterpret_cast
<
uint64_t
>
(
p
)
|
(
flag
?
mask
:
0
),
order
);
return
{
reinterpret_cast
<
T
*>
(
val
&
(
~
mask
)),
val
&
mask
};
}
}
std
::
pair
<
T
*
,
bool
>
touch_load
(
std
::
memory_order
order
=
std
::
memory_order_seq_cst
)
{
std
::
pair
<
T
*
,
bool
>
touch_load
(
std
::
memory_order
order
=
std
::
memory_order_seq_cst
)
{
uint64_t
val
=
ptr
.
fetch_and
(
~
mask
,
order
);
uint64_t
val
=
ptr
.
fetch_and
(
~
mask
,
order
);
return
{
reinterpret_cast
<
T
*>
(
val
&
(
~
mask
)),
val
&
mask
};
return
{
reinterpret_cast
<
T
*>
(
val
&
(
~
mask
)),
val
&
mask
};
}
}
bool
check_flag
(
std
::
memory_order
order
=
std
::
memory_order_seq_cst
)
{
return
ptr
.
load
(
order
)
&
mask
;
}
bool
check_flag
(
std
::
memory_order
order
=
std
::
memory_order_seq_cst
)
{
return
ptr
.
load
(
order
)
&
mask
;
}
};
};
csrc/balance_serve/sched/utils/csv.hpp
View file @
64de7843
...
@@ -19,7 +19,7 @@ namespace csv {
...
@@ -19,7 +19,7 @@ namespace csv {
* @param line The CSV line to parse.
* @param line The CSV line to parse.
* @return A vector of strings, each representing a field in the CSV line.
* @return A vector of strings, each representing a field in the CSV line.
*/
*/
inline
std
::
vector
<
std
::
string
>
parse_csv_line
(
const
std
::
string
&
line
)
{
inline
std
::
vector
<
std
::
string
>
parse_csv_line
(
const
std
::
string
&
line
)
{
std
::
vector
<
std
::
string
>
result
;
std
::
vector
<
std
::
string
>
result
;
std
::
string
field
;
std
::
string
field
;
bool
in_quotes
=
false
;
bool
in_quotes
=
false
;
...
@@ -57,7 +57,8 @@ inline std::vector<std::string> parse_csv_line(const std::string& line) {
...
@@ -57,7 +57,8 @@ inline std::vector<std::string> parse_csv_line(const std::string& line) {
* @return A vector of pairs, each containing a column name and a vector of data
* @return A vector of pairs, each containing a column name and a vector of data
* for that column.
* for that column.
*/
*/
inline
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
vector
<
std
::
string
>>>
read_csv
(
const
std
::
string
&
filename
)
{
inline
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
vector
<
std
::
string
>>>
read_csv
(
const
std
::
string
&
filename
)
{
std
::
cout
<<
"Reading CSV file: "
<<
filename
<<
std
::
endl
;
std
::
cout
<<
"Reading CSV file: "
<<
filename
<<
std
::
endl
;
// Open the file
// Open the file
std
::
ifstream
file
(
filename
);
std
::
ifstream
file
(
filename
);
...
@@ -72,7 +73,7 @@ inline std::vector<std::pair<std::string, std::vector<std::string>>> read_csv(co
...
@@ -72,7 +73,7 @@ inline std::vector<std::pair<std::string, std::vector<std::string>>> read_csv(co
// Prepare the result vector with column names
// Prepare the result vector with column names
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
vector
<
std
::
string
>>>
result
;
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
vector
<
std
::
string
>>>
result
;
for
(
const
auto
&
name
:
column_names
)
{
for
(
const
auto
&
name
:
column_names
)
{
result
.
emplace_back
(
name
,
std
::
vector
<
std
::
string
>
());
result
.
emplace_back
(
name
,
std
::
vector
<
std
::
string
>
());
}
}
...
@@ -84,7 +85,7 @@ inline std::vector<std::pair<std::string, std::vector<std::string>>> read_csv(co
...
@@ -84,7 +85,7 @@ inline std::vector<std::pair<std::string, std::vector<std::string>>> read_csv(co
// Determine the number of threads to use
// Determine the number of threads to use
unsigned
int
num_threads
=
std
::
thread
::
hardware_concurrency
();
unsigned
int
num_threads
=
std
::
thread
::
hardware_concurrency
();
if
(
num_threads
==
0
)
if
(
num_threads
==
0
)
num_threads
=
4
;
// Default to 4 threads if hardware_concurrency returns 0
num_threads
=
4
;
// Default to 4 threads if hardware_concurrency returns 0
// Calculate chunk start positions based on content size
// Calculate chunk start positions based on content size
std
::
vector
<
size_t
>
chunk_starts
;
std
::
vector
<
size_t
>
chunk_starts
;
...
@@ -100,14 +101,15 @@ inline std::vector<std::pair<std::string, std::vector<std::string>>> read_csv(co
...
@@ -100,14 +101,15 @@ inline std::vector<std::pair<std::string, std::vector<std::string>>> read_csv(co
++
pos
;
++
pos
;
}
}
if
(
pos
<
content_size
)
{
if
(
pos
<
content_size
)
{
++
pos
;
// Skip the newline character
++
pos
;
// Skip the newline character
}
}
chunk_starts
.
push_back
(
pos
);
chunk_starts
.
push_back
(
pos
);
}
}
chunk_starts
.
push_back
(
content_size
);
chunk_starts
.
push_back
(
content_size
);
// Create threads to parse each chunk
// Create threads to parse each chunk
std
::
vector
<
std
::
vector
<
std
::
vector
<
std
::
string
>>>
thread_results
(
num_threads
);
std
::
vector
<
std
::
vector
<
std
::
vector
<
std
::
string
>>>
thread_results
(
num_threads
);
std
::
vector
<
std
::
thread
>
threads
;
std
::
vector
<
std
::
thread
>
threads
;
for
(
unsigned
int
i
=
0
;
i
<
num_threads
;
++
i
)
{
for
(
unsigned
int
i
=
0
;
i
<
num_threads
;
++
i
)
{
...
@@ -133,13 +135,13 @@ inline std::vector<std::pair<std::string, std::vector<std::string>>> read_csv(co
...
@@ -133,13 +135,13 @@ inline std::vector<std::pair<std::string, std::vector<std::string>>> read_csv(co
}
}
// Wait for all threads to finish
// Wait for all threads to finish
for
(
auto
&
t
:
threads
)
{
for
(
auto
&
t
:
threads
)
{
t
.
join
();
t
.
join
();
}
}
// Combine the results from all threads into the final result
// Combine the results from all threads into the final result
for
(
const
auto
&
local_result
:
thread_results
)
{
for
(
const
auto
&
local_result
:
thread_results
)
{
for
(
const
auto
&
row
:
local_result
)
{
for
(
const
auto
&
row
:
local_result
)
{
for
(
size_t
i
=
0
;
i
<
row
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
row
.
size
();
++
i
)
{
if
(
i
<
result
.
size
())
{
if
(
i
<
result
.
size
())
{
result
[
i
].
second
.
push_back
(
row
[
i
]);
result
[
i
].
second
.
push_back
(
row
[
i
]);
...
@@ -158,8 +160,9 @@ inline std::vector<std::pair<std::string, std::vector<std::string>>> read_csv(co
...
@@ -158,8 +160,9 @@ inline std::vector<std::pair<std::string, std::vector<std::string>>> read_csv(co
* @param data A vector of pairs, each containing a column name and a vector of
* @param data A vector of pairs, each containing a column name and a vector of
* data for that column.
* data for that column.
*/
*/
inline
void
write_csv
(
const
std
::
string
&
filename
,
inline
void
write_csv
(
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
vector
<
std
::
string
>>>&
data
)
{
const
std
::
string
&
filename
,
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
vector
<
std
::
string
>>>
&
data
)
{
std
::
cout
<<
"Writing CSV file: "
<<
filename
<<
std
::
endl
;
std
::
cout
<<
"Writing CSV file: "
<<
filename
<<
std
::
endl
;
// Open the file for writing
// Open the file for writing
...
@@ -170,10 +173,10 @@ inline void write_csv(const std::string& filename,
...
@@ -170,10 +173,10 @@ inline void write_csv(const std::string& filename,
// Check that all columns have the same number of rows
// Check that all columns have the same number of rows
if
(
data
.
empty
())
{
if
(
data
.
empty
())
{
return
;
// Nothing to write
return
;
// Nothing to write
}
}
size_t
num_rows
=
data
[
0
].
second
.
size
();
size_t
num_rows
=
data
[
0
].
second
.
size
();
for
(
const
auto
&
column
:
data
)
{
for
(
const
auto
&
column
:
data
)
{
if
(
column
.
second
.
size
()
!=
num_rows
)
{
if
(
column
.
second
.
size
()
!=
num_rows
)
{
throw
std
::
runtime_error
(
"All columns must have the same number of rows"
);
throw
std
::
runtime_error
(
"All columns must have the same number of rows"
);
}
}
...
@@ -191,7 +194,7 @@ inline void write_csv(const std::string& filename,
...
@@ -191,7 +194,7 @@ inline void write_csv(const std::string& filename,
// Write the data rows
// Write the data rows
for
(
size_t
row
=
0
;
row
<
num_rows
;
++
row
)
{
for
(
size_t
row
=
0
;
row
<
num_rows
;
++
row
)
{
for
(
size_t
col
=
0
;
col
<
data
.
size
();
++
col
)
{
for
(
size_t
col
=
0
;
col
<
data
.
size
();
++
col
)
{
const
std
::
string
&
field
=
data
[
col
].
second
[
row
];
const
std
::
string
&
field
=
data
[
col
].
second
[
row
];
// Handle CSV escaping
// Handle CSV escaping
std
::
string
escaped_field
=
field
;
std
::
string
escaped_field
=
field
;
bool
needs_quotes
=
false
;
bool
needs_quotes
=
false
;
...
@@ -204,7 +207,8 @@ inline void write_csv(const std::string& filename,
...
@@ -204,7 +207,8 @@ inline void write_csv(const std::string& filename,
pos
+=
2
;
pos
+=
2
;
}
}
}
}
if
(
escaped_field
.
find
(
','
)
!=
std
::
string
::
npos
||
escaped_field
.
find
(
'\n'
)
!=
std
::
string
::
npos
)
{
if
(
escaped_field
.
find
(
','
)
!=
std
::
string
::
npos
||
escaped_field
.
find
(
'\n'
)
!=
std
::
string
::
npos
)
{
needs_quotes
=
true
;
needs_quotes
=
true
;
}
}
if
(
needs_quotes
)
{
if
(
needs_quotes
)
{
...
@@ -220,6 +224,6 @@ inline void write_csv(const std::string& filename,
...
@@ -220,6 +224,6 @@ inline void write_csv(const std::string& filename,
}
}
}
}
}
// namespace csv
}
// namespace csv
#endif
// CSV_READER_HPP
#endif // CSV_READER_HPP
csrc/balance_serve/sched/utils/easy_format.hpp
View file @
64de7843
...
@@ -2,15 +2,14 @@
...
@@ -2,15 +2,14 @@
#include <string>
#include <string>
#include <vector>
#include <vector>
template
<
typename
T
>
template
<
typename
T
>
std
::
string
format_vector
(
const
std
::
vector
<
T
>
&
v
)
{
std
::
string
format_vector
(
const
std
::
vector
<
T
>&
v
)
{
std
::
ostringstream
oss
;
std
::
ostringstream
oss
;
if
(
v
.
empty
())
if
(
v
.
empty
())
return
"[]"
;
return
"[]"
;
for
(
size_t
i
=
0
;
i
<
v
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
v
.
size
();
++
i
)
{
oss
<<
v
[
i
];
oss
<<
v
[
i
];
if
(
i
<
v
.
size
()
-
1
)
if
(
i
<
v
.
size
()
-
1
)
oss
<<
", "
;
// 逗号分隔
oss
<<
", "
;
// 逗号分隔
}
}
return
oss
.
str
();
return
oss
.
str
();
}
}
csrc/balance_serve/sched/utils/mpsc.hpp
View file @
64de7843
...
@@ -4,32 +4,31 @@
...
@@ -4,32 +4,31 @@
#include <optional>
#include <optional>
#include <semaphore>
#include <semaphore>
template
<
typename
T
>
template
<
typename
T
>
class
MPSCQueue
{
class
MPSCQueue
{
struct
Node
{
struct
Node
{
T
data
;
T
data
;
std
::
atomic
<
Node
*>
next
;
std
::
atomic
<
Node
*>
next
;
Node
()
:
next
(
nullptr
)
{}
Node
()
:
next
(
nullptr
)
{}
Node
(
T
data_
)
:
data
(
std
::
move
(
data_
)),
next
(
nullptr
)
{}
Node
(
T
data_
)
:
data
(
std
::
move
(
data_
)),
next
(
nullptr
)
{}
};
};
std
::
atomic
<
Node
*>
head
;
std
::
atomic
<
Node
*>
head
;
Node
*
tail
;
Node
*
tail
;
public:
public:
std
::
atomic_size_t
enqueue_count
=
0
;
std
::
atomic_size_t
enqueue_count
=
0
;
size_t
dequeue_count
=
0
;
size_t
dequeue_count
=
0
;
MPSCQueue
()
{
MPSCQueue
()
{
Node
*
dummy
=
new
Node
();
Node
*
dummy
=
new
Node
();
head
.
store
(
dummy
,
std
::
memory_order_seq_cst
);
head
.
store
(
dummy
,
std
::
memory_order_seq_cst
);
tail
=
dummy
;
tail
=
dummy
;
}
}
~
MPSCQueue
()
{
~
MPSCQueue
()
{
Node
*
node
=
tail
;
Node
*
node
=
tail
;
while
(
node
)
{
while
(
node
)
{
Node
*
next
=
node
->
next
.
load
(
std
::
memory_order_seq_cst
);
Node
*
next
=
node
->
next
.
load
(
std
::
memory_order_seq_cst
);
delete
node
;
delete
node
;
node
=
next
;
node
=
next
;
}
}
...
@@ -38,14 +37,14 @@ class MPSCQueue {
...
@@ -38,14 +37,14 @@ class MPSCQueue {
// 生产者调用
// 生产者调用
void
enqueue
(
T
data
)
{
void
enqueue
(
T
data
)
{
enqueue_count
.
fetch_add
(
1
);
enqueue_count
.
fetch_add
(
1
);
Node
*
node
=
new
Node
(
std
::
move
(
data
));
Node
*
node
=
new
Node
(
std
::
move
(
data
));
Node
*
prev_head
=
head
.
exchange
(
node
,
std
::
memory_order_seq_cst
);
Node
*
prev_head
=
head
.
exchange
(
node
,
std
::
memory_order_seq_cst
);
prev_head
->
next
.
store
(
node
,
std
::
memory_order_seq_cst
);
prev_head
->
next
.
store
(
node
,
std
::
memory_order_seq_cst
);
}
}
// 消费者调用
// 消费者调用
std
::
optional
<
T
>
dequeue
()
{
std
::
optional
<
T
>
dequeue
()
{
Node
*
next
=
tail
->
next
.
load
(
std
::
memory_order_seq_cst
);
Node
*
next
=
tail
->
next
.
load
(
std
::
memory_order_seq_cst
);
if
(
next
)
{
if
(
next
)
{
T
res
=
std
::
move
(
next
->
data
);
T
res
=
std
::
move
(
next
->
data
);
delete
tail
;
delete
tail
;
...
@@ -59,16 +58,16 @@ class MPSCQueue {
...
@@ -59,16 +58,16 @@ class MPSCQueue {
size_t
size
()
{
return
enqueue_count
.
load
()
-
dequeue_count
;
}
size_t
size
()
{
return
enqueue_count
.
load
()
-
dequeue_count
;
}
};
};
template
<
typename
T
>
template
<
typename
T
>
class
MPSCQueueConsumerLock
{
class
MPSCQueueConsumerLock
{
MPSCQueue
<
T
>
queue
;
MPSCQueue
<
T
>
queue
;
std
::
counting_semaphore
<>
sema
{
0
};
std
::
counting_semaphore
<>
sema
{
0
};
public:
public:
void
enqueue
(
T
data
)
{
void
enqueue
(
T
data
)
{
queue
.
enqueue
(
std
::
move
(
data
));
queue
.
enqueue
(
std
::
move
(
data
));
// std::atomic_thread_fence(std::memory_order_seq_cst);// Inserting this because the memory order might be wrong, I
// std::atomic_thread_fence(std::memory_order_seq_cst);// Inserting this
// am also not that sure about this.
// because the memory order might be wrong, I am also not that sure about
// this.
sema
.
release
();
sema
.
release
();
}
}
...
@@ -76,8 +75,10 @@ class MPSCQueueConsumerLock {
...
@@ -76,8 +75,10 @@ class MPSCQueueConsumerLock {
auto
re
=
queue
.
dequeue
();
auto
re
=
queue
.
dequeue
();
if
(
re
.
has_value
())
{
if
(
re
.
has_value
())
{
while
(
sema
.
try_acquire
()
==
false
)
{
while
(
sema
.
try_acquire
()
==
false
)
{
std
::
cerr
<<
__FILE__
<<
":"
<<
__FUNCTION__
<<
" sema try acquire should be success, retrying, please check"
std
::
cerr
<<
std
::
endl
;
<<
__FILE__
<<
":"
<<
__FUNCTION__
<<
" sema try acquire should be success, retrying, please check"
<<
std
::
endl
;
// assert(false);
// assert(false);
}
}
return
re
.
value
();
return
re
.
value
();
...
@@ -91,8 +92,10 @@ class MPSCQueueConsumerLock {
...
@@ -91,8 +92,10 @@ class MPSCQueueConsumerLock {
auto
re
=
queue
.
dequeue
();
auto
re
=
queue
.
dequeue
();
if
(
re
.
has_value
())
{
if
(
re
.
has_value
())
{
while
(
sema
.
try_acquire
()
==
false
)
{
while
(
sema
.
try_acquire
()
==
false
)
{
std
::
cerr
<<
__FILE__
<<
":"
<<
__FUNCTION__
<<
" sema try acquire should be success, retrying, please check"
std
::
cerr
<<
std
::
endl
;
<<
__FILE__
<<
":"
<<
__FUNCTION__
<<
" sema try acquire should be success, retrying, please check"
<<
std
::
endl
;
// assert(false);
// assert(false);
}
}
return
re
.
value
();
return
re
.
value
();
...
...
csrc/balance_serve/sched/utils/statistics.hpp
View file @
64de7843
...
@@ -7,59 +7,71 @@
...
@@ -7,59 +7,71 @@
#include <unordered_map>
#include <unordered_map>
class
Statistics
{
class
Statistics
{
public:
public:
// Increment the counter for a given key by a specified value (default is 1)
// Increment the counter for a given key by a specified value (default is 1)
void
increment_counter
(
const
std
::
string
&
key
,
int64_t
value
=
1
)
{
counters_
[
key
]
+=
value
;
}
void
increment_counter
(
const
std
::
string
&
key
,
int64_t
value
=
1
)
{
counters_
[
key
]
+=
value
;
}
int64_t
&
get_counter
(
const
std
::
string
&
key
)
{
return
counters_
[
key
];
}
int64_t
&
get_counter
(
const
std
::
string
&
key
)
{
return
counters_
[
key
];
}
// Start the timer for a given key
// Start the timer for a given key
void
start_timer
(
const
std
::
string
&
key
)
{
active_timers_
[
key
]
=
std
::
chrono
::
high_resolution_clock
::
now
();
}
void
start_timer
(
const
std
::
string
&
key
)
{
active_timers_
[
key
]
=
std
::
chrono
::
high_resolution_clock
::
now
();
}
// Stop the timer for a given key and update the total time and count
// Stop the timer for a given key and update the total time and count
void
stop_timer
(
const
std
::
string
&
key
)
{
void
stop_timer
(
const
std
::
string
&
key
)
{
auto
start_it
=
active_timers_
.
find
(
key
);
auto
start_it
=
active_timers_
.
find
(
key
);
if
(
start_it
!=
active_timers_
.
end
())
{
if
(
start_it
!=
active_timers_
.
end
())
{
auto
duration
=
std
::
chrono
::
high_resolution_clock
::
now
()
-
start_it
->
second
;
auto
duration
=
std
::
chrono
::
high_resolution_clock
::
now
()
-
start_it
->
second
;
timings_
[
key
].
total_time
+=
duration
;
timings_
[
key
].
total_time
+=
duration
;
timings_
[
key
].
count
+=
1
;
timings_
[
key
].
count
+=
1
;
active_timers_
.
erase
(
start_it
);
active_timers_
.
erase
(
start_it
);
}
else
{
}
else
{
// Handle error: stop_timer called without a matching start_timer
// Handle error: stop_timer called without a matching start_timer
std
::
cerr
<<
"Warning: stop_timer called for key '"
<<
key
<<
"' without a matching start_timer.
\n
"
;
std
::
cerr
<<
"Warning: stop_timer called for key '"
<<
key
<<
"' without a matching start_timer.
\n
"
;
}
}
}
}
// Print out the collected statistical information
// Print out the collected statistical information
void
report
()
const
{
void
report
()
const
{
std
::
cout
<<
"Counters:
\n
"
;
std
::
cout
<<
"Counters:
\n
"
;
for
(
const
auto
&
kv
:
counters_
)
{
for
(
const
auto
&
kv
:
counters_
)
{
std
::
cout
<<
" "
<<
kv
.
first
<<
": "
<<
kv
.
second
<<
"
\n
"
;
std
::
cout
<<
" "
<<
kv
.
first
<<
": "
<<
kv
.
second
<<
"
\n
"
;
}
}
std
::
cout
<<
"
\n
Timers:
\n
"
;
std
::
cout
<<
"
\n
Timers:
\n
"
;
for
(
const
auto
&
kv
:
timings_
)
{
for
(
const
auto
&
kv
:
timings_
)
{
std
::
cout
<<
" "
<<
kv
.
first
<<
": count = "
<<
kv
.
second
.
count
std
::
cout
<<
" "
<<
kv
.
first
<<
": count = "
<<
kv
.
second
.
count
<<
", total_time = "
<<
kv
.
second
.
total_time
.
count
()
<<
"s"
<<
", total_time = "
<<
kv
.
second
.
total_time
.
count
()
<<
"s"
<<
", average_time = "
<<
(
kv
.
second
.
count
>
0
?
kv
.
second
.
total_time
.
count
()
/
kv
.
second
.
count
:
0
)
<<
", average_time = "
<<
(
kv
.
second
.
count
>
0
?
kv
.
second
.
total_time
.
count
()
/
kv
.
second
.
count
:
0
)
<<
"s
\n
"
;
<<
"s
\n
"
;
}
}
}
}
private:
private:
// Mapping from key to counter
// Mapping from key to counter
std
::
unordered_map
<
std
::
string
,
int64_t
>
counters_
;
std
::
unordered_map
<
std
::
string
,
int64_t
>
counters_
;
// Struct to hold timing information for a key
// Struct to hold timing information for a key
struct
TimingInfo
{
struct
TimingInfo
{
int64_t
count
=
0
;
int64_t
count
=
0
;
std
::
chrono
::
duration
<
double
>
total_time
=
std
::
chrono
::
duration
<
double
>::
zero
();
std
::
chrono
::
duration
<
double
>
total_time
=
std
::
chrono
::
duration
<
double
>::
zero
();
};
};
// Mapping from key to timing information
// Mapping from key to timing information
std
::
unordered_map
<
std
::
string
,
TimingInfo
>
timings_
;
std
::
unordered_map
<
std
::
string
,
TimingInfo
>
timings_
;
// Mapping from key to the start time of active timers
// Mapping from key to the start time of active timers
std
::
unordered_map
<
std
::
string
,
std
::
chrono
::
high_resolution_clock
::
time_point
>
active_timers_
;
std
::
unordered_map
<
std
::
string
,
std
::
chrono
::
high_resolution_clock
::
time_point
>
active_timers_
;
};
};
#endif
// STATISTICS_HPP
#endif // STATISTICS_HPP
csrc/balance_serve/sched/utils/timer.hpp
View file @
64de7843
#pragma once
#pragma once
#include "readable_number.hpp"
#include <cassert>
#include <cassert>
#include <chrono>
#include <chrono>
#include <iomanip>
#include <iomanip>
...
@@ -6,7 +7,6 @@
...
@@ -6,7 +7,6 @@
#include <map>
#include <map>
#include <sstream>
#include <sstream>
#include <string>
#include <string>
#include "readable_number.hpp"
inline
std
::
string
doubleToStringR2
(
double
value
)
{
inline
std
::
string
doubleToStringR2
(
double
value
)
{
std
::
stringstream
stream
;
std
::
stringstream
stream
;
...
@@ -15,7 +15,7 @@ inline std::string doubleToStringR2(double value) {
...
@@ -15,7 +15,7 @@ inline std::string doubleToStringR2(double value) {
}
}
class
Timer
{
class
Timer
{
public:
public:
std
::
string
name
;
std
::
string
name
;
bool
tmp_timer
=
false
;
bool
tmp_timer
=
false
;
...
@@ -49,10 +49,14 @@ class Timer {
...
@@ -49,10 +49,14 @@ class Timer {
endTime
=
m_endTime
;
endTime
=
m_endTime
;
}
}
return
std
::
chrono
::
duration_cast
<
std
::
chrono
::
nanoseconds
>
(
endTime
-
m_startTime
).
count
();
return
std
::
chrono
::
duration_cast
<
std
::
chrono
::
nanoseconds
>
(
endTime
-
m_startTime
)
.
count
();
}
}
void
printElapsedMilliseconds
()
{
std
::
cout
<<
elapsedNs
()
/
1e6
<<
" ms"
<<
std
::
endl
;
}
void
printElapsedMilliseconds
()
{
std
::
cout
<<
elapsedNs
()
/
1e6
<<
" ms"
<<
std
::
endl
;
}
static
std
::
string
ns_to_string
(
double
duration
)
{
static
std
::
string
ns_to_string
(
double
duration
)
{
auto
nano_sec
=
duration
;
auto
nano_sec
=
duration
;
...
@@ -100,13 +104,13 @@ class Timer {
...
@@ -100,13 +104,13 @@ class Timer {
return
readable_number
(
ops
)
+
"op/s"
;
return
readable_number
(
ops
)
+
"op/s"
;
}
}
void
merge
(
Timer
&
other
)
{
void
merge
(
Timer
&
other
)
{
assert
(
m_isRunning
==
false
);
assert
(
m_isRunning
==
false
);
assert
(
other
.
m_isRunning
==
false
);
assert
(
other
.
m_isRunning
==
false
);
m_runningNs
+=
other
.
runningTimeNs
();
m_runningNs
+=
other
.
runningTimeNs
();
}
}
private:
private:
std
::
chrono
::
time_point
<
std
::
chrono
::
high_resolution_clock
>
m_startTime
;
std
::
chrono
::
time_point
<
std
::
chrono
::
high_resolution_clock
>
m_startTime
;
std
::
chrono
::
time_point
<
std
::
chrono
::
high_resolution_clock
>
m_endTime
;
std
::
chrono
::
time_point
<
std
::
chrono
::
high_resolution_clock
>
m_endTime
;
bool
m_isRunning
=
false
;
bool
m_isRunning
=
false
;
...
@@ -114,14 +118,14 @@ class Timer {
...
@@ -114,14 +118,14 @@ class Timer {
};
};
class
Counter
{
class
Counter
{
public:
public:
Counter
()
{}
Counter
()
{}
std
::
map
<
std
::
string
,
size_t
>
counters
;
std
::
map
<
std
::
string
,
size_t
>
counters
;
void
inc
(
const
char
*
name
,
size_t
num
)
{
counters
[
name
]
+=
num
;
};
void
inc
(
const
char
*
name
,
size_t
num
)
{
counters
[
name
]
+=
num
;
};
void
print
()
{
void
print
()
{
for
(
auto
&
p
:
counters
)
{
for
(
auto
&
p
:
counters
)
{
std
::
cout
<<
p
.
first
<<
" : "
<<
p
.
second
<<
std
::
endl
;
std
::
cout
<<
p
.
first
<<
" : "
<<
p
.
second
<<
std
::
endl
;
}
}
};
};
...
...
ktransformers/configs/model_configs.json
deleted
100644 → 0
View file @
9dd24ecd
{
"DeepSeek-Coder-V2-Instruct"
:
{
"hidden_size"
:
5120
,
"intermediate_size"
:
12288
,
"max_position_embeddings"
:
163840
,
"model_type"
:
"deepseek_v2"
,
"num_attention_heads"
:
128
,
"num_hidden_layers"
:
60
,
"num_key_value_heads"
:
128
,
"vocab_size"
:
102400
},
"DeepSeek-R1"
:
{
"hidden_size"
:
7168
,
"intermediate_size"
:
18432
,
"max_position_embeddings"
:
163840
,
"model_type"
:
"deepseek_v3"
,
"num_attention_heads"
:
128
,
"num_hidden_layers"
:
61
,
"num_key_value_heads"
:
128
,
"vocab_size"
:
129280
},
"DeepSeek-V2-Lite-Chat"
:
{
"hidden_size"
:
2048
,
"intermediate_size"
:
10944
,
"max_position_embeddings"
:
163840
,
"model_type"
:
"deepseek_v2"
,
"num_attention_heads"
:
16
,
"num_hidden_layers"
:
27
,
"num_key_value_heads"
:
16
,
"vocab_size"
:
102400
},
"DeepSeek-V3"
:
{
"hidden_size"
:
7168
,
"intermediate_size"
:
18432
,
"max_position_embeddings"
:
163840
,
"model_type"
:
"deepseek_v3"
,
"num_attention_heads"
:
128
,
"num_hidden_layers"
:
3
,
"num_key_value_heads"
:
128
,
"vocab_size"
:
129280
},
"DeepSeek-V3-bf16"
:
{
"hidden_size"
:
7168
,
"intermediate_size"
:
18432
,
"max_position_embeddings"
:
163840
,
"model_type"
:
"deepseek_v3"
,
"num_attention_heads"
:
128
,
"num_hidden_layers"
:
61
,
"num_key_value_heads"
:
128
,
"vocab_size"
:
129280
},
"LLaMA-2-7B-32K"
:
{
"hidden_size"
:
4096
,
"intermediate_size"
:
11008
,
"max_position_embeddings"
:
32768
,
"model_type"
:
"llama"
,
"num_attention_heads"
:
32
,
"num_hidden_layers"
:
32
,
"num_key_value_heads"
:
32
,
"vocab_size"
:
32000
},
"Moonlight-16B-A3B-Instruct"
:
{
"hidden_size"
:
2048
,
"intermediate_size"
:
11264
,
"max_position_embeddings"
:
8192
,
"model_type"
:
"deepseek_v3"
,
"num_attention_heads"
:
16
,
"num_hidden_layers"
:
27
,
"num_key_value_heads"
:
16
,
"vocab_size"
:
163840
},
"Qwen2.5-32B-Instruct"
:
{
"hidden_size"
:
5120
,
"intermediate_size"
:
27648
,
"max_position_embeddings"
:
32768
,
"model_type"
:
"qwen2"
,
"num_attention_heads"
:
40
,
"num_hidden_layers"
:
64
,
"num_key_value_heads"
:
8
,
"vocab_size"
:
152064
},
"Qwen2.5-32B-Instruct-GPTQ-Int4"
:
{
"hidden_size"
:
5120
,
"intermediate_size"
:
27648
,
"max_position_embeddings"
:
32768
,
"model_type"
:
"qwen2"
,
"num_attention_heads"
:
40
,
"num_hidden_layers"
:
64
,
"num_key_value_heads"
:
8
,
"vocab_size"
:
152064
},
"Qwen2.5-7B-Instruct"
:
{
"hidden_size"
:
3584
,
"intermediate_size"
:
18944
,
"max_position_embeddings"
:
32768
,
"model_type"
:
"qwen2"
,
"num_attention_heads"
:
28
,
"num_hidden_layers"
:
28
,
"num_key_value_heads"
:
4
,
"vocab_size"
:
152064
},
"Qwen2.5-7B-Instruct-GPTQ-Int4"
:
{
"hidden_size"
:
3584
,
"intermediate_size"
:
18944
,
"max_position_embeddings"
:
32768
,
"model_type"
:
"qwen2"
,
"num_attention_heads"
:
28
,
"num_hidden_layers"
:
28
,
"num_key_value_heads"
:
4
,
"vocab_size"
:
152064
},
"qwen2-72b-instruct"
:
{
"hidden_size"
:
8192
,
"intermediate_size"
:
29568
,
"max_position_embeddings"
:
32768
,
"model_type"
:
"qwen2"
,
"num_attention_heads"
:
64
,
"num_hidden_layers"
:
80
,
"num_key_value_heads"
:
8
,
"vocab_size"
:
152064
}
}
\ No newline at end of file
ktransformers/configs/quant_configs.json
deleted
100644 → 0
View file @
9dd24ecd
{
"BF16"
:
{
"block_element_count"
:
1
,
"block_element_size"
:
2
,
"bytes_per_element"
:
2.0
,
"can_be_used_as_vector"
:
true
,
"has_min"
:
false
,
"has_scale"
:
false
,
"name"
:
"BF16"
,
"reference"
:
""
,
"type_of_dot_vector"
:
"BF16"
},
"FP16"
:
{
"block_element_count"
:
1
,
"block_element_size"
:
2
,
"bytes_per_element"
:
2.0
,
"can_be_used_as_vector"
:
true
,
"has_min"
:
false
,
"has_scale"
:
false
,
"name"
:
"FP16"
,
"reference"
:
""
,
"type_of_dot_vector"
:
"FP16"
},
"FP32"
:
{
"block_element_count"
:
1
,
"block_element_size"
:
4
,
"bytes_per_element"
:
4.0
,
"can_be_used_as_vector"
:
true
,
"has_min"
:
false
,
"has_scale"
:
false
,
"name"
:
"FP32"
,
"reference"
:
""
,
"type_of_dot_vector"
:
"FP32"
},
"Q4_0"
:
{
"block_element_count"
:
32
,
"block_element_size"
:
18
,
"bytes_per_element"
:
0.5625
,
"can_be_used_as_vector"
:
false
,
"has_min"
:
false
,
"has_scale"
:
true
,
"name"
:
"Q4_0"
,
"reference"
:
"https://huggingface.co/docs/hub/gguf"
,
"type_of_dot_vector"
:
"Q8_0"
},
"Q8_0"
:
{
"block_element_count"
:
32
,
"block_element_size"
:
34
,
"bytes_per_element"
:
1.0625
,
"can_be_used_as_vector"
:
true
,
"has_min"
:
false
,
"has_scale"
:
true
,
"name"
:
"Q8_0"
,
"reference"
:
"https://huggingface.co/docs/hub/gguf"
,
"type_of_dot_vector"
:
"Q8_0"
}
}
\ No newline at end of file
ktransformers/server/args.py
View file @
64de7843
...
@@ -70,6 +70,9 @@ class ArgumentParser:
...
@@ -70,6 +70,9 @@ class ArgumentParser:
parser
.
add_argument
(
"--batch_size"
,
type
=
int
,
default
=
self
.
cfg
.
batch_size
)
parser
.
add_argument
(
"--batch_size"
,
type
=
int
,
default
=
self
.
cfg
.
batch_size
)
parser
.
add_argument
(
"--cache_lens"
,
type
=
int
,
default
=
self
.
cfg
.
cache_lens
)
parser
.
add_argument
(
"--cache_lens"
,
type
=
int
,
default
=
self
.
cfg
.
cache_lens
)
# kvc2 config
parser
.
add_argument
(
"--kvc2_config_dir"
,
type
=
str
,
default
=
self
.
cfg
.
kvc2_config_dir
)
# log configs
# log configs
# log level: debug, info, warn, error, crit
# log level: debug, info, warn, error, crit
parser
.
add_argument
(
"--log_dir"
,
type
=
str
,
default
=
self
.
cfg
.
log_dir
)
parser
.
add_argument
(
"--log_dir"
,
type
=
str
,
default
=
self
.
cfg
.
log_dir
)
...
...
ktransformers/server/balance_serve/settings.py
View file @
64de7843
...
@@ -7,9 +7,7 @@ import sys, os
...
@@ -7,9 +7,7 @@ import sys, os
import
yaml
,
json
import
yaml
,
json
from
time
import
sleep
from
time
import
sleep
current_dir
=
os
.
path
.
dirname
(
__file__
)
# sched_path = os.path.abspath(os.path.join(current_dir, '../../../build/balance_serve/sched'))
# sys.path.insert(0, sched_path)
import
sched_ext
import
sched_ext
from
transformers
import
AutoConfig
from
transformers
import
AutoConfig
...
@@ -52,8 +50,7 @@ def create_sched_settings(args):
...
@@ -52,8 +50,7 @@ def create_sched_settings(args):
settings
.
v_cache_on
=
False
settings
.
v_cache_on
=
False
settings
.
kvc2_root_path
=
'/mnt/data/persist-kvc'
settings
.
kvc2_root_path
=
'/mnt/data/persist-kvc'
settings
.
kvc2_config_path
=
os
.
path
.
join
(
current_dir
,
".."
,
".."
,
"configs"
)
settings
.
kvc2_config_path
=
args
.
kvc2_config_dir
print
(
os
.
path
.
join
(
current_dir
,
".."
,
".."
,
"configs"
))
settings
.
memory_pool_size_GB
=
args
.
cpu_memory_size_GB
settings
.
memory_pool_size_GB
=
args
.
cpu_memory_size_GB
settings
.
evict_count
=
40
settings
.
evict_count
=
40
settings
.
kvc2_metrics_port
=
args
.
kvc2_metrics_port
settings
.
kvc2_metrics_port
=
args
.
kvc2_metrics_port
...
...
ktransformers/server/config/config.py
View file @
64de7843
...
@@ -34,12 +34,15 @@ class Config(metaclass=Singleton):
...
@@ -34,12 +34,15 @@ class Config(metaclass=Singleton):
user_path
:
str
=
os
.
path
.
expanduser
(
"~"
)
user_path
:
str
=
os
.
path
.
expanduser
(
"~"
)
localstore_path
:
str
=
os
.
path
.
join
(
user_path
,
".ktransformers"
)
localstore_path
:
str
=
os
.
path
.
join
(
user_path
,
".ktransformers"
)
kvc2_config_dir
=
os
.
path
.
join
(
localstore_path
,
"kvc2"
)
config_path
:
str
=
os
.
path
.
join
(
localstore_path
,
Config
.
CONFIG_FILE_NAME
)
config_path
:
str
=
os
.
path
.
join
(
localstore_path
,
Config
.
CONFIG_FILE_NAME
)
if
not
os
.
path
.
exists
(
config_yaml
):
if
not
os
.
path
.
exists
(
config_yaml
):
print
(
f
"Can't find config file,
{
config_yaml
}
"
)
print
(
f
"Can't find config file,
{
config_yaml
}
"
)
exit
(
-
1
)
exit
(
-
1
)
if
not
os
.
path
.
exists
(
localstore_path
):
if
not
os
.
path
.
exists
(
localstore_path
):
os
.
mkdir
(
localstore_path
)
os
.
mkdir
(
localstore_path
)
if
not
os
.
path
.
exists
(
kvc2_config_dir
):
os
.
mkdir
(
kvc2_config_dir
)
if
not
os
.
path
.
exists
(
config_path
):
if
not
os
.
path
.
exists
(
config_path
):
shutil
.
copyfile
(
config_yaml
,
config_path
)
shutil
.
copyfile
(
config_yaml
,
config_path
)
with
open
(
config_path
,
"r"
,
encoding
=
"utf-8"
)
as
fp
:
with
open
(
config_path
,
"r"
,
encoding
=
"utf-8"
)
as
fp
:
...
@@ -62,10 +65,13 @@ class Config(metaclass=Singleton):
...
@@ -62,10 +65,13 @@ class Config(metaclass=Singleton):
self
.
localstore_path
:
str
=
os
.
path
.
join
(
self
.
user_path
,
".ktransformers"
)
self
.
localstore_path
:
str
=
os
.
path
.
join
(
self
.
user_path
,
".ktransformers"
)
# log configs
# log configs
self
.
log_dir
=
os
.
path
.
join
(
self
.
localstore_path
,
cfg
[
"log"
][
"dir"
])
self
.
log_dir
=
os
.
path
.
join
(
self
.
localstore_path
,
cfg
[
"log"
][
"dir"
])
if
not
os
.
path
.
exists
(
self
.
log_dir
):
os
.
mkdir
(
self
.
log_dir
)
self
.
log_file
=
cfg
[
"log"
][
"file"
]
self
.
log_file
=
cfg
[
"log"
][
"file"
]
self
.
log_level
=
cfg
[
"log"
][
"level"
]
self
.
log_level
=
cfg
[
"log"
][
"level"
]
self
.
backup_count
=
cfg
[
"log"
][
"backup_count"
]
self
.
backup_count
=
cfg
[
"log"
][
"backup_count"
]
self
.
kvc2_config_dir
=
os
.
path
.
join
(
self
.
localstore_path
,
"kvc2"
)
# server configs
# server configs
self
.
server
:
dict
=
cfg
.
get
(
"server"
,
{})
self
.
server
:
dict
=
cfg
.
get
(
"server"
,
{})
self
.
server_ip
=
self
.
server
.
get
(
"ip"
,
"0.0.0.0"
)
self
.
server_ip
=
self
.
server
.
get
(
"ip"
,
"0.0.0.0"
)
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment