Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ktransformers
Commits
e8e02e5c
"docs/git@developer.sourcefind.cn:OpenDAS/mmcv.git" did not exist on "1216e5fe7fbff3299d94f5e153cba74ce854f567"
Commit
e8e02e5c
authored
Feb 23, 2025
by
Atream
Browse files
support Moonlight
parent
95d937c5
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
4 additions
and
10 deletions
+4
-10
ktransformers/local_chat.py
ktransformers/local_chat.py
+2
-8
ktransformers/operators/experts.py
ktransformers/operators/experts.py
+1
-1
ktransformers/util/utils.py
ktransformers/util/utils.py
+1
-1
No files found.
ktransformers/local_chat.py
View file @
e8e02e5c
...
@@ -58,13 +58,12 @@ def local_chat(
...
@@ -58,13 +58,12 @@ def local_chat(
gguf_path
:
str
|
None
=
None
,
gguf_path
:
str
|
None
=
None
,
max_new_tokens
:
int
=
300
,
max_new_tokens
:
int
=
300
,
cpu_infer
:
int
=
Config
().
cpu_infer
,
cpu_infer
:
int
=
Config
().
cpu_infer
,
use_cuda_graph
:
bool
=
Fals
e
,
use_cuda_graph
:
bool
=
Tru
e
,
prompt_file
:
str
|
None
=
None
,
prompt_file
:
str
|
None
=
None
,
mode
:
str
=
"normal"
,
mode
:
str
=
"normal"
,
force_think
:
bool
=
False
,
force_think
:
bool
=
False
,
):
):
torch
.
set_grad_enabled
(
False
)
torch
.
set_grad_enabled
(
False
)
Config
().
cpu_infer
=
cpu_infer
Config
().
cpu_infer
=
cpu_infer
...
@@ -160,9 +159,6 @@ def local_chat(
...
@@ -160,9 +159,6 @@ def local_chat(
input_tensor
=
tokenizer
.
apply_chat_template
(
input_tensor
=
tokenizer
.
apply_chat_template
(
messages
,
add_generation_prompt
=
True
,
return_tensors
=
"pt"
messages
,
add_generation_prompt
=
True
,
return_tensors
=
"pt"
)
)
# input_tensor = torch.tensor([[0, 6657, 84646]], device=input_tensor.device)
if
force_think
:
if
force_think
:
token_thinks
=
torch
.
tensor
([
tokenizer
.
encode
(
"<think>
\\
n"
,
add_special_tokens
=
False
)],
device
=
input_tensor
.
device
)
token_thinks
=
torch
.
tensor
([
tokenizer
.
encode
(
"<think>
\\
n"
,
add_special_tokens
=
False
)],
device
=
input_tensor
.
device
)
input_tensor
=
torch
.
cat
(
input_tensor
=
torch
.
cat
(
...
@@ -184,6 +180,4 @@ def local_chat(
...
@@ -184,6 +180,4 @@ def local_chat(
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
# fire.Fire(local_chat)
fire
.
Fire
(
local_chat
)
# local_chat(model_path="/mnt/data/model/DeepSeek-R1", gguf_path="/mnt/data/model/DeepseekV3-q4km-gguf", cpu_infer=33, force_think=False)
\ No newline at end of file
local_chat
(
model_path
=
"/mnt/data/model/Moonlight-16B-A3B-Instruct"
,
gguf_path
=
"/mnt/data/model/Moonlight-16B-A3B-Instruct-GGUF"
,
cpu_infer
=
33
,
force_think
=
False
)
\ No newline at end of file
ktransformers/operators/experts.py
View file @
e8e02e5c
...
@@ -159,7 +159,7 @@ class KExpertsCPU(KExpertsBase):
...
@@ -159,7 +159,7 @@ class KExpertsCPU(KExpertsBase):
down_ptr
=
ctypes
.
addressof
(
down_ptr
=
ctypes
.
addressof
(
ctypes
.
cast
(
self
.
down
.
ctypes
.
data
,
ctypes
.
POINTER
(
ctypes
.
c_uint64
)).
contents
ctypes
.
cast
(
self
.
down
.
ctypes
.
data
,
ctypes
.
POINTER
(
ctypes
.
c_uint64
)).
contents
)
)
#
print(self.gate_
q
type, self.up_
q
type, self.down_
q
type)
#print(self.gate_type, self.up_type, self.down_type)
n_routed_experts
=
self
.
n_routed_experts
n_routed_experts
=
self
.
n_routed_experts
# n_routed_experts = len(self.orig_module)
# n_routed_experts = len(self.orig_module)
moe_config
=
MOEConfig
(
moe_config
=
MOEConfig
(
...
...
ktransformers/util/utils.py
View file @
e8e02e5c
...
@@ -207,7 +207,7 @@ def prefill_and_generate(model, tokenizer, inputs, max_new_tokens=10000, use_cud
...
@@ -207,7 +207,7 @@ def prefill_and_generate(model, tokenizer, inputs, max_new_tokens=10000, use_cud
tokens
.
append
(
int
(
next_token
))
tokens
.
append
(
int
(
next_token
))
seq_length
+=
1
seq_length
+=
1
if
next_token
[
0
].
item
()
==
tokenizer
.
eos_token_id
or
tokenizer
.
decode
(
next_token
)
==
'<|im_end|>'
:
if
next_token
[
0
].
item
()
==
tokenizer
.
eos_token_id
or
tokenizer
.
decode
(
next_token
.
tolist
()
)
==
'<|im_end|>'
:
print
(
stream
.
end
(),
end
=
""
,
flush
=
True
)
print
(
stream
.
end
(),
end
=
""
,
flush
=
True
)
break
break
else
:
else
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment