Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ox696c
ktransformers
Commits
170b7a60
Commit
170b7a60
authored
Aug 21, 2024
by
TangJingqi
Browse files
fix server don't accept yaml path as param; fix server static cache device problem
parent
43587228
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
12 additions
and
9 deletions
+12
-9
ktransformers/server/backend/interfaces/ktransformers.py
ktransformers/server/backend/interfaces/ktransformers.py
+9
-7
ktransformers/server/main.py
ktransformers/server/main.py
+2
-2
requirements-local_chat.txt
requirements-local_chat.txt
+1
-0
No files found.
ktransformers/server/backend/interfaces/ktransformers.py
View file @
170b7a60
...
@@ -25,8 +25,10 @@ class KTransformersInterface(TransformersInterface):
...
@@ -25,8 +25,10 @@ class KTransformersInterface(TransformersInterface):
with
torch
.
device
(
"meta"
):
with
torch
.
device
(
"meta"
):
self
.
model
=
custom_models
[
config
.
architectures
[
0
]](
config
)
self
.
model
=
custom_models
[
config
.
architectures
[
0
]](
config
)
if
default_args
.
optimize_config_path
is
not
None
:
optimize_rule_path
=
default_optimize_rules
[
config
.
architectures
[
0
]]
optimize_rule_path
=
default_optimize_rules
[
config
.
architectures
[
0
]]
else
:
optimize_rule_path
=
args
.
optimize_config_path
# print(optimize_config)
# print(optimize_config)
...
@@ -38,10 +40,10 @@ class KTransformersInterface(TransformersInterface):
...
@@ -38,10 +40,10 @@ class KTransformersInterface(TransformersInterface):
optimize_and_load_gguf
(
self
.
model
,
optimize_rule_path
,
gguf_path
,
config
)
optimize_and_load_gguf
(
self
.
model
,
optimize_rule_path
,
gguf_path
,
config
)
device_map
=
self
.
model
.
gguf_loader
.
tensor_device_map
logger
.
info
(
f
'
{
args
.
model_name
}
loaded from
{
args
.
model_dir
}
to
{
args
.
device
}
'
)
logger
.
info
(
f
'
{
args
.
model_name
}
loaded from
{
args
.
model_dir
}
to
{
device
_map
}
'
)
self
.
cache
=
StaticCache
(
config
=
self
.
model
.
config
,
max_batch_size
=
args
.
batch_size
,
max_cache_len
=
args
.
cache_lens
,
device
=
args
.
device
,
dtype
=
self
.
model
.
dtype
)
self
.
cache
=
StaticCache
(
config
=
self
.
model
.
config
,
max_batch_size
=
args
.
batch_size
,
max_cache_len
=
args
.
cache_lens
,
device
=
device
_map
,
dtype
=
self
.
model
.
dtype
)
logger
.
info
(
f
'StaticCache (length=
{
args
.
cache_lens
}
) created at
{
args
.
device
}
, batch size:
{
args
.
batch_size
}
'
)
logger
.
info
(
f
'StaticCache (length=
{
args
.
cache_lens
}
) created at
{
device
_map
}
, batch size:
{
args
.
batch_size
}
'
)
self
.
model
.
generation_config
=
GenerationConfig
.
from_pretrained
(
args
.
model_dir
)
self
.
model
.
generation_config
=
GenerationConfig
.
from_pretrained
(
args
.
model_dir
)
if
self
.
model
.
generation_config
.
pad_token_id
is
None
:
if
self
.
model
.
generation_config
.
pad_token_id
is
None
:
self
.
model
.
generation_config
.
pad_token_id
=
self
.
model
.
generation_config
.
eos_token_id
self
.
model
.
generation_config
.
pad_token_id
=
self
.
model
.
generation_config
.
eos_token_id
...
@@ -63,7 +65,7 @@ class KTransformersInterface(TransformersInterface):
...
@@ -63,7 +65,7 @@ class KTransformersInterface(TransformersInterface):
return
self
.
logits_to_token
(
logits
)
return
self
.
logits_to_token
(
logits
)
if
self
.
use_static_cache
:
if
self
.
use_static_cache
:
mask
=
torch
.
ones
((
1
,
self
.
seq_length
)).
to
(
self
.
args
.
device
)
mask
=
torch
.
ones
((
1
,
self
.
seq_length
)).
to
(
torch_
device
)
logits
=
self
.
model
(
logits
=
self
.
model
(
self
.
current_ids
,
self
.
current_ids
,
cache_position
=
self
.
active_cache_position
,
cache_position
=
self
.
active_cache_position
,
...
...
ktransformers/server/main.py
View file @
170b7a60
...
@@ -107,9 +107,9 @@ def main():
...
@@ -107,9 +107,9 @@ def main():
parser
.
add_argument
(
"--web"
,
type
=
bool
,
default
=
False
)
parser
.
add_argument
(
"--web"
,
type
=
bool
,
default
=
False
)
parser
.
add_argument
(
"--model_name"
,
type
=
str
,
default
=
cfg
.
model_name
)
parser
.
add_argument
(
"--model_name"
,
type
=
str
,
default
=
cfg
.
model_name
)
parser
.
add_argument
(
"--model_path"
,
type
=
str
,
default
=
cfg
.
model_path
)
parser
.
add_argument
(
"--model_path"
,
type
=
str
,
default
=
cfg
.
model_path
)
parser
.
add_argument
(
"--device"
,
type
=
str
,
default
=
cfg
.
model_device
)
parser
.
add_argument
(
"--device"
,
type
=
str
,
default
=
cfg
.
model_device
,
help
=
"Warning: Abandoning this parameter"
)
parser
.
add_argument
(
"--gguf_path"
,
type
=
str
,
default
=
cfg
.
gguf_path
)
parser
.
add_argument
(
"--gguf_path"
,
type
=
str
,
default
=
cfg
.
gguf_path
)
parser
.
add_argument
(
"--optimize_config_path"
,
type
=
str
,
required
=
False
)
parser
.
add_argument
(
"--optimize_config_path"
,
default
=
None
,
type
=
str
,
required
=
False
)
parser
.
add_argument
(
"--cpu_infer"
,
type
=
int
,
default
=
cfg
.
cpu_infer
)
parser
.
add_argument
(
"--cpu_infer"
,
type
=
int
,
default
=
cfg
.
cpu_infer
)
parser
.
add_argument
(
"--type"
,
type
=
str
,
default
=
cfg
.
backend_type
)
parser
.
add_argument
(
"--type"
,
type
=
str
,
default
=
cfg
.
backend_type
)
...
...
requirements-local_chat.txt
View file @
170b7a60
...
@@ -3,3 +3,4 @@ transformers
...
@@ -3,3 +3,4 @@ transformers
numpy
numpy
torch>=2.3.0
torch>=2.3.0
packaging
packaging
cpufeature
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment