Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ktransformers
Commits
08f0bd5e
Unverified
Commit
08f0bd5e
authored
Apr 17, 2025
by
Atream
Committed by
GitHub
Apr 17, 2025
Browse files
Merge pull request #1168 from kvcache-ai/Atream-patch-1
remove hard code max_length
parents
22a30d70
e6fb4d5a
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
2 additions
and
5 deletions
+2
-5
ktransformers/server/balance_serve/inference/query_manager.py
...nsformers/server/balance_serve/inference/query_manager.py
+2
-5
No files found.
ktransformers/server/balance_serve/inference/query_manager.py
View file @
08f0bd5e
...
@@ -70,13 +70,11 @@ class QueryInfo:
...
@@ -70,13 +70,11 @@ class QueryInfo:
class
QueryManager
:
class
QueryManager
:
max_length
:
int
=
65536
page_size
:
int
=
256
page_size
:
int
=
256
device
:
torch
.
device
device
:
torch
.
device
query_map
:
dict
[
int
,
QueryInfo
]
query_map
:
dict
[
int
,
QueryInfo
]
def
__init__
(
self
,
max_length
=
65536
,
page_size
=
256
,
device
=
torch
.
device
(
'cuda'
)):
def
__init__
(
self
,
page_size
=
256
,
device
=
torch
.
device
(
'cuda'
)):
self
.
max_length
=
max_length
self
.
page_size
=
page_size
self
.
page_size
=
page_size
self
.
device
=
device
self
.
device
=
device
self
.
query_map
=
{}
self
.
query_map
=
{}
...
@@ -87,7 +85,6 @@ class QueryManager:
...
@@ -87,7 +85,6 @@ class QueryManager:
id
=
batch
.
query_ids
[
i
]
id
=
batch
.
query_ids
[
i
]
if
id
not
in
self
.
query_map
:
if
id
not
in
self
.
query_map
:
print
(
f
"add query id:
{
id
}
, batch.query_lengths:
{
batch
.
query_lengths
[
i
]
}
, batch_query_tokens:
{
batch
.
query_tokens
[
i
].
shape
}
, batch.block_indexes:
{
batch
.
block_indexes
[
i
]
}
"
)
print
(
f
"add query id:
{
id
}
, batch.query_lengths:
{
batch
.
query_lengths
[
i
]
}
, batch_query_tokens:
{
batch
.
query_tokens
[
i
].
shape
}
, batch.block_indexes:
{
batch
.
block_indexes
[
i
]
}
"
)
assert
batch
.
query_tokens
[
i
].
size
(
0
)
<
self
.
max_length
,
"query max length in batchquerytodo exceeds internal max_length"
query_info
=
QueryInfo
(
id
=
id
,
query_length
=
batch
.
query_lengths
[
i
],
max_length
=
batch
.
query_tokens
[
i
].
size
(
0
)
+
1
,
page_size
=
self
.
page_size
,
device
=
self
.
device
,
temperature
=
batch
.
sample_options
[
i
].
temperature
,
top_p
=
batch
.
sample_options
[
i
].
top_p
)
query_info
=
QueryInfo
(
id
=
id
,
query_length
=
batch
.
query_lengths
[
i
],
max_length
=
batch
.
query_tokens
[
i
].
size
(
0
)
+
1
,
page_size
=
self
.
page_size
,
device
=
self
.
device
,
temperature
=
batch
.
sample_options
[
i
].
temperature
,
top_p
=
batch
.
sample_options
[
i
].
top_p
)
query_info
.
query_tokens
[:
query_info
.
query_length
].
copy_
(
batch
.
query_tokens
[
i
][:
query_info
.
query_length
].
to
(
self
.
device
))
query_info
.
query_tokens
[:
query_info
.
query_length
].
copy_
(
batch
.
query_tokens
[
i
][:
query_info
.
query_length
].
to
(
self
.
device
))
...
@@ -155,4 +152,4 @@ class QueryManager:
...
@@ -155,4 +152,4 @@ class QueryManager:
query_update
.
active_position
=
query_info
.
active_position
query_update
.
active_position
=
query_info
.
active_position
query_updates
.
append
(
query_update
)
query_updates
.
append
(
query_update
)
return
query_updates
return
query_updates
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment