Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ox696c
ktransformers
Commits
b90362b5
Unverified
Commit
b90362b5
authored
Apr 25, 2025
by
wang jiahao
Committed by
GitHub
Apr 25, 2025
Browse files
Merge pull request #1198 from kvcache-ai/fix-max_new_tokens
fix load default max_new_tokens
parents
67042d11
7af83f9e
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
21 additions
and
10 deletions
+21
-10
ktransformers/server/api/openai/endpoints/chat.py
ktransformers/server/api/openai/endpoints/chat.py
+13
-2
ktransformers/server/api/openai/legacy/completions.py
ktransformers/server/api/openai/legacy/completions.py
+4
-4
ktransformers/server/schemas/endpoints/chat.py
ktransformers/server/schemas/endpoints/chat.py
+2
-2
ktransformers/server/schemas/legacy/completions.py
ktransformers/server/schemas/legacy/completions.py
+2
-2
No files found.
ktransformers/server/api/openai/endpoints/chat.py
View file @
b90362b5
...
@@ -138,12 +138,23 @@ async def chat_completion(request: Request, create: ChatCompletionCreate):
...
@@ -138,12 +138,23 @@ async def chat_completion(request: Request, create: ChatCompletionCreate):
# Process messages with tool functionality if needed
# Process messages with tool functionality if needed
enhanced_messages
=
list
(
create
.
messages
)
enhanced_messages
=
list
(
create
.
messages
)
if
create
.
max_tokens
<
0
or
create
.
max_completion
_tokens
<
0
:
if
create
.
max_tokens
is
not
None
and
create
.
max
_tokens
<
0
:
return
JSONResponse
(
return
JSONResponse
(
status_code
=
400
,
status_code
=
400
,
content
=
{
content
=
{
"object"
:
"error"
,
"object"
:
"error"
,
"message"
:
f
"max_new_tokens must be at least 0, got
{
create
.
max_tokens
}
."
,
"message"
:
f
"max_tokens must be at least 0, got
{
create
.
max_tokens
}
."
,
"type"
:
"BadRequestError"
,
"param"
:
None
,
"code"
:
400
})
if
create
.
max_completion_tokens
is
not
None
and
create
.
max_completion_tokens
<
0
:
return
JSONResponse
(
status_code
=
400
,
content
=
{
"object"
:
"error"
,
"message"
:
f
"max_completion_tokens must be at least 0, got
{
create
.
max_completion_tokens
}
."
,
"type"
:
"BadRequestError"
,
"type"
:
"BadRequestError"
,
"param"
:
None
,
"param"
:
None
,
"code"
:
400
"code"
:
400
...
...
ktransformers/server/api/openai/legacy/completions.py
View file @
b90362b5
...
@@ -14,22 +14,22 @@ router = APIRouter()
...
@@ -14,22 +14,22 @@ router = APIRouter()
@
router
.
post
(
"/completions"
,
tags
=
[
'openai'
])
@
router
.
post
(
"/completions"
,
tags
=
[
'openai'
])
async
def
create_completion
(
request
:
Request
,
create
:
CompletionCreate
):
async
def
create_completion
(
request
:
Request
,
create
:
CompletionCreate
):
id
=
str
(
uuid4
())
id
=
str
(
uuid4
())
if
create
.
max_tokens
<
0
:
if
create
.
max_tokens
is
not
None
and
create
.
max_tokens
<
0
:
return
JSONResponse
(
return
JSONResponse
(
status_code
=
400
,
status_code
=
400
,
content
=
{
content
=
{
"object"
:
"error"
,
"object"
:
"error"
,
"message"
:
f
"max_
new_
tokens must be at least 0, got
{
create
.
max_tokens
}
."
,
"message"
:
f
"max_tokens must be at least 0, got
{
create
.
max_tokens
}
."
,
"type"
:
"BadRequestError"
,
"type"
:
"BadRequestError"
,
"param"
:
None
,
"param"
:
None
,
"code"
:
400
"code"
:
400
})
})
if
create
.
max_completion_tokens
<
0
:
if
create
.
max_completion_tokens
is
not
None
and
create
.
max_completion_tokens
<
0
:
return
JSONResponse
(
return
JSONResponse
(
status_code
=
400
,
status_code
=
400
,
content
=
{
content
=
{
"object"
:
"error"
,
"object"
:
"error"
,
"message"
:
f
"max_
new
_tokens must be at least 0, got
{
create
.
max_completion_tokens
}
."
,
"message"
:
f
"max_
completion
_tokens must be at least 0, got
{
create
.
max_completion_tokens
}
."
,
"type"
:
"BadRequestError"
,
"type"
:
"BadRequestError"
,
"param"
:
None
,
"param"
:
None
,
"code"
:
400
"code"
:
400
...
...
ktransformers/server/schemas/endpoints/chat.py
View file @
b90362b5
...
@@ -73,8 +73,8 @@ class ChatCompletionCreate(BaseModel):
...
@@ -73,8 +73,8 @@ class ChatCompletionCreate(BaseModel):
stream_options
:
Optional
[
Dict
[
str
,
Any
]]
=
None
stream_options
:
Optional
[
Dict
[
str
,
Any
]]
=
None
frequency_penalty
:
float
=
0
frequency_penalty
:
float
=
0
presence_penalty
:
float
=
0
presence_penalty
:
float
=
0
max_tokens
:
Optional
[
int
]
=
Field
(
default
=
C
on
fig
().
max_new_tokens
)
max_tokens
:
Optional
[
int
]
=
Field
(
default
=
N
on
e
)
max_completion_tokens
:
Optional
[
int
]
=
Field
(
default
=
C
on
fig
().
max_new_tokens
)
max_completion_tokens
:
Optional
[
int
]
=
Field
(
default
=
N
on
e
)
return_speed
:
Optional
[
bool
]
=
Field
(
default
=
False
)
return_speed
:
Optional
[
bool
]
=
Field
(
default
=
False
)
def
get_tokenizer_messages
(
self
):
def
get_tokenizer_messages
(
self
):
return
[
m
.
to_tokenizer_message
()
for
m
in
self
.
messages
]
return
[
m
.
to_tokenizer_message
()
for
m
in
self
.
messages
]
...
...
ktransformers/server/schemas/legacy/completions.py
View file @
b90362b5
...
@@ -10,8 +10,8 @@ class CompletionCreate(BaseModel):
...
@@ -10,8 +10,8 @@ class CompletionCreate(BaseModel):
stream
:
bool
=
False
stream
:
bool
=
False
temperature
:
Optional
[
float
]
=
Field
(
default
=
Config
().
temperature
)
temperature
:
Optional
[
float
]
=
Field
(
default
=
Config
().
temperature
)
top_p
:
Optional
[
float
]
=
Field
(
default
=
Config
().
top_p
)
top_p
:
Optional
[
float
]
=
Field
(
default
=
Config
().
top_p
)
max_tokens
:
Optional
[
int
]
=
Field
(
default
=
C
on
fig
().
max_new_tokens
)
max_tokens
:
Optional
[
int
]
=
Field
(
default
=
N
on
e
)
max_completion_tokens
:
Optional
[
int
]
=
Field
(
default
=
C
on
fig
().
max_new_tokens
)
max_completion_tokens
:
Optional
[
int
]
=
Field
(
default
=
N
on
e
)
def
get_tokenizer_messages
(
self
):
def
get_tokenizer_messages
(
self
):
if
isinstance
(
self
.
prompt
,
List
):
if
isinstance
(
self
.
prompt
,
List
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment