Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
1a8f995c
Unverified
Commit
1a8f995c
authored
Mar 03, 2025
by
Lianmin Zheng
Committed by
GitHub
Mar 03, 2025
Browse files
remove cache configs in model definitions (#4031)
parent
a3ab768a
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
0 additions
and
72 deletions
+0
-72
python/sglang/srt/models/grok.py
python/sglang/srt/models/grok.py
+0
-1
python/sglang/srt/models/llama_eagle.py
python/sglang/srt/models/llama_eagle.py
+0
-1
python/sglang/srt/models/qwen2_eagle.py
python/sglang/srt/models/qwen2_eagle.py
+0
-1
python/sglang/test/test_utils.py
python/sglang/test/test_utils.py
+0
-69
No files found.
python/sglang/srt/models/grok.py
View file @
1a8f995c
...
...
@@ -359,7 +359,6 @@ class Grok1ForCausalLM(nn.Module):
self
,
config
:
PretrainedConfig
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
cache_config
=
None
,
)
->
None
:
super
().
__init__
()
self
.
config
=
config
...
...
python/sglang/srt/models/llama_eagle.py
View file @
1a8f995c
...
...
@@ -106,7 +106,6 @@ class LlamaForCausalLMEagle(LlamaForCausalLM):
self
,
config
:
LlamaConfig
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
cache_config
=
None
,
)
->
None
:
nn
.
Module
.
__init__
(
self
)
self
.
config
=
config
...
...
python/sglang/srt/models/qwen2_eagle.py
View file @
1a8f995c
...
...
@@ -107,7 +107,6 @@ class Qwen2ForCausalLMEagle(Qwen2ForCausalLM):
self
,
config
:
Qwen2Config
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
cache_config
=
None
,
)
->
None
:
nn
.
Module
.
__init__
(
self
)
self
.
config
=
config
...
...
python/sglang/test/test_utils.py
View file @
1a8f995c
...
...
@@ -159,45 +159,6 @@ def call_generate_guidance(
return
rets
if
n
>
1
else
rets
[
0
]
async
def
call_generate_lmql
(
prompt
,
temperature
,
max_tokens
,
stop
=
None
,
n
=
1
,
max_len
=
4096
,
model
=
None
,
**
kwargs
):
assert
model
is
not
None
import
lmql
if
stop
!=
None
:
@
lmql
.
query
(
model
=
model
)
async
def
program
(
question
,
max_tokens
,
stop
):
'''lmql
"""{question}[ANSWER]""" where len(TOKENS(ANSWER)) < max_tokens and STOPS_AT(ANSWER, stop)
return ANSWER
'''
else
:
@
lmql
.
query
(
model
=
model
)
async
def
program
(
question
,
max_tokens
):
'''lmql
"""{question}[ANSWER]""" where len(TOKENS(ANSWER)) < max_tokens
return ANSWER
'''
tasks
=
[
program
(
question
=
prompt
,
temperature
=
temperature
,
max_tokens
=
max_tokens
,
stop
=
stop
,
max_len
=
max_len
,
**
kwargs
,
)
for
_
in
range
(
n
)
]
rets
=
await
asyncio
.
gather
(
*
tasks
)
return
rets
if
n
>
1
else
rets
[
0
]
def
call_select_lightllm
(
context
,
choices
,
url
=
None
):
assert
url
is
not
None
...
...
@@ -247,23 +208,6 @@ def call_select_guidance(context, choices, model=None):
return
choices
.
index
(
out
[
"answer"
])
async
def
call_select_lmql
(
context
,
choices
,
temperature
=
0
,
max_len
=
4096
,
model
=
None
):
assert
model
is
not
None
import
lmql
@
lmql
.
query
(
model
=
model
)
async
def
program
(
ctx
,
choices
):
'''lmql
"""{ctx}[ANSWER]""" where ANSWER in set(choices)
return ANSWER
'''
answer
=
await
program
(
ctx
=
context
,
choices
=
choices
,
temperature
=
temperature
,
max_len
=
max_len
)
return
choices
.
index
(
answer
)
def
add_common_other_args_and_parse
(
parser
:
argparse
.
ArgumentParser
):
parser
.
add_argument
(
"--parallel"
,
type
=
int
,
default
=
64
)
parser
.
add_argument
(
"--host"
,
type
=
str
,
default
=
"http://127.0.0.1"
)
...
...
@@ -278,7 +222,6 @@ def add_common_other_args_and_parse(parser: argparse.ArgumentParser):
"lightllm"
,
"gserver"
,
"guidance"
,
"lmql"
,
"srt-raw"
,
"llama.cpp"
,
],
...
...
@@ -295,7 +238,6 @@ def add_common_other_args_and_parse(parser: argparse.ArgumentParser):
"vllm"
:
21000
,
"outlines"
:
21000
,
"lightllm"
:
22000
,
"lmql"
:
23000
,
"srt-raw"
:
30000
,
"gserver"
:
9988
,
}
...
...
@@ -343,11 +285,6 @@ def _get_call_generate(args: argparse.Namespace):
call_generate
=
partial
(
call_generate_guidance
,
model
=
model
)
call_generate
(
"Hello,"
,
1.0
,
8
,
"."
)
return
call_generate
elif
args
.
backend
==
"lmql"
:
import
lmql
model
=
lmql
.
model
(
args
.
model_path
,
endpoint
=
f
"
{
args
.
host
}
:
{
args
.
port
}
"
)
return
partial
(
call_generate_lmql
,
model
=
model
)
else
:
raise
ValueError
(
f
"Invalid backend:
{
args
.
backend
}
"
)
...
...
@@ -365,12 +302,6 @@ def _get_call_select(args: argparse.Namespace):
call_select
(
"Hello,"
,
[
"world"
,
"earth"
])
return
call_select
elif
args
.
backend
==
"lmql"
:
import
lmql
model
=
lmql
.
model
(
args
.
model_path
,
endpoint
=
f
"
{
args
.
host
}
:
{
args
.
port
}
"
)
return
partial
(
call_select_lmql
,
model
=
model
)
else
:
raise
ValueError
(
f
"Invalid backend:
{
args
.
backend
}
"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment