Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
56503d9b
Unverified
Commit
56503d9b
authored
Oct 14, 2024
by
Byron Hsu
Committed by
GitHub
Oct 14, 2024
Browse files
[1/N] Remove `CacheConfig` import in all model files (#1658)
parent
02bc9579
Changes
30
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
16 additions
and
26 deletions
+16
-26
python/sglang/srt/models/mixtral_quant.py
python/sglang/srt/models/mixtral_quant.py
+1
-2
python/sglang/srt/models/olmoe.py
python/sglang/srt/models/olmoe.py
+1
-2
python/sglang/srt/models/qwen.py
python/sglang/srt/models/qwen.py
+1
-2
python/sglang/srt/models/qwen2.py
python/sglang/srt/models/qwen2.py
+1
-2
python/sglang/srt/models/qwen2_moe.py
python/sglang/srt/models/qwen2_moe.py
+4
-5
python/sglang/srt/models/stablelm.py
python/sglang/srt/models/stablelm.py
+1
-2
python/sglang/srt/models/torch_native_llama.py
python/sglang/srt/models/torch_native_llama.py
+1
-2
python/sglang/srt/models/xverse.py
python/sglang/srt/models/xverse.py
+1
-2
python/sglang/srt/models/xverse_moe.py
python/sglang/srt/models/xverse_moe.py
+4
-5
python/sglang/srt/models/yivl.py
python/sglang/srt/models/yivl.py
+1
-2
No files found.
python/sglang/srt/models/mixtral_quant.py
View file @
56503d9b
...
@@ -23,7 +23,6 @@ import torch
...
@@ -23,7 +23,6 @@ import torch
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
torch
import
nn
from
torch
import
nn
from
transformers
import
MixtralConfig
from
transformers
import
MixtralConfig
from
vllm.config
import
CacheConfig
from
vllm.distributed
import
(
from
vllm.distributed
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_world_size
,
...
@@ -325,7 +324,7 @@ class QuantMixtralForCausalLM(nn.Module):
...
@@ -325,7 +324,7 @@ class QuantMixtralForCausalLM(nn.Module):
self
,
self
,
config
:
MixtralConfig
,
config
:
MixtralConfig
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
)
->
None
:
)
->
None
:
super
().
__init__
()
super
().
__init__
()
self
.
config
=
config
self
.
config
=
config
...
...
python/sglang/srt/models/olmoe.py
View file @
56503d9b
...
@@ -23,7 +23,6 @@ import torch
...
@@ -23,7 +23,6 @@ import torch
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.config
import
CacheConfig
from
vllm.distributed
import
(
from
vllm.distributed
import
(
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_world_size
,
tensor_model_parallel_all_reduce
,
tensor_model_parallel_all_reduce
,
...
@@ -298,7 +297,7 @@ class OlmoeForCausalLM(nn.Module):
...
@@ -298,7 +297,7 @@ class OlmoeForCausalLM(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
PretrainedConfig
,
config
:
PretrainedConfig
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
)
->
None
:
)
->
None
:
super
().
__init__
()
super
().
__init__
()
...
...
python/sglang/srt/models/qwen.py
View file @
56503d9b
...
@@ -20,7 +20,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
...
@@ -20,7 +20,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.config
import
CacheConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
...
@@ -243,7 +242,7 @@ class QWenLMHeadModel(nn.Module):
...
@@ -243,7 +242,7 @@ class QWenLMHeadModel(nn.Module):
self
,
self
,
config
:
PretrainedConfig
,
config
:
PretrainedConfig
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
):
):
super
().
__init__
()
super
().
__init__
()
self
.
config
=
config
self
.
config
=
config
...
...
python/sglang/srt/models/qwen2.py
View file @
56503d9b
...
@@ -20,7 +20,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
...
@@ -20,7 +20,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
vllm.config
import
CacheConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
...
@@ -268,7 +267,7 @@ class Qwen2ForCausalLM(nn.Module):
...
@@ -268,7 +267,7 @@ class Qwen2ForCausalLM(nn.Module):
self
,
self
,
config
:
Qwen2Config
,
config
:
Qwen2Config
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
)
->
None
:
)
->
None
:
super
().
__init__
()
super
().
__init__
()
self
.
config
=
config
self
.
config
=
config
...
...
python/sglang/srt/models/qwen2_moe.py
View file @
56503d9b
...
@@ -23,7 +23,6 @@ import torch
...
@@ -23,7 +23,6 @@ import torch
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.config
import
CacheConfig
from
vllm.distributed
import
(
from
vllm.distributed
import
(
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_world_size
,
tensor_model_parallel_all_reduce
,
tensor_model_parallel_all_reduce
,
...
@@ -160,7 +159,7 @@ class Qwen2MoeAttention(nn.Module):
...
@@ -160,7 +159,7 @@ class Qwen2MoeAttention(nn.Module):
rope_theta
:
float
=
10000
,
rope_theta
:
float
=
10000
,
rope_scaling
:
Optional
[
Dict
[
str
,
Any
]]
=
None
,
rope_scaling
:
Optional
[
Dict
[
str
,
Any
]]
=
None
,
max_position_embeddings
:
int
=
8192
,
max_position_embeddings
:
int
=
8192
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
)
->
None
:
)
->
None
:
super
().
__init__
()
super
().
__init__
()
...
@@ -236,7 +235,7 @@ class Qwen2MoeDecoderLayer(nn.Module):
...
@@ -236,7 +235,7 @@ class Qwen2MoeDecoderLayer(nn.Module):
self
,
self
,
config
:
PretrainedConfig
,
config
:
PretrainedConfig
,
layer_id
:
int
,
layer_id
:
int
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
)
->
None
:
)
->
None
:
super
().
__init__
()
super
().
__init__
()
...
@@ -306,7 +305,7 @@ class Qwen2MoeModel(nn.Module):
...
@@ -306,7 +305,7 @@ class Qwen2MoeModel(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
PretrainedConfig
,
config
:
PretrainedConfig
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
)
->
None
:
)
->
None
:
super
().
__init__
()
super
().
__init__
()
...
@@ -355,7 +354,7 @@ class Qwen2MoeForCausalLM(nn.Module):
...
@@ -355,7 +354,7 @@ class Qwen2MoeForCausalLM(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
PretrainedConfig
,
config
:
PretrainedConfig
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
)
->
None
:
)
->
None
:
super
().
__init__
()
super
().
__init__
()
...
...
python/sglang/srt/models/stablelm.py
View file @
56503d9b
...
@@ -22,7 +22,6 @@ from typing import Iterable, Optional, Tuple
...
@@ -22,7 +22,6 @@ from typing import Iterable, Optional, Tuple
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.config
import
CacheConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
...
@@ -241,7 +240,7 @@ class StableLmForCausalLM(nn.Module):
...
@@ -241,7 +240,7 @@ class StableLmForCausalLM(nn.Module):
self
,
self
,
config
:
PretrainedConfig
,
config
:
PretrainedConfig
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
)
->
None
:
)
->
None
:
super
().
__init__
()
super
().
__init__
()
self
.
config
=
config
self
.
config
=
config
...
...
python/sglang/srt/models/torch_native_llama.py
View file @
56503d9b
...
@@ -24,7 +24,6 @@ import torch
...
@@ -24,7 +24,6 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
torch.nn.parameter
import
Parameter
from
torch.nn.parameter
import
Parameter
from
transformers
import
LlamaConfig
from
transformers
import
LlamaConfig
from
vllm.config
import
CacheConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
...
@@ -380,7 +379,7 @@ class TorchNativeLlamaForCausalLM(nn.Module):
...
@@ -380,7 +379,7 @@ class TorchNativeLlamaForCausalLM(nn.Module):
self
,
self
,
config
:
LlamaConfig
,
config
:
LlamaConfig
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
)
->
None
:
)
->
None
:
super
().
__init__
()
super
().
__init__
()
self
.
config
=
config
self
.
config
=
config
...
...
python/sglang/srt/models/xverse.py
View file @
56503d9b
...
@@ -22,7 +22,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
...
@@ -22,7 +22,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
LlamaConfig
from
transformers
import
LlamaConfig
from
vllm.config
import
CacheConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
...
@@ -297,7 +296,7 @@ class XverseForCausalLM(nn.Module):
...
@@ -297,7 +296,7 @@ class XverseForCausalLM(nn.Module):
self
,
self
,
config
:
LlamaConfig
,
config
:
LlamaConfig
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
efficient_weight_load
=
False
,
efficient_weight_load
=
False
,
)
->
None
:
)
->
None
:
super
().
__init__
()
super
().
__init__
()
...
...
python/sglang/srt/models/xverse_moe.py
View file @
56503d9b
...
@@ -19,7 +19,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
...
@@ -19,7 +19,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.config
import
CacheConfig
from
vllm.distributed
import
(
from
vllm.distributed
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_world_size
,
...
@@ -183,7 +182,7 @@ class XverseAttention(nn.Module):
...
@@ -183,7 +182,7 @@ class XverseAttention(nn.Module):
rope_theta
:
float
=
10000
,
rope_theta
:
float
=
10000
,
rope_scaling
:
Optional
[
Dict
[
str
,
Any
]]
=
None
,
rope_scaling
:
Optional
[
Dict
[
str
,
Any
]]
=
None
,
max_position_embeddings
:
int
=
8192
,
max_position_embeddings
:
int
=
8192
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
)
->
None
:
)
->
None
:
super
().
__init__
()
super
().
__init__
()
...
@@ -260,7 +259,7 @@ class XverseDecoderLayer(nn.Module):
...
@@ -260,7 +259,7 @@ class XverseDecoderLayer(nn.Module):
self
,
self
,
config
:
PretrainedConfig
,
config
:
PretrainedConfig
,
layer_id
:
int
,
layer_id
:
int
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
)
->
None
:
)
->
None
:
super
().
__init__
()
super
().
__init__
()
...
@@ -328,7 +327,7 @@ class XverseModel(nn.Module):
...
@@ -328,7 +327,7 @@ class XverseModel(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
PretrainedConfig
,
config
:
PretrainedConfig
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
)
->
None
:
)
->
None
:
super
().
__init__
()
super
().
__init__
()
...
@@ -371,7 +370,7 @@ class XverseMoeForCausalLM(nn.Module):
...
@@ -371,7 +370,7 @@ class XverseMoeForCausalLM(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
PretrainedConfig
,
config
:
PretrainedConfig
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
)
->
None
:
)
->
None
:
super
().
__init__
()
super
().
__init__
()
...
...
python/sglang/srt/models/yivl.py
View file @
56503d9b
...
@@ -20,7 +20,6 @@ from typing import Iterable, Optional, Tuple
...
@@ -20,7 +20,6 @@ from typing import Iterable, Optional, Tuple
import
torch
import
torch
import
torch.nn
as
nn
import
torch.nn
as
nn
from
transformers
import
CLIPVisionModel
,
LlavaConfig
from
transformers
import
CLIPVisionModel
,
LlavaConfig
from
vllm.config
import
CacheConfig
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
...
@@ -32,7 +31,7 @@ class YiVLForCausalLM(LlavaLlamaForCausalLM):
...
@@ -32,7 +31,7 @@ class YiVLForCausalLM(LlavaLlamaForCausalLM):
self
,
self
,
config
:
LlavaConfig
,
config
:
LlavaConfig
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
)
->
None
:
)
->
None
:
super
().
__init__
(
config
,
quant_config
,
cache_config
)
super
().
__init__
(
config
,
quant_config
,
cache_config
)
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment