Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
fa5b0b39
Commit
fa5b0b39
authored
Oct 25, 2024
by
zhuwenwen
Browse files
support telechat-12b
parent
e96edbbe
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
8 additions
and
2 deletions
+8
-2
csrc/attention/static_switch.h
csrc/attention/static_switch.h
+3
-0
csrc/attention/static_switch_tc.h
csrc/attention/static_switch_tc.h
+3
-0
vllm/model_executor/models/telechat_12B.py
vllm/model_executor/models/telechat_12B.py
+2
-2
No files found.
csrc/attention/static_switch.h
View file @
fa5b0b39
...
@@ -48,6 +48,9 @@
...
@@ -48,6 +48,9 @@
} else if (HEADDIM == 128) { \
} else if (HEADDIM == 128) { \
constexpr static int HEAD_SIZE = 128; \
constexpr static int HEAD_SIZE = 128; \
return __VA_ARGS__(); \
return __VA_ARGS__(); \
} else if (HEADDIM == 160) { \
constexpr static int HEAD_SIZE = 160; \
return __VA_ARGS__(); \
} else if (HEADDIM == 192) { \
} else if (HEADDIM == 192) { \
constexpr static int HEAD_SIZE = 192; \
constexpr static int HEAD_SIZE = 192; \
return __VA_ARGS__(); \
return __VA_ARGS__(); \
...
...
csrc/attention/static_switch_tc.h
View file @
fa5b0b39
...
@@ -40,6 +40,9 @@
...
@@ -40,6 +40,9 @@
} else if (HEADDIM == 128) { \
} else if (HEADDIM == 128) { \
constexpr static int HEAD_SIZE = 128; \
constexpr static int HEAD_SIZE = 128; \
return __VA_ARGS__(); \
return __VA_ARGS__(); \
} else if (HEADDIM == 160) { \
constexpr static int HEAD_SIZE = 160; \
return __VA_ARGS__(); \
} else if (HEADDIM == 256) { \
} else if (HEADDIM == 256) { \
constexpr static int HEAD_SIZE = 256; \
constexpr static int HEAD_SIZE = 256; \
return __VA_ARGS__(); \
return __VA_ARGS__(); \
...
...
vllm/model_executor/models/telechat_12B.py
View file @
fa5b0b39
...
@@ -45,13 +45,13 @@ from vllm.model_executor.layers.logits_processor import LogitsProcessor
...
@@ -45,13 +45,13 @@ from vllm.model_executor.layers.logits_processor import LogitsProcessor
from
vllm.model_executor.layers.quantization.base_config
import
(
from
vllm.model_executor.layers.quantization.base_config
import
(
QuantizationConfig
)
QuantizationConfig
)
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.sampler
import
Sampler
from
vllm.model_executor.layers.sampler
import
Sampler
,
SamplerOutput
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
DEFAULT_VOCAB_PADDING_SIZE
,
ParallelLMHead
,
VocabParallelEmbedding
)
DEFAULT_VOCAB_PADDING_SIZE
,
ParallelLMHead
,
VocabParallelEmbedding
)
from
vllm.model_executor.model_loader.weight_utils
import
(
from
vllm.model_executor.model_loader.weight_utils
import
(
default_weight_loader
,
kv_cache_scales_loader
)
default_weight_loader
,
kv_cache_scales_loader
)
from
vllm.model_executor.sampling_metadata
import
SamplingMetadata
from
vllm.model_executor.sampling_metadata
import
SamplingMetadata
from
vllm.sequence
import
IntermediateTensors
,
SamplerOutput
from
vllm.sequence
import
IntermediateTensors
from
vllm.utils
import
is_hip
,
print_warning_once
from
vllm.utils
import
is_hip
,
print_warning_once
from
.interfaces
import
SupportsLoRA
from
.interfaces
import
SupportsLoRA
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment