Unverified Commit d9b3b018 authored by Enrique Shockwave's avatar Enrique Shockwave Committed by GitHub
Browse files

enable marlin kernels (#286)

parent 745ea007
...@@ -13,12 +13,13 @@ from sglang.srt.utils import is_multimodal_model ...@@ -13,12 +13,13 @@ from sglang.srt.utils import is_multimodal_model
from sglang.utils import get_available_gpu_memory from sglang.utils import get_available_gpu_memory
from vllm.model_executor.layers.quantization.awq import AWQConfig from vllm.model_executor.layers.quantization.awq import AWQConfig
from vllm.model_executor.layers.quantization.gptq import GPTQConfig from vllm.model_executor.layers.quantization.gptq import GPTQConfig
from vllm.model_executor.layers.quantization.marlin import MarlinConfig
from vllm.model_executor.model_loader import _set_default_torch_dtype from vllm.model_executor.model_loader import _set_default_torch_dtype
from vllm.model_executor.parallel_utils.parallel_state import initialize_model_parallel from vllm.model_executor.parallel_utils.parallel_state import initialize_model_parallel
import sglang import sglang
QUANTIONCONFIG_MAPPING = {"awq": AWQConfig, "gptq": GPTQConfig} QUANTIONCONFIG_MAPPING = {"awq": AWQConfig, "gptq": GPTQConfig, "marlin": MarlinConfig}
logger = logging.getLogger("model_runner") logger = logging.getLogger("model_runner")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment