Fix circular imports in gptq.py and unblock test explorer (#4736)

4c584fc6 · Stefan He · GitHub · 77cf771e · 4c584fc6 · 4c584fc6
Unverified Commit 4c584fc6 authored Mar 24, 2025 by Stefan He Committed by GitHub Mar 24, 2025
3 changed files
--- a/python/sglang/srt/layers/quantization/gptq.py
+++ b/python/sglang/srt/layers/quantization/gptq.py
@@ -6,7 +6,6 @@ import torch

 from sglang.srt.layers.linear import LinearBase
 from sglang.srt.layers.quantization.base_config import QuantizationConfig
-from sglang.srt.layers.vocab_parallel_embedding import ParallelLMHead
 from sglang.srt.utils import is_cuda

 _is_cuda = is_cuda()
@@ -434,6 +433,9 @@ class MarlinConfig(QuantizationConfig):

        from vllm.model_executor.layers.quantization.marlin import MarlinLinearMethod

+        # Delay import to avoid circular dependency
+        from sglang.srt.layers.vocab_parallel_embedding import ParallelLMHead
+
        if isinstance(layer, LinearBase) or (
            isinstance(layer, ParallelLMHead) and self.lm_head_quantized
        ):

--- a/python/sglang/test/__init__.py
+++ b/python/sglang/test/__init__.py
--- a/python/sglang/test/attention/__init__.py
+++ b/python/sglang/test/attention/__init__.py