initing develop

d76078ef · yangql · a2630e0f · a2630e0f · a2630e0f · a2630e0f
Commit d76078ef authored Sep 26, 2024 by yangql
20 changed files
--- a/auto_gptq/modeling/__pycache__/internlm.cpython-310.pyc
+++ b/auto_gptq/modeling/__pycache__/internlm.cpython-310.pyc
--- a/auto_gptq/modeling/__pycache__/llama.cpython-310.pyc
+++ b/auto_gptq/modeling/__pycache__/llama.cpython-310.pyc
--- a/auto_gptq/modeling/__pycache__/longllama.cpython-310.pyc
+++ b/auto_gptq/modeling/__pycache__/longllama.cpython-310.pyc
--- a/auto_gptq/modeling/__pycache__/mistral.cpython-310.pyc
+++ b/auto_gptq/modeling/__pycache__/mistral.cpython-310.pyc
--- a/auto_gptq/modeling/__pycache__/mixtral.cpython-310.pyc
+++ b/auto_gptq/modeling/__pycache__/mixtral.cpython-310.pyc
--- a/auto_gptq/modeling/__pycache__/moss.cpython-310.pyc
+++ b/auto_gptq/modeling/__pycache__/moss.cpython-310.pyc
--- a/auto_gptq/modeling/__pycache__/mpt.cpython-310.pyc
+++ b/auto_gptq/modeling/__pycache__/mpt.cpython-310.pyc
--- a/auto_gptq/modeling/__pycache__/opt.cpython-310.pyc
+++ b/auto_gptq/modeling/__pycache__/opt.cpython-310.pyc
--- a/auto_gptq/modeling/__pycache__/phi.cpython-310.pyc
+++ b/auto_gptq/modeling/__pycache__/phi.cpython-310.pyc
--- a/auto_gptq/modeling/__pycache__/qwen.cpython-310.pyc
+++ b/auto_gptq/modeling/__pycache__/qwen.cpython-310.pyc
--- a/auto_gptq/modeling/__pycache__/qwen2.cpython-310.pyc
+++ b/auto_gptq/modeling/__pycache__/qwen2.cpython-310.pyc
--- a/auto_gptq/modeling/__pycache__/rw.cpython-310.pyc
+++ b/auto_gptq/modeling/__pycache__/rw.cpython-310.pyc
--- a/auto_gptq/modeling/__pycache__/stablelmepoch.cpython-310.pyc
+++ b/auto_gptq/modeling/__pycache__/stablelmepoch.cpython-310.pyc
--- a/auto_gptq/modeling/__pycache__/starcoder2.cpython-310.pyc
+++ b/auto_gptq/modeling/__pycache__/starcoder2.cpython-310.pyc
--- a/auto_gptq/modeling/__pycache__/xverse.cpython-310.pyc
+++ b/auto_gptq/modeling/__pycache__/xverse.cpython-310.pyc
--- a/auto_gptq/modeling/__pycache__/yi.cpython-310.pyc
+++ b/auto_gptq/modeling/__pycache__/yi.cpython-310.pyc
--- a/auto_gptq/modeling/_base.py
+++ b/auto_gptq/modeling/_base.py
@@ -756,39 +756,39 @@ class BaseGPTQForCausalLM(nn.Module, PushToHubMixin):
            use_triton = False
            use_tritonv2 = False
        if not disable_exllama and not EXLLAMA_KERNELS_AVAILABLE:
-            logger.warning(
-                "Exllama kernel is not installed, reset disable_exllama to True. "
-                "This may because you installed auto_gptq using a pre-build wheel "
-                "on Windows, in which exllama_kernels are not compiled. To use "
-                "exllama_kernels to further speedup inference, you can re-install "
-                "auto_gptq from source."
-            )
+            # logger.warning(
+            #     "Exllama kernel is not installed, reset disable_exllama to True. "
+            #     "This may because you installed auto_gptq using a pre-build wheel "
+            #     "on Windows, in which exllama_kernels are not compiled. To use "
+            #     "exllama_kernels to further speedup inference, you can re-install "
+            #     "auto_gptq from source."
+            # )
            disable_exllama = True
        if not disable_exllamav2 and not EXLLAMAV2_KERNELS_AVAILABLE:
-            logger.warning(
-                "Exllamav2 kernel is not installed, reset disable_exllamav2 to True. "
-                "This may because you installed auto_gptq using a pre-build wheel "
-                "on Windows, in which exllama_kernels are not compiled. To use "
-                "exllama_kernels to further speedup inference, you can re-install "
-                "auto_gptq from source."
-            )
-            disable_exllamav2 = True
-        if not AUTOGPTQ_CUDA_AVAILABLE:
-            logger.warning(
-                "CUDA kernels for auto_gptq are not installed, this will result in "
-                "very slow inference speed. This may because:\n"
-                "1. You disabled CUDA extensions compilation by setting BUILD_CUDA_EXT=0 when install auto_gptq from source.\n"
-                "2. You are using pytorch without CUDA support.\n"
-                "3. CUDA and nvcc are not installed in your device."
-            )
-
-        if use_qigen and QIGEN_AVAILABLE:
-            logger.warning("QIgen is active. Ignores all settings related to cuda.")
-            inject_fused_attention = False
-            inject_fused_mlp = False
-            use_triton = False
-            disable_exllama = False
+            # logger.warning(
+            #     "Exllamav2 kernel is not installed, reset disable_exllamav2 to True. "
+            #     "This may because you installed auto_gptq using a pre-build wheel "
+            #     "on Windows, in which exllama_kernels are not compiled. To use "
+            #     "exllama_kernels to further speedup inference, you can re-install "
+            #     "auto_gptq from source."
+            # )
            disable_exllamav2 = True
+        # if not AUTOGPTQ_CUDA_AVAILABLE:
+        #     logger.warning(
+        #         "CUDA kernels for auto_gptq are not installed, this will result in "
+        #         "very slow inference speed. This may because:\n"
+        #         "1. You disabled CUDA extensions compilation by setting BUILD_CUDA_EXT=0 when install auto_gptq from source.\n"
+        #         "2. You are using pytorch without CUDA support.\n"
+        #         "3. CUDA and nvcc are not installed in your device."
+        #     )
+
+        # if use_qigen and QIGEN_AVAILABLE:
+        #     logger.warning("QIgen is active. Ignores all settings related to cuda.")
+        #     inject_fused_attention = False
+        #     inject_fused_mlp = False
+        #     use_triton = False
+        #     disable_exllama = False
+        #     disable_exllamav2 = True

        if not disable_exllamav2 and not disable_exllama:
            logger.warning(

--- a/auto_gptq/nn_modules/__pycache__/__init__.cpython-310.pyc
+++ b/auto_gptq/nn_modules/__pycache__/__init__.cpython-310.pyc
--- a/auto_gptq/nn_modules/__pycache__/_fused_base.cpython-310.pyc
+++ b/auto_gptq/nn_modules/__pycache__/_fused_base.cpython-310.pyc
--- a/auto_gptq/nn_modules/__pycache__/fused_gptj_attn.cpython-310.pyc
+++ b/auto_gptq/nn_modules/__pycache__/fused_gptj_attn.cpython-310.pyc