chore: add pre-commit (#1569)

9946165e · OlivierDehaene · GitHub · 142cdabe · 9946165e · 9946165e
Unverified Commit 9946165e authored Feb 16, 2024 by OlivierDehaene Committed by GitHub Feb 16, 2024
20 changed files
--- a/server/Makefile-flash-att
+++ b/server/Makefile-flash-att
--- a/server/Makefile-selective-scan
+++ b/server/Makefile-selective-scan
--- a/server/README.md
+++ b/server/README.md
--- a/server/custom_kernels/custom_kernels/fused_bloom_attention_cuda.cu
+++ b/server/custom_kernels/custom_kernels/fused_bloom_attention_cuda.cu
--- a/server/exllama_kernels/exllama_kernels/cuda_func/column_remap.cuh
+++ b/server/exllama_kernels/exllama_kernels/cuda_func/column_remap.cuh
--- a/server/exllama_kernels/exllama_kernels/cuda_func/q4_matrix.cuh
+++ b/server/exllama_kernels/exllama_kernels/cuda_func/q4_matrix.cuh
--- a/server/exllama_kernels/exllama_kernels/hip_compat.cuh
+++ b/server/exllama_kernels/exllama_kernels/hip_compat.cuh
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/matrix_view.cuh
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/matrix_view.cuh
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm.cuh
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm.cuh
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_2.cuh
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_2.cuh
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_4.cuh
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_4.cuh
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_5.cuh
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_5.cuh
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_6.cuh
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_6.cuh
@@ -40,5 +40,3 @@ __forceinline__ __device__ void dequant_6bit_16
 #endif
 #endif
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_8.cuh
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_8.cuh
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/util.cuh
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/util.cuh
--- a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
@@ -251,9 +251,9 @@ class LlamaMLP(nn.Module):
            if "gelu" not in act
            else lambda x: torch.nn.functional.gelu(
                x,
-                approximate="tanh"
+                approximate=(
-                if act in ["gelu_fast", "gelu_pytorch_tanh"]
+                    "tanh" if act in ["gelu_fast", "gelu_pytorch_tanh"] else "none"
-                else "none",
+                ),
            )
        )
        # Fuse gate and up proj

--- a/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py
@@ -255,9 +255,9 @@ class MistralMLP(nn.Module):
            if "gelu" not in act
            else lambda x: torch.nn.functional.gelu(
                x,
-                approximate="tanh"
+                approximate=(
-                if act in ["gelu_fast", "gelu_pytorch_tanh"]
+                    "tanh" if act in ["gelu_fast", "gelu_pytorch_tanh"] else "none"
-                else "none",
+                ),
            )
        )
        # Fuse gate and up proj

--- a/server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py
@@ -344,9 +344,9 @@ class BlockSparseMoE(nn.Module):
        if "gelu" in act:
            self.act = lambda x: torch.nn.functional.gelu(
                x,
-                approximate="tanh"
+                approximate=(
-                if act in ["gelu_fast", "gelu_pytorch_tanh"]
+                    "tanh" if act in ["gelu_fast", "gelu_pytorch_tanh"] else "none"
-                else "none",
+                ),
            )
        elif "silu" in act:
            self.act = torch.nn.functional.silu
@@ -600,9 +600,9 @@ class DenseMoE(nn.Module):
        if "gelu" in act:
            self.act = lambda x: torch.nn.functional.gelu(
                x,
-                approximate="tanh"
+                approximate=(
-                if act in ["gelu_fast", "gelu_pytorch_tanh"]
+                    "tanh" if act in ["gelu_fast", "gelu_pytorch_tanh"] else "none"
-                else "none",
+                ),
            )
        elif "silu" in act:
            self.act = torch.nn.functional.silu

--- a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py
@@ -187,9 +187,9 @@ class FlashMLP(nn.Module):
            if "gelu" not in act
            else lambda x: torch.nn.functional.gelu(
                x,
-                approximate="tanh"
+                approximate=(
-                if act in ["gelu_fast", "gelu_pytorch_tanh"]
+                    "tanh" if act in ["gelu_fast", "gelu_pytorch_tanh"] else "none"
-                else "none",
+                ),
            )
        )

--- a/server/text_generation_server/models/custom_modeling/flash_phi_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_phi_modeling.py
@@ -225,9 +225,9 @@ class PhiMLP(nn.Module):
            if "gelu" not in act
            else lambda x: torch.nn.functional.gelu(
                x,
-                approximate="tanh"
+                approximate=(
-                if act in ["gelu_fast", "gelu_pytorch_tanh"]
+                    "tanh" if act in ["gelu_fast", "gelu_pytorch_tanh"] else "none"
-                else "none",
+                ),
            )
        )