chore: add pre-commit (#1569)

9946165e · OlivierDehaene · GitHub · 142cdabe · 9946165e · 9946165e
Unverified Commit 9946165e authored Feb 16, 2024 by OlivierDehaene Committed by GitHub Feb 16, 2024
20 changed files
--- a/server/Makefile-flash-att
+++ b/server/Makefile-flash-att
@@ -13,4 +13,4 @@ build-flash-attention: flash-attention

 install-flash-attention: build-flash-attention
 	pip uninstall flash_attn rotary_emb dropout_layer_norm -y || true
-	cd flash-attention && python setup.py install && cd csrc/layer_norm && python setup.py install && cd ../rotary && python setup.py install
\ No newline at end of file
+	cd flash-attention && python setup.py install && cd csrc/layer_norm && python setup.py install && cd ../rotary && python setup.py install
--- a/server/Makefile-selective-scan
+++ b/server/Makefile-selective-scan
@@ -13,7 +13,7 @@ install-causal-conv1d: build-causal-conv1d
 	cd causal-conv1d/ && pip install .

 # selective-scan dependends on causal-conv1d
-selective-scan: 
+selective-scan:
 	rm -rf mamba
 	git clone https://github.com/state-spaces/mamba.git mamba

@@ -21,8 +21,8 @@ build-selective-scan: selective-scan
 	cd mamba/ && git fetch && git checkout $(selective_scan_commit)
 	cd mamba && python setup.py build

-install-selective-scan: install-causal-conv1d build-selective-scan 
+install-selective-scan: install-causal-conv1d build-selective-scan
 	pip uninstall selective-scan-cuda -y || true
 	cd mamba && pip install .

-build-all: build-causal-conv1d build-selective-scan
\ No newline at end of file
+build-all: build-causal-conv1d build-selective-scan
--- a/server/README.md
+++ b/server/README.md
@@ -12,4 +12,4 @@ make install

 ```shell
 make run-dev
-```
\ No newline at end of file
+```
--- a/server/custom_kernels/custom_kernels/fused_bloom_attention_cuda.cu
+++ b/server/custom_kernels/custom_kernels/fused_bloom_attention_cuda.cu
@@ -247,4 +247,4 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
        &forward,
        "Bloom attention mechanism forward (CUDA)"
    );
-}
\ No newline at end of file
+}
--- a/server/exllama_kernels/exllama_kernels/cuda_func/column_remap.cuh
+++ b/server/exllama_kernels/exllama_kernels/cuda_func/column_remap.cuh
@@ -16,4 +16,4 @@ void column_remap_cuda
    const uint32_t* x_map
 );

-#endif
\ No newline at end of file
+#endif
--- a/server/exllama_kernels/exllama_kernels/cuda_func/q4_matrix.cuh
+++ b/server/exllama_kernels/exllama_kernels/cuda_func/q4_matrix.cuh
@@ -50,4 +50,4 @@ private:
 void g_q4_keep_matrix(Q4Matrix* m);
 void g_q4_free_matrices();

-#endif
\ No newline at end of file
+#endif
--- a/server/exllama_kernels/exllama_kernels/hip_compat.cuh
+++ b/server/exllama_kernels/exllama_kernels/hip_compat.cuh
@@ -48,4 +48,4 @@ __host__ __forceinline__ hipblasStatus_t __compat_hipblasHgemm(hipblasHandle_t
 #define rocblas_set_stream hipblasSetStream
 #define rocblas_hgemm __compat_hipblasHgemm

-#endif
\ No newline at end of file
+#endif
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/matrix_view.cuh
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/matrix_view.cuh
@@ -118,4 +118,4 @@ public:
    }
 };

-#endif
\ No newline at end of file
+#endif
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm.cuh
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm.cuh
@@ -33,4 +33,4 @@ void clear_tensor_cuda
    int size_n
 );

-#endif
\ No newline at end of file
+#endif
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_2.cuh
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_2.cuh
@@ -100,4 +100,4 @@ __forceinline__ __device__ void dequant_2bit_16

 #endif

-#endif
\ No newline at end of file
+#endif
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_4.cuh
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_4.cuh
@@ -224,4 +224,4 @@ __forceinline__ __device__ void dequant_4bit_8_gptq

 #endif

-#endif
\ No newline at end of file
+#endif
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_5.cuh
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_5.cuh
@@ -204,4 +204,4 @@ __forceinline__ __device__ void dequant_5bit_32

 #endif

-#endif
\ No newline at end of file
+#endif
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_6.cuh
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_6.cuh
@@ -40,5 +40,3 @@ __forceinline__ __device__ void dequant_6bit_16
 #endif

 #endif
-
-
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_8.cuh
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_8.cuh
@@ -35,4 +35,4 @@ __forceinline__ __device__ void dequant_8bit_8

 #endif

-#endif
\ No newline at end of file
+#endif
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/util.cuh
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/util.cuh
@@ -51,4 +51,4 @@ inline void gpu_assert(cudaError_t code, const char *file, int line, bool abort=

 void print_global_mem(const half* ptr, int rows, int columns, int stride);

-#endif
\ No newline at end of file
+#endif
--- a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
@@ -251,9 +251,9 @@ class LlamaMLP(nn.Module):
            if "gelu" not in act
            else lambda x: torch.nn.functional.gelu(
                x,
-                approximate="tanh"
-                if act in ["gelu_fast", "gelu_pytorch_tanh"]
-                else "none",
+                approximate=(
+                    "tanh" if act in ["gelu_fast", "gelu_pytorch_tanh"] else "none"
+                ),
            )
        )
        # Fuse gate and up proj

--- a/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py
@@ -255,9 +255,9 @@ class MistralMLP(nn.Module):
            if "gelu" not in act
            else lambda x: torch.nn.functional.gelu(
                x,
-                approximate="tanh"
-                if act in ["gelu_fast", "gelu_pytorch_tanh"]
-                else "none",
+                approximate=(
+                    "tanh" if act in ["gelu_fast", "gelu_pytorch_tanh"] else "none"
+                ),
            )
        )
        # Fuse gate and up proj

--- a/server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py
@@ -344,9 +344,9 @@ class BlockSparseMoE(nn.Module):
        if "gelu" in act:
            self.act = lambda x: torch.nn.functional.gelu(
                x,
-                approximate="tanh"
-                if act in ["gelu_fast", "gelu_pytorch_tanh"]
-                else "none",
+                approximate=(
+                    "tanh" if act in ["gelu_fast", "gelu_pytorch_tanh"] else "none"
+                ),
            )
        elif "silu" in act:
            self.act = torch.nn.functional.silu
@@ -600,9 +600,9 @@ class DenseMoE(nn.Module):
        if "gelu" in act:
            self.act = lambda x: torch.nn.functional.gelu(
                x,
-                approximate="tanh"
-                if act in ["gelu_fast", "gelu_pytorch_tanh"]
-                else "none",
+                approximate=(
+                    "tanh" if act in ["gelu_fast", "gelu_pytorch_tanh"] else "none"
+                ),
            )
        elif "silu" in act:
            self.act = torch.nn.functional.silu

--- a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py
@@ -187,9 +187,9 @@ class FlashMLP(nn.Module):
            if "gelu" not in act
            else lambda x: torch.nn.functional.gelu(
                x,
-                approximate="tanh"
-                if act in ["gelu_fast", "gelu_pytorch_tanh"]
-                else "none",
+                approximate=(
+                    "tanh" if act in ["gelu_fast", "gelu_pytorch_tanh"] else "none"
+                ),
            )
        )


--- a/server/text_generation_server/models/custom_modeling/flash_phi_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_phi_modeling.py
@@ -225,9 +225,9 @@ class PhiMLP(nn.Module):
            if "gelu" not in act
            else lambda x: torch.nn.functional.gelu(
                x,
-                approximate="tanh"
-                if act in ["gelu_fast", "gelu_pytorch_tanh"]
-                else "none",
+                approximate=(
+                    "tanh" if act in ["gelu_fast", "gelu_pytorch_tanh"] else "none"
+                ),
            )
        )