chore: add pre-commit (#1569)

9946165e · OlivierDehaene · GitHub · 142cdabe · 9946165e · 9946165e
Unverified Commit 9946165e authored Feb 16, 2024 by OlivierDehaene Committed by GitHub Feb 16, 2024
20 changed files
--- a/docs/source/basic_tutorials/using_cli.md
+++ b/docs/source/basic_tutorials/using_cli.md
--- a/docs/source/conceptual/flash_attention.md
+++ b/docs/source/conceptual/flash_attention.md
@@ -9,4 +9,3 @@ Standard attention mechanism uses High Bandwidth Memory (HBM) to store, read and
 It is implemented for supported models. You can check out the complete list of models that support Flash Attention [here](https://github.com/huggingface/text-generation-inference/tree/main/server/text_generation_server/models), for models with flash prefix.
 You can learn more about Flash Attention by reading the paper in this [link](https://arxiv.org/abs/2205.14135).
--- a/docs/source/conceptual/quantization.md
+++ b/docs/source/conceptual/quantization.md
--- a/docs/source/conceptual/safetensors.md
+++ b/docs/source/conceptual/safetensors.md
--- a/docs/source/conceptual/streaming.md
+++ b/docs/source/conceptual/streaming.md
--- a/docs/source/conceptual/tensor_parallelism.md
+++ b/docs/source/conceptual/tensor_parallelism.md
--- a/docs/source/installation.md
+++ b/docs/source/installation.md
--- a/docs/source/messages_api.md
+++ b/docs/source/messages_api.md
--- a/docs/source/quicktour.md
+++ b/docs/source/quicktour.md
--- a/integration-tests/models/test_mamba.py
+++ b/integration-tests/models/test_mamba.py
@@ -54,7 +54,9 @@ async def test_mamba_all_params(fused_kernel_mamba, response_snapshot):
 @pytest.mark.asyncio
 @pytest.mark.private
-async def test_mamba_load(fused_kernel_mamba, generate_load, generous_response_snapshot):
+async def test_mamba_load(
+    fused_kernel_mamba, generate_load, generous_response_snapshot
+):
    responses = await generate_load(
        fused_kernel_mamba, "What is Deep Learning?", max_new_tokens=10, n=4
    )

--- a/integration-tests/pytest.ini
+++ b/integration-tests/pytest.ini
--- a/load_tests/common.js
+++ b/load_tests/common.js
--- a/load_tests/starcoder_load.js
+++ b/load_tests/starcoder_load.js
--- a/load_tests/tgi.js
+++ b/load_tests/tgi.js
--- a/load_tests/vllm.js
+++ b/load_tests/vllm.js
--- a/router/README.md
+++ b/router/README.md
--- a/router/client/src/pb/.gitignore
+++ b/router/client/src/pb/.gitignore
--- a/router/src/validation.rs
+++ b/router/src/validation.rs
@@ -27,6 +27,7 @@ pub struct Validation {
 }
 impl Validation {
+    #[allow(clippy::too_many_arguments)]
    pub(crate) fn new(
        workers: usize,
        tokenizer: Option<Tokenizer>,

--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
--- a/server/Makefile-awq
+++ b/server/Makefile-awq