Add tp hint for deployment (#555)

* add tp hint for deploy * fix lint * assert tp in turbomind * fix lint

Add tp hint for deployment (#555)
* add tp hint for deploy * fix lint * assert tp in turbomind * fix lint
77a26812 · Chen Xin · GitHub · 6904053f · 77a26812 · 77a26812
Unverified Commit 77a26812 authored Oct 13, 2023 by Chen Xin Committed by GitHub Oct 13, 2023
Show whitespace changes
Inline Side-by-side

Showing with 4 additions and 1 deletion

lmdeploy/serve/turbomind/deploy.py lmdeploy/serve/turbomind/deploy.py +3 -1

lmdeploy/turbomind/turbomind.py lmdeploy/turbomind/turbomind.py +1 -0

No files found.
--- a/lmdeploy/serve/turbomind/deploy.py
+++ b/lmdeploy/serve/turbomind/deploy.py
@@ -972,7 +972,7 @@ def main(model_name: str,
            META's llama format, and 'hf' means huggingface format
        tokenizer_path (str): the path of tokenizer model
        dst_path (str): the destination path that saves outputs
-        tp (int): the number of GPUs used for tensor parallelism
+        tp (int): the number of GPUs used for tensor parallelism, should be 2^n
        quant_path (str): path of the quantized model, which can be None
        group_size (int): a parameter used in AWQ to quantize fp16 weights
            to 4 bits
@@ -981,6 +981,8 @@ def main(model_name: str,
        f"'{model_name}' is not supported. " \
        f'The supported models are: {MODELS.module_dict.keys()}'

+    assert ((tp & (tp - 1) == 0) and tp != 0), 'tp should be 2^n'
+
    if model_format is None:
        model_format = 'qwen' if model_name == 'qwen-7b' else 'hf'


--- a/lmdeploy/turbomind/turbomind.py
+++ b/lmdeploy/turbomind/turbomind.py
@@ -86,6 +86,7 @@ class TurboMind:
        node_num = 1

        # read meta from model path
+        assert ((tp & (tp - 1) == 0) and tp != 0), 'tp should be 2^n'
        self.gpu_count = tp
        self.session_len = 2048
        data_type = 'fp16'