Unverified Commit 77a26812 authored by Chen Xin's avatar Chen Xin Committed by GitHub
Browse files

Add tp hint for deployment (#555)

* add tp hint for deploy

* fix lint

* assert tp in turbomind

* fix lint
parent 6904053f
...@@ -972,7 +972,7 @@ def main(model_name: str, ...@@ -972,7 +972,7 @@ def main(model_name: str,
META's llama format, and 'hf' means huggingface format META's llama format, and 'hf' means huggingface format
tokenizer_path (str): the path of tokenizer model tokenizer_path (str): the path of tokenizer model
dst_path (str): the destination path that saves outputs dst_path (str): the destination path that saves outputs
tp (int): the number of GPUs used for tensor parallelism tp (int): the number of GPUs used for tensor parallelism, should be 2^n
quant_path (str): path of the quantized model, which can be None quant_path (str): path of the quantized model, which can be None
group_size (int): a parameter used in AWQ to quantize fp16 weights group_size (int): a parameter used in AWQ to quantize fp16 weights
to 4 bits to 4 bits
...@@ -981,6 +981,8 @@ def main(model_name: str, ...@@ -981,6 +981,8 @@ def main(model_name: str,
f"'{model_name}' is not supported. " \ f"'{model_name}' is not supported. " \
f'The supported models are: {MODELS.module_dict.keys()}' f'The supported models are: {MODELS.module_dict.keys()}'
assert ((tp & (tp - 1) == 0) and tp != 0), 'tp should be 2^n'
if model_format is None: if model_format is None:
model_format = 'qwen' if model_name == 'qwen-7b' else 'hf' model_format = 'qwen' if model_name == 'qwen-7b' else 'hf'
......
...@@ -86,6 +86,7 @@ class TurboMind: ...@@ -86,6 +86,7 @@ class TurboMind:
node_num = 1 node_num = 1
# read meta from model path # read meta from model path
assert ((tp & (tp - 1) == 0) and tp != 0), 'tp should be 2^n'
self.gpu_count = tp self.gpu_count = tp
self.session_len = 2048 self.session_len = 2048
data_type = 'fp16' data_type = 'fp16'
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment