Unverified Commit e76bb324 authored by thatPepe's avatar thatPepe Committed by GitHub
Browse files

Merge pull request #238 from InfiniTensor/issue/237

issue/237 - support hygon in bench and inf server
parents 39b594ff 8f71a5ec
...@@ -15,7 +15,7 @@ xmake && xmake install ...@@ -15,7 +15,7 @@ xmake && xmake install
- 运行模型推理测试 - 运行模型推理测试
```bash ```bash
python scripts/jiuge.py [--cpu | --nvidia | --qy | --cambricon | --ascend | --metax | --moore | --iluvatar | --kunlun | --hygon] path/to/model_dir [n_device] python scripts/jiuge.py [--cpu | --nvidia | --qy | --cambricon | --ascend | --metax | --moore | --iluvatar | --kunlun | --hygon | --ali] path/to/model_dir [n_device]
``` ```
- 部署模型推理服务 - 部署模型推理服务
...@@ -77,11 +77,11 @@ python scripts/test_ppl.py --model-path MODEL_PATH [--ndev NDEV] [--max-batch MA ...@@ -77,11 +77,11 @@ python scripts/test_ppl.py --model-path MODEL_PATH [--ndev NDEV] [--max-batch MA
- 单次推理测试 - 单次推理测试
- llama示例 - llama示例
```bash ```bash
python examples/llama.py [--cpu | --nvidia | --qy | --metax | --moore | --iluvatar | --ali] --model_path=<path/to/model_dir> python examples/jiuge.py [--cpu | --nvidia | --qy | --metax | --moore | --iluvatar | --ali | --cambricon | --hygon] --model_path=<path/to/model_dir>
``` ```
- 例如: - 例如:
```bash ```bash
python examples/llama.py --nvidia --model_path=/models/TinyLlama-1.1B-Chat-v1.0 python examples/jigue.py --nvidia --model_path=/models/TinyLlama-1.1B-Chat-v1.0
``` ```
- 分布式推理测试 - 分布式推理测试
- 9g示例 - 9g示例
...@@ -119,7 +119,7 @@ python scripts/test_ppl.py --model-path MODEL_PATH [--ndev NDEV] [--max-batch MA ...@@ -119,7 +119,7 @@ python scripts/test_ppl.py --model-path MODEL_PATH [--ndev NDEV] [--max-batch MA
- 运行推理基准测试(C-Eval/MMLU) - 运行推理基准测试(C-Eval/MMLU)
```bash ```bash
python test/bench/test_benchmark.py [--cpu | --nvidia | --cambricon | --ascend | --metax | --moore | --iluvatar | --kunlun | --hygon] <path/to/model_dir> --bench {ceval|mmlu} [--backend cpp] [--ndev N] [--subject SUBJECT] [--num_samples N] [--max_new_tokens N] [--output_csv PATH] [--cache_dir PATH] python test/bench/test_benchmark.py [--cpu | --nvidia | --cambricon | --ascend | --metax | --moore | --iluvatar | --kunlun | --hygon | --ali] <path/to/model_dir> --bench {ceval|mmlu} [--backend cpp] [--ndev N] [--subject SUBJECT] [--num_samples N] [--max_new_tokens N] [--output_csv PATH] [--cache_dir PATH]
``` ```
- 参数说明: - 参数说明:
......
...@@ -167,6 +167,11 @@ def get_args(): ...@@ -167,6 +167,11 @@ def get_args():
action="store_true", action="store_true",
help="Run alippu test", help="Run alippu test",
) )
parser.add_argument(
"--hygon",
action="store_true",
help="Run hygon test",
)
parser.add_argument( parser.add_argument(
"--model", "--model",
type=str, type=str,
...@@ -387,6 +392,8 @@ if __name__ == "__main__": ...@@ -387,6 +392,8 @@ if __name__ == "__main__":
device_str = "mlu" device_str = "mlu"
elif args.ali: elif args.ali:
device_str = "cuda" device_str = "cuda"
elif args.hygon:
device_str = "cuda"
else: else:
print( print(
"python examples/bench.py --nvidia --model=~/TinyLlama-1.1B-Chat-v1.0/ --batch-size=2 --tp=1 --input-len=50 --output-len=50" "python examples/bench.py --nvidia --model=~/TinyLlama-1.1B-Chat-v1.0/ --batch-size=2 --tp=1 --input-len=50 --output-len=50"
......
...@@ -594,6 +594,7 @@ def parse_args(): ...@@ -594,6 +594,7 @@ def parse_args():
parser.add_argument("--iluvatar", action="store_true", help="Use Iluvatar device") parser.add_argument("--iluvatar", action="store_true", help="Use Iluvatar device")
parser.add_argument("--cambricon", action="store_true", help="Use Cambricon device") parser.add_argument("--cambricon", action="store_true", help="Use Cambricon device")
parser.add_argument("--ali", action="store_true", help="Use Ali PPU device") parser.add_argument("--ali", action="store_true", help="Use Ali PPU device")
parser.add_argument("--hygon", action="store_true", help="Use Hygon DCU device")
parser.add_argument( parser.add_argument(
"--enable-graph", "--enable-graph",
action="store_true", action="store_true",
...@@ -631,9 +632,11 @@ def main(): ...@@ -631,9 +632,11 @@ def main():
device = "mlu" device = "mlu"
elif args.ali: elif args.ali:
device = "cuda" device = "cuda"
elif args.hygon:
device = "cuda"
else: else:
print( print(
"Usage: python infinilm.server.inference_server [--cpu | --nvidia | --qy | --metax | --moore | --iluvatar | --cambricon | --ali] " "Usage: python infinilm.server.inference_server [--cpu | --nvidia | --qy | --metax | --moore | --iluvatar | --cambricon | --ali | --hygon] "
"--model_path=<path/to/model_dir> --max_tokens=MAX_TOKENS --max_batch_size=MAX_BATCH_SIZE" "--model_path=<path/to/model_dir> --max_tokens=MAX_TOKENS --max_batch_size=MAX_BATCH_SIZE"
"\n" "\n"
"Example: python infinilm.server.inference_server --nvidia --model_path=/data/shared/models/9G7B_MHA/ " "Example: python infinilm.server.inference_server --nvidia --model_path=/data/shared/models/9G7B_MHA/ "
......
...@@ -73,6 +73,7 @@ class InfiniLMBenchmark(BaseBenchmark): ...@@ -73,6 +73,7 @@ class InfiniLMBenchmark(BaseBenchmark):
"iluvatar": "cuda", "iluvatar": "cuda",
"kunlun": "cuda", "kunlun": "cuda",
"hygon": "cuda", "hygon": "cuda",
"ali": "cuda",
} }
device_name = device_map.get(device_type_str.lower(), "cpu") device_name = device_map.get(device_type_str.lower(), "cpu")
...@@ -184,11 +185,17 @@ class InfiniLMBenchmark(BaseBenchmark): ...@@ -184,11 +185,17 @@ class InfiniLMBenchmark(BaseBenchmark):
start_time = time.perf_counter() start_time = time.perf_counter()
# For cpp backend, reset cache before generation if use_cache is enabled # For cpp backend, reset cache before generation if use_cache is enabled
if self.model.use_cache and hasattr(self.model, "_model") and hasattr(self.model._model, "reset_cache"): if (
self.model.use_cache
and hasattr(self.model, "_model")
and hasattr(self.model._model, "reset_cache")
):
batch_size = input_ids.shape[0] batch_size = input_ids.shape[0]
seq_len = input_ids.shape[1] seq_len = input_ids.shape[1]
max_cache_len = max_steps + seq_len max_cache_len = max_steps + seq_len
self.model.reset_cache(batch_size=batch_size, initial_capacity=max_cache_len) self.model.reset_cache(
batch_size=batch_size, initial_capacity=max_cache_len
)
# Use model's built-in generate() method which properly handles KV cache # Use model's built-in generate() method which properly handles KV cache
# Pass sampling parameters (temperature, topk, topp) via kwargs # Pass sampling parameters (temperature, topk, topp) via kwargs
...@@ -656,7 +663,7 @@ def test(): ...@@ -656,7 +663,7 @@ def test():
# Parse arguments manually to handle device flags properly # Parse arguments manually to handle device flags properly
if len(sys.argv) < 4: if len(sys.argv) < 4:
print( print(
"Usage: python test_benchmark.py [--cpu | --nvidia| --cambricon | --ascend | --metax | --moore | --iluvatar | --kunlun | --hygon] <path/to/model_dir> --bench [ceval|mmlu] [--backend cpp|torch] [--ndev N] [--subject SUBJECT] [--split {test|val|all}] [--num_samples N] [--max_new_tokens N] [--output_csv PATH] [--cache_dir PATH]" "Usage: python test_benchmark.py [--cpu | --nvidia| --cambricon | --ascend | --metax | --moore | --iluvatar | --kunlun | --hygon | --ali] <path/to/model_dir> --bench [ceval|mmlu] [--backend cpp|torch] [--ndev N] [--subject SUBJECT] [--split {test|val|all}] [--num_samples N] [--max_new_tokens N] [--output_csv PATH] [--cache_dir PATH]"
) )
sys.exit(1) sys.exit(1)
...@@ -739,9 +746,11 @@ def test(): ...@@ -739,9 +746,11 @@ def test():
device_type_str = "kunlun" device_type_str = "kunlun"
elif device_flag == "--hygon": elif device_flag == "--hygon":
device_type_str = "hygon" device_type_str = "hygon"
elif device_flag == "--ali":
device_type_str = "ali"
else: else:
print( print(
"Usage: python test_benchmark.py [--cpu | --nvidia| --cambricon | --ascend | --metax | --moore | --iluvatar | --kunlun | --hygon] <path/to/model_dir> --bench [ceval|mmlu] [--backend cpp|torch] [--ndev N] [--subject SUBJECT] [--num_samples N] [--max_new_tokens N] [--output_csv PATH] [--cache_dir PATH]" "Usage: python test_benchmark.py [--cpu | --nvidia| --cambricon | --ascend | --metax | --moore | --iluvatar | --kunlun | --hygon | --ali] <path/to/model_dir> --bench [ceval|mmlu] [--backend cpp|torch] [--ndev N] [--subject SUBJECT] [--num_samples N] [--max_new_tokens N] [--output_csv PATH] [--cache_dir PATH]"
) )
sys.exit(1) sys.exit(1)
...@@ -935,9 +944,7 @@ def test(): ...@@ -935,9 +944,7 @@ def test():
splits_to_load = ( splits_to_load = (
["test"] ["test"]
if split == "test" if split == "test"
else ["validation"] else ["validation"] if split == "val" else ["validation", "test"]
if split == "val"
else ["validation", "test"]
) )
# Load each subject individually from hardcoded list, excluding "all" # Load each subject individually from hardcoded list, excluding "all"
for subject_name in mmlu_subjects: for subject_name in mmlu_subjects:
...@@ -959,9 +966,7 @@ def test(): ...@@ -959,9 +966,7 @@ def test():
splits_to_load = ( splits_to_load = (
["test"] ["test"]
if split == "test" if split == "test"
else ["validation"] else ["validation"] if split == "val" else ["validation", "test"]
if split == "val"
else ["validation", "test"]
) )
records = [] records = []
for sp in splits_to_load: for sp in splits_to_load:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment