"...scripts/git@developer.sourcefind.cn:yangql/googletest.git" did not exist on "ecf9459561a71f9a54056c8d627ecd673084c390"
Commit 5dc85bf4 authored by wooway777's avatar wooway777
Browse files

issue/248 - change default attn backend to classic impl

parent 84fbe5b1
...@@ -176,5 +176,5 @@ python scripts/test_ppl.py --model-path MODEL_PATH [--ndev NDEV] [--max-batch MA ...@@ -176,5 +176,5 @@ python scripts/test_ppl.py --model-path MODEL_PATH [--ndev NDEV] [--max-batch MA
``` ```
- 选择attention后端 (使用flash attention后端需要先在InfiniCore完成相关配置和编译) - 选择attention后端 (使用flash attention后端需要先在InfiniCore完成相关配置和编译)
```bash ```bash
python examples/bench.py --nvidia --model=<model-path> --enable-paged-attn [--attn=flash-attn | --attn=default] python examples/bench.py --nvidia --model=<model-path> --enable-paged-attn [--attn=default | --attn=flash-attn]
``` ```
...@@ -255,7 +255,7 @@ def get_args(): ...@@ -255,7 +255,7 @@ def get_args():
parser.add_argument( parser.add_argument(
"--attn", "--attn",
type=str, type=str,
default="flash-attn", default="default",
choices=["default", "flash-attn"], choices=["default", "flash-attn"],
help="attention backend to use: 'default' or 'flash-attn'", help="attention backend to use: 'default' or 'flash-attn'",
) )
...@@ -285,7 +285,7 @@ class TestModel: ...@@ -285,7 +285,7 @@ class TestModel:
skip_load=False, skip_load=False,
cache_config=None, cache_config=None,
enable_graph=False, enable_graph=False,
attn_backend="flash-attn", attn_backend="default",
) -> None: ) -> None:
model_path = os.path.expanduser(model_path) model_path = os.path.expanduser(model_path)
# ---------------------------------------------------------------------------- # # ---------------------------------------------------------------------------- #
......
...@@ -145,7 +145,7 @@ def get_args(): ...@@ -145,7 +145,7 @@ def get_args():
parser.add_argument( parser.add_argument(
"--attn", "--attn",
type=str, type=str,
default="flash-attn", default="default",
choices=["default", "flash-attn"], choices=["default", "flash-attn"],
help="attention backend to use: 'default' or 'flash-attn'", help="attention backend to use: 'default' or 'flash-attn'",
) )
...@@ -164,7 +164,7 @@ def test( ...@@ -164,7 +164,7 @@ def test(
top_k=1, top_k=1,
top_p=1.0, top_p=1.0,
temperature=1.0, temperature=1.0,
attn_backend="flash-attn", attn_backend="default",
): ):
model_path = os.path.expanduser(model_path) model_path = os.path.expanduser(model_path)
# ---------------------------------------------------------------------------- # # ---------------------------------------------------------------------------- #
......
...@@ -29,7 +29,7 @@ class InferEngine(_infinilm.InferEngine): ...@@ -29,7 +29,7 @@ class InferEngine(_infinilm.InferEngine):
distributed_config=DistConfig(1), distributed_config=DistConfig(1),
cache_config=None, cache_config=None,
enable_graph_compiling=False, enable_graph_compiling=False,
attention_backend="flash-attn", attention_backend="default",
): ):
self.config = AutoConfig.from_pretrained(model_path) self.config = AutoConfig.from_pretrained(model_path)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment