Commit 300470cb authored by pengcheng888's avatar pengcheng888
Browse files

issue/114 - 添加读取.bin文件权重的代码,更新readme

parent 81081f3c
...@@ -37,29 +37,38 @@ python scripts/test_ppl.py --model-path MODEL_PATH [--ndev NDEV] [--max-batch MA ...@@ -37,29 +37,38 @@ python scripts/test_ppl.py --model-path MODEL_PATH [--ndev NDEV] [--max-batch MA
``` ```
## 使用方式(新版) ## 使用方式(新版)
#### 一、编译并安装 `InfiniCore`
编译并安装 `InfiniCore`, 详情见 InfiniCore的 [`README`](https://github.com/InfiniTensor/InfiniCore) :
- 编译并安装 `InfiniCore`, 详情见 InfiniCore的 [`README`](https://github.com/InfiniTensor/InfiniCore) : - 注意根据提示设置好 `INFINI_ROOT` 环境变量(默认为 `$HOME/.infini`
- 根据硬件平台,选择 xmake 构建配置
- 编译安装InfiniCore
- 安装 C++ 库
- 安装 Python 包
- 注意根据提示设置好 `INFINI_ROOT` 环境变量(默认为 `$HOME/.infini`)
- 根据硬件平台,选择 xmake 构建配置
- 编译安装InfiniCore
- 安装 C++ 库
- 安装 Python 包
#### 二、编译并安装 `InfiniLM`
- 克隆项目
由于仓库中含有子模块,所以在克隆时请添加 `--recursive``--recurse-submodules`,如:
- 编译并安装 `InfiniLM` Python 包 ```shell
- 安装第三方依赖 git clone --recursive https://github.com/InfiniTensor/InfiniCore.git
```bash ```
或者在普通克隆后进行更新:
```shell
git submodule update --init --recursive git submodule update --init --recursive
``` ```
- 安装 InfiniLM Python 包 - 安装 InfiniLM Python 包
```bash ```bash
pip install -e . pip install -e .
``` ```
- 单次推理测试 - 单次推理测试
- llama示例 - llama示例
```bash ```bash
python examples/llama.py [--cpu | --nvidia | --metax | --moore | --iluvatar] --model_path=<path/to/model_dir> python examples/llama.py [--cpu | --nvidia | --metax | --moore | --iluvatar] --model_path=<path/to/model_dir>
...@@ -68,3 +77,13 @@ python scripts/test_ppl.py --model-path MODEL_PATH [--ndev NDEV] [--max-batch MA ...@@ -68,3 +77,13 @@ python scripts/test_ppl.py --model-path MODEL_PATH [--ndev NDEV] [--max-batch MA
```bash ```bash
python examples/llama.py --nvidia --model_path=/models/TinyLlama-1.1B-Chat-v1.0 python examples/llama.py --nvidia --model_path=/models/TinyLlama-1.1B-Chat-v1.0
``` ```
- 分布式推理测试
- 9g示例
```bash
python examples/jiuge.py [---nvidia] --model_path=<path/to/model_dir> --backend=cpp --tp=NDEV --batch_size=MAX_BATCH
```
- 例如: 9G7B模型,cpp后端,batch_size为16,4卡分布式
```bash
python examples/jiuge.py --nvidia --model_path=/models/9G7B_MHA/ --backend=cpp --tp=4 --batch_size=16
```
\ No newline at end of file
...@@ -30,15 +30,20 @@ void InferEngine::load_param(const std::string &name, const infinicore::Tensor & ...@@ -30,15 +30,20 @@ void InferEngine::load_param(const std::string &name, const infinicore::Tensor &
worker->load_param(name, param); worker->load_param(name, param);
} }
} }
//------------------------------------------------------ //------------------------------------------------------
// state_dict // state_dict
//------------------------------------------------------ //------------------------------------------------------
std::unordered_map<std::string, infinicore::nn::Parameter> InferEngine::state_dict() { std::vector<std::unordered_map<std::string, infinicore::nn::Parameter>> InferEngine::state_dict() {
std::vector<std::unordered_map<std::string, infinicore::nn::Parameter>> results;
if (0 == workers_.size()) { if (0 == workers_.size()) {
throw std::runtime_error(" Model object not found. "); throw std::runtime_error(" Model object not found. ");
} }
return workers_[0]->state_dict();
for (auto &worker : workers_) {
results.push_back(worker->state_dict());
}
return results;
} }
//------------------------------------------------------ //------------------------------------------------------
......
...@@ -20,7 +20,7 @@ public: ...@@ -20,7 +20,7 @@ public:
void load_param(const std::string &name, const infinicore::Tensor &param); void load_param(const std::string &name, const infinicore::Tensor &param);
// return the parameters (i.e. weights and biases). // return the parameters (i.e. weights and biases).
std::unordered_map<std::string, infinicore::nn::Parameter> state_dict(); std::vector<std::unordered_map<std::string, infinicore::nn::Parameter>> state_dict();
// Run a single forward pass on all workers and return the outputs from all ranks // Run a single forward pass on all workers and return the outputs from all ranks
infinicore::Tensor generate(const infinicore::Tensor &input_ids, infinicore::Tensor generate(const infinicore::Tensor &input_ids,
......
...@@ -44,17 +44,19 @@ inline void bind_infer_engine(py::module &m) { ...@@ -44,17 +44,19 @@ inline void bind_infer_engine(py::module &m) {
"Load a parameter tensor into all workers (each worker picks its shard)") "Load a parameter tensor into all workers (each worker picks its shard)")
.def("state_dict", [](InferEngine &self) { .def("state_dict", [](InferEngine &self) {
// Return a dictionary containing references to the whole state of the module. // Return a dictionary containing references to the whole state of the module.
auto state_dict = self.state_dict(); py::list state_dict_tp_all;
for (const auto &state_dict_tp : self.state_dict()) {
py::dict result; py::dict result;
for (const auto &[name, param] : state_dict) { for (const auto &[name, param] : state_dict_tp) {
result[py::cast(name)] = infinicore::Tensor(param); result[py::cast(name)] = infinicore::Tensor(param);
} }
return result; state_dict_tp_all.append(result);
}
return state_dict_tp_all;
}) })
.def("generate", [](InferEngine &self, py::object input_ids, py::object position_ids) -> infinicore::Tensor { return self.generate(input_ids.cast<infinicore::Tensor>(), position_ids.cast<infinicore::Tensor>()); }, "Run inference on all ranks with arbitrary arguments") .def("generate", [](InferEngine &self, py::object input_ids, py::object position_ids) -> infinicore::Tensor { return self.generate(input_ids.cast<infinicore::Tensor>(), position_ids.cast<infinicore::Tensor>()); }, "Run inference on all ranks with arbitrary arguments")
.def("reset_cache", &InferEngine::reset_cache, .def("reset_cache", &InferEngine::reset_cache, py::arg("pos") = 0, py::arg("async") = false, "Reset the internal cache in all workers to a specific position (clears state between generations). "
py::arg("pos") = 0, py::arg("async") = false,
"Reset the internal cache in all workers to a specific position (clears state between generations). "
"By default, this is synchronous. If async=True, this becomes asynchronous (unstable - use with caution)."); "By default, this is synchronous. If async=True, this becomes asynchronous (unstable - use with caution).");
// Optionally, you can add __repr__ for debugging // Optionally, you can add __repr__ for debugging
......
...@@ -4,7 +4,7 @@ from typing import Dict, Union ...@@ -4,7 +4,7 @@ from typing import Dict, Union
import torch import torch
from safetensors import safe_open from safetensors import safe_open
import glob import glob
from tqdm import tqdm
import infinicore import infinicore
str_to_torch_dtype = { str_to_torch_dtype = {
...@@ -33,20 +33,22 @@ def check_parameters(model_keys: list, already_loaded_keys: list): ...@@ -33,20 +33,22 @@ def check_parameters(model_keys: list, already_loaded_keys: list):
error_msgs: list[str] = [] error_msgs: list[str] = []
if len(unexpected_keys) > 0: if len(unexpected_keys) > 0:
error_msgs.insert( error_msgs.append(
0,
"Unexpected key(s) in state_dict: {}. ".format( "Unexpected key(s) in state_dict: {}. ".format(
", ".join('"{}"'.format(k) for k in unexpected_keys) ", ".join('"{}"'.format(k) for k in unexpected_keys)
), )
) )
if len(missing_keys) > 0: if len(missing_keys) > 0:
error_msgs.insert( error_msgs.append(
0,
"Missing key(s) in state_dict: {}. ".format( "Missing key(s) in state_dict: {}. ".format(
", ".join('"{}"'.format(k) for k in missing_keys) ", ".join('"{}"'.format(k) for k in missing_keys)
),
) )
return error_msgs )
if len(error_msgs) > 0:
raise RuntimeError(
"Error(s) in loading state_dict\n\t{}".format("\n\t".join(error_msgs))
)
def load_state_dict( def load_state_dict(
...@@ -86,6 +88,9 @@ def get_model_state_dict( ...@@ -86,6 +88,9 @@ def get_model_state_dict(
""" """
Load the model weights. Load the model weights.
""" """
print(" load weights ......")
torch_device = device.type torch_device = device.type
torch_dtype = infinicore.utils.to_torch_dtype(dtype) torch_dtype = infinicore.utils.to_torch_dtype(dtype)
...@@ -117,18 +122,27 @@ def load_model_state_dict_by_file( ...@@ -117,18 +122,27 @@ def load_model_state_dict_by_file(
dtype=infinicore.dtype, dtype=infinicore.dtype,
) -> Dict[str, infinicore.Tensor]: ) -> Dict[str, infinicore.Tensor]:
""" """
Load the model weights by file. Load the model weights from file.
""" """
print(" load weights ......")
torch_device = "cpu" torch_device = "cpu"
torch_dtype = infinicore.utils.to_torch_dtype(dtype) torch_dtype = infinicore.utils.to_torch_dtype(dtype)
model_keys = model.state_dict().keys() model_keys = model.state_dict_keyname()
already_loaded_keys = [] already_loaded_keys = []
for file_path in glob.glob(os.path.join(model_path, "*.safetensors")):
file_list = glob.glob(os.path.join(model_path, "*.safetensors"))
if len(file_list) > 0:
for file_path in tqdm(file_list, desc="Processing files"):
tqdm.write(f"Processing: {os.path.basename(file_path)}")
# --------------------------------------------------------- # # --------------------------------------------------------- #
# Load weights from *.safetensors file # Load weights from *.safetensors file
# --------------------------------------------------------- # # --------------------------------------------------------- #
model_param = load_state_dict(file_path, device=torch_device, dtype=torch_dtype) model_param = load_state_dict(
file_path, device=torch_device, dtype=torch_dtype
)
already_loaded_keys.extend(model_param.keys()) already_loaded_keys.extend(model_param.keys())
# --------------------------------------------------------- # # --------------------------------------------------------- #
...@@ -141,12 +155,25 @@ def load_model_state_dict_by_file( ...@@ -141,12 +155,25 @@ def load_model_state_dict_by_file(
model.load_state_dict(model_param_infini, strict=False) model.load_state_dict(model_param_infini, strict=False)
infinicore.sync_device() infinicore.sync_device()
error_msgs = check_parameters(model_keys, already_loaded_keys) elif os.path.exists(os.path.join(model_path, "pytorch_model.bin")):
if len(error_msgs) > 0: file_path = os.path.join(model_path, "pytorch_model.bin")
raise RuntimeError( model_params = torch.load(file_path, weights_only=True, map_location="cpu")
"Error(s) in loading state_dict\n\t{}".format("\n\t".join(error_msgs))
model_param_infini = {}
for key in model_params.keys():
model_param_infini[key] = infinicore.from_torch(
model_params[key].to(dtype=torch_dtype)
) )
already_loaded_keys.append(key)
model.load_state_dict(model_param_infini, strict=True)
infinicore.sync_device()
else:
raise KeyError("Weight file not found.")
check_parameters(model_keys, already_loaded_keys)
def load_model_state_dict_by_tensor( def load_model_state_dict_by_tensor(
model: infinicore.nn.Module, model: infinicore.nn.Module,
...@@ -156,23 +183,37 @@ def load_model_state_dict_by_tensor( ...@@ -156,23 +183,37 @@ def load_model_state_dict_by_tensor(
""" """
Load the model weights by tensor. Load the model weights by tensor.
""" """
print(" load weights ......")
torch_dtype = infinicore.utils.to_torch_dtype(dtype) torch_dtype = infinicore.utils.to_torch_dtype(dtype)
model_keys = model.state_dict().keys() model_keys = model.state_dict_keyname()
already_loaded_keys = [] already_loaded_keys = []
for file in glob.glob(os.path.join(model_path, "*.safetensors")): file_list = glob.glob(os.path.join(model_path, "*.safetensors"))
with safe_open(file, "pt", "cpu") as f: if len(file_list) > 0:
for file_path in tqdm(file_list, desc="Processing files"):
tqdm.write(f"Processing: {os.path.basename(file_path)}")
with safe_open(file_path, "pt", "cpu") as f:
for name in f.keys(): for name in f.keys():
param_infini = infinicore.from_torch( weight_infini = infinicore.from_torch(
f.get_tensor(name).to(dtype=torch_dtype) f.get_tensor(name).to(dtype=torch_dtype)
) )
model.load_parameter(name, param_infini) model.load_param(name, weight_infini)
already_loaded_keys.append(name) already_loaded_keys.append(name)
infinicore.sync_stream() infinicore.sync_stream()
error_msgs = check_parameters(model_keys, already_loaded_keys) elif os.path.exists(os.path.join(model_path, "pytorch_model.bin")):
if len(error_msgs) > 0: file_path = os.path.join(model_path, "pytorch_model.bin")
raise RuntimeError( model_params = torch.load(file_path, weights_only=True, map_location="cpu")
"Error(s) in loading state_dict\n\t{}".format("\n\t".join(error_msgs))
for key in model_params.keys():
weight_infini = infinicore.from_torch(
model_params[key].to(dtype=torch_dtype)
) )
model.load_param(key, weight_infini)
already_loaded_keys.append(key)
else:
raise KeyError("Weight file not found.")
check_parameters(model_keys, already_loaded_keys)
...@@ -189,12 +189,9 @@ class LlamaForCausalLM(GenerationMixin): ...@@ -189,12 +189,9 @@ class LlamaForCausalLM(GenerationMixin):
config._underlying, distributed_config._underlying, device._underlying.type config._underlying, distributed_config._underlying, device._underlying.type
) )
def state_dict(self): def state_dict_keyname(self):
"""Get model state dictionary with parameter shapes""" """Get model key name."""
destination = OrderedDict() return self._model.state_dict()[0].keys()
for name, param in self._model.state_dict().items():
destination[name] = infinicore.Tensor(param)
return destination
def load_state_dict(self, state_dict, strict=None): def load_state_dict(self, state_dict, strict=None):
""" """
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment