"src/rpc/git@developer.sourcefind.cn:OpenDAS/dgl.git" did not exist on "c8d4d6fbf828b1fde6bbb67867e66c509b3356e4"
Commit 0a8cfdda authored by zhangwq5's avatar zhangwq5
Browse files

all

parent 7052e81b
Pipeline #2914 failed with stages
in 0 seconds
# Graphormer # Graphormer_pytorch
Graphormer 是一个深度学习软件包,它能让研究人员和开发人员为分子建模任务训练自定义模型。 <div align=center>
\ No newline at end of file <img src="./doc/GF.png"/>
</div>
Graphormer 是一个深度学习软件包,它能让研究人员和开发人员为分子建模任务训练自定义模型。其旨在加速分子科学领域人工智能的研究与应用,例如材料发现、药物发现等,[项目网站](https://www.microsoft.com/en-us/research/project/graphormer/)
Graphormer 的高级预训练版本仅在以下平台提供:[Azure Quantum Elements](https://quantum.microsoft.com/en-us/our-story/quantum-elements-overview)
关于Graphormer的更多信息请前往[源码仓库](https://github.com/microsoft/Graphormer)[用户手册]()
## 环境配置
### 硬件需求
DCU型号:K100_AI,节点数量:1台,卡数:2张。
### Docker
```bash
docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-ubuntu22.04-dtk24.04.2-py3.10
docker run -it --shm-size 200g --network=host --name {docker_name} --privileged --device=/dev/kfd --device=/dev/dri --device=/dev/mkfd --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -u root -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal/:/opt/hyhal/:ro {imageID} bash
cd /your_code_path/graphormer
# 克隆并安装 fairseq
git clone https://github.com/facebookresearch/fairseq.git
git checkout 98ebe4f
pip install -e .
python setup.py build_ext --inplace
# 安装其他库
pip install -r requirements.txt
# 克隆 graphormer
git clone https://github.com/microsoft/Graphormer.git
# 替换文件
cp -f ./algos.pyx ./Graphormer/graphormer/data/algos.pyx
cp -f ./pyg_dataset_lookup_table.py ./Graphormer/graphormer/data/pyg_datasets/pyg_dataset_lookup_table.py
```
## 数据集
[示例数据集ZINC下载地址](https://uc2b1e201a894fcdcefc45447d3d.dl.dropboxusercontent.com/cd/0/get/CxApk-pOjXqveTD0AlvvU9WEEA2_kVEX8ekJVJe1vichmkjzq-LY7rmslrJFughtnKezOb8HyijbDxpMWW0mc-B4TtGE2UofmHz5-2_jnkaFWigtF6opQQvQiwMge5JCCyCMUsPJV__YUagJZOFRucbe/file?dl=1#)
## 训练
```bash
cd ./Graphormer/examples/property_prediction
mkdir ./dataset/raw
# 下载ZINC数据集并解压,将文件cp到./dataset/raw下
unzip molecules.zip
cp ./molecules/atom_dict.pickle ./dataset/raw
cp ./molecules/bond_dict.pickle ./dataset/raw
cp ./molecules/train.pickle ./dataset/raw
cp ./molecules/test.pickle ./dataset/raw
cp ./molecules/val.pickle ./dataset/raw
# 修改fairseq安装路径,将训练命令复制粘贴到./zinc.sh
export PYTHONPATH=/you_path_of/fairseq:$PYTHONPATH
fairseq-train \
--user-dir ../../graphormer \
--num-workers 0 \
--find-unused-parameters \
--dataset-name zinc \
--dataset-source pyg \
--task graph_prediction \
--criterion l1_loss \
--arch graphormer_slim \
--num-classes 1 \
--attention-dropout 0.1 --act-dropout 0.1 --dropout 0.0 \
--optimizer adam --adam-betas '(0.9, 0.999)' --adam-eps 1e-8 --clip-norm 5.0 --weight-decay 0.01 \
--lr-scheduler polynomial_decay --power 1 --warmup-updates 60000 --total-num-update 400000 \
--lr 2e-4 --end-learning-rate 1e-9 \
--batch-size 64 \
--fp16 \
--data-buffer-size 20 \
--encoder-layers 12 \
--encoder-embed-dim 80 \
--encoder-ffn-embed-dim 80 \
--encoder-attention-heads 8 \
--max-epoch 2 \
--save-dir ./ckpts
# 启动训练, 第一次启动时会花费一些时间用于数据集转换,转换后的训练数据存放在./property_prediction/dataset/full/processed目录下
# 权重文件会保存在./property_prediction/ckpts下
bash zinc.sh
```
## 推理
## result
详见/graphormer/res文件夹
## 应用场景
### 算法类别
`训练微调`
### 热点应用行业
`金融,教育,政府,科研,制造,能源,交通`
## 源码仓库及问题反馈
- https://github.com/microsoft/Graphormer
## 参考资料
- https://github.com/ibm-granite/granite-speech-models
\ No newline at end of file
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import cython
from cython.parallel cimport prange, parallel
cimport numpy
import numpy
def floyd_warshall(adjacency_matrix):
(nrows, ncols) = adjacency_matrix.shape
assert nrows == ncols
cdef unsigned int n = nrows
adj_mat_copy = adjacency_matrix.astype(numpy.int64, order='C', casting='safe', copy=True)
assert adj_mat_copy.flags['C_CONTIGUOUS']
cdef numpy.ndarray[long, ndim=2, mode='c'] M = adj_mat_copy
cdef numpy.ndarray[long, ndim=2, mode='c'] path = -1 * numpy.ones([n, n], dtype=numpy.int64)
cdef unsigned int i, j, k
cdef long M_ij, M_ik, cost_ikkj
cdef long* M_ptr = &M[0,0]
cdef long* M_i_ptr
cdef long* M_k_ptr
# set unreachable nodes distance to 510
for i in range(n):
for j in range(n):
if i == j:
M[i][j] = 0
elif M[i][j] == 0:
M[i][j] = 510
# floyed algo
for k in range(n):
M_k_ptr = M_ptr + n*k
for i in range(n):
M_i_ptr = M_ptr + n*i
M_ik = M_i_ptr[k]
for j in range(n):
cost_ikkj = M_ik + M_k_ptr[j]
M_ij = M_i_ptr[j]
if M_ij > cost_ikkj:
M_i_ptr[j] = cost_ikkj
path[i][j] = k
# set unreachable path to 510
for i in range(n):
for j in range(n):
if M[i][j] >= 510:
path[i][j] = 510
M[i][j] = 510
return M, path
def get_all_edges(path, i, j):
cdef int k = path[i][j]
if k == -1:
return []
else:
return get_all_edges(path, i, k) + [k] + get_all_edges(path, k, j)
def gen_edge_input(max_dist, path, edge_feat):
(nrows, ncols) = path.shape
assert nrows == ncols
cdef unsigned int n = nrows
cdef unsigned int max_dist_copy = max_dist
path_copy = path.astype(numpy.int64, order='C', casting='safe', copy=True)
edge_feat_copy = edge_feat.astype(numpy.int64, order='C', casting='safe', copy=True)
assert path_copy.flags['C_CONTIGUOUS']
assert edge_feat_copy.flags['C_CONTIGUOUS']
cdef numpy.ndarray[long, ndim=4, mode='c'] edge_fea_all = -1 * numpy.ones([n, n, max_dist_copy, edge_feat.shape[-1]], dtype=numpy.int64)
cdef unsigned int i, j, k, num_path, cur
for i in range(n):
for j in range(n):
if i == j:
continue
if path_copy[i][j] == 510:
continue
path = [i] + get_all_edges(path_copy, i, j) + [j]
num_path = len(path) - 1
for k in range(num_path):
edge_fea_all[i, j, k, :] = edge_feat_copy[path[k], path[k+1], :]
return edge_fea_all
doc/GF.png

25.6 KB

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
from typing import Optional
from torch_geometric.datasets import *
from torch_geometric.data import Dataset
from .pyg_dataset import GraphormerPYGDataset
import torch.distributed as dist
import os.path as osp
import pickle
import torch
from torch_geometric.datasets import ZINC
from torch_geometric.data import Data
class MyQM7b(QM7b):
def download(self):
if not dist.is_initialized() or dist.get_rank() == 0:
super(MyQM7b, self).download()
if dist.is_initialized():
dist.barrier()
def process(self):
if not dist.is_initialized() or dist.get_rank() == 0:
super(MyQM7b, self).process()
if dist.is_initialized():
dist.barrier()
class MyQM9(QM9):
def download(self):
if not dist.is_initialized() or dist.get_rank() == 0:
super(MyQM9, self).download()
if dist.is_initialized():
dist.barrier()
def process(self):
if not dist.is_initialized() or dist.get_rank() == 0:
super(MyQM9, self).process()
if dist.is_initialized():
dist.barrier()
class MyZINC(ZINC):
def download(self):
if not dist.is_initialized() or dist.get_rank() == 0:
pass
if dist.is_initialized():
dist.barrier()
def process(self):
if not dist.is_initialized() or dist.get_rank() == 0:
for i, split in enumerate(['train', 'val', 'test']):
input_path = osp.join(self.raw_dir, f'{split}.pickle')
with open(input_path, 'rb') as f:
graphs = pickle.load(f)
data_list = []
for g in graphs:
x = g['atom_type'].to(torch.long).view(-1, 1)
bond_info = g['bond_type']
y = g['logP_SA_cycle_normalized'].clone().detach().view(1, -1).to(torch.float)
edge_index = bond_info[:, :2].t().contiguous().to(torch.long)
edge_attr = bond_info[:, 2].to(torch.long)
data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y)
data.num_nodes = len(x)
data_list.append(data)
if self.pre_filter is not None:
data_list = [d for d in data_list if self.pre_filter(d)]
if self.pre_transform is not None:
data_list = [self.pre_transform(d) for d in data_list]
data, slices = self.collate(data_list)
torch.save((data, slices), self.processed_paths[i])
if dist.is_initialized():
dist.barrier()
class MyMoleculeNet(MoleculeNet):
def download(self):
if not dist.is_initialized() or dist.get_rank() == 0:
super(MyMoleculeNet, self).download()
if dist.is_initialized():
dist.barrier()
def process(self):
if not dist.is_initialized() or dist.get_rank() == 0:
super(MyMoleculeNet, self).process()
if dist.is_initialized():
dist.barrier()
class PYGDatasetLookupTable:
@staticmethod
def GetPYGDataset(dataset_spec: str, seed: int) -> Optional[Dataset]:
split_result = dataset_spec.split(":")
if len(split_result) == 2:
name, params = split_result[0], split_result[1]
params = params.split(",")
elif len(split_result) == 1:
name = dataset_spec
params = []
inner_dataset = None
num_class = 1
train_set = None
valid_set = None
test_set = None
root = "dataset"
if name == "qm7b":
inner_dataset = MyQM7b(root=root)
elif name == "qm9":
inner_dataset = MyQM9(root=root)
elif name == "zinc":
inner_dataset = MyZINC(root=root)
train_set = MyZINC(root=root, split="train")
valid_set = MyZINC(root=root, split="val")
test_set = MyZINC(root=root, split="test")
elif name == "moleculenet":
nm = None
for param in params:
name, value = param.split("=")
if name == "name":
nm = value
inner_dataset = MyMoleculeNet(root=root, name=nm)
else:
raise ValueError(f"Unknown dataset name {name} for pyg source.")
if train_set is not None:
return GraphormerPYGDataset(
None,
seed,
None,
None,
None,
train_set,
valid_set,
test_set,
)
else:
return (
None
if inner_dataset is None
else GraphormerPYGDataset(inner_dataset, seed)
)
# -*- coding: utf-8 -*-
"""
一个用于比较两个 PyTorch checkpoint (.pt 或 .ckpt) 文件中模型权重的脚本。
它会逐层比较权重,并根据预设的“平均绝对差异”阈值来判断是否“过关”。
"""
import torch
from collections import OrderedDict
# ==============================================================================
# 1. 配置区域: 文件路径、模型权重 Key 和判断阈值
# ==============================================================================
CKPT_PATH_1 = '/home/zwq/project/shangchaun/external/graphormer_pytorch/res/res_of_A800/checkpoint1.pt'
CKPT_PATH_2 = '/home/zwq/project/shangchaun/external/graphormer_pytorch/res/res_of_K100AI/checkpoint1.pt'
# 我们已经通过探查得知,模型权重存储在 'model' 这个键下
MODEL_WEIGHTS_KEY = 'model'
# !! 核心判断标准 !!
# 设置平均绝对差异的阈值,如果所有层的差异都小于此值,则认为“过关”
MEAN_ABS_DIFF_THRESHOLD = 0.02
# ==============================================================================
def extract_state_dict(checkpoint, model_key):
"""从加载的 checkpoint 对象中提取 state_dict。"""
if not isinstance(checkpoint, dict):
raise TypeError(f"Checkpoint 文件加载后不是一个字典,而是一个 {type(checkpoint)}。")
if model_key in checkpoint:
return checkpoint[model_key]
else:
keys_found = list(checkpoint.keys())
raise KeyError(
f"在 checkpoint 中找不到指定的键 '{model_key}'。\n"
f"文件中实际存在的键是: {keys_found}"
)
def normalize_keys(state_dict):
"""移除常见的 state_dict key 前缀,如 'module.'。"""
new_state_dict = OrderedDict()
for k, v in state_dict.items():
if k.startswith('module.'):
name = k[7:] # 移除 'module.'
else:
name = k
new_state_dict[name] = v
return new_state_dict
def compare_checkpoints(ckpt_path1, ckpt_path2, model_key, threshold):
"""加载并比较两个 checkpoint 文件的主函数。"""
print(f"[*] 正在加载 Checkpoint 1: {ckpt_path1}")
ckpt1 = torch.load(ckpt_path1, map_location='cpu')
print(f"[*] 正在加载 Checkpoint 2: {ckpt_path2}")
ckpt2 = torch.load(ckpt_path2, map_location='cpu')
print(f"\n[*] 正在从键 '{model_key}' 中提取并标准化 state_dict...")
sd1 = normalize_keys(extract_state_dict(ckpt1, model_key))
sd2 = normalize_keys(extract_state_dict(ckpt2, model_key))
keys1, keys2 = set(sd1.keys()), set(sd2.keys())
common_keys = sorted(list(keys1.intersection(keys2)))
unique_to_1, unique_to_2 = sorted(list(keys1 - keys2)), sorted(list(keys2 - keys1))
print("\n" + "="*60)
print(" 层名称比较摘要 (Layer Name Comparison Summary)")
print("="*60)
print(f"总层数 (文件1): {len(keys1)}")
print(f"总层数 (文件2): {len(keys2)}")
print(f"共有层数: {len(common_keys)}")
if unique_to_1: print(f"文件1独有层数: {len(unique_to_1)}")
if unique_to_2: print(f"文件2独有层数: {len(unique_to_2)}")
print("\n" + "="*60)
print(" 共有层权重差异详细分析 (Shared Layer Weight-Diff Analysis)")
print(f" - 阈值 (Threshold for Mean Abs Diff): {threshold}")
print("="*60)
failing_layers = []
for key in common_keys:
tensor1, tensor2 = sd1[key], sd2[key]
if tensor1.shape != tensor2.shape:
print(f"层: {key} - [形状不匹配!] Shape Mismatch! {tensor1.shape} vs {tensor2.shape}")
failing_layers.append((key, float('inf'), "形状不匹配")) # 标记为失败
continue
if torch.equal(tensor1, tensor2):
continue # 完全相同则跳过,保持输出简洁
abs_diff = torch.abs(tensor1.float() - tensor2.float())
mean_abs_diff = abs_diff.mean().item()
# 核心检查:平均绝对差异是否超过阈值
if mean_abs_diff > threshold:
status = f"❌ [不通过] (>{threshold})"
failing_layers.append((key, mean_abs_diff, "超过阈值"))
else:
status = f"✅ [通过] (<={threshold})"
print(f"层: {key}")
print(f" - 平均绝对差 (Mean Abs Diff): {mean_abs_diff:.8f} --- {status}")
print("\n" + "="*60)
print(" 最终总结 (Final Conclusion)")
print("="*60)
# 检查结构是否完全一致
if unique_to_1 or unique_to_2:
print("警告: 两个模型的层结构不完全一致,存在独有层。")
print(" - 文件1 独有层:", unique_to_1 if unique_to_1 else "无")
print(" - 文件2 独有层:", unique_to_2 if unique_to_2 else "无")
print("-" * 20)
# 根据 failing_layers 列表给出最终结论
if not failing_layers:
print(f"✅ 过关 (PASS): 所有共有层的平均绝对差异都在阈值 {threshold} 之内。")
else:
print(f"❌ 不通过 (FAIL): 发现 {len(failing_layers)} 个层的差异不满足要求。")
print("\n详细信息如下:")
for layer_name, diff_value, reason in failing_layers:
if reason == "形状不匹配":
print(f" - 层: {layer_name}, 原因: {reason}")
else:
print(f" - 层: {layer_name}, 平均绝对差: {diff_value:.8f} (原因: {reason})")
if __name__ == '__main__':
try:
compare_checkpoints(CKPT_PATH_1, CKPT_PATH_2, MODEL_WEIGHTS_KEY, MEAN_ABS_DIFF_THRESHOLD)
except Exception as e:
print(f"\n[程序执行出错]: {e}")
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment