README.md 3.94 KB
Newer Older
wangsen's avatar
wangsen committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# 下载数据

```
wget https://github.com/hanfang/Topsorter/blob/master/data/hg19.chrom.sizes.txt
wget https://drive.google.com/drive/folders/1S0KOMAj60MxQP6mgPV1OKjn_J-lVpzKM?usp=sharing
wget https://hgdownload.cse.ucsc.edu/goldenpath/hg19/database/cytoBand.txt.gz
```

# 创建环境


```
conda create -n higashi python=3.10
source activate higashi 
pip install https://cancon.hpccube.com:65024/directlink/4/pytorch/DAS1.1.1/torch-2.1.0+gitf643949.abi1.dtk2404-cp310-cp310-manylinux_2_31_x86_64.whl
git clone https://github.com/ma-compbio/Higashi/
cd Higashi
python setup.py install
 pip install matplotlib==3.7.3  -i https://pypi.tuna.tsinghua.edu.cn/simple
```
安装后环境如下:

```
asciitree          0.3.3
bokeh              3.5.1
click              8.1.7
contourpy          1.2.1
cooler             0.9.0
cycler             0.12.1
Cython             0.29.24
cytoolz            0.12.3
dill               0.3.8
fbpca              1.0
filelock           3.15.4
fonttools          4.53.1
fsspec             2024.6.1
h5py               3.11.0
higashi            0.1.0a0
importlib_metadata 8.2.0
Jinja2             3.1.4
joblib             1.4.2
kiwisolver         1.4.5
llvmlite           0.43.0
MarkupSafe         2.1.5
matplotlib         3.7.3
mpmath             1.3.0
multiprocess       0.70.16
networkx           3.3
numba              0.60.0
numpy              1.23.0
packaging          24.1
pandas             1.3.4
pillow             10.4.0
pip                24.2
pyfaidx            0.8.1.2
pynndescent        0.5.13
pyparsing          3.1.2
python-dateutil    2.9.0.post0
pytz               2024.1
PyYAML             6.0.2
scikit-learn       1.5.1
scipy              1.7.3
seaborn            0.11.2
setuptools         72.1.0
simplejson         3.19.3
six                1.16.0
sympy              1.13.2
threadpoolctl      3.5.0
toolz              0.12.1
torch              2.1.0+das1.1.git3ac1bdd.abi1.dtk2404
tornado            6.4.1
tqdm               4.66.5
typing_extensions  4.12.2
tzdata             2024.1
umap-learn         0.5.6
wheel              0.43.0
xyzservices        2024.6.0
zipp               3.20.0

```

# 结合测试数据和Higashi模型生成具备超图分析与接触图嵌入能力的demo

```
from higashi.Higashi_wrapper import *
config = "/work/magroup/ruochiz/Higashi/config_dir/config_ramani.JSON"     # 修改下载文件的路径,如客户对数据集有指定,则根据客户数据集进行修改
higashi_model = Higashi(config)
higashi_model.process_data()
higashi_model.prep_model()
higashi_model.train_for_embeddings()

```


# 验证单细胞Hi-C数据的超图分析与接触图嵌入能力

```
higashi_model.train_for_embeddings()
higashi_model.train_for_imputation_nbr_0()
higashi_model.impute_no_nbr()
higashi_model.train_for_imputation_with_nbr()
higashi_model.impute_with_nbr()
# Visualize embedding results
cell_embeddings = higashi_model.fetch_cell_embeddings()
print (cell_embeddings.shape)

from umap import UMAP
from sklearn.decomposition import PCA
import seaborn as sns
import matplotlib.pyplot as plt

cell_type = higashi_model.label_info['cell type']
fig = plt.figure(figsize=(14, 5))
ax = plt.subplot(1, 2, 1)
vec = PCA(n_components=2).fit_transform(cell_embeddings)
sns.scatterplot(x=vec[:, 0], y=vec[:, 1], hue=cell_type, ax=ax, s=6, linewidth=0)
handles, labels = ax.get_legend_handles_labels()
labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t[0]))
ax.legend(handles=handles, labels=labels, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., ncol=1)
ax = plt.subplot(1, 2, 2)
vec = UMAP(n_components=2).fit_transform(cell_embeddings)
sns.scatterplot(x=vec[:, 0], y=vec[:, 1], hue=cell_type, ax=ax, s=6, linewidth=0)
handles, labels = ax.get_legend_handles_labels()
labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t[0]))
ax.legend(handles=handles, labels=labels, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., ncol=1)
plt.tight_layout()
plt.show()

```


# 参考文档

https://github.com/ma-compbio/Higashi/