# 下载数据 ``` wget https://github.com/hanfang/Topsorter/blob/master/data/hg19.chrom.sizes.txt wget https://drive.google.com/drive/folders/1S0KOMAj60MxQP6mgPV1OKjn_J-lVpzKM?usp=sharing wget https://hgdownload.cse.ucsc.edu/goldenpath/hg19/database/cytoBand.txt.gz ``` # 创建环境 ``` conda create -n higashi python=3.10 source activate higashi pip install https://cancon.hpccube.com:65024/directlink/4/pytorch/DAS1.1.1/torch-2.1.0+gitf643949.abi1.dtk2404-cp310-cp310-manylinux_2_31_x86_64.whl git clone https://github.com/ma-compbio/Higashi/ cd Higashi python setup.py install pip install matplotlib==3.7.3 -i https://pypi.tuna.tsinghua.edu.cn/simple ``` 安装后环境如下: ``` asciitree 0.3.3 bokeh 3.5.1 click 8.1.7 contourpy 1.2.1 cooler 0.9.0 cycler 0.12.1 Cython 0.29.24 cytoolz 0.12.3 dill 0.3.8 fbpca 1.0 filelock 3.15.4 fonttools 4.53.1 fsspec 2024.6.1 h5py 3.11.0 higashi 0.1.0a0 importlib_metadata 8.2.0 Jinja2 3.1.4 joblib 1.4.2 kiwisolver 1.4.5 llvmlite 0.43.0 MarkupSafe 2.1.5 matplotlib 3.7.3 mpmath 1.3.0 multiprocess 0.70.16 networkx 3.3 numba 0.60.0 numpy 1.23.0 packaging 24.1 pandas 1.3.4 pillow 10.4.0 pip 24.2 pyfaidx 0.8.1.2 pynndescent 0.5.13 pyparsing 3.1.2 python-dateutil 2.9.0.post0 pytz 2024.1 PyYAML 6.0.2 scikit-learn 1.5.1 scipy 1.7.3 seaborn 0.11.2 setuptools 72.1.0 simplejson 3.19.3 six 1.16.0 sympy 1.13.2 threadpoolctl 3.5.0 toolz 0.12.1 torch 2.1.0+das1.1.git3ac1bdd.abi1.dtk2404 tornado 6.4.1 tqdm 4.66.5 typing_extensions 4.12.2 tzdata 2024.1 umap-learn 0.5.6 wheel 0.43.0 xyzservices 2024.6.0 zipp 3.20.0 ``` # 结合测试数据和Higashi模型生成具备超图分析与接触图嵌入能力的demo ``` from higashi.Higashi_wrapper import * config = "/work/magroup/ruochiz/Higashi/config_dir/config_ramani.JSON" # 修改下载文件的路径,如客户对数据集有指定,则根据客户数据集进行修改 higashi_model = Higashi(config) higashi_model.process_data() higashi_model.prep_model() higashi_model.train_for_embeddings() ``` # 验证单细胞Hi-C数据的超图分析与接触图嵌入能力 ``` higashi_model.train_for_embeddings() higashi_model.train_for_imputation_nbr_0() higashi_model.impute_no_nbr() higashi_model.train_for_imputation_with_nbr() higashi_model.impute_with_nbr() # Visualize embedding results cell_embeddings = higashi_model.fetch_cell_embeddings() print (cell_embeddings.shape) from umap import UMAP from sklearn.decomposition import PCA import seaborn as sns import matplotlib.pyplot as plt cell_type = higashi_model.label_info['cell type'] fig = plt.figure(figsize=(14, 5)) ax = plt.subplot(1, 2, 1) vec = PCA(n_components=2).fit_transform(cell_embeddings) sns.scatterplot(x=vec[:, 0], y=vec[:, 1], hue=cell_type, ax=ax, s=6, linewidth=0) handles, labels = ax.get_legend_handles_labels() labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t[0])) ax.legend(handles=handles, labels=labels, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., ncol=1) ax = plt.subplot(1, 2, 2) vec = UMAP(n_components=2).fit_transform(cell_embeddings) sns.scatterplot(x=vec[:, 0], y=vec[:, 1], hue=cell_type, ax=ax, s=6, linewidth=0) handles, labels = ax.get_legend_handles_labels() labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t[0])) ax.legend(handles=handles, labels=labels, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., ncol=1) plt.tight_layout() plt.show() ``` # 参考文档 https://github.com/ma-compbio/Higashi/