"doc/git@developer.sourcefind.cn:gaoqiong/migraphx.git" did not exist on "4f07b8f1d933bc78d7664fb30cd6bbc750dd772b"
Commit b14e47f4 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'main' of https://github.com/hpcaitech/FastFold

parents 490cb6f5 05681304
Pipeline #234 failed with stages
in 0 seconds
name: Build
on:
pull_request:
types: [synchronize, labeled]
jobs:
build:
name: Build and Test FastFold
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/FastFold' &&
contains( github.event.pull_request.labels.*.name, 'Run Build and Test')
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
options: --gpus all --rm -v /data/scratch/fastfold:/data/scratch/fastfold
timeout-minutes: 40
steps:
- uses: actions/checkout@v2
with:
repository: hpcaitech/FastFold
ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
- name: Install FastFold
run: |
[ ! -z "$(ls -A /github/home/fastfold_cache/)" ] && cp -r /github/home/fastfold_cache/* /__w/FastFold/FastFold/
pip install -r requirements/requirements.txt
pip install -e .
pip install -r requirements/test_requirements.txt
cp -r /__w/FastFold/FastFold/build /github/home/fastfold_cache/
cp /__w/FastFold/FastFold/*.so /github/home/fastfold_cache/
- name: Unit Testing
run: |
PYTHONPATH=$PWD pytest tests
env:
NCCL_SHM_DISABLE: 1
name: Release bdist wheel
on:
workflow_dispatch:
inputs:
torch_version:
type: string
description: torch version, separated by comma
required: true
default: "all"
cuda_version:
type: string
description: cuda version, separated by comma
required: true
github_ref:
type: string
description: Branch or Tag
default: 'main'
required: true
jobs:
matrix_preparation:
name: Prepare Container List
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- id: set-matrix
env:
TORCH_VERSIONS: ${{ inputs.torch_version }}
CUDA_VERSIONS: ${{ inputs.cuda_version }}
run: |
echo $TORCH_VERSIONS
echo $CUDA_VERSIONS
IFS=','
DOCKER_IMAGE=()
for cv in $CUDA_VERSIONS
do
DOCKER_IMAGE+=("\"hpcaitech/cuda-conda:${cv}\"")
done
container=$( IFS=',' ; echo "${DOCKER_IMAGE[*]}" )
container="[${container}]"
echo "$container"
echo "::set-output name=matrix::{\"container\":$(echo "$container")}"
build:
name: Release bdist wheels
needs: matrix_preparation
if: github.repository == 'hpcaitech/FastFold' && contains(fromJson('["FrankLeeeee", "feifeibear", "Shenggan", "Gy-Lu"]'), github.actor)
runs-on: [self-hosted, gpu]
strategy:
fail-fast: false
matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}}
container:
image: ${{ matrix.container }}
options: --gpus all --rm
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Copy scripts and checkout
run: |
cp -r ./.github/workflows/scripts/build* ./
ln -s /github/home/pip_wheels ./pip_wheels
git checkout $git_ref
env:
git_ref: ${{ github.event.inputs.github_ref }}
- name: Build bdist wheel
run: |
pip install beautifulsoup4 requests packaging
python ./build_fastfold_wheel.py --torch_version $TORCH_VERSIONS
env:
TORCH_VERSIONS: ${{ inputs.torch_version }}
- name: 🚀 Deploy
uses: garygrossgarten/github-action-scp@release
with:
local: all_dist
remote: ${{ secrets.PRIVATE_PYPI_DIR }}
host: ${{ secrets.PRIVATE_PYPI_HOST }}
username: ${{ secrets.PRIVATE_PYPI_USER }}
password: ${{ secrets.PRIVATE_PYPI_PASSWD }}
\ No newline at end of file
import requests
from bs4 import BeautifulSoup
import argparse
import os
import subprocess
from packaging import version
from functools import cmp_to_key
WHEEL_TEXT_ROOT_URL = 'https://github.com/hpcaitech/public_assets/tree/main/colossalai/torch_build/torch_wheels'
RAW_TEXT_FILE_PREFIX = 'https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/torch_build/torch_wheels'
CUDA_HOME = os.environ['CUDA_HOME']
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--torch_version', type=str)
return parser.parse_args()
def get_cuda_bare_metal_version():
raw_output = subprocess.check_output([CUDA_HOME + "/bin/nvcc", "-V"], universal_newlines=True)
output = raw_output.split()
release_idx = output.index("release") + 1
release = output[release_idx].split(".")
bare_metal_major = release[0]
bare_metal_minor = release[1][0]
return bare_metal_major, bare_metal_minor
def all_wheel_info():
page_text = requests.get(WHEEL_TEXT_ROOT_URL).text
soup = BeautifulSoup(page_text)
all_a_links = soup.find_all('a')
wheel_info = dict()
for a_link in all_a_links:
if 'cuda' in a_link.text and '.txt' in a_link.text:
filename = a_link.text
torch_version, cuda_version = filename.rstrip('.txt').split('-')
cuda_version = cuda_version.lstrip('cuda')
if float(cuda_version) < 11.1:
continue
if torch_version not in wheel_info:
wheel_info[torch_version] = dict()
wheel_info[torch_version][cuda_version] = dict()
file_text = requests.get(f'{RAW_TEXT_FILE_PREFIX}/{filename}').text
lines = file_text.strip().split('\n')
for line in lines:
parts = line.split('\t')
method, url, python_version = parts[:3]
if float(python_version) < 3.8 or method == "conda":
continue
wheel_info[torch_version][cuda_version][python_version] = dict(url=url)
return wheel_info
def build_fastfold(wheel_info):
cuda_version_major, cuda_version_minor = get_cuda_bare_metal_version()
cuda_version_on_host = f'{cuda_version_major}.{cuda_version_minor}'
for torch_version, cuda_versioned_wheel_info in wheel_info.items():
for cuda_version, python_versioned_wheel_info in cuda_versioned_wheel_info.items():
if cuda_version_on_host == cuda_version:
for python_version, wheel_info in python_versioned_wheel_info.items():
url = wheel_info['url']
filename = url.split('/')[-1].replace('%2B', '+')
cmd = f'bash ./build_fastfold_wheel.sh {url} {filename} {cuda_version} {python_version}'
os.system(cmd)
def main():
args = parse_args()
wheel_info = all_wheel_info()
# filter wheels on condition
all_torch_versions = list(wheel_info.keys())
def _compare_version(a, b):
if version.parse(a) > version.parse(b):
return 1
else:
return -1
all_torch_versions.sort(key=cmp_to_key(_compare_version))
if args.torch_version != 'all':
torch_versions = args.torch_version.split(',')
# only keep the torch versions specified
for key in all_torch_versions:
if key not in torch_versions:
wheel_info.pop(key)
build_fastfold(wheel_info)
if __name__ == '__main__':
main()
#!/usr/bin/env bash
url=${1}
filename=${2}
cuda_version=${3}
python_version=${4}
git reset --hard HEAD
mkdir -p ./all_dist
source activate base
conda create -n $python_version -y python=$python_version
source activate $python_version
wget -nc -q -O ./$filename $url
pip install ./$filename
pip install numpy
python setup.py bdist_wheel
mv ./dist/* ./all_dist
python setup.py clean
conda deactivate
conda env remove -n $python_version
\ No newline at end of file
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# vscode
.vscode/
# setup
dist/
build/
\ No newline at end of file
This diff is collapsed.
![](/assets/fold.jpg)
# FastFold
[![](https://img.shields.io/badge/Paper-PDF-green?style=flat&logo=arXiv&logoColor=green)](https://arxiv.org/abs/2203.00854)
![](https://img.shields.io/badge/Made%20with-ColossalAI-blueviolet?style=flat)
![](https://img.shields.io/badge/Habana-support-blue?style=flat&logo=intel&logoColor=blue)
![](https://img.shields.io/github/v/release/hpcaitech/FastFold)
[![GitHub license](https://img.shields.io/github/license/hpcaitech/FastFold)](https://github.com/hpcaitech/FastFold/blob/main/LICENSE)
## News :triangular_flag_on_post:
- [2023/01] Compatible with AlphaFold v2.3
- [2023/01] Added support for inference and training of AlphaFold on [Intel Habana](https://habana.ai/) platform. For usage instructions, see [here](#Inference-or-Training-on-Intel-Habana).
<br>
Optimizing Protein Structure Prediction Model Training and Inference on Heterogeneous Clusters
FastFold provides a **high-performance implementation of Evoformer** with the following characteristics.
1. Excellent kernel performance on GPU platform
2. Supporting Dynamic Axial Parallelism(DAP)
* Break the memory limit of single GPU and reduce the overall training time
* DAP can significantly speed up inference and make ultra-long sequence inference possible
3. Ease of use
* Huge performance gains with a few lines changes
* You don't need to care about how the parallel part is implemented
4. Faster data processing, about 3x times faster on monomer, about 3Nx times faster on multimer with N sequence.
5. Great Reduction on GPU memory, able to inference sequence containing more than **10000** residues.
## Installation
To install FastFold, you will need:
+ Python 3.8 or 3.9.
+ [NVIDIA CUDA](https://developer.nvidia.com/cuda-downloads) 11.3 or above
+ PyTorch 1.12 or above
For now, You can install FastFold:
### Using Conda (Recommended)
We highly recommend installing an Anaconda or Miniconda environment and install PyTorch with conda.
Lines below would create a new conda environment called "fastfold":
```shell
git clone https://github.com/hpcaitech/FastFold
cd FastFold
conda env create --name=fastfold -f environment.yml
conda activate fastfold
python setup.py install
```
#### Advanced
To leverage the power of FastFold, we recommend you to install [Triton](https://github.com/openai/triton).
**NOTE: Triron needs CUDA 11.4 to run.**
```bash
pip install -U --pre triton
```
## Use Docker
### Build On Your Own
Run the following command to build a docker image from Dockerfile provided.
> Building FastFold from scratch requires GPU support, you need to use Nvidia Docker Runtime as the default when doing `docker build`. More details can be found [here](https://stackoverflow.com/questions/59691207/docker-build-with-nvidia-runtime).
```shell
cd FastFold
docker build -t fastfold ./docker
```
Run the following command to start the docker container in interactive mode.
```shell
docker run -ti --gpus all --rm --ipc=host fastfold bash
```
## Usage
You can use `Evoformer` as `nn.Module` in your project after `from fastfold.model.fastnn import Evoformer`:
```python
from fastfold.model.fastnn import Evoformer
evoformer_layer = Evoformer()
```
If you want to use Dynamic Axial Parallelism, add a line of initialize with `fastfold.distributed.init_dap`.
```python
from fastfold.distributed import init_dap
init_dap(args.dap_size)
```
### Download the dataset
You can down the dataset used to train FastFold by the script `download_all_data.sh`:
./scripts/download_all_data.sh data/
### Inference
You can use FastFold with `inject_fastnn`. This will replace the evoformer from OpenFold with the high performance evoformer from FastFold.
```python
from fastfold.utils import inject_fastnn
model = AlphaFold(config)
import_jax_weights_(model, args.param_path, version=args.model_name)
model = inject_fastnn(model)
```
For Dynamic Axial Parallelism, you can refer to `./inference.py`. Here is an example of 2 GPUs parallel inference:
```shell
python inference.py target.fasta data/pdb_mmcif/mmcif_files/ \
--output_dir .outputs/ \
--gpus 2 \
--uniref90_database_path data/uniref90/uniref90.fasta \
--mgnify_database_path data/mgnify/mgy_clusters_2022_05.fa \
--pdb70_database_path data/pdb70/pdb70 \
--uniref30_database_path data/uniref30/UniRef30_2021_03 \
--bfd_database_path data/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \
--jackhmmer_binary_path `which jackhmmer` \
--hhblits_binary_path `which hhblits` \
--hhsearch_binary_path `which hhsearch` \
--kalign_binary_path `which kalign` \
--enable_workflow \
--inplace
```
or run the script `./inference.sh`, you can change the parameter in the script, especisally those data path.
```shell
./inference.sh
```
Alphafold's data pre-processing takes a lot of time, so we speed up the data pre-process by [ray](https://docs.ray.io/en/latest/workflows/concepts.html) workflow, which achieves a 3x times faster speed. To run the inference with ray workflow, we add parameter `--enable_workflow` by default.
To reduce memory usage of embedding presentations, we also add parameter `--inplace` to share memory by defaul.
#### inference with lower memory usage
Alphafold's embedding presentations take up a lot of memory as the sequence length increases. To reduce memory usage,
you should add parameter `--chunk_size [N]` to cmdline or shell script `./inference.sh`.
The smaller you set N, the less memory will be used, but it will affect the speed. We can inference
a sequence of length 10000 in bf16 with 61GB memory on a Nvidia A100(80GB). For fp32, the max length is 8000.
> You need to set `PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:15000` to inference such an extreme long sequence.
```shell
python inference.py target.fasta data/pdb_mmcif/mmcif_files/ \
--output_dir .outputs/ \
--gpus 2 \
--uniref90_database_path data/uniref90/uniref90.fasta \
--mgnify_database_path data/mgnify/mgy_clusters_2022_05.fa \
--pdb70_database_path data/pdb70/pdb70 \
--uniref30_database_path data/uniref30/UniRef30_2021_03 \
--bfd_database_path data/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \
--jackhmmer_binary_path `which jackhmmer` \
--hhblits_binary_path `which hhblits` \
--hhsearch_binary_path `which hhsearch` \
--kalign_binary_path `which kalign` \
--enable_workflow \
--inplace
--chunk_size N \
```
#### inference multimer sequence
Alphafold Multimer is supported. You can the following cmd or shell script `./inference_multimer.sh`.
Workflow and memory parameters mentioned above can also be used.
```shell
python inference.py target.fasta data/pdb_mmcif/mmcif_files/ \
--output_dir ./ \
--gpus 2 \
--model_preset multimer \
--uniref90_database_path data/uniref90/uniref90.fasta \
--mgnify_database_path data/mgnify/mgy_clusters_2022_05.fa \
--pdb70_database_path data/pdb70/pdb70 \
--uniref30_database_path data/uniref30/UniRef30_2021_03 \
--bfd_database_path data/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \
--uniprot_database_path data/uniprot/uniprot.fasta \
--pdb_seqres_database_path data/pdb_seqres/pdb_seqres.txt \
--param_path data/params/params_model_1_multimer.npz \
--model_name model_1_multimer \
--jackhmmer_binary_path `which jackhmmer` \
--hhblits_binary_path `which hhblits` \
--hhsearch_binary_path `which hhsearch` \
--kalign_binary_path `which kalign`
```
### Inference or Training on Intel Habana
To run AlphaFold inference or training on Intel Habana, you can follow the instructions in the [Installation Guide](https://docs.habana.ai/en/latest/Installation_Guide/) to set up your environment on Amazon EC2 DL1 instances or on-premise environments, and please use SynapseAI R1.7.1 to test as it was verified internally.
Once you have prepared your dataset and installed fastfold, you can use the following scripts:
```shell
cd fastfold/habana/fastnn/custom_op/; python setup.py build (this is for Gaudi, for Gaudi2 please use setup2.py) ; cd -
bash habana/inference.sh
bash habana/train.sh
```
## Performance Benchmark
We have included a performance benchmark script in `./benchmark`. You can benchmark the performance of Evoformer using different settings.
```shell
cd ./benchmark
torchrun --nproc_per_node=1 perf.py --msa-length 128 --res-length 256
```
Benchmark Dynamic Axial Parallelism with 2 GPUs:
```shell
cd ./benchmark
torchrun --nproc_per_node=2 perf.py --msa-length 128 --res-length 256 --dap-size 2
```
If you want to benchmark with [OpenFold](https://github.com/aqlaboratory/openfold), you need to install OpenFold first and benchmark with option `--openfold`:
```shell
torchrun --nproc_per_node=1 perf.py --msa-length 128 --res-length 256 --openfold
```
## Cite us
Cite this paper, if you use FastFold in your research publication.
```
@misc{cheng2022fastfold,
title={FastFold: Reducing AlphaFold Training Time from 11 Days to 67 Hours},
author={Shenggan Cheng and Ruidong Wu and Zhongming Yu and Binrui Li and Xiwen Zhang and Jian Peng and Yang You},
year={2022},
eprint={2203.00854},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
```
## Acknowledgments
We would like to extend our special thanks to the Intel Habana team for their support in providing us with technology and resources on the Habana platform.
import argparse
import os
import torch
import torch.nn as nn
from fastfold.distributed import init_dap
from fastfold.model.fastnn import Evoformer
def main():
parser = argparse.ArgumentParser(description='Evoformer Standalone Perf Benchmark')
parser.add_argument("--dap-size", default=1, type=int, help='batch size')
parser.add_argument('--batch-size', default=1, type=int, help='batch size')
parser.add_argument('--msa-length', default=132, type=int, help='Sequence Length of MSA')
parser.add_argument('--res-length',
default=256,
type=int,
help='Sequence Length of Residues')
parser.add_argument('--trials', default=50, type=int, help='Number of Trials to Execute')
parser.add_argument('--warmup-trials', default=5, type=int, help='Warmup Trials to discard')
parser.add_argument('--layers',
default=12,
type=int,
help='Evoformer Layers to Execute')
parser.add_argument('--cm', default=256, type=int, help='MSA hidden dimension')
parser.add_argument('--cz', default=128, type=int, help='Pair hidden dimension')
parser.add_argument('--heads', default=8, type=int, help='Number of Multihead Attention heads')
parser.add_argument('--openfold',
action='store_true',
help='Benchmark with Evoformer Implementation from OpenFold.')
parser.add_argument('--fwd', action='store_true', help='Only execute Fwd Pass.')
parser.add_argument('--prof', action='store_true', help='run with profiler.')
args = parser.parse_args()
init_dap(args.dap_size)
precision = torch.bfloat16
if args.dap_size > 1:
# (PyTorch issue) Currently All2All communication does not support the Bfloat16 datatype in PyTorch
precision = torch.float16
if not torch.cuda.is_available():
raise NotImplementedError('Running on CPU is not supported')
torch.manual_seed(42)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(42)
if args.openfold:
from openfold.model.evoformer import EvoformerBlock
class OpenFoldEvoformer(nn.Module):
def __init__(self, d_node, d_pair):
super(OpenFoldEvoformer, self).__init__()
self.d_node = d_node
self.d_pair = d_pair
self.c_hidden_msa_att = int(d_node / 8)
self.c_hidden_pair_att = int(d_pair / 8)
self.EvoformerBlock = EvoformerBlock(c_m=d_node,
c_z=d_pair,
c_hidden_msa_att=self.c_hidden_msa_att,
c_hidden_opm=self.c_hidden_msa_att,
c_hidden_mul=self.d_pair,
c_hidden_pair_att=self.c_hidden_pair_att,
no_heads_msa=8,
no_heads_pair=4,
transition_n=4,
msa_dropout=0.15,
pair_dropout=0.25,
inf=1e9,
eps=1e-10)
def forward(self, node, pair, node_mask, pair_mask):
node, pair = self.EvoformerBlock(node, pair, node_mask, pair_mask)
return node, pair
attn_layers = []
for idx in range(0, args.layers):
if args.openfold:
attn_layers.append(OpenFoldEvoformer(d_node=args.cm, d_pair=args.cz))
else:
attn_layers.append(Evoformer(d_node=args.cm, d_pair=args.cz))
attn_layers[idx].cuda()
attn_layers[idx].to(dtype=precision)
start_evt_fwd = []
start_evt_bwd = []
stop_evt_bwd = []
for recorded_trial in range(0, args.trials):
start_evt_fwd.append(torch.cuda.Event(enable_timing=True))
start_evt_bwd.append(torch.cuda.Event(enable_timing=True))
stop_evt_bwd.append(torch.cuda.Event(enable_timing=True))
inputs_node = torch.randn(args.batch_size,
args.msa_length // args.dap_size,
args.res_length,
args.cm,
dtype=precision,
device=torch.device("cuda")).requires_grad_(True)
inputs_pair = torch.randn(args.batch_size,
args.res_length // args.dap_size,
args.res_length,
args.cz,
dtype=precision,
device=torch.device("cuda")).requires_grad_(True)
node_mask = torch.ones((args.batch_size, args.msa_length, args.res_length),
dtype=precision,
device=torch.device("cuda")).requires_grad_(False)
pair_mask = torch.ones((args.batch_size, args.res_length, args.res_length),
dtype=precision,
device=torch.device("cuda")).requires_grad_(False)
grads_node = torch.randn_like(inputs_pair)
if args.prof:
prof = torch.profiler.profile(
schedule=torch.profiler.schedule(wait=1,
warmup=args.warmup_trials,
active=args.trials,
repeat=1),
on_trace_ready=torch.profiler.tensorboard_trace_handler('./log/fastfold'),
profile_memory=False,
record_shapes=False,
with_stack=False)
prof.start()
for trial in range(0, args.trials + args.warmup_trials):
layer_inputs = inputs_node, inputs_pair
evt_idx = trial - args.warmup_trials
torch.distributed.barrier()
torch.cuda.synchronize()
if evt_idx >= 0:
start_evt_fwd[evt_idx].record()
for lyr_idx in range(0, args.layers):
layer_inputs = attn_layers[lyr_idx].forward(*layer_inputs, node_mask, pair_mask)
torch.cuda.synchronize()
if evt_idx >= 0:
start_evt_bwd[evt_idx].record()
if not args.fwd:
layer_inputs[1].backward(grads_node)
if evt_idx >= 0:
stop_evt_bwd[evt_idx].record()
if args.prof:
prof.step()
if args.prof:
prof.stop()
torch.distributed.barrier()
torch.cuda.synchronize()
elapsed_time_fwd = 0.0
elapsed_time_bwd = 0.0
for evt_idx in range(0, args.trials):
elapsed_time_fwd += start_evt_fwd[evt_idx].elapsed_time(start_evt_bwd[evt_idx])
elapsed_time_bwd += start_evt_bwd[evt_idx].elapsed_time(stop_evt_bwd[evt_idx])
print("[ MSA Attn ] Input: {:4d}, {:4d}, {:4d}, ({:4d} {:4d}) Fwd Time / Layer: {:.3f} ms Bwd Time / Layer: {:.3f} ms".format(
args.batch_size, args.msa_length, args.res_length, \
args.cm, args.cz, \
elapsed_time_fwd / ( args.trials * args.layers ), \
elapsed_time_bwd / ( args.trials * args.layers )))
if __name__ == '__main__':
main()
# Copyright 2023 HPC-AI Tech Inc.
# Copyright 2021 AlQuraishi Laboratory
# Copyright 2021 DeepMind Technologies Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import time
import fastfold
import numpy as np
import torch
import torch.multiprocessing as mp
from fastfold.config import model_config
from fastfold.data import data_transforms
from fastfold.model.fastnn import set_chunk_size
from fastfold.model.hub import AlphaFold
from fastfold.utils.inject_fastnn import inject_fastnn
from fastfold.utils.tensor_utils import tensor_tree_map
if int(torch.__version__.split(".")[0]) >= 1 and int(torch.__version__.split(".")[1]) > 11:
torch.backends.cuda.matmul.allow_tf32 = True
def random_template_feats(n_templ, n):
b = []
batch = {
"template_mask": np.random.randint(0, 2, (*b, n_templ)),
"template_pseudo_beta_mask": np.random.randint(0, 2, (*b, n_templ, n)),
"template_pseudo_beta": np.random.rand(*b, n_templ, n, 3),
"template_aatype": np.random.randint(0, 22, (*b, n_templ, n)),
"template_all_atom_mask": np.random.randint(0, 2, (*b, n_templ, n, 37)),
"template_all_atom_positions": np.random.rand(*b, n_templ, n, 37, 3) * 10,
"template_torsion_angles_sin_cos": np.random.rand(*b, n_templ, n, 7, 2),
"template_alt_torsion_angles_sin_cos": np.random.rand(*b, n_templ, n, 7, 2),
"template_torsion_angles_mask": np.random.rand(*b, n_templ, n, 7),
}
batch = {k: v.astype(np.float32) for k, v in batch.items()}
batch["template_aatype"] = batch["template_aatype"].astype(np.int64)
return batch
def random_extra_msa_feats(n_extra, n):
b = []
batch = {
"extra_msa": np.random.randint(0, 22, (*b, n_extra, n)).astype(np.int64),
"extra_has_deletion": np.random.randint(0, 2, (*b, n_extra, n)).astype(np.float32),
"extra_deletion_value": np.random.rand(*b, n_extra, n).astype(np.float32),
"extra_msa_mask": np.random.randint(0, 2, (*b, n_extra, n)).astype(np.float32),
}
return batch
def generate_batch(n_res):
batch = {}
tf = torch.randint(21, size=(n_res,))
batch["target_feat"] = torch.nn.functional.one_hot(tf, 22).float()
batch["aatype"] = torch.argmax(batch["target_feat"], dim=-1)
batch["residue_index"] = torch.arange(n_res)
batch["msa_feat"] = torch.rand((128, n_res, 49))
t_feats = random_template_feats(4, n_res)
batch.update({k: torch.tensor(v) for k, v in t_feats.items()})
extra_feats = random_extra_msa_feats(5120, n_res)
batch.update({k: torch.tensor(v) for k, v in extra_feats.items()})
batch["msa_mask"] = torch.randint(low=0, high=2, size=(128, n_res)).float()
batch["seq_mask"] = torch.randint(low=0, high=2, size=(n_res,)).float()
batch.update(data_transforms.make_atom14_masks(batch))
batch["no_recycling_iters"] = torch.tensor(2.)
add_recycling_dims = lambda t: (t.unsqueeze(-1).expand(*t.shape, 3))
batch = tensor_tree_map(add_recycling_dims, batch)
return batch
def inference_model(rank, world_size, result_q, batch, args):
os.environ['RANK'] = str(rank)
os.environ['LOCAL_RANK'] = str(rank)
os.environ['WORLD_SIZE'] = str(world_size)
# init distributed for Dynamic Axial Parallelism
fastfold.distributed.init_dap()
torch.cuda.set_device(rank)
config = model_config(args.model_name)
if args.chunk_size:
config.globals.chunk_size = args.chunk_size
config.globals.inplace = args.inplace
config.globals.is_multimer = False
model = AlphaFold(config)
model = inject_fastnn(model)
model = model.eval()
model = model.cuda()
set_chunk_size(model.globals.chunk_size)
with torch.no_grad():
batch = {k: torch.as_tensor(v).cuda() for k, v in batch.items()}
t = time.perf_counter()
out = model(batch)
print(f"Inference time: {time.perf_counter() - t}")
out = tensor_tree_map(lambda x: np.array(x.cpu()), out)
result_q.put(out)
torch.distributed.barrier()
torch.cuda.synchronize()
def inference_monomer_model(args):
batch = generate_batch(args.n_res)
manager = mp.Manager()
result_q = manager.Queue()
torch.multiprocessing.spawn(inference_model, nprocs=args.gpus, args=(args.gpus, result_q, batch, args))
out = result_q.get()
# get unrelexed pdb and save
# batch = tensor_tree_map(lambda x: np.array(x[..., -1].cpu()), batch)
# plddt = out["plddt"]
# plddt_b_factors = np.repeat(plddt[..., None], residue_constants.atom_type_num, axis=-1)
# unrelaxed_protein = protein.from_prediction(features=batch,
# result=out,
# b_factors=plddt_b_factors)
# with open('demo_unrelex.pdb', 'w+') as fp:
# fp.write(unrelaxed_protein)
def main(args):
inference_monomer_model(args)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--gpus", type=int, default=1, help="""Number of GPUs with which to run inference""")
parser.add_argument("--n_res", type=int, default=50, help="virtual residue number of random data")
parser.add_argument("--model_name", type=str, default="model_1", help="model name of alphafold")
parser.add_argument('--chunk_size', type=int, default=None)
parser.add_argument('--inplace', default=False, action='store_true')
args = parser.parse_args()
main(args)
\ No newline at end of file
FROM hpcaitech/pytorch-cuda:1.12.0-11.3.0
RUN conda install openmm=7.7.0 pdbfixer -c conda-forge -y \
&& conda install hmmer==3.3.2 hhsuite=3.3.0 kalign2=2.04 -c bioconda -y
RUN pip install biopython==1.79 dm-tree==0.1.6 ml-collections==0.1.0 \
scipy==1.7.1 ray pyarrow pandas einops
RUN pip install colossalai
Run git clone https://github.com/hpcaitech/FastFold.git \
&& cd ./FastFold \
&& python setup.py install
name: fastfold
channels:
- conda-forge
- bioconda
- pytorch
dependencies:
- pip:
- biopython==1.79
- dm-tree==0.1.6
- ml-collections==0.1.0
- PyYAML==5.4.1
- requests==2.26.0
- scipy==1.7.1
- tqdm==4.62.2
- typing-extensions==4.3.0
- einops
- ray==2.0.0
- pyarrow
- pandas
- colossalai==0.2.7
- pytorch::pytorch=1.12
- pytorch::torchvision
- pytorch::torchaudio
- conda-forge::cudatoolkit=11.3
- conda-forge::python=3.8
- conda-forge::setuptools=59.5.0
- conda-forge::pip
- conda-forge::openmm=7.7.0
- conda-forge::pdbfixer
- bioconda::hmmer==3.3.2
- bioconda::hhsuite==3.3.0
- bioconda::kalign2==2.04
VERSION = "0.1.0-beta"
\ No newline at end of file
# Copyright 2021 AlQuraishi Laboratory
# Copyright 2021 DeepMind Technologies Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Protein data type."""
import dataclasses
import io
from typing import Any, Mapping, Optional
import re
from fastfold.common import residue_constants
from Bio.PDB import PDBParser
import numpy as np
FeatureDict = Mapping[str, np.ndarray]
ModelOutput = Mapping[str, Any] # Is a nested dict.
PICO_TO_ANGSTROM = 0.01
PDB_CHAIN_IDS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
PDB_MAX_CHAINS = len(PDB_CHAIN_IDS)
assert(PDB_MAX_CHAINS == 62)
@dataclasses.dataclass(frozen=True)
class Protein:
"""Protein structure representation."""
# Cartesian coordinates of atoms in angstroms. The atom types correspond to
# residue_constants.atom_types, i.e. the first three are N, CA, CB.
atom_positions: np.ndarray # [num_res, num_atom_type, 3]
# Amino-acid type for each residue represented as an integer between 0 and
# 20, where 20 is 'X'.
aatype: np.ndarray # [num_res]
# Binary float mask to indicate presence of a particular atom. 1.0 if an atom
# is present and 0.0 if not. This should be used for loss masking.
atom_mask: np.ndarray # [num_res, num_atom_type]
# Residue index as used in PDB. It is not necessarily continuous or 0-indexed.
residue_index: np.ndarray # [num_res]
# 0-indexed number corresponding to the chain in the protein that this
# residue belongs to
chain_index: np.ndarray # [num_res]
# B-factors, or temperature factors, of each residue (in sq. angstroms units),
# representing the displacement of the residue from its ground truth mean
# value.
b_factors: np.ndarray # [num_res, num_atom_type]
def __post_init__(self):
if(len(np.unique(self.chain_index)) > PDB_MAX_CHAINS):
raise ValueError(
f"Cannot build an instance with more than {PDB_MAX_CHAINS} "
"chains because these cannot be written to PDB format"
)
def from_pdb_string(pdb_str: str, chain_id: Optional[str] = None) -> Protein:
"""Takes a PDB string and constructs a Protein object.
WARNING: All non-standard residue types will be converted into UNK. All
non-standard atoms will be ignored.
Args:
pdb_str: The contents of the pdb file
chain_id: If chain_id is specified (e.g. A), then only that chain is
parsed. Else, all chains are parsed.
Returns:
A new `Protein` parsed from the pdb contents.
"""
pdb_fh = io.StringIO(pdb_str)
parser = PDBParser(QUIET=True)
structure = parser.get_structure("none", pdb_fh)
models = list(structure.get_models())
if len(models) != 1:
raise ValueError(
f"Only single model PDBs are supported. Found {len(models)} models."
)
model = models[0]
atom_positions = []
aatype = []
atom_mask = []
residue_index = []
chain_ids = []
b_factors = []
for chain in model:
if(chain_id is not None and chain.id != chain_id):
continue
for res in chain:
if res.id[2] != " ":
raise ValueError(
f"PDB contains an insertion code at chain {chain.id} and residue "
f"index {res.id[1]}. These are not supported."
)
res_shortname = residue_constants.restype_3to1.get(res.resname, "X")
restype_idx = residue_constants.restype_order.get(
res_shortname, residue_constants.restype_num
)
pos = np.zeros((residue_constants.atom_type_num, 3))
mask = np.zeros((residue_constants.atom_type_num,))
res_b_factors = np.zeros((residue_constants.atom_type_num,))
for atom in res:
if atom.name not in residue_constants.atom_types:
continue
pos[residue_constants.atom_order[atom.name]] = atom.coord
mask[residue_constants.atom_order[atom.name]] = 1.0
res_b_factors[
residue_constants.atom_order[atom.name]
] = atom.bfactor
if np.sum(mask) < 0.5:
# If no known atom positions are reported for the residue then skip it.
continue
aatype.append(restype_idx)
atom_positions.append(pos)
atom_mask.append(mask)
residue_index.append(res.id[1])
chain_ids.append(chain.id)
b_factors.append(res_b_factors)
# Chain IDs are usually characters so map these to ints
unique_chain_ids = np.unique(chain_ids)
chain_id_mapping = {cid: n for n, cid in enumerate(unique_chain_ids)}
chain_index = np.array([chain_id_mapping[cid] for cid in chain_ids])
return Protein(
atom_positions=np.array(atom_positions),
atom_mask=np.array(atom_mask),
aatype=np.array(aatype),
residue_index=np.array(residue_index),
chain_index=chain_index,
b_factors=np.array(b_factors),
)
def from_proteinnet_string(proteinnet_str: str) -> Protein:
tag_re = r'(\[[A-Z]+\]\n)'
tags = [
tag.strip() for tag in re.split(tag_re, proteinnet_str) if len(tag) > 0
]
groups = zip(tags[0::2], [l.split('\n') for l in tags[1::2]])
atoms = ['N', 'CA', 'C']
aatype = None
atom_positions = None
atom_mask = None
for g in groups:
if("[PRIMARY]" == g[0]):
seq = g[1][0].strip()
for i in range(len(seq)):
if(seq[i] not in residue_constants.restypes):
seq[i] = 'X'
aatype = np.array([
residue_constants.restype_order.get(
res_symbol, residue_constants.restype_num
) for res_symbol in seq
])
elif("[TERTIARY]" == g[0]):
tertiary = []
for axis in range(3):
tertiary.append(list(map(float, g[1][axis].split())))
tertiary_np = np.array(tertiary)
atom_positions = np.zeros(
(len(tertiary[0])//3, residue_constants.atom_type_num, 3)
).astype(np.float32)
for i, atom in enumerate(atoms):
atom_positions[:, residue_constants.atom_order[atom], :] = (
np.transpose(tertiary_np[:, i::3])
)
atom_positions *= PICO_TO_ANGSTROM
elif("[MASK]" == g[0]):
mask = np.array(list(map({'-': 0, '+': 1}.get, g[1][0].strip())))
atom_mask = np.zeros(
(len(mask), residue_constants.atom_type_num,)
).astype(np.float32)
for i, atom in enumerate(atoms):
atom_mask[:, residue_constants.atom_order[atom]] = 1
atom_mask *= mask[..., None]
return Protein(
atom_positions=atom_positions,
atom_mask=atom_mask,
aatype=aatype,
residue_index=np.arange(len(aatype)),
b_factors=None,
)
def _chain_end(atom_index, end_resname, chain_name, residue_index) -> str:
chain_end = 'TER'
return(
f'{chain_end:<6}{atom_index:>5} {end_resname:>3} '
f'{chain_name:>1}{residue_index:>4}'
)
def to_pdb(prot: Protein) -> str:
"""Converts a `Protein` instance to a PDB string.
Args:
prot: The protein to convert to PDB.
Returns:
PDB string.
"""
restypes = residue_constants.restypes + ["X"]
res_1to3 = lambda r: residue_constants.restype_1to3.get(restypes[r], "UNK")
atom_types = residue_constants.atom_types
pdb_lines = []
atom_mask = prot.atom_mask
aatype = prot.aatype
atom_positions = prot.atom_positions
residue_index = prot.residue_index.astype(np.int32)
chain_index = prot.chain_index.astype(np.int32)
b_factors = prot.b_factors
if np.any(aatype > residue_constants.restype_num):
raise ValueError("Invalid aatypes.")
# Construct a mapping from chain integer indices to chain ID strings.
chain_ids = {}
for i in np.unique(chain_index): # np.unique gives sorted output.
if i >= PDB_MAX_CHAINS:
raise ValueError(
f"The PDB format supports at most {PDB_MAX_CHAINS} chains."
)
chain_ids[i] = PDB_CHAIN_IDS[i]
pdb_lines.append("MODEL 1")
atom_index = 1
last_chain_index = chain_index[0]
# Add all atom sites.
for i in range(aatype.shape[0]):
# Close the previous chain if in a multichain PDB.
if last_chain_index != chain_index[i]:
pdb_lines.append(
_chain_end(
atom_index,
res_1to3(aatype[i - 1]),
chain_ids[chain_index[i - 1]],
residue_index[i - 1]
)
)
last_chain_index = chain_index[i]
atom_index += 1 # Atom index increases at the TER symbol.
res_name_3 = res_1to3(aatype[i])
for atom_name, pos, mask, b_factor in zip(
atom_types, atom_positions[i], atom_mask[i], b_factors[i]
):
if mask < 0.5:
continue
record_type = "ATOM"
name = atom_name if len(atom_name) == 4 else f" {atom_name}"
alt_loc = ""
insertion_code = ""
occupancy = 1.00
element = atom_name[
0
] # Protein supports only C, N, O, S, this works.
charge = ""
# PDB is a columnar format, every space matters here!
atom_line = (
f"{record_type:<6}{atom_index:>5} {name:<4}{alt_loc:>1}"
f"{res_name_3:>3} {chain_ids[chain_index[i]]:>1}"
f"{residue_index[i]:>4}{insertion_code:>1} "
f"{pos[0]:>8.3f}{pos[1]:>8.3f}{pos[2]:>8.3f}"
f"{occupancy:>6.2f}{b_factor:>6.2f} "
f"{element:>2}{charge:>2}"
)
pdb_lines.append(atom_line)
atom_index += 1
# Close the final chain.
pdb_lines.append(
_chain_end(
atom_index,
res_1to3(aatype[-1]),
chain_ids[chain_index[-1]],
residue_index[-1]
)
)
pdb_lines.append("ENDMDL")
pdb_lines.append("END")
# Pad all lines to 80 characters
pdb_lines = [line.ljust(80) for line in pdb_lines]
return '\n'.join(pdb_lines) + '\n' # Add terminating newline.
def ideal_atom_mask(prot: Protein) -> np.ndarray:
"""Computes an ideal atom mask.
`Protein.atom_mask` typically is defined according to the atoms that are
reported in the PDB. This function computes a mask according to heavy atoms
that should be present in the given sequence of amino acids.
Args:
prot: `Protein` whose fields are `numpy.ndarray` objects.
Returns:
An ideal atom mask.
"""
return residue_constants.STANDARD_ATOM_MASK[prot.aatype]
def from_prediction(
features: FeatureDict,
result: ModelOutput,
b_factors: Optional[np.ndarray] = None,
remove_leading_feature_dimension: bool = False,
) -> Protein:
"""Assembles a protein from a prediction.
Args:
features: Dictionary holding model inputs.
result: Dictionary holding model outputs.
b_factors: (Optional) B-factors to use for the protein.
remove_leading_feature_dimension: Whether to remove the leading dimension
of the `features` values
Returns:
A protein instance.
"""
def _maybe_remove_leading_dim(arr: np.ndarray) -> np.ndarray:
return arr[0] if remove_leading_feature_dimension else arr
if 'asym_id' in features:
chain_index = _maybe_remove_leading_dim(features["asym_id"])
else:
chain_index = np.zeros_like(
_maybe_remove_leading_dim(features["aatype"])
)
if b_factors is None:
b_factors = np.zeros_like(result["final_atom_mask"])
return Protein(
aatype=_maybe_remove_leading_dim(features["aatype"]),
atom_positions=result["final_atom_positions"],
atom_mask=result["final_atom_mask"],
residue_index=_maybe_remove_leading_dim(features["residue_index"]) + 1,
chain_index=chain_index,
b_factors=b_factors,
)
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment