Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
OpenFold
Commits
3c89c1c7
Commit
3c89c1c7
authored
Jun 15, 2022
by
Brian Loyal
Browse files
Docker testing
parent
148afc98
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
234 additions
and
20 deletions
+234
-20
Dockerfile
Dockerfile
+29
-20
ec2-testing.txt
ec2-testing.txt
+172
-0
run_batch_job.sh
run_batch_job.sh
+33
-0
No files found.
Dockerfile
View file @
3c89c1c7
FROM
nvidia/cuda:10.2-cudnn8-runtime-ubuntu18.04
FROM
nvidia/cuda:10.2-cudnn8-runtime-ubuntu18.04
RUN
apt-key del 7fa2af80
RUN
apt-key adv
--fetch-keys
https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub
RUN
apt-key adv
--fetch-keys
https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
RUN
apt-key adv
--fetch-keys
https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
RUN
apt-get update
&&
apt-get
install
-y
wget cuda-minimal-build-10-2 git
RUN
apt-get update
\
RUN
wget
-P
/tmp
\
&&
apt-get
install
-y
wget cuda-minimal-build-10-2 git zip
\
"https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh"
\
&&
apt-get clean
&&
bash /tmp/Miniconda3-latest-Linux-x86_64.sh
-b
-p
/opt/conda
\
&&
rm
/tmp/Miniconda3-latest-Linux-x86_64.sh
ENV
PATH /opt/conda/bin:$PATH
COPY
environment.yml /opt/openfold/environment.yml
RUN
wget
-q
-P
/tmp
\
https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh
\
&&
bash /tmp/Mambaforge-Linux-x86_64.sh
-b
-p
/opt/mamba
\
&&
rm
/tmp/Mambaforge-Linux-x86_64.sh
# installing into the base environment since the docker container wont do anything other than run openfold
ENV
PATH="/opt/mamba/bin:$PATH"
RUN
conda
env
update
-n
base
--file
/opt/openfold/environment.yml
&&
conda clean
--all
RUN
wget
-O
"awscliv2.zip"
"https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip"
\
&&
unzip awscliv2.zip
\
&&
./aws/install
\
&&
rm
awscliv2.zip
RUN
git clone
--branch
main
--single-branch
https://github.com/aqlaboratory/openfold.git /opt/openfold
\
&&
cd
/opt/openfold
\
&&
git reset ec5619fc970e28e7b81ce452f5e08e7dd6a7cb31
\
&&
rm
-rf
/opt/openfold/imgs /opt/openfold/notebooks /opt/openfold/tests
RUN
mamba
env
update
-n
base
--file
/opt/openfold/environment.yml
\
&&
mamba clean
--all
COPY
openfold /opt/openfold/openfold
COPY
scripts /opt/openfold/scripts
COPY
run_pretrained_openfold.py /opt/openfold/run_pretrained_openfold.py
COPY
train_openfold.py /opt/openfold/train_openfold.py
COPY
setup.py /opt/openfold/setup.py
COPY
lib/openmm.patch /opt/openfold/lib/openmm.patch
RUN
wget
-q
-P
/opt/openfold/openfold/resources
\
RUN
wget
-q
-P
/opt/openfold/openfold/resources
\
https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt
https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt
RUN
patch
-p0
-d
/opt/conda/lib/python3.7/site-packages/ < /opt/openfold/lib/openmm.patch
RUN
patch
-p0
-d
/opt/mamba/lib/python3.7/site-packages/ < /opt/openfold/lib/openmm.patch
COPY
run_batch_job.sh /opt/openfold
WORKDIR
/opt/openfold
WORKDIR
/opt/openfold
RUN
python3 setup.py
install
RUN
python3 setup.py
install
\ No newline at end of file
ec2-testing.txt
0 → 100644
View file @
3c89c1c7
python3 run_pretrained_openfold.py \
target.fasta \
data/pdb_mmcif/mmcif_files/ \
--uniref90_database_path data/uniref90/uniref90.fasta \
--mgnify_database_path data/mgnify/mgy_clusters_2018_12.fa \
--pdb70_database_path data/pdb70/pdb70 \
--uniclust30_database_path data/uniclust30/uniclust30_2018_08/uniclust30_2018_08 \
--output_dir ./ \
--bfd_database_path data/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \
--model_device cuda:1 \
--jackhmmer_binary_path lib/conda/envs/openfold_venv/bin/jackhmmer \
--hhblits_binary_path lib/conda/envs/openfold_venv/bin/hhblits \
--hhsearch_binary_path lib/conda/envs/openfold_venv/bin/hhsearch \
--kalign_binary_path lib/conda/envs/openfold_venv/bin/kalign
docker run \
--gpus all \
-v $PWD/:/data \
-v /mnt/alphafold_database/:/database \
-ti openfold:latest \
python3 /opt/openfold/run_pretrained_openfold.py \
/data/input.fasta \
/database/pdb_mmcif/mmcif_files/ \
--uniref90_database_path /database/uniref90/uniref90.fasta \
--mgnify_database_path /database/mgnify/mgy_clusters_2018_12.fa \
--pdb70_database_path /database/pdb70/pdb70 \
--uniclust30_database_path /database/uniclust30/uniclust30_2018_08/uniclust30_2018_08 \
--output_dir /data \
--bfd_database_path /database/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \
--model_device cuda:0 \
--jackhmmer_binary_path /opt/conda/bin/jackhmmer \
--hhblits_binary_path /opt/conda/bin/hhblits \
--hhsearch_binary_path /opt/conda/bin/hhsearch \
--kalign_binary_path /opt/conda/bin/kalign \
--param_path /database/params/params_model_1.npz
# T1083
docker run \
--rm \
--gpus all \
-v /home/ec2-user/data:/data \
-v /fsx/:/database/ \
-e CUDA_VISIBLE_DEVICES=0 \
617302ff1b5a \
python3 /opt/openfold/run_pretrained_openfold.py \
/data/T1083/input.fasta \
/database/pdb_mmcif/mmcif_files/ \
--use_precomputed_alignments /data \
--output_dir /data/T1083 \
--model_device cuda:0 \
--param_path /database/params/params_model_1.npz
# T1070
docker run \
--rm \
--gpus all \
-v /home/ec2-user/data:/data \
-v /fsx/:/database/ \
-e CUDA_VISIBLE_DEVICES=0 \
617302ff1b5a \
python3 /opt/openfold/run_pretrained_openfold.py \
/data/T1070/input.fasta \
/database/pdb_mmcif/mmcif_files/ \
--use_precomputed_alignments /data \
--output_dir /data/T1070 \
--model_device cuda:0 \
--param_path /database/params/params_model_1.npz
# Notes: The current implementation of OpenFold assumes that the sequences
# in the input FASTA file are each in a single line. Sequences that span multiple
# lines are interpreted as sequence IDs and cause errors.
# https://github.com/aqlaboratory/openfold/issues/89
# Also, it seems like it's not possible to run the MSA step with the reduced DB at this time
### MMseqs 2
python3 scripts/precompute_alignments_mmseqs.py \
/home/ubuntu/data/T1082/input.fasta \
/fsx/mmseqs_dbs \
uniref30_2103_db \
/home/ubuntu/data/T1082 \
/home/ubuntu/openfold/mmseqs/bin/mmseqs \
--hhsearch_binary_path home/ubuntu/openfold/lib/conda/envs/openfold_venv/bin/hhsearch \
--pdb70 pdb70 \
--env_db colabfold_envdb_202108_db
bash scripts/colabfold_search.sh \
/home/ubuntu/openfold/mmseqs/bin/mmseqs \
/home/ubuntu/data/T1080/input.fasta \
/fsx/mmseqs_dbs \
/home/ubuntu/data/T1080/output \
uniref30_2103_db \
"" \
colabfold_envdb_202108_db \
1 0 1 1 0
#Note seems to take a fair amount of time to finish (30 minutes - not faster than hhblits).
Need to retry using memory recommendations from https://colabfold.mmseqs.com/
e.g. 768 GiB RAM. A x2gd.12xlarge looks to be the most cost-effective instance for this ($4/hr), or else
a r5.24xlarge if we need an AMD ($6/hr)
sudo wget -P /tmp "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" \
&& sudo bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda \
&& sudo rm /tmp/Miniconda3-latest-Linux-x86_64.sh
220614
docker run \
--gpus all \
-v /home/ec2-user/data:/data \
-v /fsx/:/database/ \
-ti openfold:latest \
python3 /opt/openfold/run_pretrained_openfold.py \
/data/fasta_dir \
/database/pdb_mmcif/mmcif_files/ \
--use_precomputed_alignments /data/alignments/ \
--output_dir /data \
--model_device cuda:0 \
--jax_param_path /database/params/params_model_1.npz
python3 /opt/openfold/run_pretrained_openfold.py \
/data/fasta_dir \
/database/pdb_mmcif/mmcif_files/ \
--use_precomputed_alignments /data/alignments/ \
--output_dir /data \
--model_device cuda:0 \
--jax_param_path /database/params/params_model_1.npz
docker run \
--gpus all \
-v /home/ec2-user/data:/data \
-v /fsx/:/database/ \
-ti openfold:latest \
python3 /opt/openfold/run_pretrained_openfold.py \
/data/fasta_dir \
/database/pdb_mmcif/mmcif_files/ \
--uniref90_database_path /database/uniref90/uniref90.fasta \
--mgnify_database_path /database/mgnify/mgy_clusters_2018_12.fa \
--pdb70_database_path /database/pdb70/pdb70 \
--uniclust30_database_path /database/uniclust30/uniclust30_2018_08/uniclust30_2018_08 \
--output_dir /data \
--model_device cuda:0 \
--jackhmmer_binary_path /opt/conda/bin/jackhmmer \
--hhblits_binary_path /opt/conda/bin/hhblits \
--hhsearch_binary_path /opt/conda/bin/hhsearch \
--kalign_binary_path /opt/conda/bin/kalign \
--jax_param_path /database/params/params_model_1.npz
docker run --gpus all -v /home/ec2-user/data:/data -v /fsx/:/database/ -ti openfold:latest bash run_batch_job.sh \
s3://sagemaker-us-east-2-032243382548/openfold_testing/T1084.fasta \
/data/fasta_dir \
"python3 /opt/openfold/run_pretrained_openfold.py \
/data/fasta_dir \
/database/pdb_mmcif/mmcif_files/ \
--uniref90_database_path /database/uniref90/uniref90.fasta \
--mgnify_database_path /database/mgnify/mgy_clusters_2018_12.fa \
--pdb70_database_path /database/pdb70/pdb70 \
--uniclust30_database_path /database/uniclust30/uniclust30_2018_08/uniclust30_2018_08 \
--output_dir /data \
--model_device cuda:0 \
--jackhmmer_binary_path /opt/mamba/bin/jackhmmer \
--hhblits_binary_path /opt/mamba/bin/hhblits \
--hhsearch_binary_path /opt/mamba/bin/hhsearch \
--kalign_binary_path /opt/mamba/bin/kalign \
--jax_param_path /database/params/params_model_1.npz" \
/data \
s3://sagemaker-us-east-2-032243382548/openfold_testing/
\ No newline at end of file
run_batch_job.sh
0 → 100644
View file @
3c89c1c7
#!/bin/bash
################
# Example CMD
./run_batch_job.sh
\
s3://sagemaker-us-east-2-032243382548/openfold_testing/T1084.fasta
\
/data/fasta_dir
\
"python3 /opt/openfold/run_pretrained_openfold.py
\
/data/fasta_dir
\
/database/pdb_mmcif/mmcif_files/
\
--uniref90_database_path /database/uniref90/uniref90.fasta
\
--mgnify_database_path /database/mgnify/mgy_clusters_2018_12.fa
\
--pdb70_database_path /database/pdb70/pdb70
\
--uniclust30_database_path /database/uniclust30/uniclust30_2018_08/uniclust30_2018_08
\
--output_dir /data
\
--model_device cuda:0
\
--jackhmmer_binary_path /opt/conda/bin/jackhmmer
\
--hhblits_binary_path /opt/conda/bin/hhblits
\
--hhsearch_binary_path /opt/conda/bin/hhsearch
\
--kalign_binary_path /opt/conda/bin/kalign
\
--jax_param_path /database/params/params_model_1.npz"
\
/data
\
s3://sagemaker-us-east-2-032243382548/openfold_testing/
input_source
=
$1
input_destination
=
$2
script
=
$3
output_source
=
$4
output_destination
=
$5
aws s3
cp
$input_source
$input_destination
$script
a2s s3
cp
$output_source
$output_destination
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment