Commit 3c89c1c7 authored by Brian Loyal's avatar Brian Loyal
Browse files

Docker testing

parent 148afc98
FROM nvidia/cuda:10.2-cudnn8-runtime-ubuntu18.04 FROM nvidia/cuda:10.2-cudnn8-runtime-ubuntu18.04
RUN apt-key del 7fa2af80
RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub
RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
RUN apt-get update && apt-get install -y wget cuda-minimal-build-10-2 git RUN apt-get update \
RUN wget -P /tmp \ && apt-get install -y wget cuda-minimal-build-10-2 git zip \
"https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" \ && apt-get clean
&& bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda \
&& rm /tmp/Miniconda3-latest-Linux-x86_64.sh
ENV PATH /opt/conda/bin:$PATH
COPY environment.yml /opt/openfold/environment.yml RUN wget -q -P /tmp \
https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh \
&& bash /tmp/Mambaforge-Linux-x86_64.sh -b -p /opt/mamba \
&& rm /tmp/Mambaforge-Linux-x86_64.sh
# installing into the base environment since the docker container wont do anything other than run openfold ENV PATH="/opt/mamba/bin:$PATH"
RUN conda env update -n base --file /opt/openfold/environment.yml && conda clean --all
RUN wget -O "awscliv2.zip" "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" \
&& unzip awscliv2.zip \
&& ./aws/install \
&& rm awscliv2.zip
RUN git clone --branch main --single-branch https://github.com/aqlaboratory/openfold.git /opt/openfold \
&& cd /opt/openfold \
&& git reset ec5619fc970e28e7b81ce452f5e08e7dd6a7cb31 \
&& rm -rf /opt/openfold/imgs /opt/openfold/notebooks /opt/openfold/tests
RUN mamba env update -n base --file /opt/openfold/environment.yml \
&& mamba clean --all
COPY openfold /opt/openfold/openfold
COPY scripts /opt/openfold/scripts
COPY run_pretrained_openfold.py /opt/openfold/run_pretrained_openfold.py
COPY train_openfold.py /opt/openfold/train_openfold.py
COPY setup.py /opt/openfold/setup.py
COPY lib/openmm.patch /opt/openfold/lib/openmm.patch
RUN wget -q -P /opt/openfold/openfold/resources \ RUN wget -q -P /opt/openfold/openfold/resources \
https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt
RUN patch -p0 -d /opt/conda/lib/python3.7/site-packages/ < /opt/openfold/lib/openmm.patch
RUN patch -p0 -d /opt/mamba/lib/python3.7/site-packages/ < /opt/openfold/lib/openmm.patch
COPY run_batch_job.sh /opt/openfold
WORKDIR /opt/openfold WORKDIR /opt/openfold
RUN python3 setup.py install RUN python3 setup.py install
\ No newline at end of file
python3 run_pretrained_openfold.py \
target.fasta \
data/pdb_mmcif/mmcif_files/ \
--uniref90_database_path data/uniref90/uniref90.fasta \
--mgnify_database_path data/mgnify/mgy_clusters_2018_12.fa \
--pdb70_database_path data/pdb70/pdb70 \
--uniclust30_database_path data/uniclust30/uniclust30_2018_08/uniclust30_2018_08 \
--output_dir ./ \
--bfd_database_path data/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \
--model_device cuda:1 \
--jackhmmer_binary_path lib/conda/envs/openfold_venv/bin/jackhmmer \
--hhblits_binary_path lib/conda/envs/openfold_venv/bin/hhblits \
--hhsearch_binary_path lib/conda/envs/openfold_venv/bin/hhsearch \
--kalign_binary_path lib/conda/envs/openfold_venv/bin/kalign
docker run \
--gpus all \
-v $PWD/:/data \
-v /mnt/alphafold_database/:/database \
-ti openfold:latest \
python3 /opt/openfold/run_pretrained_openfold.py \
/data/input.fasta \
/database/pdb_mmcif/mmcif_files/ \
--uniref90_database_path /database/uniref90/uniref90.fasta \
--mgnify_database_path /database/mgnify/mgy_clusters_2018_12.fa \
--pdb70_database_path /database/pdb70/pdb70 \
--uniclust30_database_path /database/uniclust30/uniclust30_2018_08/uniclust30_2018_08 \
--output_dir /data \
--bfd_database_path /database/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \
--model_device cuda:0 \
--jackhmmer_binary_path /opt/conda/bin/jackhmmer \
--hhblits_binary_path /opt/conda/bin/hhblits \
--hhsearch_binary_path /opt/conda/bin/hhsearch \
--kalign_binary_path /opt/conda/bin/kalign \
--param_path /database/params/params_model_1.npz
# T1083
docker run \
--rm \
--gpus all \
-v /home/ec2-user/data:/data \
-v /fsx/:/database/ \
-e CUDA_VISIBLE_DEVICES=0 \
617302ff1b5a \
python3 /opt/openfold/run_pretrained_openfold.py \
/data/T1083/input.fasta \
/database/pdb_mmcif/mmcif_files/ \
--use_precomputed_alignments /data \
--output_dir /data/T1083 \
--model_device cuda:0 \
--param_path /database/params/params_model_1.npz
# T1070
docker run \
--rm \
--gpus all \
-v /home/ec2-user/data:/data \
-v /fsx/:/database/ \
-e CUDA_VISIBLE_DEVICES=0 \
617302ff1b5a \
python3 /opt/openfold/run_pretrained_openfold.py \
/data/T1070/input.fasta \
/database/pdb_mmcif/mmcif_files/ \
--use_precomputed_alignments /data \
--output_dir /data/T1070 \
--model_device cuda:0 \
--param_path /database/params/params_model_1.npz
# Notes: The current implementation of OpenFold assumes that the sequences
# in the input FASTA file are each in a single line. Sequences that span multiple
# lines are interpreted as sequence IDs and cause errors.
# https://github.com/aqlaboratory/openfold/issues/89
# Also, it seems like it's not possible to run the MSA step with the reduced DB at this time
### MMseqs 2
python3 scripts/precompute_alignments_mmseqs.py \
/home/ubuntu/data/T1082/input.fasta \
/fsx/mmseqs_dbs \
uniref30_2103_db \
/home/ubuntu/data/T1082 \
/home/ubuntu/openfold/mmseqs/bin/mmseqs \
--hhsearch_binary_path home/ubuntu/openfold/lib/conda/envs/openfold_venv/bin/hhsearch \
--pdb70 pdb70 \
--env_db colabfold_envdb_202108_db
bash scripts/colabfold_search.sh \
/home/ubuntu/openfold/mmseqs/bin/mmseqs \
/home/ubuntu/data/T1080/input.fasta \
/fsx/mmseqs_dbs \
/home/ubuntu/data/T1080/output \
uniref30_2103_db \
"" \
colabfold_envdb_202108_db \
1 0 1 1 0
#Note seems to take a fair amount of time to finish (30 minutes - not faster than hhblits).
Need to retry using memory recommendations from https://colabfold.mmseqs.com/
e.g. 768 GiB RAM. A x2gd.12xlarge looks to be the most cost-effective instance for this ($4/hr), or else
a r5.24xlarge if we need an AMD ($6/hr)
sudo wget -P /tmp "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" \
&& sudo bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda \
&& sudo rm /tmp/Miniconda3-latest-Linux-x86_64.sh
220614
docker run \
--gpus all \
-v /home/ec2-user/data:/data \
-v /fsx/:/database/ \
-ti openfold:latest \
python3 /opt/openfold/run_pretrained_openfold.py \
/data/fasta_dir \
/database/pdb_mmcif/mmcif_files/ \
--use_precomputed_alignments /data/alignments/ \
--output_dir /data \
--model_device cuda:0 \
--jax_param_path /database/params/params_model_1.npz
python3 /opt/openfold/run_pretrained_openfold.py \
/data/fasta_dir \
/database/pdb_mmcif/mmcif_files/ \
--use_precomputed_alignments /data/alignments/ \
--output_dir /data \
--model_device cuda:0 \
--jax_param_path /database/params/params_model_1.npz
docker run \
--gpus all \
-v /home/ec2-user/data:/data \
-v /fsx/:/database/ \
-ti openfold:latest \
python3 /opt/openfold/run_pretrained_openfold.py \
/data/fasta_dir \
/database/pdb_mmcif/mmcif_files/ \
--uniref90_database_path /database/uniref90/uniref90.fasta \
--mgnify_database_path /database/mgnify/mgy_clusters_2018_12.fa \
--pdb70_database_path /database/pdb70/pdb70 \
--uniclust30_database_path /database/uniclust30/uniclust30_2018_08/uniclust30_2018_08 \
--output_dir /data \
--model_device cuda:0 \
--jackhmmer_binary_path /opt/conda/bin/jackhmmer \
--hhblits_binary_path /opt/conda/bin/hhblits \
--hhsearch_binary_path /opt/conda/bin/hhsearch \
--kalign_binary_path /opt/conda/bin/kalign \
--jax_param_path /database/params/params_model_1.npz
docker run --gpus all -v /home/ec2-user/data:/data -v /fsx/:/database/ -ti openfold:latest bash run_batch_job.sh \
s3://sagemaker-us-east-2-032243382548/openfold_testing/T1084.fasta \
/data/fasta_dir \
"python3 /opt/openfold/run_pretrained_openfold.py \
/data/fasta_dir \
/database/pdb_mmcif/mmcif_files/ \
--uniref90_database_path /database/uniref90/uniref90.fasta \
--mgnify_database_path /database/mgnify/mgy_clusters_2018_12.fa \
--pdb70_database_path /database/pdb70/pdb70 \
--uniclust30_database_path /database/uniclust30/uniclust30_2018_08/uniclust30_2018_08 \
--output_dir /data \
--model_device cuda:0 \
--jackhmmer_binary_path /opt/mamba/bin/jackhmmer \
--hhblits_binary_path /opt/mamba/bin/hhblits \
--hhsearch_binary_path /opt/mamba/bin/hhsearch \
--kalign_binary_path /opt/mamba/bin/kalign \
--jax_param_path /database/params/params_model_1.npz" \
/data \
s3://sagemaker-us-east-2-032243382548/openfold_testing/
\ No newline at end of file
#!/bin/bash
################
# Example CMD
./run_batch_job.sh \
s3://sagemaker-us-east-2-032243382548/openfold_testing/T1084.fasta \
/data/fasta_dir \
"python3 /opt/openfold/run_pretrained_openfold.py \
/data/fasta_dir \
/database/pdb_mmcif/mmcif_files/ \
--uniref90_database_path /database/uniref90/uniref90.fasta \
--mgnify_database_path /database/mgnify/mgy_clusters_2018_12.fa \
--pdb70_database_path /database/pdb70/pdb70 \
--uniclust30_database_path /database/uniclust30/uniclust30_2018_08/uniclust30_2018_08 \
--output_dir /data \
--model_device cuda:0 \
--jackhmmer_binary_path /opt/conda/bin/jackhmmer \
--hhblits_binary_path /opt/conda/bin/hhblits \
--hhsearch_binary_path /opt/conda/bin/hhsearch \
--kalign_binary_path /opt/conda/bin/kalign \
--jax_param_path /database/params/params_model_1.npz" \
/data \
s3://sagemaker-us-east-2-032243382548/openfold_testing/
input_source=$1
input_destination=$2
script=$3
output_source=$4
output_destination=$5
aws s3 cp $input_source $input_destination
$script
a2s s3 cp $output_source $output_destination
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment