python3 run_pretrained_openfold.py \
    target.fasta \
    data/pdb_mmcif/mmcif_files/ \
    --uniref90_database_path data/uniref90/uniref90.fasta \
    --mgnify_database_path data/mgnify/mgy_clusters_2018_12.fa \
    --pdb70_database_path data/pdb70/pdb70 \
    --uniclust30_database_path data/uniclust30/uniclust30_2018_08/uniclust30_2018_08 \
    --output_dir ./ \
    --bfd_database_path data/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \
    --model_device cuda:1 \
    --jackhmmer_binary_path lib/conda/envs/openfold_venv/bin/jackhmmer \
    --hhblits_binary_path lib/conda/envs/openfold_venv/bin/hhblits \
    --hhsearch_binary_path lib/conda/envs/openfold_venv/bin/hhsearch \
    --kalign_binary_path lib/conda/envs/openfold_venv/bin/kalign

docker run \
--gpus all \
-v $PWD/:/data \
-v /mnt/alphafold_database/:/database \
-ti openfold:latest \
python3 /opt/openfold/run_pretrained_openfold.py \
/data/input.fasta \
/database/pdb_mmcif/mmcif_files/ \
--uniref90_database_path /database/uniref90/uniref90.fasta \
--mgnify_database_path /database/mgnify/mgy_clusters_2018_12.fa \
--pdb70_database_path /database/pdb70/pdb70 \
--uniclust30_database_path /database/uniclust30/uniclust30_2018_08/uniclust30_2018_08 \
--output_dir /data \
--bfd_database_path /database/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \
--model_device cuda:0 \
--jackhmmer_binary_path /opt/conda/bin/jackhmmer \
--hhblits_binary_path /opt/conda/bin/hhblits \
--hhsearch_binary_path /opt/conda/bin/hhsearch \
--kalign_binary_path /opt/conda/bin/kalign \
--param_path /database/params/params_model_1.npz

# T1083
docker run \
--rm \
--gpus all \
-v /home/ec2-user/data:/data \
-v /fsx/:/database/ \
-e CUDA_VISIBLE_DEVICES=0 \
617302ff1b5a \
python3 /opt/openfold/run_pretrained_openfold.py \
/data/T1083/input.fasta \
/database/pdb_mmcif/mmcif_files/ \
--use_precomputed_alignments /data \
--output_dir /data/T1083 \
--model_device cuda:0 \
--param_path /database/params/params_model_1.npz

# T1070
docker run \
--rm \
--gpus all \
-v /home/ec2-user/data:/data \
-v /fsx/:/database/ \
-e CUDA_VISIBLE_DEVICES=0 \
617302ff1b5a \
python3 /opt/openfold/run_pretrained_openfold.py \
/data/T1070/input.fasta \
/database/pdb_mmcif/mmcif_files/ \
--use_precomputed_alignments /data \
--output_dir /data/T1070 \
--model_device cuda:0 \
--param_path /database/params/params_model_1.npz

# Notes: The current implementation of OpenFold assumes that the sequences 
# in the input FASTA file are each in a single line. Sequences that span multiple
# lines are interpreted as sequence IDs and cause errors.
# https://github.com/aqlaboratory/openfold/issues/89

# Also, it seems like it's not possible to run the MSA step with the reduced DB at this time

### MMseqs 2 
python3 scripts/precompute_alignments_mmseqs.py \
    /home/ubuntu/data/T1082/input.fasta \
    /fsx/mmseqs_dbs \
    uniref30_2103_db \
    /home/ubuntu/data/T1082 \
    /home/ubuntu/openfold/mmseqs/bin/mmseqs \
    --hhsearch_binary_path home/ubuntu/openfold/lib/conda/envs/openfold_venv/bin/hhsearch \
    --pdb70 pdb70 \
    --env_db colabfold_envdb_202108_db

bash scripts/colabfold_search.sh \
    /home/ubuntu/openfold/mmseqs/bin/mmseqs \
    /home/ubuntu/data/T1080/input.fasta \
    /fsx/mmseqs_dbs \
    /home/ubuntu/data/T1080/output \
    uniref30_2103_db \
    "" \
    colabfold_envdb_202108_db \ 
    1 0 1 1 0

#Note seems to take a fair amount of time to finish (30 minutes - not faster than hhblits). 
Need to retry using memory recommendations from https://colabfold.mmseqs.com/
e.g. 768 GiB RAM. A x2gd.12xlarge looks to be the most cost-effective instance for this ($4/hr), or else
a r5.24xlarge if we need an AMD ($6/hr)


sudo wget -P /tmp "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" \
    && sudo bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda \
    && sudo rm /tmp/Miniconda3-latest-Linux-x86_64.sh


220614

docker run \
--gpus all \
-v /home/ec2-user/data:/data \
-v /fsx/:/database/ \
-ti openfold:latest \
python3 /opt/openfold/run_pretrained_openfold.py \
/data/fasta_dir \
/database/pdb_mmcif/mmcif_files/ \
--use_precomputed_alignments /data/alignments/ \
--output_dir /data \
--model_device cuda:0 \
--jax_param_path /database/params/params_model_1.npz

python3 /opt/openfold/run_pretrained_openfold.py \
/data/fasta_dir \
/database/pdb_mmcif/mmcif_files/ \
--use_precomputed_alignments /data/alignments/ \
--output_dir /data \
--model_device cuda:0 \
--jax_param_path /database/params/params_model_1.npz


docker run \
--gpus all \
-v /home/ec2-user/data:/data \
-v /fsx/:/database/ \
-ti openfold:latest \
python3 /opt/openfold/run_pretrained_openfold.py \
/data/fasta_dir \
/database/pdb_mmcif/mmcif_files/ \
--uniref90_database_path /database/uniref90/uniref90.fasta \
--mgnify_database_path /database/mgnify/mgy_clusters_2018_12.fa \
--pdb70_database_path /database/pdb70/pdb70 \
--uniclust30_database_path /database/uniclust30/uniclust30_2018_08/uniclust30_2018_08 \
--output_dir /data \
--model_device cuda:0 \
--jackhmmer_binary_path /opt/conda/bin/jackhmmer \
--hhblits_binary_path /opt/conda/bin/hhblits \
--hhsearch_binary_path /opt/conda/bin/hhsearch \
--kalign_binary_path /opt/conda/bin/kalign \
--jax_param_path /database/params/params_model_1.npz



docker run --gpus all -v /home/ec2-user/data:/data -v /fsx/:/database/ -ti openfold:latest bash run_batch_job.sh \
    s3://sagemaker-us-east-2-032243382548/openfold_testing/T1084.fasta \
    /data/fasta_dir \
    "python3 /opt/openfold/run_pretrained_openfold.py \
    /data/fasta_dir \
    /database/pdb_mmcif/mmcif_files/ \
    --uniref90_database_path /database/uniref90/uniref90.fasta \
    --mgnify_database_path /database/mgnify/mgy_clusters_2018_12.fa \
    --pdb70_database_path /database/pdb70/pdb70 \
    --uniclust30_database_path /database/uniclust30/uniclust30_2018_08/uniclust30_2018_08 \
    --output_dir /data \
    --model_device cuda:0 \
    --jackhmmer_binary_path /opt/mamba/bin/jackhmmer \
    --hhblits_binary_path /opt/mamba/bin/hhblits \
    --hhsearch_binary_path /opt/mamba/bin/hhsearch \
    --kalign_binary_path /opt/mamba/bin/kalign \
    --jax_param_path /database/params/params_model_1.npz" \
    /data \
    s3://sagemaker-us-east-2-032243382548/openfold_testing/