Update workflow store dir and readme (#74)

update workflow store dir and readme

Update workflow store dir and readme (#74)
update workflow store dir and readme
1a33718b · oahzxl · GitHub · 2a67dc33 · 1a33718b · 1a33718b
Unverified Commit 1a33718b authored Sep 27, 2022 by oahzxl Committed by GitHub Sep 27, 2022
4 changed files
--- a/README.md
+++ b/README.md
@@ -165,6 +165,29 @@ python inference.py target.fasta data/pdb_mmcif/mmcif_files/ \
    --inplace
 ```

+#### inference multimer sequence
+Alphafold Multimer is supported. You can the following cmd or shell script `./inference_multimer.sh`.
+Workflow and memory parameters mentioned above can also be used.
+```shell
+python inference.py target.fasta data/pdb_mmcif/mmcif_files/ \
+    --output_dir ./ \
+    --gpus 2 \
+    --model_preset multimer \
+    --uniref90_database_path data/uniref90/uniref90.fasta \
+    --mgnify_database_path data/mgnify/mgy_clusters_2018_12.fa \
+    --pdb70_database_path data/pdb70/pdb70 \
+    --uniclust30_database_path data/uniclust30/uniclust30_2018_08/uniclust30_2018_08 \
+    --bfd_database_path data/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \
+    --uniprot_database_path data/uniprot/uniprot_sprot.fasta \
+    --pdb_seqres_database_path data/pdb_seqres/pdb_seqres.txt  \
+    --param_path data/params/params_model_1_multimer.npz \
+    --model_name model_1_multimer \
+    --jackhmmer_binary_path `which jackhmmer` \
+    --hhblits_binary_path `which hhblits` \
+    --hhsearch_binary_path `which hhsearch` \
+    --kalign_binary_path `which kalign`
+```
+
 ## Performance Benchmark

 We have included a performance benchmark script in `./benchmark`. You can benchmark the performance of Evoformer using different settings.

--- a/fastfold/workflow/template/fastfold_data_workflow.py
+++ b/fastfold/workflow/template/fastfold_data_workflow.py
@@ -119,10 +119,10 @@ class FastFoldDataWorkFlow:


    def run(self, fasta_path: str, alignment_dir: str=None, storage_dir: str=None) -> None:
-        storage_dir = "file:///tmp/ray/lcmql/workflow_data"
+        storage_dir = "file:///tmp/ray/" + os.getlogin() + "/workflow_data"
        if storage_dir is not None:
            if not os.path.exists(storage_dir):
-                os.makedirs(storage_dir)
+                os.makedirs(storage_dir, exist_ok=True)
            if not ray.is_initialized():
                ray.init(storage=storage_dir)


--- a/fastfold/workflow/template/fastfold_multimer_data_workflow.py
+++ b/fastfold/workflow/template/fastfold_multimer_data_workflow.py
@@ -137,7 +137,7 @@ class FastFoldMultimerDataWorkFlow:


    def run(self, fasta_path: str, alignment_dir: str=None, storage_dir: str=None) -> None:
-        storage_dir = "file:///tmp/ray/lcmql/workflow_data"
+        storage_dir = "file:///tmp/ray/" + os.getlogin() + "/workflow_data"
        if storage_dir is not None:
            if not os.path.exists(storage_dir):
                os.makedirs(storage_dir)

--- a/inference_multimer.sh
+++ b/inference_multimer.sh
+# add '--gpus [N]' to use N gpus for inference
+# add '--enable_workflow' to use parallel workflow for data processing
+# add '--use_precomputed_alignments [path_to_alignments]' to use precomputed msa
+# add '--chunk_size [N]' to use chunk to reduce peak memory
+# add '--inplace' to use inplace to save memory
+
+python inference.py target.fasta data/pdb_mmcif/mmcif_files \
+    --output_dir ./ \
+    --gpus 1 \
+    --uniref90_database_path data/uniref90/uniref90.fasta \
+    --mgnify_database_path data/mgnify/mgy_clusters_2018_12.fa \
+    --pdb70_database_path data/pdb70/pdb70 \
+    --pdb_seqres_database_path data/pdb_seqres/pdb_seqres.txt \
+    --uniprot_database_path data/uniprot/uniprot_sprot.fasta \
+    --uniclust30_database_path data/uniclust30/uniclust30_2018_08/uniclust30_2018_08 \
+    --bfd_database_path data/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \
+    --jackhmmer_binary_path `which jackhmmer` \
+    --hhblits_binary_path `which hhblits` \
+    --hhsearch_binary_path `which hhsearch` \
+    --kalign_binary_path `which kalign`  \
+    --model_preset multimer \
+    --param_path data/params/params_model_1_multimer.npz \
+    --model_name model_1_multimer \