Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
FastFold
Commits
19ce8406
Unverified
Commit
19ce8406
authored
Jan 17, 2023
by
shenggan
Committed by
GitHub
Jan 17, 2023
Browse files
Align dataset with alphafold v2.3 (#140)
* update to alphafold 2.3 dataset * fix uniprot dataset
parent
da5fe1a6
Changes
12
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
70 additions
and
66 deletions
+70
-66
README.md
README.md
+9
-9
fastfold/data/data_pipeline.py
fastfold/data/data_pipeline.py
+25
-25
fastfold/workflow/template/fastfold_data_workflow.py
fastfold/workflow/template/fastfold_data_workflow.py
+3
-3
fastfold/workflow/template/fastfold_multimer_data_workflow.py
...fold/workflow/template/fastfold_multimer_data_workflow.py
+3
-3
inference.py
inference.py
+6
-6
inference.sh
inference.sh
+2
-2
inference_multimer.sh
inference_multimer.sh
+4
-4
scripts/download_all_data.sh
scripts/download_all_data.sh
+6
-6
scripts/download_alphafold_params.sh
scripts/download_alphafold_params.sh
+1
-1
scripts/download_mgnify.sh
scripts/download_mgnify.sh
+2
-2
scripts/download_pdb_seqres.sh
scripts/download_pdb_seqres.sh
+4
-0
scripts/download_uniref30.sh
scripts/download_uniref30.sh
+5
-5
No files found.
README.md
View file @
19ce8406
...
...
@@ -126,9 +126,9 @@ python inference.py target.fasta data/pdb_mmcif/mmcif_files/ \
--output_dir
./
\
--gpus
2
\
--uniref90_database_path
data/uniref90/uniref90.fasta
\
--mgnify_database_path
data/mgnify/mgy_clusters_20
18_12
.fa
\
--mgnify_database_path
data/mgnify/mgy_clusters_20
22_05
.fa
\
--pdb70_database_path
data/pdb70/pdb70
\
--uni
clust
30_database_path
data/uni
clust
30/
u
ni
clust30_2018_08/uniclust30_2018_08
\
--uni
ref
30_database_path
data/uni
ref
30/
U
ni
Ref30_2021_03
\
--bfd_database_path
data/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt
\
--jackhmmer_binary_path
`
which jackhmmer
`
\
--hhblits_binary_path
`
which hhblits
`
\
...
...
@@ -150,9 +150,9 @@ python inference.py target.fasta data/pdb_mmcif/mmcif_files/ \
--output_dir
./
\
--gpus
2
\
--uniref90_database_path
data/uniref90/uniref90.fasta
\
--mgnify_database_path
data/mgnify/mgy_clusters_20
18_12
.fa
\
--mgnify_database_path
data/mgnify/mgy_clusters_20
22_05
.fa
\
--pdb70_database_path
data/pdb70/pdb70
\
--uni
clust
30_database_path
data/uni
clust
30/
u
ni
clust30_2018_08/uniclust30_2018_08
\
--uni
ref
30_database_path
data/uni
ref
30/
U
ni
Ref30_2021_03
\
--bfd_database_path
data/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt
\
--jackhmmer_binary_path
`
which jackhmmer
`
\
--hhblits_binary_path
`
which hhblits
`
\
...
...
@@ -173,9 +173,9 @@ python inference.py target.fasta data/pdb_mmcif/mmcif_files/ \
--output_dir
./
\
--gpus
2
\
--uniref90_database_path
data/uniref90/uniref90.fasta
\
--mgnify_database_path
data/mgnify/mgy_clusters_20
18_12
.fa
\
--mgnify_database_path
data/mgnify/mgy_clusters_20
22_05
.fa
\
--pdb70_database_path
data/pdb70/pdb70
\
--uni
clust
30_database_path
data/uni
clust
30/
u
ni
clust30_2018_08/uniclust30_2018_08
\
--uni
ref
30_database_path
data/uni
ref
30/
U
ni
Ref30_2021_03
\
--bfd_database_path
data/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt
\
--jackhmmer_binary_path
`
which jackhmmer
`
\
--hhblits_binary_path
`
which hhblits
`
\
...
...
@@ -194,11 +194,11 @@ python inference.py target.fasta data/pdb_mmcif/mmcif_files/ \
--gpus
2
\
--model_preset
multimer
\
--uniref90_database_path
data/uniref90/uniref90.fasta
\
--mgnify_database_path
data/mgnify/mgy_clusters_20
18_12
.fa
\
--mgnify_database_path
data/mgnify/mgy_clusters_20
22_05
.fa
\
--pdb70_database_path
data/pdb70/pdb70
\
--uni
clust
30_database_path
data/uni
clust
30/
u
ni
clust30_2018_08/uniclust30_2018_08
\
--uni
ref
30_database_path
data/uni
ref
30/
U
ni
Ref30_2021_03
\
--bfd_database_path
data/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt
\
--uniprot_database_path
data/uniprot/uniprot
_sprot
.fasta
\
--uniprot_database_path
data/uniprot/uniprot.fasta
\
--pdb_seqres_database_path
data/pdb_seqres/pdb_seqres.txt
\
--param_path
data/params/params_model_1_multimer.npz
\
--model_name
model_1_multimer
\
...
...
fastfold/data/data_pipeline.py
View file @
19ce8406
...
...
@@ -270,7 +270,7 @@ class AlignmentRunner:
uniref90_database_path
:
Optional
[
str
]
=
None
,
mgnify_database_path
:
Optional
[
str
]
=
None
,
bfd_database_path
:
Optional
[
str
]
=
None
,
uni
clust
30_database_path
:
Optional
[
str
]
=
None
,
uni
ref
30_database_path
:
Optional
[
str
]
=
None
,
pdb70_database_path
:
Optional
[
str
]
=
None
,
use_small_bfd
:
Optional
[
bool
]
=
None
,
no_cpus
:
Optional
[
int
]
=
None
,
...
...
@@ -296,14 +296,14 @@ class AlignmentRunner:
Path to BFD database. Depending on the value of use_small_bfd,
one of hhblits_binary_path or jackhmmer_binary_path must be
provided.
uni
clust
30_database_path:
Path to uni
clust
30. Searched alongside BFD if use_small_bfd is
uni
ref
30_database_path:
Path to uni
ref
30. Searched alongside BFD if use_small_bfd is
false.
pdb70_database_path:
Path to pdb70 database.
use_small_bfd:
Whether to search the BFD database alone with jackhmmer or
in conjunction with uni
clust
30 with hhblits.
in conjunction with uni
ref
30 with hhblits.
no_cpus:
The number of CPUs available for alignment. By default, all
CPUs are used.
...
...
@@ -367,7 +367,7 @@ class AlignmentRunner:
)
self
.
jackhmmer_small_bfd_runner
=
None
self
.
hhblits_bfd_uni
clust
_runner
=
None
self
.
hhblits_bfd_uni
ref
_runner
=
None
if
(
bfd_database_path
is
not
None
):
if
use_small_bfd
:
self
.
jackhmmer_small_bfd_runner
=
jackhmmer
.
Jackhmmer
(
...
...
@@ -377,9 +377,9 @@ class AlignmentRunner:
)
else
:
dbs
=
[
bfd_database_path
]
if
(
uni
clust
30_database_path
is
not
None
):
dbs
.
append
(
uni
clust
30_database_path
)
self
.
hhblits_bfd_uni
clust
_runner
=
hhblits
.
HHBlits
(
if
(
uni
ref
30_database_path
is
not
None
):
dbs
.
append
(
uni
ref
30_database_path
)
self
.
hhblits_bfd_uni
ref
_runner
=
hhblits
.
HHBlits
(
binary_path
=
hhblits_binary_path
,
databases
=
dbs
,
n_cpu
=
no_cpus
,
...
...
@@ -446,14 +446,14 @@ class AlignmentRunner:
bfd_out_path
=
os
.
path
.
join
(
output_dir
,
"small_bfd_hits.sto"
)
with
open
(
bfd_out_path
,
"w"
)
as
f
:
f
.
write
(
jackhmmer_small_bfd_result
[
"sto"
])
elif
(
self
.
hhblits_bfd_uni
clust
_runner
is
not
None
):
hhblits_bfd_uni
clust
_result
=
(
self
.
hhblits_bfd_uni
clust
_runner
.
query
(
fasta_path
)
elif
(
self
.
hhblits_bfd_uni
ref
_runner
is
not
None
):
hhblits_bfd_uni
ref
_result
=
(
self
.
hhblits_bfd_uni
ref
_runner
.
query
(
fasta_path
)
)
if
output_dir
is
not
None
:
bfd_out_path
=
os
.
path
.
join
(
output_dir
,
"bfd_uni
clust
_hits.a3m"
)
bfd_out_path
=
os
.
path
.
join
(
output_dir
,
"bfd_uni
ref
_hits.a3m"
)
with
open
(
bfd_out_path
,
"w"
)
as
f
:
f
.
write
(
hhblits_bfd_uni
clust
_result
[
"a3m"
])
f
.
write
(
hhblits_bfd_uni
ref
_result
[
"a3m"
])
...
...
@@ -470,7 +470,7 @@ class AlignmentRunnerMultimer:
uniref90_database_path
:
Optional
[
str
]
=
None
,
mgnify_database_path
:
Optional
[
str
]
=
None
,
bfd_database_path
:
Optional
[
str
]
=
None
,
uni
clust
30_database_path
:
Optional
[
str
]
=
None
,
uni
ref
30_database_path
:
Optional
[
str
]
=
None
,
uniprot_database_path
:
Optional
[
str
]
=
None
,
pdb_seqres_database_path
:
Optional
[
str
]
=
None
,
use_small_bfd
:
Optional
[
bool
]
=
None
,
...
...
@@ -495,12 +495,12 @@ class AlignmentRunnerMultimer:
Path to BFD database. Depending on the value of use_small_bfd,
one of hhblits_binary_path or jackhmmer_binary_path must be
provided.
uni
clust
30_database_path:
Path to uni
clust
30. Searched alongside BFD if use_small_bfd is
uni
ref
30_database_path:
Path to uni
ref
30. Searched alongside BFD if use_small_bfd is
false.
use_small_bfd:
Whether to search the BFD database alone with jackhmmer or
in conjunction with uni
clust
30 with hhblits.
in conjunction with uni
ref
30 with hhblits.
no_cpus:
The number of CPUs available for alignment. By default, all
CPUs are used.
...
...
@@ -559,7 +559,7 @@ class AlignmentRunnerMultimer:
)
self
.
jackhmmer_small_bfd_runner
=
None
self
.
hhblits_bfd_uni
clust
_runner
=
None
self
.
hhblits_bfd_uni
ref
_runner
=
None
if
(
bfd_database_path
is
not
None
):
if
use_small_bfd
:
self
.
jackhmmer_small_bfd_runner
=
jackhmmer
.
Jackhmmer
(
...
...
@@ -569,9 +569,9 @@ class AlignmentRunnerMultimer:
)
else
:
dbs
=
[
bfd_database_path
]
if
(
uni
clust
30_database_path
is
not
None
):
dbs
.
append
(
uni
clust
30_database_path
)
self
.
hhblits_bfd_uni
clust
_runner
=
hhblits
.
HHBlits
(
if
(
uni
ref
30_database_path
is
not
None
):
dbs
.
append
(
uni
ref
30_database_path
)
self
.
hhblits_bfd_uni
ref
_runner
=
hhblits
.
HHBlits
(
binary_path
=
hhblits_binary_path
,
databases
=
dbs
,
n_cpu
=
no_cpus
,
...
...
@@ -647,10 +647,10 @@ class AlignmentRunnerMultimer:
msa_out_path
=
bfd_out_path
,
msa_format
=
"sto"
,
)
elif
(
self
.
hhblits_bfd_uni
clust
_runner
is
not
None
):
bfd_out_path
=
os
.
path
.
join
(
output_dir
,
"bfd_uni
clust
_hits.a3m"
)
hhblits_bfd_uni
clust
_result
=
run_msa_tool
(
msa_runner
=
self
.
hhblits_bfd_uni
clust
_runner
,
elif
(
self
.
hhblits_bfd_uni
ref
_runner
is
not
None
):
bfd_out_path
=
os
.
path
.
join
(
output_dir
,
"bfd_uni
ref
_hits.a3m"
)
hhblits_bfd_uni
ref
_result
=
run_msa_tool
(
msa_runner
=
self
.
hhblits_bfd_uni
ref
_runner
,
fasta_path
=
fasta_path
,
msa_out_path
=
bfd_out_path
,
msa_format
=
"a3m"
,
...
...
fastfold/workflow/template/fastfold_data_workflow.py
View file @
19ce8406
...
...
@@ -16,7 +16,7 @@ class FastFoldDataWorkFlow:
uniref90_database_path
:
Optional
[
str
]
=
None
,
mgnify_database_path
:
Optional
[
str
]
=
None
,
bfd_database_path
:
Optional
[
str
]
=
None
,
uni
clust
30_database_path
:
Optional
[
str
]
=
None
,
uni
ref
30_database_path
:
Optional
[
str
]
=
None
,
pdb70_database_path
:
Optional
[
str
]
=
None
,
use_small_bfd
:
Optional
[
bool
]
=
None
,
no_cpus
:
Optional
[
int
]
=
None
,
...
...
@@ -154,13 +154,13 @@ class FastFoldDataWorkFlow:
if
not
self
.
use_small_bfd
:
# Run HHBlits on BFD
bfd_out_path
=
os
.
path
.
join
(
alignment_dir
,
"bfd_uni
clust
_hits.a3m"
)
bfd_out_path
=
os
.
path
.
join
(
alignment_dir
,
"bfd_uni
ref
_hits.a3m"
)
# generate workflow for STEP4
bfd_node
=
self
.
hhblits_bfd_factory
.
gen_node
(
fasta_path
,
bfd_out_path
)
else
:
# Run Jackhmmer on small_bfd
bfd_out_path
=
os
.
path
.
join
(
alignment_dir
,
"bfd_uni
clust
_hits.a3m"
)
bfd_out_path
=
os
.
path
.
join
(
alignment_dir
,
"bfd_uni
ref
_hits.a3m"
)
# generate workflow for STEP4_2
bfd_node
=
self
.
jackhmmer_small_bfd_factory
.
gen_node
(
fasta_path
,
bfd_out_path
,
output_format
=
"sto"
)
...
...
fastfold/workflow/template/fastfold_multimer_data_workflow.py
View file @
19ce8406
...
...
@@ -19,7 +19,7 @@ class FastFoldMultimerDataWorkFlow:
uniref90_database_path
:
Optional
[
str
]
=
None
,
mgnify_database_path
:
Optional
[
str
]
=
None
,
bfd_database_path
:
Optional
[
str
]
=
None
,
uni
clust
30_database_path
:
Optional
[
str
]
=
None
,
uni
ref
30_database_path
:
Optional
[
str
]
=
None
,
uniprot_database_path
:
Optional
[
str
]
=
None
,
pdb_seqres_database_path
:
Optional
[
str
]
=
None
,
use_small_bfd
:
Optional
[
bool
]
=
None
,
...
...
@@ -171,13 +171,13 @@ class FastFoldMultimerDataWorkFlow:
if
not
self
.
use_small_bfd
:
# Run HHBlits on BFD
bfd_out_path
=
os
.
path
.
join
(
alignment_dir
,
"bfd_uni
clust
_hits.a3m"
)
bfd_out_path
=
os
.
path
.
join
(
alignment_dir
,
"bfd_uni
ref
_hits.a3m"
)
# generate workflow for STEP4
bfd_node
=
self
.
hhblits_bfd_factory
.
gen_node
(
fasta_path
,
bfd_out_path
)
else
:
# Run Jackhmmer on small_bfd
bfd_out_path
=
os
.
path
.
join
(
alignment_dir
,
"bfd_uni
clust
_hits.sto"
)
bfd_out_path
=
os
.
path
.
join
(
alignment_dir
,
"bfd_uni
ref
_hits.sto"
)
# generate workflow for STEP4_2
bfd_node
=
self
.
jackhmmer_small_bfd_factory
.
gen_node
(
fasta_path
,
bfd_out_path
,
output_format
=
"sto"
)
...
...
inference.py
View file @
19ce8406
...
...
@@ -71,7 +71,7 @@ def add_data_args(parser: argparse.ArgumentParser):
default
=
None
,
)
parser
.
add_argument
(
'--uni
clust
30_database_path'
,
'--uni
ref
30_database_path'
,
type
=
str
,
default
=
None
,
)
...
...
@@ -181,7 +181,7 @@ def inference_multimer_model(args):
uniref90_database_path
=
args
.
uniref90_database_path
,
mgnify_database_path
=
args
.
mgnify_database_path
,
bfd_database_path
=
args
.
bfd_database_path
,
uni
clust
30_database_path
=
args
.
uni
clust
30_database_path
,
uni
ref
30_database_path
=
args
.
uni
ref
30_database_path
,
uniprot_database_path
=
args
.
uniprot_database_path
,
pdb_seqres_database_path
=
args
.
pdb_seqres_database_path
,
use_small_bfd
=
(
args
.
bfd_database_path
is
None
),
...
...
@@ -196,7 +196,7 @@ def inference_multimer_model(args):
uniref90_database_path
=
args
.
uniref90_database_path
,
mgnify_database_path
=
args
.
mgnify_database_path
,
bfd_database_path
=
args
.
bfd_database_path
,
uni
clust
30_database_path
=
args
.
uni
clust
30_database_path
,
uni
ref
30_database_path
=
args
.
uni
ref
30_database_path
,
uniprot_database_path
=
args
.
uniprot_database_path
,
pdb_seqres_database_path
=
args
.
pdb_seqres_database_path
,
use_small_bfd
=
(
args
.
bfd_database_path
is
None
),
...
...
@@ -341,7 +341,7 @@ def inference_monomer_model(args):
assert
args
.
bfd_database_path
is
not
None
else
:
assert
args
.
bfd_database_path
is
not
None
assert
args
.
uni
clust
30_database_path
is
not
None
assert
args
.
uni
ref
30_database_path
is
not
None
data_processor
=
data_pipeline
.
DataPipeline
(
template_featurizer
=
template_featurizer
,)
...
...
@@ -385,7 +385,7 @@ def inference_monomer_model(args):
uniref90_database_path
=
args
.
uniref90_database_path
,
mgnify_database_path
=
args
.
mgnify_database_path
,
bfd_database_path
=
args
.
bfd_database_path
,
uni
clust
30_database_path
=
args
.
uni
clust
30_database_path
,
uni
ref
30_database_path
=
args
.
uni
ref
30_database_path
,
pdb70_database_path
=
args
.
pdb70_database_path
,
use_small_bfd
=
use_small_bfd
,
no_cpus
=
args
.
cpus
,
...
...
@@ -401,7 +401,7 @@ def inference_monomer_model(args):
uniref90_database_path
=
args
.
uniref90_database_path
,
mgnify_database_path
=
args
.
mgnify_database_path
,
bfd_database_path
=
args
.
bfd_database_path
,
uni
clust
30_database_path
=
args
.
uni
clust
30_database_path
,
uni
ref
30_database_path
=
args
.
uni
ref
30_database_path
,
pdb70_database_path
=
args
.
pdb70_database_path
,
use_small_bfd
=
use_small_bfd
,
no_cpus
=
args
.
cpus
,
...
...
inference.sh
View file @
19ce8406
...
...
@@ -7,9 +7,9 @@
python inference.py target.fasta data/pdb_mmcif/mmcif_files
\
--output_dir
./
\
--uniref90_database_path
data/uniref90/uniref90.fasta
\
--mgnify_database_path
data/mgnify/mgy_clusters_20
18_12
.fa
\
--mgnify_database_path
data/mgnify/mgy_clusters_20
22_05
.fa
\
--pdb70_database_path
data/pdb70/pdb70
\
--uni
clust
30_database_path
data/uni
clust
30/
u
ni
clust30_2018_08/uniclust30_2018_08
\
--uni
ref
30_database_path
data/uni
ref
30/
U
ni
Ref30_2021_03
\
--bfd_database_path
data/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt
\
--jackhmmer_binary_path
`
which jackhmmer
`
\
--hhblits_binary_path
`
which hhblits
`
\
...
...
inference_multimer.sh
View file @
19ce8406
...
...
@@ -8,16 +8,16 @@ python inference.py target.fasta data/pdb_mmcif/mmcif_files \
--output_dir
./
\
--gpus
1
\
--uniref90_database_path
data/uniref90/uniref90.fasta
\
--mgnify_database_path
data/mgnify/mgy_clusters_20
18_12
.fa
\
--mgnify_database_path
data/mgnify/mgy_clusters_20
22_05
.fa
\
--pdb70_database_path
data/pdb70/pdb70
\
--pdb_seqres_database_path
data/pdb_seqres/pdb_seqres.txt
\
--uniprot_database_path
data/uniprot/uniprot
_sprot
.fasta
\
--uni
clust
30_database_path
data/uni
clust
30/
u
ni
clust30_2018_08/uniclust30_2018_08
\
--uniprot_database_path
data/uniprot/uniprot.fasta
\
--uni
ref
30_database_path
data/uni
ref
30/
U
ni
Ref30_2021_03
\
--bfd_database_path
data/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt
\
--jackhmmer_binary_path
`
which jackhmmer
`
\
--hhblits_binary_path
`
which hhblits
`
\
--hhsearch_binary_path
`
which hhsearch
`
\
--kalign_binary_path
`
which kalign
`
\
--model_preset
multimer
\
--param_path
data/params/params_model_1_multimer_v
2
.npz
\
--param_path
data/params/params_model_1_multimer_v
3
.npz
\
--model_name
model_1_multimer
\
scripts/download_all_data.sh
View file @
19ce8406
...
...
@@ -59,17 +59,17 @@ bash "${SCRIPT_DIR}/download_pdb70.sh" "${DOWNLOAD_DIR}"
echo
"Downloading PDB mmCIF files..."
bash
"
${
SCRIPT_DIR
}
/download_pdb_mmcif.sh"
"
${
DOWNLOAD_DIR
}
"
echo
"Downloading Uni
clust
30..."
bash
"
${
SCRIPT_DIR
}
/download_uni
clust
30.sh"
"
${
DOWNLOAD_DIR
}
"
echo
"Downloading Uni
ref
30..."
bash
"
${
SCRIPT_DIR
}
/download_uni
ref
30.sh"
"
${
DOWNLOAD_DIR
}
"
echo
"Downloading Uniref90..."
bash
"
${
SCRIPT_DIR
}
/download_uniref90.sh"
"
${
DOWNLOAD_DIR
}
"
# UniProt and PDB SeqRes for multimer version
#
echo "Downloading UniProt..."
#
bash "${SCRIPT_DIR}/download_uniprot.sh" "${DOWNLOAD_DIR}"
echo
"Downloading UniProt..."
bash
"
${
SCRIPT_DIR
}
/download_uniprot.sh"
"
${
DOWNLOAD_DIR
}
"
#
echo "Downloading PDB SeqRes..."
#
bash "${SCRIPT_DIR}/download_pdb_seqres.sh" "${DOWNLOAD_DIR}"
echo
"Downloading PDB SeqRes..."
bash
"
${
SCRIPT_DIR
}
/download_pdb_seqres.sh"
"
${
DOWNLOAD_DIR
}
"
echo
"All data downloaded."
scripts/download_alphafold_params.sh
View file @
19ce8406
...
...
@@ -31,7 +31,7 @@ fi
DOWNLOAD_DIR
=
"
$1
"
ROOT_DIR
=
"
${
DOWNLOAD_DIR
}
/params"
SOURCE_URL
=
"https://storage.googleapis.com/alphafold/alphafold_params_2022-
03
-0
2
.tar"
SOURCE_URL
=
"https://storage.googleapis.com/alphafold/alphafold_params_2022-
12
-0
6
.tar"
BASENAME
=
$(
basename
"
${
SOURCE_URL
}
"
)
mkdir
--parents
"
${
ROOT_DIR
}
"
...
...
scripts/download_mgnify.sh
View file @
19ce8406
...
...
@@ -32,8 +32,8 @@ fi
DOWNLOAD_DIR
=
"
$1
"
ROOT_DIR
=
"
${
DOWNLOAD_DIR
}
/mgnify"
# Mirror of:
# ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/peptide_database/20
18_12
/mgy_clusters.fa.gz
SOURCE_URL
=
"https://storage.googleapis.com/alphafold-databases/
casp14_versions
/mgy_clusters_20
18_12
.fa.gz"
# ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/peptide_database/20
22_05
/mgy_clusters.fa.gz
SOURCE_URL
=
"https://storage.googleapis.com/alphafold-databases/
v2.3
/mgy_clusters_20
22_05
.fa.gz"
BASENAME
=
$(
basename
"
${
SOURCE_URL
}
"
)
mkdir
--parents
"
${
ROOT_DIR
}
"
...
...
scripts/download_pdb_seqres.sh
View file @
19ce8406
...
...
@@ -36,3 +36,7 @@ BASENAME=$(basename "${SOURCE_URL}")
mkdir
--parents
"
${
ROOT_DIR
}
"
aria2c
"
${
SOURCE_URL
}
"
--dir
=
"
${
ROOT_DIR
}
"
# Keep only protein sequences.
grep
--after-context
=
1
--no-group-separator
'>.* mol:protein'
"
${
ROOT_DIR
}
/pdb_seqres.txt"
>
"
${
ROOT_DIR
}
/pdb_seqres_filtered.txt"
mv
"
${
ROOT_DIR
}
/pdb_seqres_filtered.txt"
"
${
ROOT_DIR
}
/pdb_seqres.txt"
scripts/download_uni
clust
30.sh
→
scripts/download_uni
ref
30.sh
View file @
19ce8406
...
...
@@ -14,9 +14,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Downloads and unzips the
U
ni
clust
30 database for AlphaFold.
# Downloads and unzips the
u
ni
ref
30 database for AlphaFold.
#
# Usage: bash download_uni
clust
30.sh /path/to/download/directory
# Usage: bash download_uni
ref
30.sh /path/to/download/directory
set
-e
if
[[
$#
-eq
0
]]
;
then
...
...
@@ -30,10 +30,10 @@ if ! command -v aria2c &> /dev/null ; then
fi
DOWNLOAD_DIR
=
"
$1
"
ROOT_DIR
=
"
${
DOWNLOAD_DIR
}
/uni
clust
30"
ROOT_DIR
=
"
${
DOWNLOAD_DIR
}
/uni
ref
30"
# Mirror of:
# http://wwwuser.gwdg.de/~compbiol/uniclust/20
18_08/uniclust30_2018_08_hhsuite
.tar.gz
SOURCE_URL
=
"https://storage.googleapis.com/alphafold-databases/
casp14_versions/uniclust30_2018_08_hhsuite
.tar.gz"
# http
s
://wwwuser.gwdg.de/~compbiol/uniclust/20
21_03/UniRef30_2021_03
.tar.gz
SOURCE_URL
=
"https://storage.googleapis.com/alphafold-databases/
v2.3/UniRef30_2021_03
.tar.gz"
BASENAME
=
$(
basename
"
${
SOURCE_URL
}
"
)
mkdir
--parents
"
${
ROOT_DIR
}
"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment