Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
OpenFold
Commits
7642bef9
Commit
7642bef9
authored
Jun 21, 2022
by
Gustaf Ahdritz
Browse files
Add better AlphaFold-Gap support to PDB parsing scripts
parent
2549752d
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
50 additions
and
42 deletions
+50
-42
openfold/np/protein.py
openfold/np/protein.py
+50
-42
No files found.
openfold/np/protein.py
View file @
7642bef9
...
...
@@ -92,24 +92,16 @@ def from_pdb_string(pdb_str: str, chain_id: Optional[str] = None) -> Protein:
)
model
=
models
[
0
]
if
chain_id
is
not
None
:
chain
=
model
[
chain_id
]
else
:
chains
=
list
(
model
.
get_chains
())
if
len
(
chains
)
!=
1
:
raise
ValueError
(
"Only single chain PDBs are supported when chain_id not specified. "
f
"Found
{
len
(
chains
)
}
chains."
)
else
:
chain
=
chains
[
0
]
atom_positions
=
[]
aatype
=
[]
atom_mask
=
[]
residue_index
=
[]
chain_ids
=
[]
b_factors
=
[]
for
chain
in
model
:
if
(
chain_id
is
not
None
and
chain
.
id
!=
chain_id
):
continue
for
res
in
chain
:
if
res
.
id
[
2
]
!=
" "
:
raise
ValueError
(
...
...
@@ -138,22 +130,38 @@ def from_pdb_string(pdb_str: str, chain_id: Optional[str] = None) -> Protein:
atom_positions
.
append
(
pos
)
atom_mask
.
append
(
mask
)
residue_index
.
append
(
res
.
id
[
1
])
chain_ids
.
append
(
chain
.
id
)
b_factors
.
append
(
res_b_factors
)
parents
=
None
parents_chain_index
=
None
if
(
"PARENT"
in
pdb_str
):
parents
=
[]
parents_chain_index
=
[]
chain_id
=
0
for
l
in
pdb_str
.
split
(
"
\n
"
):
if
(
"PARENT"
in
l
and
not
"N/A"
in
l
):
parents
=
l
.
split
()[
1
:]
break
if
(
"PARENT"
in
l
):
if
(
not
"N/A"
in
l
):
parent_names
=
l
.
split
()[
1
:]
parents
.
extend
(
parent_names
)
parents_chain_index
.
extend
([
chain_id
for
_
in
parent_names
])
chain_id
+=
1
unique_chain_ids
=
np
.
unique
(
chain_ids
)
chain_id_mapping
=
{
cid
:
n
for
n
,
cid
in
enumerate
(
string
.
ascii_uppercase
)}
chain_index
=
np
.
array
([
chain_id_mapping
[
cid
]
for
cid
in
chain_ids
])
return
Protein
(
atom_positions
=
np
.
array
(
atom_positions
),
atom_mask
=
np
.
array
(
atom_mask
),
aatype
=
np
.
array
(
aatype
),
residue_index
=
np
.
array
(
residue_index
),
chain_index
=
chain_index
,
b_factors
=
np
.
array
(
b_factors
),
parents
=
parents
,
parents_chain_index
=
parents_chain_index
,
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment