Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
OpenFold
Commits
7642bef9
Commit
7642bef9
authored
Jun 21, 2022
by
Gustaf Ahdritz
Browse files
Add better AlphaFold-Gap support to PDB parsing scripts
parent
2549752d
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
50 additions
and
42 deletions
+50
-42
openfold/np/protein.py
openfold/np/protein.py
+50
-42
No files found.
openfold/np/protein.py
View file @
7642bef9
...
@@ -92,68 +92,76 @@ def from_pdb_string(pdb_str: str, chain_id: Optional[str] = None) -> Protein:
...
@@ -92,68 +92,76 @@ def from_pdb_string(pdb_str: str, chain_id: Optional[str] = None) -> Protein:
)
)
model
=
models
[
0
]
model
=
models
[
0
]
if
chain_id
is
not
None
:
chain
=
model
[
chain_id
]
else
:
chains
=
list
(
model
.
get_chains
())
if
len
(
chains
)
!=
1
:
raise
ValueError
(
"Only single chain PDBs are supported when chain_id not specified. "
f
"Found
{
len
(
chains
)
}
chains."
)
else
:
chain
=
chains
[
0
]
atom_positions
=
[]
atom_positions
=
[]
aatype
=
[]
aatype
=
[]
atom_mask
=
[]
atom_mask
=
[]
residue_index
=
[]
residue_index
=
[]
chain_ids
=
[]
b_factors
=
[]
b_factors
=
[]
for
res
in
chain
:
for
chain
in
model
:
if
res
.
id
[
2
]
!=
" "
:
if
(
chain_id
is
not
None
and
chain
.
id
!=
chain_id
):
raise
ValueError
(
continue
f
"PDB contains an insertion code at chain
{
chain
.
id
}
and residue "
for
res
in
chain
:
f
"index
{
res
.
id
[
1
]
}
. These are not supported."
if
res
.
id
[
2
]
!=
" "
:
raise
ValueError
(
f
"PDB contains an insertion code at chain
{
chain
.
id
}
and residue "
f
"index
{
res
.
id
[
1
]
}
. These are not supported."
)
res_shortname
=
residue_constants
.
restype_3to1
.
get
(
res
.
resname
,
"X"
)
restype_idx
=
residue_constants
.
restype_order
.
get
(
res_shortname
,
residue_constants
.
restype_num
)
)
res_shortname
=
residue_constants
.
restype_3to1
.
get
(
res
.
resname
,
"X"
)
pos
=
np
.
zeros
((
residue_constants
.
atom_type_num
,
3
))
restype_idx
=
residue_constants
.
restype_order
.
get
(
mask
=
np
.
zeros
((
residue_constants
.
atom_type_num
,))
res_shortname
,
residue_constants
.
restype_num
res_b_factors
=
np
.
zeros
((
residue_constants
.
atom_type_num
,))
)
for
atom
in
res
:
pos
=
np
.
zeros
((
residue_constants
.
atom_type_num
,
3
))
if
atom
.
name
not
in
residue_constants
.
atom_types
:
mask
=
np
.
zeros
((
residue_constants
.
atom_type_num
,))
continue
res_b_factors
=
np
.
zeros
((
residue_constants
.
atom_type_num
,))
pos
[
residue_constants
.
atom_order
[
atom
.
name
]]
=
atom
.
coord
for
atom
in
res
:
mask
[
residue_constants
.
atom_order
[
atom
.
name
]]
=
1.0
if
atom
.
name
not
in
residue_constants
.
atom_types
:
res_b_factors
[
residue_constants
.
atom_order
[
atom
.
name
]
]
=
atom
.
bfactor
if
np
.
sum
(
mask
)
<
0.5
:
# If no known atom positions are reported for the residue then skip it.
continue
continue
pos
[
residue_constants
.
atom_order
[
atom
.
name
]]
=
atom
.
coord
aatype
.
append
(
restype_idx
)
mask
[
residue_constants
.
atom_order
[
atom
.
name
]]
=
1.0
atom_positions
.
append
(
pos
)
res_b_factors
[
atom_mask
.
append
(
mask
)
residue_constants
.
atom_order
[
atom
.
name
]
residue_index
.
append
(
res
.
id
[
1
])
]
=
atom
.
bfactor
chain_ids
.
append
(
chain
.
id
)
if
np
.
sum
(
mask
)
<
0.5
:
b_factors
.
append
(
res_b_factors
)
# If no known atom positions are reported for the residue then skip it.
continue
aatype
.
append
(
restype_idx
)
atom_positions
.
append
(
pos
)
atom_mask
.
append
(
mask
)
residue_index
.
append
(
res
.
id
[
1
])
b_factors
.
append
(
res_b_factors
)
parents
=
None
parents
=
None
parents_chain_index
=
None
if
(
"PARENT"
in
pdb_str
):
if
(
"PARENT"
in
pdb_str
):
parents
=
[]
parents_chain_index
=
[]
chain_id
=
0
for
l
in
pdb_str
.
split
(
"
\n
"
):
for
l
in
pdb_str
.
split
(
"
\n
"
):
if
(
"PARENT"
in
l
and
not
"N/A"
in
l
):
if
(
"PARENT"
in
l
):
parents
=
l
.
split
()[
1
:]
if
(
not
"N/A"
in
l
):
break
parent_names
=
l
.
split
()[
1
:]
parents
.
extend
(
parent_names
)
parents_chain_index
.
extend
([
chain_id
for
_
in
parent_names
])
chain_id
+=
1
unique_chain_ids
=
np
.
unique
(
chain_ids
)
chain_id_mapping
=
{
cid
:
n
for
n
,
cid
in
enumerate
(
string
.
ascii_uppercase
)}
chain_index
=
np
.
array
([
chain_id_mapping
[
cid
]
for
cid
in
chain_ids
])
return
Protein
(
return
Protein
(
atom_positions
=
np
.
array
(
atom_positions
),
atom_positions
=
np
.
array
(
atom_positions
),
atom_mask
=
np
.
array
(
atom_mask
),
atom_mask
=
np
.
array
(
atom_mask
),
aatype
=
np
.
array
(
aatype
),
aatype
=
np
.
array
(
aatype
),
residue_index
=
np
.
array
(
residue_index
),
residue_index
=
np
.
array
(
residue_index
),
chain_index
=
chain_index
,
b_factors
=
np
.
array
(
b_factors
),
b_factors
=
np
.
array
(
b_factors
),
parents
=
parents
,
parents
=
parents
,
parents_chain_index
=
parents_chain_index
,
)
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment