Commit 7642bef9 authored by Gustaf Ahdritz's avatar Gustaf Ahdritz
Browse files

Add better AlphaFold-Gap support to PDB parsing scripts

parent 2549752d
...@@ -92,68 +92,76 @@ def from_pdb_string(pdb_str: str, chain_id: Optional[str] = None) -> Protein: ...@@ -92,68 +92,76 @@ def from_pdb_string(pdb_str: str, chain_id: Optional[str] = None) -> Protein:
) )
model = models[0] model = models[0]
if chain_id is not None:
chain = model[chain_id]
else:
chains = list(model.get_chains())
if len(chains) != 1:
raise ValueError(
"Only single chain PDBs are supported when chain_id not specified. "
f"Found {len(chains)} chains."
)
else:
chain = chains[0]
atom_positions = [] atom_positions = []
aatype = [] aatype = []
atom_mask = [] atom_mask = []
residue_index = [] residue_index = []
chain_ids = []
b_factors = [] b_factors = []
for res in chain: for chain in model:
if res.id[2] != " ": if(chain_id is not None and chain.id != chain_id):
raise ValueError( continue
f"PDB contains an insertion code at chain {chain.id} and residue " for res in chain:
f"index {res.id[1]}. These are not supported." if res.id[2] != " ":
raise ValueError(
f"PDB contains an insertion code at chain {chain.id} and residue "
f"index {res.id[1]}. These are not supported."
)
res_shortname = residue_constants.restype_3to1.get(res.resname, "X")
restype_idx = residue_constants.restype_order.get(
res_shortname, residue_constants.restype_num
) )
res_shortname = residue_constants.restype_3to1.get(res.resname, "X") pos = np.zeros((residue_constants.atom_type_num, 3))
restype_idx = residue_constants.restype_order.get( mask = np.zeros((residue_constants.atom_type_num,))
res_shortname, residue_constants.restype_num res_b_factors = np.zeros((residue_constants.atom_type_num,))
) for atom in res:
pos = np.zeros((residue_constants.atom_type_num, 3)) if atom.name not in residue_constants.atom_types:
mask = np.zeros((residue_constants.atom_type_num,)) continue
res_b_factors = np.zeros((residue_constants.atom_type_num,)) pos[residue_constants.atom_order[atom.name]] = atom.coord
for atom in res: mask[residue_constants.atom_order[atom.name]] = 1.0
if atom.name not in residue_constants.atom_types: res_b_factors[
residue_constants.atom_order[atom.name]
] = atom.bfactor
if np.sum(mask) < 0.5:
# If no known atom positions are reported for the residue then skip it.
continue continue
pos[residue_constants.atom_order[atom.name]] = atom.coord aatype.append(restype_idx)
mask[residue_constants.atom_order[atom.name]] = 1.0 atom_positions.append(pos)
res_b_factors[ atom_mask.append(mask)
residue_constants.atom_order[atom.name] residue_index.append(res.id[1])
] = atom.bfactor chain_ids.append(chain.id)
if np.sum(mask) < 0.5: b_factors.append(res_b_factors)
# If no known atom positions are reported for the residue then skip it.
continue
aatype.append(restype_idx)
atom_positions.append(pos)
atom_mask.append(mask)
residue_index.append(res.id[1])
b_factors.append(res_b_factors)
parents = None parents = None
parents_chain_index = None
if("PARENT" in pdb_str): if("PARENT" in pdb_str):
parents = []
parents_chain_index = []
chain_id = 0
for l in pdb_str.split("\n"): for l in pdb_str.split("\n"):
if("PARENT" in l and not "N/A" in l): if("PARENT" in l):
parents = l.split()[1:] if(not "N/A" in l):
break parent_names = l.split()[1:]
parents.extend(parent_names)
parents_chain_index.extend([
chain_id for _ in parent_names
])
chain_id += 1
unique_chain_ids = np.unique(chain_ids)
chain_id_mapping = {cid: n for n, cid in enumerate(string.ascii_uppercase)}
chain_index = np.array([chain_id_mapping[cid] for cid in chain_ids])
return Protein( return Protein(
atom_positions=np.array(atom_positions), atom_positions=np.array(atom_positions),
atom_mask=np.array(atom_mask), atom_mask=np.array(atom_mask),
aatype=np.array(aatype), aatype=np.array(aatype),
residue_index=np.array(residue_index), residue_index=np.array(residue_index),
chain_index=chain_index,
b_factors=np.array(b_factors), b_factors=np.array(b_factors),
parents=parents, parents=parents,
parents_chain_index=parents_chain_index,
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment