Commit 50ecd75d authored by Augustin Zidek's avatar Augustin Zidek Committed by Copybara-Service
Browse files

Skip obsolete PDB templates that don't have a replacement.

Fixes https://github.com/deepmind/alphafold/issues/163.

PiperOrigin-RevId: 395878875
Change-Id: I99a9defa95547582bbfd9fec3ae821386274d77f
parent 98caef21
...@@ -132,19 +132,26 @@ def _is_after_cutoff( ...@@ -132,19 +132,26 @@ def _is_after_cutoff(
return False return False
def _parse_obsolete(obsolete_file_path: str) -> Mapping[str, str]: def _parse_obsolete(obsolete_file_path: str) -> Mapping[str, Optional[str]]:
"""Parses the data file from PDB that lists which PDB ids are obsolete.""" """Parses the data file from PDB that lists which pdb_ids are obsolete."""
with open(obsolete_file_path) as f: with open(obsolete_file_path) as f:
result = {} result = {}
for line in f: for line in f:
line = line.strip() line = line.strip()
# We skip obsolete entries that don't contain a mapping to a new entry. # Format: Date From To
if line.startswith('OBSLTE') and len(line) > 30: # 'OBSLTE 06-NOV-19 6G9Y' - Removed, rare
# Format: Date From To # 'OBSLTE 31-JUL-94 116L 216L' - Replaced, common
# 'OBSLTE 31-JUL-94 116L 216L' # 'OBSLTE 26-SEP-06 2H33 2JM5 2OWI' - Replaced by multiple, rare
from_id = line[20:24].lower() if line.startswith('OBSLTE'):
to_id = line[29:33].lower() if len(line) > 30:
result[from_id] = to_id # Replaced by at least one structure.
from_id = line[20:24].lower()
to_id = line[29:33].lower()
result[from_id] = to_id
elif len(line) == 24:
# Removed.
from_id = line[20:24].lower()
result[from_id] = None
return result return result
...@@ -673,13 +680,18 @@ def _process_single_hit( ...@@ -673,13 +680,18 @@ def _process_single_hit(
mmcif_dir: str, mmcif_dir: str,
max_template_date: datetime.datetime, max_template_date: datetime.datetime,
release_dates: Mapping[str, datetime.datetime], release_dates: Mapping[str, datetime.datetime],
obsolete_pdbs: Mapping[str, str], obsolete_pdbs: Mapping[str, Optional[str]],
kalign_binary_path: str, kalign_binary_path: str,
strict_error_check: bool = False) -> SingleHitResult: strict_error_check: bool = False) -> SingleHitResult:
"""Tries to extract template features from a single HHSearch hit.""" """Tries to extract template features from a single HHSearch hit."""
# Fail hard if we can't get the PDB ID and chain name from the hit. # Fail hard if we can't get the PDB ID and chain name from the hit.
hit_pdb_code, hit_chain_id = _get_pdb_id_and_chain(hit) hit_pdb_code, hit_chain_id = _get_pdb_id_and_chain(hit)
# This hit has been removed (obsoleted) from PDB, skip it.
if hit_pdb_code in obsolete_pdbs and obsolete_pdbs[hit_pdb_code] is None:
return SingleHitResult(
features=None, error=None, warning=f'Hit {hit_pdb_code} is obsolete.')
if hit_pdb_code not in release_dates: if hit_pdb_code not in release_dates:
if hit_pdb_code in obsolete_pdbs: if hit_pdb_code in obsolete_pdbs:
hit_pdb_code = obsolete_pdbs[hit_pdb_code] hit_pdb_code = obsolete_pdbs[hit_pdb_code]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment