Commit f6d02cd9 authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

fix

parent 1abe6160
...@@ -57,6 +57,9 @@ class OpenFoldSingleDataset(torch.utils.data.Dataset): ...@@ -57,6 +57,9 @@ class OpenFoldSingleDataset(torch.utils.data.Dataset):
Path to a directory containing template mmCIF files. Path to a directory containing template mmCIF files.
config: config:
A dataset config object. See openfold.config A dataset config object. See openfold.config
chain_data_cache_path:
Path to cache of data_dir generated by
scripts/generate_chain_data_cache.py
kalign_binary_path: kalign_binary_path:
Path to kalign binary. Path to kalign binary.
max_template_hits: max_template_hits:
...@@ -121,22 +124,24 @@ class OpenFoldSingleDataset(torch.utils.data.Dataset): ...@@ -121,22 +124,24 @@ class OpenFoldSingleDataset(torch.utils.data.Dataset):
c for c in self._chain_ids if c in chains_to_include c for c in self._chain_ids if c in chains_to_include
] ]
if self.chain_data_cache is not None:
# Filter to include only chains where we have structure data # Filter to include only chains where we have structure data
# (i.e. entries in chain_data_cache) # (entries in chain_data_cache)
original_chain_ids = self._chain_ids original_chain_ids = self._chain_ids
self._chain_ids = [ self._chain_ids = [
c for c in self._chain_ids if c in self.chain_data_cache c for c in self._chain_ids if c in self.chain_data_cache
] ]
if len(self._chain_ids) < len(original_chain_ids): if len(self._chain_ids) < len(original_chain_ids):
missing = [ missing = [
c for c in original_chain_ids if c not in self.chain_data_cache c for c in original_chain_ids
if c not in self.chain_data_cache
] ]
max_to_print = 10 max_to_print = 10
missing_examples = ", ".join(missing[:max_to_print]) missing_examples = ", ".join(missing[:max_to_print])
if len(missing) > max_to_print: if len(missing) > max_to_print:
missing_examples += ", ..." missing_examples += ", ..."
logging.warning( logging.warning(
"Ignoring %d alignment entries (%s) that have no corresponding " "Removing %d alignment entries (%s) with no corresponding "
"entries in chain_data_cache (%s).", "entries in chain_data_cache (%s).",
len(missing), len(missing),
missing_examples, missing_examples,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment