Commit cef198e0 authored by Augustin Zidek's avatar Augustin Zidek Committed by Copybara-Service
Browse files

Use pLDDT in the B-factor column of the output PDBs.

PiperOrigin-RevId: 390566020
Change-Id: I3fafbe8246d0a5ad018f0398b39bf7dacee00468
parent f65e94fc
...@@ -273,6 +273,10 @@ The contents of each output file are as follows: ...@@ -273,6 +273,10 @@ The contents of each output file are as follows:
serve for a visualisation of domain packing confidence within the serve for a visualisation of domain packing confidence within the
structure. structure.
The pLDDT confidence measure is stored in the B-factor field of the output PDB
files (although unlike a B-factor, higher pLDDT is better, so care must be taken
when using for tasks such as molecular replacement).
This code has been tested to match mean top-1 accuracy on a CASP14 test set with This code has been tested to match mean top-1 accuracy on a CASP14 test set with
pLDDT ranking over 5 model predictions (some CASP targets were run with earlier pLDDT ranking over 5 model predictions (some CASP targets were run with earlier
versions of AlphaFold and some had manual interventions; see our forthcoming versions of AlphaFold and some had manual interventions; see our forthcoming
......
...@@ -26,6 +26,7 @@ from absl import app ...@@ -26,6 +26,7 @@ from absl import app
from absl import flags from absl import flags
from absl import logging from absl import logging
from alphafold.common import protein from alphafold.common import protein
from alphafold.common import residue_constants
from alphafold.data import pipeline from alphafold.data import pipeline
from alphafold.data import templates from alphafold.data import templates
from alphafold.model import data from alphafold.model import data
...@@ -158,15 +159,22 @@ def predict_structure( ...@@ -158,15 +159,22 @@ def predict_structure(
timings[f'predict_benchmark_{model_name}'] = time.time() - t_0 timings[f'predict_benchmark_{model_name}'] = time.time() - t_0
# Get mean pLDDT confidence metric. # Get mean pLDDT confidence metric.
plddts[model_name] = np.mean(prediction_result['plddt']) plddt = prediction_result['plddt']
plddts[model_name] = np.mean(plddt)
# Save the model outputs. # Save the model outputs.
result_output_path = os.path.join(output_dir, f'result_{model_name}.pkl') result_output_path = os.path.join(output_dir, f'result_{model_name}.pkl')
with open(result_output_path, 'wb') as f: with open(result_output_path, 'wb') as f:
pickle.dump(prediction_result, f, protocol=4) pickle.dump(prediction_result, f, protocol=4)
unrelaxed_protein = protein.from_prediction(processed_feature_dict, # Add the predicted LDDT in the b-factor column.
prediction_result) # Note that higher predicted LDDT value means higher model confidence.
plddt_b_factors = np.repeat(
plddt[:, None], residue_constants.atom_type_num, axis=-1)
unrelaxed_protein = protein.from_prediction(
features=processed_feature_dict,
result=prediction_result,
b_factors=plddt_b_factors)
unrelaxed_pdb_path = os.path.join(output_dir, f'unrelaxed_{model_name}.pdb') unrelaxed_pdb_path = os.path.join(output_dir, f'unrelaxed_{model_name}.pdb')
with open(unrelaxed_pdb_path, 'w') as f: with open(unrelaxed_pdb_path, 'w') as f:
......
...@@ -45,7 +45,7 @@ class RunAlphafoldTest(parameterized.TestCase): ...@@ -45,7 +45,7 @@ class RunAlphafoldTest(parameterized.TestCase):
'predicted_lddt': { 'predicted_lddt': {
'logits': np.ones((10, 50)), 'logits': np.ones((10, 50)),
}, },
'plddt': np.zeros(10), 'plddt': np.ones(10) * 42,
'ptm': np.array(0.), 'ptm': np.array(0.),
'aligned_confidence_probs': np.zeros((10, 10, 50)), 'aligned_confidence_probs': np.zeros((10, 10, 50)),
'predicted_aligned_error': np.zeros((10, 10)), 'predicted_aligned_error': np.zeros((10, 10)),
...@@ -71,6 +71,22 @@ class RunAlphafoldTest(parameterized.TestCase): ...@@ -71,6 +71,22 @@ class RunAlphafoldTest(parameterized.TestCase):
benchmark=False, benchmark=False,
random_seed=0) random_seed=0)
base_output_files = os.listdir(out_dir)
self.assertIn('target.fasta', base_output_files)
self.assertIn('test', base_output_files)
target_output_files = os.listdir(os.path.join(out_dir, 'test'))
self.assertSequenceEqual(
['features.pkl', 'msas', 'ranked_0.pdb', 'ranking_debug.json',
'relaxed_model1.pdb', 'result_model1.pkl', 'timings.json',
'unrelaxed_model1.pdb'], target_output_files)
# Check that pLDDT is set in the B-factor column.
with open(os.path.join(out_dir, 'test', 'unrelaxed_model1.pdb')) as f:
for line in f:
if line.startswith('ATOM'):
self.assertEqual(line[61:66], '42.00')
if __name__ == '__main__': if __name__ == '__main__':
absltest.main() absltest.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment