Commit cef198e0 authored by Augustin Zidek's avatar Augustin Zidek Committed by Copybara-Service
Browse files

Use pLDDT in the B-factor column of the output PDBs.

PiperOrigin-RevId: 390566020
Change-Id: I3fafbe8246d0a5ad018f0398b39bf7dacee00468
parent f65e94fc
......@@ -273,6 +273,10 @@ The contents of each output file are as follows:
serve for a visualisation of domain packing confidence within the
structure.
The pLDDT confidence measure is stored in the B-factor field of the output PDB
files (although unlike a B-factor, higher pLDDT is better, so care must be taken
when using for tasks such as molecular replacement).
This code has been tested to match mean top-1 accuracy on a CASP14 test set with
pLDDT ranking over 5 model predictions (some CASP targets were run with earlier
versions of AlphaFold and some had manual interventions; see our forthcoming
......
......@@ -26,6 +26,7 @@ from absl import app
from absl import flags
from absl import logging
from alphafold.common import protein
from alphafold.common import residue_constants
from alphafold.data import pipeline
from alphafold.data import templates
from alphafold.model import data
......@@ -158,15 +159,22 @@ def predict_structure(
timings[f'predict_benchmark_{model_name}'] = time.time() - t_0
# Get mean pLDDT confidence metric.
plddts[model_name] = np.mean(prediction_result['plddt'])
plddt = prediction_result['plddt']
plddts[model_name] = np.mean(plddt)
# Save the model outputs.
result_output_path = os.path.join(output_dir, f'result_{model_name}.pkl')
with open(result_output_path, 'wb') as f:
pickle.dump(prediction_result, f, protocol=4)
unrelaxed_protein = protein.from_prediction(processed_feature_dict,
prediction_result)
# Add the predicted LDDT in the b-factor column.
# Note that higher predicted LDDT value means higher model confidence.
plddt_b_factors = np.repeat(
plddt[:, None], residue_constants.atom_type_num, axis=-1)
unrelaxed_protein = protein.from_prediction(
features=processed_feature_dict,
result=prediction_result,
b_factors=plddt_b_factors)
unrelaxed_pdb_path = os.path.join(output_dir, f'unrelaxed_{model_name}.pdb')
with open(unrelaxed_pdb_path, 'w') as f:
......
......@@ -45,7 +45,7 @@ class RunAlphafoldTest(parameterized.TestCase):
'predicted_lddt': {
'logits': np.ones((10, 50)),
},
'plddt': np.zeros(10),
'plddt': np.ones(10) * 42,
'ptm': np.array(0.),
'aligned_confidence_probs': np.zeros((10, 10, 50)),
'predicted_aligned_error': np.zeros((10, 10)),
......@@ -71,6 +71,22 @@ class RunAlphafoldTest(parameterized.TestCase):
benchmark=False,
random_seed=0)
base_output_files = os.listdir(out_dir)
self.assertIn('target.fasta', base_output_files)
self.assertIn('test', base_output_files)
target_output_files = os.listdir(os.path.join(out_dir, 'test'))
self.assertSequenceEqual(
['features.pkl', 'msas', 'ranked_0.pdb', 'ranking_debug.json',
'relaxed_model1.pdb', 'result_model1.pkl', 'timings.json',
'unrelaxed_model1.pdb'], target_output_files)
# Check that pLDDT is set in the B-factor column.
with open(os.path.join(out_dir, 'test', 'unrelaxed_model1.pdb')) as f:
for line in f:
if line.startswith('ATOM'):
self.assertEqual(line[61:66], '42.00')
if __name__ == '__main__':
absltest.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment