Unverified Commit 46f05aeb authored by Ignacio Pickering's avatar Ignacio Pickering Committed by GitHub
Browse files

Consistent units (#422)

* bypass slow code

* delete reference to all_atoms which was unused

* delete whitespace

* make flake8 happy

* rerun

* actual change was off

* typo did not turn on bypassing code

* Add CODATA and IUPAC unit conversion factors

* Add units to main module

* change neurochem and training-benchmark to use the new hartree2kcalmol conversion

* change training-benchmark to use hartree2kcalmol conversion

* change comp6 to use the new hartree2kcalmol conversion

* change training-benchmark without nsys to also use hartree2kcalmol conversion

* change examples to use hartree2kcalmol conversion

* Rename ase to ase_ani to avoid naming conflicts that come from naming internal module the same as an external one we are using

* fix import

* Add ASE's hartree_to_ev conversion to compare with codata 2018

* Standarize our units to be essentially the same as ASE and CODATA 2014

* Change vibration_analysis to use the consistent constants

* Fix explanation in docstring

* Add millieV conversion factor

* Add millieV conversion factor to vibrational analysis

* Add clarification

* flake8

* revert name change

* Fix typo in function name

* Add units to __all__'

* Improve units docstring and comments in preparation for docs

* delete pad from docs

* Add units to api.rst

* minor formatting of docstrings and comments

* Add functions into the API

* final formatting changes

* flake8

* try to make mypy happy

* Checking if mypy prefers this

* rerun
parent 0d0e6d5b
......@@ -39,7 +39,6 @@ Utilities
=========
.. automodule:: torchani.utils
.. autofunction:: torchani.utils.pad
.. autofunction:: torchani.utils.pad_atomic_properties
.. autofunction:: torchani.utils.present_species
.. autofunction:: torchani.utils.strip_redundant_padding
......@@ -77,3 +76,17 @@ TorchANI Optimizater
.. automodule:: torchani.optim
.. autoclass:: torchani.optim.AdamW
Units
=====
.. automodule:: torchani.units
.. autofunction:: torchani.units.hartree2ev
.. autofunction:: torchani.units.hartree2kcalmol
.. autofunction:: torchani.units.hartree2kjoulemol
.. autofunction:: torchani.units.ev2kcalmol
.. autofunction:: torchani.units.ev2kjoulemol
.. autofunction:: torchani.units.mhessian2fconst
.. autofunction:: torchani.units.sqrt_mhessian2invcm
.. autofunction:: torchani.units.sqrt_mhessian2milliev
......@@ -27,6 +27,9 @@ import math
import torch.utils.tensorboard
import tqdm
# helper function to convert energy unit from Hartree to kcal/mol
from torchani.units import hartree2kcalmol
# device to run the training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
......@@ -272,11 +275,6 @@ if os.path.isfile(latest_checkpoint):
# is better than the best, then save the new best model to a checkpoint
# helper function to convert energy unit from Hartree to kcal/mol
def hartree2kcal(x):
return 627.509 * x
def validate():
# run validation
mse_sum = torch.nn.MSELoss(reduction='sum')
......@@ -291,7 +289,7 @@ def validate():
predicted_energies = torch.cat(predicted_energies)
total_mse += mse_sum(predicted_energies, true_energies).item()
count += predicted_energies.shape[0]
return hartree2kcal(math.sqrt(total_mse / count))
return hartree2kcalmol(math.sqrt(total_mse / count))
###############################################################################
......
......@@ -22,6 +22,9 @@ import math
import torch.utils.tensorboard
import tqdm
# helper function to convert energy unit from Hartree to kcal/mol
from torchani.units import hartree2kcalmol
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
Rcr = 5.2000e+00
......@@ -217,11 +220,6 @@ if os.path.isfile(latest_checkpoint):
# is better than the best, then save the new best model to a checkpoint
# helper function to convert energy unit from Hartree to kcal/mol
def hartree2kcal(x):
return 627.509 * x
def validate():
# run validation
mse_sum = torch.nn.MSELoss(reduction='sum')
......@@ -236,7 +234,7 @@ def validate():
predicted_energies = torch.cat(predicted_energies)
total_mse += mse_sum(predicted_energies, true_energies).item()
count += predicted_energies.shape[0]
return hartree2kcal(math.sqrt(total_mse / count))
return hartree2kcalmol(math.sqrt(total_mse / count))
###############################################################################
......
......@@ -2,13 +2,12 @@ import os
import torch
import torchani
from torchani.data._pyanitools import anidataloader
from torchani.units import hartree2kcalmol
import argparse
import math
import tqdm
HARTREE2KCAL = 627.509
# parse command line arguments
parser = argparse.ArgumentParser()
parser.add_argument('dir', help='Path to the COMP6 directory')
......@@ -97,13 +96,12 @@ def do_benchmark(model):
rmse_averager_energy.update(ediff ** 2)
rmse_averager_relative_energy.update(relative_ediff ** 2)
rmse_averager_force.update(fdiff ** 2)
mae_energy = mae_averager_energy.compute() * HARTREE2KCAL
rmse_energy = math.sqrt(rmse_averager_energy.compute()) * HARTREE2KCAL
mae_relative_energy = mae_averager_relative_energy.compute() * HARTREE2KCAL
rmse_relative_energy = math.sqrt(rmse_averager_relative_energy.compute()) \
* HARTREE2KCAL
mae_force = mae_averager_force.compute() * HARTREE2KCAL
rmse_force = math.sqrt(rmse_averager_force.compute()) * HARTREE2KCAL
mae_energy = hartree2kcalmol(mae_averager_energy.compute())
rmse_energy = hartree2kcalmol(math.sqrt(rmse_averager_energy.compute()))
mae_relative_energy = hartree2kcalmol(mae_averager_relative_energy.compute())
rmse_relative_energy = hartree2kcalmol(math.sqrt(rmse_averager_relative_energy.compute()))
mae_force = hartree2kcalmol(mae_averager_force.compute())
rmse_force = hartree2kcalmol(math.sqrt(rmse_averager_force.compute()))
print("Energy:", mae_energy, rmse_energy)
print("Relative Energy:", mae_relative_energy, rmse_relative_energy)
print("Forces:", mae_force, rmse_force)
......
......@@ -2,6 +2,7 @@ import torch
import torchani
import argparse
import pkbar
from torchani.units import hartree2kcalmol
WARM_UP_BATCHES = 50
......@@ -32,10 +33,6 @@ def time_func(key, func):
return wrapper
def hartree2kcal(x):
return 627.509 * x
def enable_timers(model):
torchani.aev.cutoff_cosine = time_func('cutoff_cosine', torchani.aev.cutoff_cosine)
torchani.aev.radial_terms = time_func('radial_terms', torchani.aev.radial_terms)
......@@ -171,7 +168,7 @@ if __name__ == "__main__":
num_atoms = torch.cat(num_atoms)
predicted_energies = torch.cat(predicted_energies).to(true_energies.dtype)
loss = (mse(predicted_energies, true_energies) / num_atoms.sqrt()).mean()
rmse = hartree2kcal((mse(predicted_energies, true_energies)).mean()).detach().cpu().numpy()
rmse = hartree2kcalmol((mse(predicted_energies, true_energies)).mean()).detach().cpu().numpy()
if PROFILING_STARTED:
torch.cuda.nvtx.range_push("backward")
......
......@@ -4,6 +4,7 @@ import time
import timeit
import argparse
import pkbar
from torchani.units import hartree2kcalmol
def atomic():
......@@ -32,10 +33,6 @@ def time_func(key, func):
return wrapper
def hartree2kcal(x):
return 627.509 * x
if __name__ == "__main__":
# parse command line arguments
parser = argparse.ArgumentParser()
......@@ -166,7 +163,7 @@ if __name__ == "__main__":
num_atoms = torch.cat(num_atoms)
predicted_energies = torch.cat(predicted_energies).to(true_energies.dtype)
loss = (mse(predicted_energies, true_energies) / num_atoms.sqrt()).mean()
rmse = hartree2kcal((mse(predicted_energies, true_energies)).mean()).detach().cpu().numpy()
rmse = hartree2kcalmol((mse(predicted_energies, true_energies)).mean()).detach().cpu().numpy()
loss.backward()
optimizer.step()
......
......@@ -30,6 +30,7 @@ from . import utils
from . import neurochem
from . import models
from . import optim
from . import units
from pkg_resources import get_distribution, DistributionNotFound
try:
......@@ -39,7 +40,7 @@ except DistributionNotFound:
pass
__all__ = ['AEVComputer', 'EnergyShifter', 'ANIModel', 'Ensemble', 'SpeciesConverter',
'utils', 'neurochem', 'models', 'optim']
'utils', 'neurochem', 'models', 'optim', 'units']
try:
from . import ase # noqa: F401
......
......@@ -16,6 +16,7 @@ from ..utils import EnergyShifter, ChemicalSymbolsToInts
from ..aev import AEVComputer
from ..optim import AdamW
from collections import OrderedDict
from torchani.units import hartree2kcalmol
class Constants(collections.abc.Mapping):
......@@ -266,10 +267,6 @@ def load_model_ensemble(species, prefix, count):
return Ensemble(models)
def hartree2kcal(x):
return 627.509 * x
if sys.version_info[0] > 2:
class Trainer:
......@@ -577,7 +574,7 @@ if sys.version_info[0] > 2:
predicted_energies = torch.cat(predicted_energies)
total_mse += self.mse_sum(predicted_energies, true_energies).item()
count += predicted_energies.shape[0]
return hartree2kcal(math.sqrt(total_mse / count))
return hartree2kcalmol(math.sqrt(total_mse / count))
def run(self):
"""Run the training"""
......
r"""Unit conversion factors used in torchani
The torchani networks themselves works internally entirely in Hartrees
(energy), Angstroms (distance) and AMU (mass). In some example code and scripts
we convert to other more commonly used units. Our conversion factors are
consistent with `CODATA 2014 recommendations`_, which is also consistent with
the `units used in ASE`_. (However, take into account that ASE uses
electronvolt as its base energy unit, so the appropriate conversion factors
should always be applied when converting from ASE to torchani) Joule-to-kcal
conversion taken from the `IUPAC Goldbook`_. All the conversion factors we use
are defined in this module, and convenience functions to convert between
different units are provided.
.. _units used in ASE:
https://wiki.fysik.dtu.dk/ase/ase/units.html#units
.. _CODATA 2014 recommendations:
https://arxiv.org/pdf/1507.07956.pdf
.. _IUPAC Goldbook:
https://goldbook.iupac.org/terms/view/C00784
"""
import math
# Comments on ASE:
# ASE uses its own constants, which they take from CODATA 2014 as of
# 03/10/2019. ASE's units are created when ase.units is imported.
# Since we don't want to be dependent on ASE changing their module
# we define here our own units for our own purposes, but right now we define
# them to be consistent with ASE values (i. e. our values are also CODATA 2014)
# the difference between CODATA 2014 and CODATA 2018 is negligible.
# General conversion:
# the codata value for hartree in units of eV can be obtained from
# m_e * e^3 / ( 16 * pi^2 * eps_0^2 hbar^2 )
HARTREE_TO_EV = 27.211386024367243 # equal to ase.units.Hartree
EV_TO_JOULE = 1.6021766208e-19 # equal to ase.units._e (electron charge)
JOULE_TO_KCAL = 1 / 4184. # exact
HARTREE_TO_JOULE = HARTREE_TO_EV * EV_TO_JOULE
AVOGADROS_NUMBER = 6.022140857e+23 # equal to ase.units._Nav
SPEED_OF_LIGHT = 299792458.0 # equal to ase.units._c
AMU_TO_KG = 1.660539040e-27 # equal to ase.units._amu
ANGSTROM_TO_METER = 1e-10
NEWTON_TO_MILLIDYNE = 1e8 # exact relation
HARTREE_TO_KCALMOL = HARTREE_TO_JOULE * JOULE_TO_KCAL * AVOGADROS_NUMBER
HARTREE_TO_KJOULEMOL = HARTREE_TO_JOULE * AVOGADROS_NUMBER / 1000
EV_TO_KCALMOL = EV_TO_JOULE * JOULE_TO_KCAL * AVOGADROS_NUMBER
EV_TO_KJOULEMOL = EV_TO_JOULE * AVOGADROS_NUMBER / 1000
# For vibrational analysis:
INVCM_TO_EV = 0.0001239841973964072 # equal to ase.units.invcm
# To convert from the sqrt of eigenvalues (mass-scaled hessian units of
# sqrt(hartree / (amu * A^2)) into cm^-1
# it is necessary to multiply by the sqrt ( HARTREE_TO_JOULE / AMU_TO_KG ),
# then we convert angstroms to meters and divide by 1/ speed_of_light (to
# convert seconds into meters). Finally divide by 100 to get inverse cm
# The resulting value should be close to 17092
SQRT_MHESSIAN_TO_INVCM = (math.sqrt(HARTREE_TO_JOULE / AMU_TO_KG) / ANGSTROM_TO_METER / SPEED_OF_LIGHT) / 100
# meV is other common unit used to present vibrational transition energies
SQRT_MHESSIAN_TO_MILLIEV = SQRT_MHESSIAN_TO_INVCM * INVCM_TO_EV * 1000
# To convert units form mass-scaled hessian units into mDyne / Angstrom (force
# constant units) factor should be close to 4.36
MHESSIAN_TO_FCONST = HARTREE_TO_JOULE * NEWTON_TO_MILLIDYNE / ANGSTROM_TO_METER
def sqrt_mhessian2invcm(x):
r"""Converts sqrt(mass-scaled hessian units) into cm^-1
Converts form units of sqrt(Hartree / (amu * Angstrom^2))
which are sqrt(units of the mass-scaled hessian matrix)
into units of inverse centimeters.
Take into account that to convert the actual eigenvalues of the hessian
into wavenumbers it is necessary to multiply by an extra factor of 1 / (2 *
pi)"""
return x * SQRT_MHESSIAN_TO_INVCM
def sqrt_mhessian2milliev(x):
r"""Converts sqrt(mass-scaled hessian units) into meV
Converts form units of sqrt(Hartree / (amu * Angstrom^2))
which are sqrt(units of the mass-scaled hessian matrix)
into units of milli-electronvolts.
Take into account that to convert the actual eigenvalues of the hessian
into wavenumbers it is necessary to multiply by an extra factor of 1 / (2 *
pi)"""
return x * SQRT_MHESSIAN_TO_MILLIEV
def mhessian2fconst(x):
r"""Converts mass-scaled hessian units into mDyne/Angstrom
Converts from units of mass-scaled hessian (Hartree / (amu * Angstrom^2)
into force constant units (mDyne/Angstom), where 1 N = 1 * 10^8 mDyne"""
return x * MHESSIAN_TO_FCONST
def hartree2ev(x):
r"""Hartree to eV conversion factor from 2014 CODATA"""
return x * HARTREE_TO_EV
def ev2kjoulemol(x):
r"""Electronvolt to kJ/mol conversion factor from CODATA 2014"""
return x * EV_TO_KJOULEMOL
def ev2kcalmol(x):
r"""Electronvolt to kcal/mol conversion factor from CODATA 2014"""
return x * EV_TO_KCALMOL
def hartree2kjoulemol(x):
r"""Hartree to kJ/mol conversion factor from CODATA 2014"""
return x * HARTREE_TO_KJOULEMOL
def hartree2kcalmol(x):
r"""Hartree to kJ/mol conversion factor from CODATA 2014"""
return x * HARTREE_TO_KCALMOL
# Add actual values to docstrings on import
hartree2ev.__doc__ = str(hartree2ev.__doc__) + f'\n\n1 Hartree = {hartree2ev(1)} eV'
hartree2kcalmol.__doc__ = str(hartree2kcalmol.__doc__) + f'\n\n1 Hartree = {hartree2kcalmol(1)} kcal/mol'
hartree2kjoulemol.__doc__ = str(hartree2kjoulemol) + f'\n\n1 Hartree = {hartree2kjoulemol(1)} kJ/mol'
ev2kjoulemol.__doc__ = str(ev2kjoulemol.__doc__) + f'\n\n1 eV = {ev2kjoulemol(1)} kJ/mol'
ev2kcalmol.__doc__ = str(ev2kcalmol.__doc__) + f'\n\n1 eV = {ev2kcalmol(1)} kcal/mol'
mhessian2fconst.__doc__ = str(mhessian2fconst.__doc__) + f'\n\n1 Hartree / (AMU * Angstrom^2) = {ev2kcalmol(1)} mDyne/Angstrom'
sqrt_mhessian2milliev.__doc__ = str(sqrt_mhessian2milliev.__doc__) + f'\n\n1 sqrt(Hartree / (AMU * Angstrom^2)) = {sqrt_mhessian2milliev(1)} meV'
sqrt_mhessian2invcm.__doc__ = str(sqrt_mhessian2invcm.__doc__) + f'\n\n1 sqrt(Hartree / (AMU * Angstrom^2)) = {sqrt_mhessian2invcm(1)} cm^-1'
......@@ -5,6 +5,7 @@ import math
import numpy as np
from collections import defaultdict
from typing import Tuple, NamedTuple, Optional
from torchani.units import sqrt_mhessian2invcm, sqrt_mhessian2milliev, mhessian2fconst
from .nn import SpeciesEnergies
......@@ -278,12 +279,18 @@ def vibrational_analysis(masses, hessian, mode_type='MDU', unit='cm^-1'):
- MDU (mass deweighted unnormalized)
- MDN (mass deweighted normalized)
MDU modes are orthogonal but not normalized, MDN modes are normalized
but not orthogonal. MWN modes are orthonormal, but they correspond
MDU modes are not orthogonal, and not normalized,
MDN modes are not orthogonal, and normalized.
MWN modes are orthonormal, but they correspond
to mass weighted cartesian coordinates (x' = sqrt(m)x).
"""
if unit != 'cm^-1':
raise ValueError('Only cm^-1 are supported right now')
if unit == 'meV':
unit_converter = sqrt_mhessian2milliev
elif unit == 'cm^-1':
unit_converter = sqrt_mhessian2invcm
else:
raise ValueError(f'Only meV and cm^-1 are supported right now')
assert hessian.shape[0] == 1, 'Currently only supporting computing one molecule a time'
# Solving the eigenvalue problem: Hq = w^2 * T q
# where H is the Hessian matrix, q is the normal coordinates,
......@@ -300,8 +307,8 @@ def vibrational_analysis(masses, hessian, mode_type='MDU', unit='cm^-1'):
eigenvalues, eigenvectors = torch.symeig(mass_scaled_hessian, eigenvectors=True)
angular_frequencies = eigenvalues.sqrt()
frequencies = angular_frequencies / (2 * math.pi)
# converting from sqrt(hartree / (amu * angstrom^2)) to cm^-1
wavenumbers = frequencies * 17092
# converting from sqrt(hartree / (amu * angstrom^2)) to cm^-1 or meV
wavenumbers = unit_converter(frequencies)
# Note that the normal modes are the COLUMNS of the eigenvectors matrix
mw_normalized = eigenvectors.t()
......@@ -310,8 +317,8 @@ def vibrational_analysis(masses, hessian, mode_type='MDU', unit='cm^-1'):
md_normalized = md_unnormalized * norm_factors.unsqueeze(1)
rmasses = norm_factors**2 # units are AMU
# The conversion factor for Ha/(AMU*A^2) to mDyne/(A*AMU) is 4.3597482
fconstants = eigenvalues * rmasses * 4.3597482 # units are mDyne/A
# The conversion factor for Ha/(AMU*A^2) to mDyne/(A*AMU) is about 4.3597482
fconstants = mhessian2fconst(eigenvalues) * rmasses # units are mDyne/A
if mode_type == 'MDN':
modes = (md_normalized).reshape(frequencies.numel(), -1, 3)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment