Consistent units (#422)

* bypass slow code * delete reference to all_atoms which was unused * delete whitespace * make flake8 happy * rerun * actual change was off * typo did not turn on bypassing code * Add CODATA and IUPAC unit conversion factors * Add units to main module * change neurochem and training-benchmark to use the new hartree2kcalmol conversion * change training-benchmark to use hartree2kcalmol conversion * change comp6 to use the new hartree2kcalmol conversion * change training-benchmark without nsys to also use hartree2kcalmol conversion * change examples to use hartree2kcalmol conversion * Rename ase to ase_ani to avoid naming conflicts that come from naming internal module the same as an external one we are using * fix import * Add ASE's hartree_to_ev conversion to compare with codata 2018 * Standarize our units to be essentially the same as ASE and CODATA 2014 * Change vibration_analysis to use the consistent constants * Fix explanation in docstring * Add millieV conversion factor * Add millieV conversion factor to vibrational analysis * Add clarification * flake8 * revert name change * Fix typo in function name * Add units to __all__' * Improve units docstring and comments in preparation for docs * delete pad from docs * Add units to api.rst * minor formatting of docstrings and comments * Add functions into the API * final formatting changes * flake8 * try to make mypy happy * Checking if mypy prefers this * rerun

Consistent units (#422)
* bypass slow code * delete reference to all_atoms which was unused * delete whitespace * make flake8 happy * rerun * actual change was off * typo did not turn on bypassing code * Add CODATA and IUPAC unit conversion factors * Add units to main module * change neurochem and training-benchmark to use the new hartree2kcalmol conversion * change training-benchmark to use hartree2kcalmol conversion * change comp6 to use the new hartree2kcalmol conversion * change training-benchmark without nsys to also use hartree2kcalmol conversion * change examples to use hartree2kcalmol conversion * Rename ase to ase_ani to avoid naming conflicts that come from naming internal module the same as an external one we are using * fix import * Add ASE's hartree_to_ev conversion to compare with codata 2018 * Standarize our units to be essentially the same as ASE and CODATA 2014 * Change vibration_analysis to use the consistent constants * Fix explanation in docstring * Add millieV conversion factor * Add millieV conversion factor to vibrational analysis * Add clarification * flake8 * revert name change * Fix typo in function name * Add units to __all__' * Improve units docstring and comments in preparation for docs * delete pad from docs * Add units to api.rst * minor formatting of docstrings and comments * Add functions into the API * final formatting changes * flake8 * try to make mypy happy * Checking if mypy prefers this * rerun
46f05aeb · Ignacio Pickering · GitHub · 0d0e6d5b · 46f05aeb · 46f05aeb
Unverified Commit 46f05aeb authored Mar 12, 2020 by Ignacio Pickering Committed by GitHub Mar 12, 2020
10 changed files
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -39,7 +39,6 @@ Utilities
 =========

 .. automodule:: torchani.utils
-.. autofunction:: torchani.utils.pad
 .. autofunction:: torchani.utils.pad_atomic_properties
 .. autofunction:: torchani.utils.present_species
 .. autofunction:: torchani.utils.strip_redundant_padding
@@ -77,3 +76,17 @@ TorchANI Optimizater

 .. automodule:: torchani.optim
 .. autoclass:: torchani.optim.AdamW
+
+
+Units
+=====
+
+.. automodule:: torchani.units
+.. autofunction:: torchani.units.hartree2ev
+.. autofunction:: torchani.units.hartree2kcalmol
+.. autofunction:: torchani.units.hartree2kjoulemol
+.. autofunction:: torchani.units.ev2kcalmol
+.. autofunction:: torchani.units.ev2kjoulemol
+.. autofunction:: torchani.units.mhessian2fconst
+.. autofunction:: torchani.units.sqrt_mhessian2invcm
+.. autofunction:: torchani.units.sqrt_mhessian2milliev
--- a/examples/nnp_training.py
+++ b/examples/nnp_training.py
@@ -27,6 +27,9 @@ import math
 import torch.utils.tensorboard
 import tqdm

+# helper function to convert energy unit from Hartree to kcal/mol
+from torchani.units import hartree2kcalmol
+
 # device to run the training
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

@@ -272,11 +275,6 @@ if os.path.isfile(latest_checkpoint):
 # is better than the best, then save the new best model to a checkpoint


-# helper function to convert energy unit from Hartree to kcal/mol
-def hartree2kcal(x):
-    return 627.509 * x
-
-
 def validate():
    # run validation
    mse_sum = torch.nn.MSELoss(reduction='sum')
@@ -291,7 +289,7 @@ def validate():
        predicted_energies = torch.cat(predicted_energies)
        total_mse += mse_sum(predicted_energies, true_energies).item()
        count += predicted_energies.shape[0]
-    return hartree2kcal(math.sqrt(total_mse / count))
+    return hartree2kcalmol(math.sqrt(total_mse / count))


 ###############################################################################

--- a/examples/nnp_training_force.py
+++ b/examples/nnp_training_force.py
@@ -22,6 +22,9 @@ import math
 import torch.utils.tensorboard
 import tqdm

+# helper function to convert energy unit from Hartree to kcal/mol
+from torchani.units import hartree2kcalmol
+
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

 Rcr = 5.2000e+00
@@ -217,11 +220,6 @@ if os.path.isfile(latest_checkpoint):
 # is better than the best, then save the new best model to a checkpoint


-# helper function to convert energy unit from Hartree to kcal/mol
-def hartree2kcal(x):
-    return 627.509 * x
-
-
 def validate():
    # run validation
    mse_sum = torch.nn.MSELoss(reduction='sum')
@@ -236,7 +234,7 @@ def validate():
        predicted_energies = torch.cat(predicted_energies)
        total_mse += mse_sum(predicted_energies, true_energies).item()
        count += predicted_energies.shape[0]
-    return hartree2kcal(math.sqrt(total_mse / count))
+    return hartree2kcalmol(math.sqrt(total_mse / count))


 ###############################################################################

--- a/tools/comp6.py
+++ b/tools/comp6.py
@@ -2,13 +2,12 @@ import os
 import torch
 import torchani
 from torchani.data._pyanitools import anidataloader
+from torchani.units import hartree2kcalmol
 import argparse
 import math
 import tqdm


-HARTREE2KCAL = 627.509
-
 # parse command line arguments
 parser = argparse.ArgumentParser()
 parser.add_argument('dir', help='Path to the COMP6 directory')
@@ -97,13 +96,12 @@ def do_benchmark(model):
        rmse_averager_energy.update(ediff ** 2)
        rmse_averager_relative_energy.update(relative_ediff ** 2)
        rmse_averager_force.update(fdiff ** 2)
-    mae_energy = mae_averager_energy.compute() * HARTREE2KCAL
-    rmse_energy = math.sqrt(rmse_averager_energy.compute()) * HARTREE2KCAL
-    mae_relative_energy = mae_averager_relative_energy.compute() * HARTREE2KCAL
-    rmse_relative_energy = math.sqrt(rmse_averager_relative_energy.compute()) \
-        * HARTREE2KCAL
-    mae_force = mae_averager_force.compute() * HARTREE2KCAL
-    rmse_force = math.sqrt(rmse_averager_force.compute()) * HARTREE2KCAL
+    mae_energy = hartree2kcalmol(mae_averager_energy.compute())
+    rmse_energy = hartree2kcalmol(math.sqrt(rmse_averager_energy.compute()))
+    mae_relative_energy = hartree2kcalmol(mae_averager_relative_energy.compute())
+    rmse_relative_energy = hartree2kcalmol(math.sqrt(rmse_averager_relative_energy.compute()))
+    mae_force = hartree2kcalmol(mae_averager_force.compute())
+    rmse_force = hartree2kcalmol(math.sqrt(rmse_averager_force.compute()))
    print("Energy:", mae_energy, rmse_energy)
    print("Relative Energy:", mae_relative_energy, rmse_relative_energy)
    print("Forces:", mae_force, rmse_force)

--- a/tools/training-benchmark-nsys-profile.py
+++ b/tools/training-benchmark-nsys-profile.py
@@ -2,6 +2,7 @@ import torch
 import torchani
 import argparse
 import pkbar
+from torchani.units import hartree2kcalmol


 WARM_UP_BATCHES = 50
@@ -32,10 +33,6 @@ def time_func(key, func):
    return wrapper


-def hartree2kcal(x):
-    return 627.509 * x
-
-
 def enable_timers(model):
    torchani.aev.cutoff_cosine = time_func('cutoff_cosine', torchani.aev.cutoff_cosine)
    torchani.aev.radial_terms = time_func('radial_terms', torchani.aev.radial_terms)
@@ -171,7 +168,7 @@ if __name__ == "__main__":
            num_atoms = torch.cat(num_atoms)
            predicted_energies = torch.cat(predicted_energies).to(true_energies.dtype)
            loss = (mse(predicted_energies, true_energies) / num_atoms.sqrt()).mean()
-            rmse = hartree2kcal((mse(predicted_energies, true_energies)).mean()).detach().cpu().numpy()
+            rmse = hartree2kcalmol((mse(predicted_energies, true_energies)).mean()).detach().cpu().numpy()

            if PROFILING_STARTED:
                torch.cuda.nvtx.range_push("backward")

--- a/tools/training-benchmark.py
+++ b/tools/training-benchmark.py
@@ -4,6 +4,7 @@ import time
 import timeit
 import argparse
 import pkbar
+from torchani.units import hartree2kcalmol


 def atomic():
@@ -32,10 +33,6 @@ def time_func(key, func):
    return wrapper


-def hartree2kcal(x):
-    return 627.509 * x
-
-
 if __name__ == "__main__":
    # parse command line arguments
    parser = argparse.ArgumentParser()
@@ -166,7 +163,7 @@ if __name__ == "__main__":
            num_atoms = torch.cat(num_atoms)
            predicted_energies = torch.cat(predicted_energies).to(true_energies.dtype)
            loss = (mse(predicted_energies, true_energies) / num_atoms.sqrt()).mean()
-            rmse = hartree2kcal((mse(predicted_energies, true_energies)).mean()).detach().cpu().numpy()
+            rmse = hartree2kcalmol((mse(predicted_energies, true_energies)).mean()).detach().cpu().numpy()
            loss.backward()
            optimizer.step()


--- a/torchani/__init__.py
+++ b/torchani/__init__.py
@@ -30,6 +30,7 @@ from . import utils
 from . import neurochem
 from . import models
 from . import optim
+from . import units
 from pkg_resources import get_distribution, DistributionNotFound

 try:
@@ -39,7 +40,7 @@ except DistributionNotFound:
    pass

 __all__ = ['AEVComputer', 'EnergyShifter', 'ANIModel', 'Ensemble', 'SpeciesConverter',
-           'utils', 'neurochem', 'models', 'optim']
+           'utils', 'neurochem', 'models', 'optim', 'units']

 try:
    from . import ase  # noqa: F401

--- a/torchani/neurochem/__init__.py
+++ b/torchani/neurochem/__init__.py
@@ -16,6 +16,7 @@ from ..utils import EnergyShifter, ChemicalSymbolsToInts
 from ..aev import AEVComputer
 from ..optim import AdamW
 from collections import OrderedDict
+from torchani.units import hartree2kcalmol


 class Constants(collections.abc.Mapping):
@@ -266,10 +267,6 @@ def load_model_ensemble(species, prefix, count):
    return Ensemble(models)


-def hartree2kcal(x):
-    return 627.509 * x
-
-
 if sys.version_info[0] > 2:

    class Trainer:
@@ -577,7 +574,7 @@ if sys.version_info[0] > 2:
                predicted_energies = torch.cat(predicted_energies)
                total_mse += self.mse_sum(predicted_energies, true_energies).item()
                count += predicted_energies.shape[0]
-            return hartree2kcal(math.sqrt(total_mse / count))
+            return hartree2kcalmol(math.sqrt(total_mse / count))

        def run(self):
            """Run the training"""

--- a/torchani/units.py
+++ b/torchani/units.py
+r"""Unit conversion factors used in torchani
+
+The torchani networks themselves works internally entirely in Hartrees
+(energy), Angstroms (distance) and AMU (mass). In some example code and scripts
+we convert to other more commonly used units. Our conversion factors are
+consistent with `CODATA 2014 recommendations`_, which is also consistent with
+the `units used in ASE`_. (However, take into account that ASE uses
+electronvolt as its base energy unit, so the appropriate conversion factors
+should always be applied when converting from ASE to torchani) Joule-to-kcal
+conversion taken from the `IUPAC Goldbook`_.  All the conversion factors we use
+are defined in this module, and convenience functions to convert between
+different units are provided.
+
+
+.. _units used in ASE:
+    https://wiki.fysik.dtu.dk/ase/ase/units.html#units
+
+.. _CODATA 2014 recommendations:
+    https://arxiv.org/pdf/1507.07956.pdf
+
+.. _IUPAC Goldbook:
+    https://goldbook.iupac.org/terms/view/C00784
+
+"""
+import math
+
+
+# Comments on ASE:
+
+# ASE uses its own constants, which they take from CODATA 2014 as of
+# 03/10/2019. ASE's units are created when ase.units is imported.
+# Since we don't want to be dependent on ASE changing their module
+# we define here our own units for our own purposes, but right now we define
+# them to be consistent with ASE values (i. e. our values are also CODATA 2014)
+# the difference between CODATA 2014 and CODATA 2018 is negligible.
+
+
+# General conversion:
+
+# the codata value for hartree in units of eV can be obtained from
+# m_e * e^3 / ( 16 * pi^2 * eps_0^2 hbar^2 )
+HARTREE_TO_EV = 27.211386024367243  # equal to ase.units.Hartree
+EV_TO_JOULE = 1.6021766208e-19  # equal to ase.units._e (electron charge)
+JOULE_TO_KCAL = 1 / 4184.  # exact
+HARTREE_TO_JOULE = HARTREE_TO_EV * EV_TO_JOULE
+AVOGADROS_NUMBER = 6.022140857e+23  # equal to ase.units._Nav
+SPEED_OF_LIGHT = 299792458.0  # equal to ase.units._c
+AMU_TO_KG = 1.660539040e-27  # equal to ase.units._amu
+ANGSTROM_TO_METER = 1e-10
+NEWTON_TO_MILLIDYNE = 1e8  # exact relation
+HARTREE_TO_KCALMOL = HARTREE_TO_JOULE * JOULE_TO_KCAL * AVOGADROS_NUMBER
+HARTREE_TO_KJOULEMOL = HARTREE_TO_JOULE * AVOGADROS_NUMBER / 1000
+EV_TO_KCALMOL = EV_TO_JOULE * JOULE_TO_KCAL * AVOGADROS_NUMBER
+EV_TO_KJOULEMOL = EV_TO_JOULE * AVOGADROS_NUMBER / 1000
+
+# For vibrational analysis:
+
+INVCM_TO_EV = 0.0001239841973964072  # equal to ase.units.invcm
+# To convert from the sqrt of eigenvalues (mass-scaled hessian units of
+# sqrt(hartree / (amu * A^2)) into cm^-1
+# it is necessary to multiply by the sqrt ( HARTREE_TO_JOULE / AMU_TO_KG ),
+# then we convert angstroms to meters and divide by 1/ speed_of_light (to
+# convert seconds into meters). Finally divide by 100 to get inverse cm
+# The resulting value should be close to 17092
+SQRT_MHESSIAN_TO_INVCM = (math.sqrt(HARTREE_TO_JOULE / AMU_TO_KG) / ANGSTROM_TO_METER / SPEED_OF_LIGHT) / 100
+# meV is other common unit used to present vibrational transition energies
+SQRT_MHESSIAN_TO_MILLIEV = SQRT_MHESSIAN_TO_INVCM * INVCM_TO_EV * 1000
+# To convert units form mass-scaled hessian units into mDyne / Angstrom (force
+# constant units) factor should be close to 4.36
+MHESSIAN_TO_FCONST = HARTREE_TO_JOULE * NEWTON_TO_MILLIDYNE / ANGSTROM_TO_METER
+
+
+def sqrt_mhessian2invcm(x):
+    r"""Converts sqrt(mass-scaled hessian units) into cm^-1
+
+    Converts form units of sqrt(Hartree / (amu * Angstrom^2))
+    which are sqrt(units of the mass-scaled hessian matrix)
+    into units of inverse centimeters.
+
+    Take into account that to convert the actual eigenvalues of the hessian
+    into wavenumbers it is necessary to multiply by an extra factor of 1 / (2 *
+    pi)"""
+    return x * SQRT_MHESSIAN_TO_INVCM
+
+
+def sqrt_mhessian2milliev(x):
+    r"""Converts sqrt(mass-scaled hessian units) into meV
+
+    Converts form units of sqrt(Hartree / (amu * Angstrom^2))
+    which are sqrt(units of the mass-scaled hessian matrix)
+    into units of milli-electronvolts.
+
+    Take into account that to convert the actual eigenvalues of the hessian
+    into wavenumbers it is necessary to multiply by an extra factor of 1 / (2 *
+    pi)"""
+    return x * SQRT_MHESSIAN_TO_MILLIEV
+
+
+def mhessian2fconst(x):
+    r"""Converts mass-scaled hessian units into mDyne/Angstrom
+
+    Converts from units of mass-scaled hessian (Hartree / (amu * Angstrom^2)
+    into force constant units (mDyne/Angstom), where 1 N = 1 * 10^8 mDyne"""
+    return x * MHESSIAN_TO_FCONST
+
+
+def hartree2ev(x):
+    r"""Hartree to eV conversion factor from 2014 CODATA"""
+    return x * HARTREE_TO_EV
+
+
+def ev2kjoulemol(x):
+    r"""Electronvolt to kJ/mol conversion factor from CODATA 2014"""
+    return x * EV_TO_KJOULEMOL
+
+
+def ev2kcalmol(x):
+    r"""Electronvolt to kcal/mol conversion factor from CODATA 2014"""
+    return x * EV_TO_KCALMOL
+
+
+def hartree2kjoulemol(x):
+    r"""Hartree to kJ/mol conversion factor from CODATA 2014"""
+    return x * HARTREE_TO_KJOULEMOL
+
+
+def hartree2kcalmol(x):
+    r"""Hartree to kJ/mol conversion factor from CODATA 2014"""
+    return x * HARTREE_TO_KCALMOL
+
+
+# Add actual values to docstrings on import
+hartree2ev.__doc__ = str(hartree2ev.__doc__) + f'\n\n1 Hartree = {hartree2ev(1)} eV'
+hartree2kcalmol.__doc__ = str(hartree2kcalmol.__doc__) + f'\n\n1 Hartree = {hartree2kcalmol(1)} kcal/mol'
+hartree2kjoulemol.__doc__ = str(hartree2kjoulemol) + f'\n\n1 Hartree = {hartree2kjoulemol(1)} kJ/mol'
+ev2kjoulemol.__doc__ = str(ev2kjoulemol.__doc__) + f'\n\n1 eV = {ev2kjoulemol(1)} kJ/mol'
+ev2kcalmol.__doc__ = str(ev2kcalmol.__doc__) + f'\n\n1 eV = {ev2kcalmol(1)} kcal/mol'
+mhessian2fconst.__doc__ = str(mhessian2fconst.__doc__) + f'\n\n1 Hartree / (AMU * Angstrom^2) = {ev2kcalmol(1)} mDyne/Angstrom'
+sqrt_mhessian2milliev.__doc__ = str(sqrt_mhessian2milliev.__doc__) + f'\n\n1 sqrt(Hartree / (AMU * Angstrom^2)) = {sqrt_mhessian2milliev(1)} meV'
+sqrt_mhessian2invcm.__doc__ = str(sqrt_mhessian2invcm.__doc__) + f'\n\n1 sqrt(Hartree / (AMU * Angstrom^2)) = {sqrt_mhessian2invcm(1)} cm^-1'
--- a/torchani/utils.py
+++ b/torchani/utils.py
@@ -5,6 +5,7 @@ import math
 import numpy as np
 from collections import defaultdict
 from typing import Tuple, NamedTuple, Optional
+from torchani.units import sqrt_mhessian2invcm, sqrt_mhessian2milliev, mhessian2fconst
 from .nn import SpeciesEnergies


@@ -278,12 +279,18 @@ def vibrational_analysis(masses, hessian, mode_type='MDU', unit='cm^-1'):
    - MDU (mass deweighted unnormalized)
    - MDN (mass deweighted normalized)

-    MDU modes are orthogonal but not normalized, MDN modes are normalized
-    but not orthogonal. MWN modes are orthonormal, but they correspond
+    MDU modes are not orthogonal, and not normalized,
+    MDN modes are not orthogonal, and normalized.
+    MWN modes are orthonormal, but they correspond
    to mass weighted cartesian coordinates (x' = sqrt(m)x).
    """
-    if unit != 'cm^-1':
-        raise ValueError('Only cm^-1 are supported right now')
+    if unit == 'meV':
+        unit_converter = sqrt_mhessian2milliev
+    elif unit == 'cm^-1':
+        unit_converter = sqrt_mhessian2invcm
+    else:
+        raise ValueError(f'Only meV and cm^-1 are supported right now')
+
    assert hessian.shape[0] == 1, 'Currently only supporting computing one molecule a time'
    # Solving the eigenvalue problem: Hq = w^2 * T q
    # where H is the Hessian matrix, q is the normal coordinates,
@@ -300,8 +307,8 @@ def vibrational_analysis(masses, hessian, mode_type='MDU', unit='cm^-1'):
    eigenvalues, eigenvectors = torch.symeig(mass_scaled_hessian, eigenvectors=True)
    angular_frequencies = eigenvalues.sqrt()
    frequencies = angular_frequencies / (2 * math.pi)
-    # converting from sqrt(hartree / (amu * angstrom^2)) to cm^-1
-    wavenumbers = frequencies * 17092
+    # converting from sqrt(hartree / (amu * angstrom^2)) to cm^-1 or meV
+    wavenumbers = unit_converter(frequencies)

    # Note that the normal modes are the COLUMNS of the eigenvectors matrix
    mw_normalized = eigenvectors.t()
@@ -310,8 +317,8 @@ def vibrational_analysis(masses, hessian, mode_type='MDU', unit='cm^-1'):
    md_normalized = md_unnormalized * norm_factors.unsqueeze(1)

    rmasses = norm_factors**2  # units are AMU
-    # The conversion factor for Ha/(AMU*A^2) to mDyne/(A*AMU) is 4.3597482
-    fconstants = eigenvalues * rmasses * 4.3597482  # units are mDyne/A
+    # The conversion factor for Ha/(AMU*A^2) to mDyne/(A*AMU) is about 4.3597482
+    fconstants = mhessian2fconst(eigenvalues) * rmasses  # units are mDyne/A

    if mode_type == 'MDN':
        modes = (md_normalized).reshape(frequencies.numel(), -1, 3)