Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
nivren
ICT-CSP
Commits
ca86f720
Unverified
Commit
ca86f720
authored
Aug 24, 2025
by
zcxzcx1
Committed by
GitHub
Aug 24, 2025
Browse files
Add files via upload
parent
b75ed73c
Changes
81
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
37455 additions
and
0 deletions
+37455
-0
mace-bench/3rdparty/SevenNet/sevenn/pair_e3gnn/pair_d3.h
mace-bench/3rdparty/SevenNet/sevenn/pair_e3gnn/pair_d3.h
+181
-0
mace-bench/3rdparty/SevenNet/sevenn/pair_e3gnn/pair_d3_for_ase.cu
...ch/3rdparty/SevenNet/sevenn/pair_e3gnn/pair_d3_for_ase.cu
+2081
-0
mace-bench/3rdparty/SevenNet/sevenn/pair_e3gnn/pair_d3_for_ase.h
...nch/3rdparty/SevenNet/sevenn/pair_e3gnn/pair_d3_for_ase.h
+263
-0
mace-bench/3rdparty/SevenNet/sevenn/pair_e3gnn/pair_d3_pars.h
...-bench/3rdparty/SevenNet/sevenn/pair_e3gnn/pair_d3_pars.h
+32484
-0
mace-bench/3rdparty/SevenNet/sevenn/pair_e3gnn/pair_e3gnn.cpp
...-bench/3rdparty/SevenNet/sevenn/pair_e3gnn/pair_e3gnn.cpp
+400
-0
mace-bench/3rdparty/SevenNet/sevenn/pair_e3gnn/pair_e3gnn.h
mace-bench/3rdparty/SevenNet/sevenn/pair_e3gnn/pair_e3gnn.h
+54
-0
mace-bench/3rdparty/SevenNet/sevenn/pair_e3gnn/pair_e3gnn_parallel.cpp
...dparty/SevenNet/sevenn/pair_e3gnn/pair_e3gnn_parallel.cpp
+897
-0
mace-bench/3rdparty/SevenNet/sevenn/pair_e3gnn/pair_e3gnn_parallel.h
...3rdparty/SevenNet/sevenn/pair_e3gnn/pair_e3gnn_parallel.h
+118
-0
mace-bench/3rdparty/SevenNet/sevenn/pair_e3gnn/patch_lammps.sh
...bench/3rdparty/SevenNet/sevenn/pair_e3gnn/patch_lammps.sh
+154
-0
mace-bench/3rdparty/SevenNet/sevenn/presets/MF_0.yaml
mace-bench/3rdparty/SevenNet/sevenn/presets/MF_0.yaml
+108
-0
mace-bench/3rdparty/SevenNet/sevenn/presets/base.yaml
mace-bench/3rdparty/SevenNet/sevenn/presets/base.yaml
+91
-0
mace-bench/3rdparty/SevenNet/sevenn/presets/fine_tune.yaml
mace-bench/3rdparty/SevenNet/sevenn/presets/fine_tune.yaml
+80
-0
mace-bench/3rdparty/SevenNet/sevenn/presets/fine_tune_le.yaml
...-bench/3rdparty/SevenNet/sevenn/presets/fine_tune_le.yaml
+83
-0
mace-bench/3rdparty/SevenNet/sevenn/presets/multi_modal.yaml
mace-bench/3rdparty/SevenNet/sevenn/presets/multi_modal.yaml
+106
-0
mace-bench/3rdparty/SevenNet/sevenn/presets/sevennet-0.yaml
mace-bench/3rdparty/SevenNet/sevenn/presets/sevennet-0.yaml
+79
-0
mace-bench/3rdparty/SevenNet/sevenn/presets/sevennet-l3i5.yaml
...bench/3rdparty/SevenNet/sevenn/presets/sevennet-l3i5.yaml
+92
-0
mace-bench/3rdparty/SevenNet/sevenn/scripts/__init__.py
mace-bench/3rdparty/SevenNet/sevenn/scripts/__init__.py
+0
-0
mace-bench/3rdparty/SevenNet/sevenn/scripts/__pycache__/__init__.cpython-310.pyc
...enNet/sevenn/scripts/__pycache__/__init__.cpython-310.pyc
+0
-0
mace-bench/3rdparty/SevenNet/sevenn/scripts/__pycache__/backward_compatibility.cpython-310.pyc
...cripts/__pycache__/backward_compatibility.cpython-310.pyc
+0
-0
mace-bench/3rdparty/SevenNet/sevenn/scripts/backward_compatibility.py
...rdparty/SevenNet/sevenn/scripts/backward_compatibility.py
+184
-0
No files found.
mace-bench/3rdparty/SevenNet/sevenn/pair_e3gnn/pair_d3.h
0 → 100644
View file @
ca86f720
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle
(
d3
,
PairD3
)
#else
#ifndef LMP_PAIR_D3
#define LMP_PAIR_D3
#include <cmath>
#include <string>
#include <vector>
#include <algorithm>
#include <map>
#include <unordered_map>
#include <cuda_runtime.h>
#include "pair.h"
#include "utils.h"
#include "atom.h"
#include "domain.h"
#include "error.h"
#include "comm.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "math_extra.h"
#include "pair_d3_pars.h"
// Removed dependencies to STL
// #include <stdlib.h> -> no more C style functions
// #define _USE_MATH_DEFINES -> no predefined constants
// Removed dependencies to LAMMPS
// #include "potential_file_reader.h" -> removed, PotentialFileReader
// #include "memory.h" -> already no dependency for CUDA version
namespace
LAMMPS_NS
{
class
PairD3
:
public
Pair
{
public:
PairD3
(
class
LAMMPS
*
);
~
PairD3
()
override
;
void
compute
(
int
,
int
)
override
;
void
settings
(
int
,
char
**
)
override
;
void
coeff
(
int
,
char
**
)
override
;
double
init_one
(
int
i
,
int
j
)
override
;
void
init_style
()
override
;
void
write_restart
(
FILE
*
)
override
;
void
read_restart
(
FILE
*
)
override
;
void
write_restart_settings
(
FILE
*
)
override
;
void
read_restart_settings
(
FILE
*
)
override
;
protected:
virtual
void
allocate
();
/* ------- Read parameters ------- */
int
find_atomic_number
(
std
::
string
&
);
int
is_int_in_array
(
int
*
,
int
,
int
);
void
read_r0ab
(
int
*
,
int
);
void
get_limit_in_pars_array
(
int
&
,
int
&
,
int
&
,
int
&
);
void
read_c6ab
(
int
*
,
int
);
void
setfuncpar_zero
();
void
setfuncpar_bj
();
void
setfuncpar_zerom
();
void
setfuncpar_bjm
();
void
setfuncpar
();
/* ------- Read parameters ------- */
/* ------- Lattice information ------- */
void
set_lattice_repetition_criteria
(
float
,
int
*
);
void
set_lattice_vectors
();
/* ------- Lattice information ------- */
/* ------- Initialize & Precalculate ------- */
void
load_atom_info
();
void
precalculate_tau_array
();
/* ------- Initialize & Precalculate ------- */
/* ------- Reallocate (when number of atoms changed) ------- */
void
reallocate_arrays
();
/* ------- Reallocate (when number of atoms changed) ------- */
/* ------- Coordination number ------- */
void
get_coordination_number
();
void
get_dC6_dCNij
();
/* ------- Coordination number ------- */
/* ------- Main workers ------- */
void
get_forces_without_dC6_zero
();
void
get_forces_without_dC6_bj
();
void
get_forces_without_dC6_zerom
();
void
get_forces_without_dC6_bjm
();
void
get_forces_without_dC6
();
void
get_forces_with_dC6
();
void
update
(
int
,
int
);
/* ------- Main workers ------- */
/*--------- Constants ---------*/
static
constexpr
int
MAX_ELEM
=
94
;
// maximum of the element number
static
constexpr
int
MAXC
=
5
;
// maximum coordination number references per element
static
constexpr
double
AU_TO_ANG
=
0.52917726
;
// conversion factors (atomic unit --> angstrom)
static
constexpr
double
AU_TO_EV
=
27.21138505
;
// conversion factors (atomic unit --> eV)
static
constexpr
float
K1
=
16.0
;
// global ad hoc parameters
static
constexpr
float
K3
=
-
4.0
;
// global ad hoc parameters
/*--------- Constants ---------*/
/*--------- Parameters to read ---------*/
int
damping
;
std
::
string
functional
;
float
*
r2r4
=
nullptr
;
// scale r4/r2 values of the atoms by sqrt(Z)
float
*
rcov
=
nullptr
;
// covalent radii
int
*
mxc
=
nullptr
;
// How large the grid for c6 interpolation
float
**
r0ab
=
nullptr
;
// cut-off radii for all element pairs
float
*****
c6ab
=
nullptr
;
// C6 for all element pairs
float
rthr
;
// R^2 distance to cutoff for C calculation
float
cnthr
;
// R^2 distance to cutoff for CN_calculation
float
s6
,
s8
,
s18
,
rs6
,
rs8
,
rs18
,
alp
,
alp6
,
alp8
,
a1
,
a2
;
// parameters for D3
/*--------- Parameters to read ---------*/
/*--------- Lattice related values ---------*/
double
*
lat_v_1
=
nullptr
;
// lattice coordination vector
double
*
lat_v_2
=
nullptr
;
// lattice coordination vector
double
*
lat_v_3
=
nullptr
;
// lattice coordination vector
int
*
rep_vdw
=
nullptr
;
// repetition of cell for calculating D3
int
*
rep_cn
=
nullptr
;
// repetition of cell for calculating
double
**
sigma
=
nullptr
;
// virial pressure on cell
/*--------- Lattice related values ---------*/
/*--------- Per-atom values/arrays ---------*/
double
*
cn
=
nullptr
;
// Coordination numbers
float
**
x
=
nullptr
;
// Positions
double
**
f
=
nullptr
;
// Forces
double
*
dc6i
=
nullptr
;
// dC6i(iat) saves dE_dsp/dCN(iat)
/*--------- Per-atom values/arrays ---------*/
/*--------- Per-pair values/arrays ---------*/
float
*
c6_ij_tot
=
nullptr
;
float
*
dc6_iji_tot
=
nullptr
;
float
*
dc6_ijj_tot
=
nullptr
;
/*--------- Per-pair values/arrays ---------*/
/*---------- Global values ---------*/
int
n_save
;
// to check whether the number of atoms has changed
float
disp_total
;
// Dispersion energy
/*---------- Global values ---------*/
/*--------- For loop over tau (translation of cell) ---------*/
float
****
tau_vdw
=
nullptr
;
float
****
tau_cn
=
nullptr
;
int
*
tau_idx_vdw
=
nullptr
;
int
*
tau_idx_cn
=
nullptr
;
int
tau_idx_vdw_total_size
;
int
tau_idx_cn_total_size
;
/*--------- For loop over tau (translation of cell) ---------*/
/*--------- For cuda memory transfer (pointerized) ---------*/
int
*
atomtype
;
double
*
disp
;
/*--------- For cuda memory transfer (pointerized) ---------*/
};
}
#endif // LMP_PAIR_D3
#endif // PAIR_CLASS
mace-bench/3rdparty/SevenNet/sevenn/pair_e3gnn/pair_d3_for_ase.cu
0 → 100644
View file @
ca86f720
/*
This code is a skeleton of the LAMMPS pair_style d3 accelerated by CUDA.
All dependencies on LAMMPS have been removed.
The input and output variables are named based on the LAMMPS variables as much as possible.
*/
#include "pair_d3_for_ase.h"
/* --------- Macros for CUDA error handling --------- */
#define START_CUDA_TIMER() \
cudaEvent_t
start
,
stop
;
\
cudaEventCreate
(
&
start
);
\
cudaEventCreate
(
&
stop
);
\
cudaEventRecord
(
start
);
#define STOP_CUDA_TIMER(tag) \
cudaEventRecord
(
stop
);
\
cudaEventSynchronize
(
stop
);
\
float
msec
=
0
;
\
cudaEventElapsedTime
(
&
msec
,
start
,
stop
);
\
printf
(
"Elapsed time for %s: %f ms
\n
"
,
tag
,
msec
);
\
cudaEventDestroy
(
start
);
\
cudaEventDestroy
(
stop
);
#define CHECK_CUDA(call) do { \
cudaError_t
status_
=
call
;
\
if
(
status_
!=
cudaSuccess
)
{
\
fprintf
(
stderr
,
"CUDA Error (%s:%d) -> %s: %s
\n
"
,
__FILE__
,
__LINE__
,
\
cudaGetErrorName
(
status_
),
cudaGetErrorString
(
status_
));
\
exit
(
EXIT_FAILURE
);
\
}
\
}
while
(
0
)
#define CHECK_CUDA_ERROR() do { \
cudaDeviceSynchronize
();
\
cudaError_t
status_
=
cudaGetLastError
();
\
if
(
status_
!=
cudaSuccess
)
{
\
fprintf
(
stderr
,
"CUDA Error (%s:%d) -> %s: %s
\n
"
,
__FILE__
,
__LINE__
,
\
cudaGetErrorName
(
status_
),
cudaGetErrorString
(
status_
));
\
exit
(
EXIT_FAILURE
);
\
}
\
}
while
(
0
)
#define CHECK_CUDA_DEVICES() do { \
int
deviceCount
=
0
;
\
if
(
cudaGetDeviceCount
(
&
deviceCount
)
!=
cudaSuccess
||
deviceCount
==
0
)
{
\
fprintf
(
stderr
,
"CUDA Error (%s:%d) -> No CUDA devices found
\n
"
,
\
__FILE__
,
__LINE__
);
\
exit
(
EXIT_FAILURE
);
\
}
\
}
while
(
0
)
/* --------- Macros for CUDA error handling --------- */
/* --------- Math functions for CUDA compatibility --------- */
inline
__host__
__device__
void
ij_at_linij
(
int
linij
,
int
&
i
,
int
&
j
)
{
i
=
static_cast
<
int
>
((
sqrt
(
1
+
8
*
linij
)
-
1
)
/
2
);
j
=
linij
-
i
*
(
i
+
1
)
/
2
;
}
// unroll the triangular loop
inline
__host__
__device__
float
lensq3
(
const
float
*
v
)
{
return
v
[
0
]
*
v
[
0
]
+
v
[
1
]
*
v
[
1
]
+
v
[
2
]
*
v
[
2
];
}
// from MathExtra::lensq3
inline
void
cross3
(
const
double
*
v1
,
const
double
*
v2
,
double
*
ans
)
{
ans
[
0
]
=
v1
[
1
]
*
v2
[
2
]
-
v1
[
2
]
*
v2
[
1
];
ans
[
1
]
=
v1
[
2
]
*
v2
[
0
]
-
v1
[
0
]
*
v2
[
2
];
ans
[
2
]
=
v1
[
0
]
*
v2
[
1
]
-
v1
[
1
]
*
v2
[
0
];
}
inline
double
dot3
(
const
double
*
v1
,
const
double
*
v2
)
{
return
v1
[
0
]
*
v2
[
0
]
+
v1
[
1
]
*
v2
[
1
]
+
v1
[
2
]
*
v2
[
2
];
}
inline
double
len3
(
const
double
*
v
)
{
return
sqrt
(
v
[
0
]
*
v
[
0
]
+
v
[
1
]
*
v
[
1
]
+
v
[
2
]
*
v
[
2
]);
}
/* --------- Math functions for CUDA compatibility --------- */
/* ----------------------------------------------------------------------
Constructor (Required)
------------------------------------------------------------------------- */
PairD3
::
PairD3
()
{
allocated
=
0
;
}
/* ----------------------------------------------------------------------
Destructor (Required)
------------------------------------------------------------------------- */
PairD3
::~
PairD3
()
{
if
(
allocated
)
{
int
n
=
atom
->
natoms
;
int
np1
=
atom
->
ntypes
+
1
;
int
vdw_range_x
=
2
*
rep_vdw
[
0
]
+
1
;
int
vdw_range_y
=
2
*
rep_vdw
[
1
]
+
1
;
int
vdw_range_z
=
2
*
rep_vdw
[
2
]
+
1
;
int
cn_range_x
=
2
*
rep_cn
[
0
]
+
1
;
int
cn_range_y
=
2
*
rep_cn
[
1
]
+
1
;
int
cn_range_z
=
2
*
rep_cn
[
2
]
+
1
;
//for (int i = 0; i < np1; i++) { cudaFree(setflag[i]); }; cudaFree(setflag);
//for (int i = 0; i < np1; i++) { cudaFree(cutsq[i]); }; cudaFree(cutsq);
cudaFree
(
r2r4
);
cudaFree
(
rcov
);
cudaFree
(
mxc
);
for
(
int
i
=
0
;
i
<
np1
;
i
++
)
{
cudaFree
(
r0ab
[
i
]);
};
cudaFree
(
r0ab
);
for
(
int
i
=
0
;
i
<
np1
;
i
++
)
{
for
(
int
j
=
0
;
j
<
np1
;
j
++
)
{
for
(
int
k
=
0
;
k
<
MAXC
;
k
++
)
{
for
(
int
l
=
0
;
l
<
MAXC
;
l
++
)
{
cudaFree
(
c6ab
[
i
][
j
][
k
][
l
]);
}
cudaFree
(
c6ab
[
i
][
j
][
k
]);
}
cudaFree
(
c6ab
[
i
][
j
]);
}
cudaFree
(
c6ab
[
i
]);
}
cudaFree
(
c6ab
);
cudaFree
(
lat_v_1
);
cudaFree
(
lat_v_2
);
cudaFree
(
lat_v_3
);
cudaFree
(
rep_vdw
);
cudaFree
(
rep_cn
);
cudaFree
(
cn
);
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
cudaFree
(
x
[
i
]);
};
cudaFree
(
x
);
cudaFree
(
dc6i
);
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
cudaFree
(
f
[
i
]);
};
cudaFree
(
f
);
for
(
int
i
=
0
;
i
<
3
;
i
++
)
{
cudaFree
(
sigma
[
i
]);
};
cudaFree
(
sigma
);
cudaFree
(
dc6_iji_tot
);
cudaFree
(
dc6_ijj_tot
);
cudaFree
(
c6_ij_tot
);
for
(
int
i
=
0
;
i
<
vdw_range_x
;
i
++
)
{
for
(
int
j
=
0
;
j
<
vdw_range_y
;
j
++
)
{
for
(
int
k
=
0
;
k
<
vdw_range_z
;
k
++
)
{
cudaFree
(
tau_vdw
[
i
][
j
][
k
]);
}
cudaFree
(
tau_vdw
[
i
][
j
]);
}
cudaFree
(
tau_vdw
[
i
]);
}
cudaFree
(
tau_vdw
);
for
(
int
i
=
0
;
i
<
cn_range_x
;
i
++
)
{
for
(
int
j
=
0
;
j
<
cn_range_y
;
j
++
)
{
for
(
int
k
=
0
;
k
<
cn_range_z
;
k
++
)
{
cudaFree
(
tau_cn
[
i
][
j
][
k
]);
}
cudaFree
(
tau_cn
[
i
][
j
]);
}
cudaFree
(
tau_cn
[
i
]);
}
cudaFree
(
tau_cn
);
cudaFree
(
tau_idx_vdw
);
cudaFree
(
tau_idx_cn
);
cudaFree
(
atomtype
);
cudaFree
(
disp
);
}
}
/* ----------------------------------------------------------------------
Allocate all arrays (Required)
------------------------------------------------------------------------- */
void
PairD3
::
allocate
()
{
CHECK_CUDA_DEVICES
();
allocated
=
1
;
/* atom->ntypes : # of elements; element index starts from 1 */
int
n
=
atom
->
natoms
;
int
np1
=
atom
->
ntypes
+
1
;
n_save
=
n
;
np1_save
=
np1
;
//cudaMallocManaged(&setflag, np1 * sizeof(int*)); for (int i = 0; i < np1; i++) { cudaMallocManaged(&setflag[i], np1 * sizeof(int)); }
//cudaMallocManaged(&cutsq, np1 * sizeof(double*)); for (int i = 0; i < np1; i++) { cudaMallocManaged(&cutsq[i], np1 * sizeof(double)); }
cudaMallocManaged
(
&
r2r4
,
np1
*
sizeof
(
float
));
cudaMallocManaged
(
&
rcov
,
np1
*
sizeof
(
float
));
cudaMallocManaged
(
&
mxc
,
np1
*
sizeof
(
int
));
cudaMallocManaged
(
&
r0ab
,
np1
*
sizeof
(
float
*
));
for
(
int
i
=
0
;
i
<
np1
;
i
++
)
{
cudaMallocManaged
(
&
r0ab
[
i
],
np1
*
sizeof
(
float
));
}
cudaMallocManaged
(
&
c6ab
,
np1
*
sizeof
(
float
****
));
for
(
int
i
=
0
;
i
<
np1
;
i
++
)
{
cudaMallocManaged
(
&
c6ab
[
i
],
np1
*
sizeof
(
float
***
));
for
(
int
j
=
0
;
j
<
np1
;
j
++
)
{
cudaMallocManaged
(
&
c6ab
[
i
][
j
],
MAXC
*
sizeof
(
float
**
));
for
(
int
k
=
0
;
k
<
MAXC
;
k
++
)
{
cudaMallocManaged
(
&
c6ab
[
i
][
j
][
k
],
MAXC
*
sizeof
(
float
*
));
for
(
int
l
=
0
;
l
<
MAXC
;
l
++
)
{
cudaMallocManaged
(
&
c6ab
[
i
][
j
][
k
][
l
],
3
*
sizeof
(
float
));
}
}
}
}
cudaMallocManaged
(
&
lat_v_1
,
3
*
sizeof
(
float
));
cudaMallocManaged
(
&
lat_v_2
,
3
*
sizeof
(
float
));
cudaMallocManaged
(
&
lat_v_3
,
3
*
sizeof
(
float
));
cudaMallocManaged
(
&
rep_vdw
,
3
*
sizeof
(
int
));
cudaMallocManaged
(
&
rep_cn
,
3
*
sizeof
(
int
));
cudaMallocManaged
(
&
sigma
,
3
*
sizeof
(
double
*
));
for
(
int
i
=
0
;
i
<
3
;
i
++
)
{
cudaMallocManaged
(
&
sigma
[
i
],
3
*
sizeof
(
double
));
}
cudaMallocManaged
(
&
cn
,
n
*
sizeof
(
double
));
cudaMallocManaged
(
&
x
,
n
*
sizeof
(
float
*
));
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
cudaMallocManaged
(
&
x
[
i
],
3
*
sizeof
(
float
));
}
cudaMallocManaged
(
&
dc6i
,
n
*
sizeof
(
double
));
cudaMallocManaged
(
&
f
,
n
*
sizeof
(
double
*
));
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
cudaMallocManaged
(
&
f
[
i
],
3
*
sizeof
(
double
));
}
// Initialization
// Initialize for lattice -> set_lattice_vectors()
tau_idx_vdw_total_size
=
-
1
;
tau_idx_cn_total_size
=
-
1
;
for
(
int
i
=
0
;
i
<
3
;
i
++
)
{
rep_vdw
[
i
]
=
-
1
;
rep_cn
[
i
]
=
-
1
;
}
//for (int i = 1; i < np1; i++) {
// for (int j = 1; j < np1; j++) {
// setflag[i][j] = 0;
// }
//}
for
(
int
idx1
=
0
;
idx1
<
np1
;
idx1
++
)
{
for
(
int
idx2
=
0
;
idx2
<
np1
;
idx2
++
)
{
for
(
int
idx3
=
0
;
idx3
<
MAXC
;
idx3
++
)
{
for
(
int
idx4
=
0
;
idx4
<
MAXC
;
idx4
++
)
{
for
(
int
idx5
=
0
;
idx5
<
3
;
idx5
++
)
{
c6ab
[
idx1
][
idx2
][
idx3
][
idx4
][
idx5
]
=
-
1
;
}
}
}
}
}
int
n_ij_combination
=
n
*
(
n
+
1
)
/
2
;
cudaMallocManaged
(
&
dc6_iji_tot
,
n_ij_combination
*
sizeof
(
float
));
cudaMallocManaged
(
&
dc6_ijj_tot
,
n_ij_combination
*
sizeof
(
float
));
cudaMallocManaged
(
&
c6_ij_tot
,
n_ij_combination
*
sizeof
(
float
));
cudaMallocManaged
(
&
atomtype
,
n
*
sizeof
(
int
));
cudaMallocManaged
(
&
disp
,
sizeof
(
double
));
}
/* ----------------------------------------------------------------------
Settings : read from pair_style (Required) -> pair_style d3 vdw_sq cn_sq damp_name func_name
------------------------------------------------------------------------- */
void
PairD3
::
settings
(
double
vdw_sq
,
double
cn_sq
,
std
::
string
damp_name
,
std
::
string
func_name
)
{
rthr
=
vdw_sq
;
cnthr
=
cn_sq
;
std
::
map
<
std
::
string
,
int
>
commandMap
=
{
{
"damp_zero"
,
0
},
{
"damp_bj"
,
1
},
{
"damp_zerom"
,
2
},
{
"damp_bjm"
,
3
},
};
if
(
commandMap
.
find
(
damp_name
)
==
commandMap
.
end
())
{
error
->
all
(
FLERR
,
"Unknown damping function"
);
}
damping
=
commandMap
[
damp_name
];
functional
=
func_name
;
setfuncpar
();
}
/* ----------------------------------------------------------------------
finds atomic number (used in PairD3::coeff)
------------------------------------------------------------------------- */
int
PairD3
::
find_atomic_number
(
std
::
string
&
key
)
{
std
::
transform
(
key
.
begin
(),
key
.
end
(),
key
.
begin
(),
::
tolower
);
if
(
key
.
length
()
==
1
)
{
key
+=
" "
;
}
key
.
resize
(
2
);
std
::
vector
<
std
::
string
>
element_table
=
{
"h "
,
"he"
,
"li"
,
"be"
,
"b "
,
"c "
,
"n "
,
"o "
,
"f "
,
"ne"
,
"na"
,
"mg"
,
"al"
,
"si"
,
"p "
,
"s "
,
"cl"
,
"ar"
,
"k "
,
"ca"
,
"sc"
,
"ti"
,
"v "
,
"cr"
,
"mn"
,
"fe"
,
"co"
,
"ni"
,
"cu"
,
"zn"
,
"ga"
,
"ge"
,
"as"
,
"se"
,
"br"
,
"kr"
,
"rb"
,
"sr"
,
"y "
,
"zr"
,
"nb"
,
"mo"
,
"tc"
,
"ru"
,
"rh"
,
"pd"
,
"ag"
,
"cd"
,
"in"
,
"sn"
,
"sb"
,
"te"
,
"i "
,
"xe"
,
"cs"
,
"ba"
,
"la"
,
"ce"
,
"pr"
,
"nd"
,
"pm"
,
"sm"
,
"eu"
,
"gd"
,
"tb"
,
"dy"
,
"ho"
,
"er"
,
"tm"
,
"yb"
,
"lu"
,
"hf"
,
"ta"
,
"w "
,
"re"
,
"os"
,
"ir"
,
"pt"
,
"au"
,
"hg"
,
"tl"
,
"pb"
,
"bi"
,
"po"
,
"at"
,
"rn"
,
"fr"
,
"ra"
,
"ac"
,
"th"
,
"pa"
,
"u "
,
"np"
,
"pu"
};
for
(
size_t
i
=
0
;
i
<
element_table
.
size
();
++
i
)
{
if
(
element_table
[
i
]
==
key
)
{
int
atomic_number
=
i
+
1
;
return
atomic_number
;
}
}
// if not the case
return
-
1
;
}
/* ----------------------------------------------------------------------
Check whether an integer value in an integer array (used in PairD3::coeff)
------------------------------------------------------------------------- */
int
PairD3
::
is_int_in_array
(
int
arr
[],
int
size
,
int
value
)
{
for
(
int
i
=
0
;
i
<
size
;
i
++
)
{
if
(
arr
[
i
]
==
value
)
{
return
i
;
}
// returns the index
}
return
-
1
;
}
/* ----------------------------------------------------------------------
Read r0ab values from the table (used in PairD3::coeff)
------------------------------------------------------------------------- */
void
PairD3
::
read_r0ab
(
int
*
atomic_numbers
,
int
ntypes
)
{
const
double
r0ab_table
[
94
][
94
]
=
R0AB_TABLE
;
for
(
int
i
=
1
;
i
<=
ntypes
;
i
++
)
{
for
(
int
j
=
1
;
j
<=
ntypes
;
j
++
)
{
r0ab
[
i
][
j
]
=
r0ab_table
[
atomic_numbers
[
i
-
1
]
-
1
][
atomic_numbers
[
j
-
1
]
-
1
]
/
AU_TO_ANG
;
}
}
}
/* ----------------------------------------------------------------------
Get atom pair indices and grid indices (used in PairD3::read_c6ab)
------------------------------------------------------------------------- */
void
PairD3
::
get_limit_in_pars_array
(
int
&
idx_atom_1
,
int
&
idx_atom_2
,
int
&
idx_i
,
int
&
idx_j
)
{
const
int
shift
=
100
;
idx_i
=
(
idx_atom_1
-
1
)
/
shift
+
1
;
idx_j
=
(
idx_atom_2
-
1
)
/
shift
+
1
;
idx_atom_1
=
(
idx_atom_1
-
1
)
%
shift
+
1
;
idx_atom_2
=
(
idx_atom_2
-
1
)
%
shift
+
1
;
// the code above replaces the code below
//idx_i = 1;
//idx_j = 1;
//int shift = 100;
//while (idx_atom_1 > shift) { idx_atom_1 -= shift; idx_i++; }
//while (idx_atom_2 > shift) { idx_atom_2 -= shift; idx_j++; }
}
/* ----------------------------------------------------------------------
Read c6ab values from the table (used in PairD3::coeff)
------------------------------------------------------------------------- */
void
PairD3
::
read_c6ab
(
int
*
atomic_numbers
,
int
ntypes
)
{
for
(
int
i
=
1
;
i
<=
ntypes
;
i
++
)
{
mxc
[
i
]
=
0
;
}
int
grid_i
=
0
,
grid_j
=
0
;
const
double
c6ab_table
[
32385
][
5
]
=
C6AB_TABLE
;
for
(
int
i
=
0
;
i
<
32385
;
i
++
)
{
const
double
ref_c6
=
c6ab_table
[
i
][
0
];
int
atom_number_1
=
static_cast
<
int
>
(
c6ab_table
[
i
][
1
]);
int
atom_number_2
=
static_cast
<
int
>
(
c6ab_table
[
i
][
2
]);
get_limit_in_pars_array
(
atom_number_1
,
atom_number_2
,
grid_i
,
grid_j
);
const
int
idx_atom_1
=
is_int_in_array
(
atomic_numbers
,
ntypes
,
atom_number_1
);
if
(
idx_atom_1
<
0
)
{
continue
;
}
const
int
idx_atom_2
=
is_int_in_array
(
atomic_numbers
,
ntypes
,
atom_number_2
);
if
(
idx_atom_2
<
0
)
{
continue
;
}
const
double
ref_cn1
=
c6ab_table
[
i
][
3
];
const
double
ref_cn2
=
c6ab_table
[
i
][
4
];
mxc
[
idx_atom_1
+
1
]
=
std
::
max
(
mxc
[
idx_atom_1
+
1
],
grid_i
);
mxc
[
idx_atom_2
+
1
]
=
std
::
max
(
mxc
[
idx_atom_2
+
1
],
grid_j
);
c6ab
[
idx_atom_1
+
1
][
idx_atom_2
+
1
][
grid_i
-
1
][
grid_j
-
1
][
0
]
=
ref_c6
;
c6ab
[
idx_atom_1
+
1
][
idx_atom_2
+
1
][
grid_i
-
1
][
grid_j
-
1
][
1
]
=
ref_cn1
;
c6ab
[
idx_atom_1
+
1
][
idx_atom_2
+
1
][
grid_i
-
1
][
grid_j
-
1
][
2
]
=
ref_cn2
;
c6ab
[
idx_atom_2
+
1
][
idx_atom_1
+
1
][
grid_j
-
1
][
grid_i
-
1
][
0
]
=
ref_c6
;
c6ab
[
idx_atom_2
+
1
][
idx_atom_1
+
1
][
grid_j
-
1
][
grid_i
-
1
][
1
]
=
ref_cn2
;
c6ab
[
idx_atom_2
+
1
][
idx_atom_1
+
1
][
grid_j
-
1
][
grid_i
-
1
][
2
]
=
ref_cn1
;
}
}
/* ----------------------------------------------------------------------
Set functional parameters (used in PairD3::coeff)
------------------------------------------------------------------------- */
void
PairD3
::
setfuncpar_zero
()
{
s6
=
1.0
;
alp
=
14.0
;
rs18
=
1.0
;
// default def2-QZVP (almost basis set limit)
std
::
unordered_map
<
std
::
string
,
int
>
commandMap
=
{
{
"slater-dirac-exchange"
,
1
},
{
"b-lyp"
,
2
},
{
"b-p"
,
3
},
{
"b97-d"
,
4
},
{
"revpbe"
,
5
},
{
"pbe"
,
6
},
{
"pbesol"
,
7
},
{
"rpw86-pbe"
,
8
},
{
"rpbe"
,
9
},
{
"tpss"
,
10
},
{
"b3-lyp"
,
11
},
{
"pbe0"
,
12
},
{
"hse06"
,
13
},
{
"revpbe38"
,
14
},
{
"pw6b95"
,
15
},
{
"tpss0"
,
16
},
{
"b2-plyp"
,
17
},
{
"pwpb95"
,
18
},
{
"b2gp-plyp"
,
19
},
{
"ptpss"
,
20
},
{
"hf"
,
21
},
{
"mpwlyp"
,
22
},
{
"bpbe"
,
23
},
{
"bh-lyp"
,
24
},
{
"tpssh"
,
25
},
{
"pwb6k"
,
26
},
{
"b1b95"
,
27
},
{
"bop"
,
28
},
{
"o-lyp"
,
29
},
{
"o-pbe"
,
30
},
{
"ssb"
,
31
},
{
"revssb"
,
32
},
{
"otpss"
,
33
},
{
"b3pw91"
,
34
},
{
"revpbe0"
,
35
},
{
"pbe38"
,
36
},
{
"mpw1b95"
,
37
},
{
"mpwb1k"
,
38
},
{
"bmk"
,
39
},
{
"cam-b3lyp"
,
40
},
{
"lc-wpbe"
,
41
},
{
"m05"
,
42
},
{
"m052x"
,
43
},
{
"m06l"
,
44
},
{
"m06"
,
45
},
{
"m062x"
,
46
},
{
"m06hf"
,
47
},
{
"hcth120"
,
48
}
};
int
commandCode
=
commandMap
[
functional
];
switch
(
commandCode
)
{
case
1
:
rs6
=
0.999
;
s18
=
-
1.957
;
rs18
=
0.697
;
break
;
case
2
:
rs6
=
1.094
;
s18
=
1.682
;
break
;
case
3
:
rs6
=
1.139
;
s18
=
1.683
;
break
;
case
4
:
rs6
=
0.892
;
s18
=
0.909
;
break
;
case
5
:
rs6
=
0.923
;
s18
=
1.010
;
break
;
case
6
:
rs6
=
1.217
;
s18
=
0.722
;
break
;
case
7
:
rs6
=
1.345
;
s18
=
0.612
;
break
;
case
8
:
rs6
=
1.224
;
s18
=
0.901
;
break
;
case
9
:
rs6
=
0.872
;
s18
=
0.514
;
break
;
case
10
:
rs6
=
1.166
;
s18
=
1.105
;
break
;
case
11
:
rs6
=
1.261
;
s18
=
1.703
;
break
;
case
12
:
rs6
=
1.287
;
s18
=
0.928
;
break
;
case
13
:
rs6
=
1.129
;
s18
=
0.109
;
break
;
case
14
:
rs6
=
1.021
;
s18
=
0.862
;
break
;
case
15
:
rs6
=
1.532
;
s18
=
0.862
;
break
;
case
16
:
rs6
=
1.252
;
s18
=
1.242
;
break
;
case
17
:
rs6
=
1.427
;
s18
=
1.022
;
s6
=
0.64
;
break
;
case
18
:
rs6
=
1.557
;
s18
=
0.705
;
s6
=
0.82
;
break
;
case
19
:
rs6
=
1.586
;
s18
=
0.760
;
s6
=
0.56
;
break
;
case
20
:
rs6
=
1.541
;
s18
=
0.879
;
s6
=
0.75
;
break
;
case
21
:
rs6
=
1.158
;
s18
=
1.746
;
break
;
case
22
:
rs6
=
1.239
;
s18
=
1.098
;
break
;
case
23
:
rs6
=
1.087
;
s18
=
2.033
;
break
;
case
24
:
rs6
=
1.370
;
s18
=
1.442
;
break
;
case
25
:
rs6
=
1.223
;
s18
=
1.219
;
break
;
case
26
:
rs6
=
1.660
;
s18
=
0.550
;
break
;
case
27
:
rs6
=
1.613
;
s18
=
1.868
;
break
;
case
28
:
rs6
=
0.929
;
s18
=
1.975
;
break
;
case
29
:
rs6
=
0.806
;
s18
=
1.764
;
break
;
case
30
:
rs6
=
0.837
;
s18
=
2.055
;
break
;
case
31
:
rs6
=
1.215
;
s18
=
0.663
;
break
;
case
32
:
rs6
=
1.221
;
s18
=
0.560
;
break
;
case
33
:
rs6
=
1.128
;
s18
=
1.494
;
break
;
case
34
:
rs6
=
1.176
;
s18
=
1.775
;
break
;
case
35
:
rs6
=
0.949
;
s18
=
0.792
;
break
;
case
36
:
rs6
=
1.333
;
s18
=
0.998
;
break
;
case
37
:
rs6
=
1.605
;
s18
=
1.118
;
break
;
case
38
:
rs6
=
1.671
;
s18
=
1.061
;
break
;
case
39
:
rs6
=
1.931
;
s18
=
2.168
;
break
;
case
40
:
rs6
=
1.378
;
s18
=
1.217
;
break
;
case
41
:
rs6
=
1.355
;
s18
=
1.279
;
break
;
case
42
:
rs6
=
1.373
;
s18
=
0.595
;
break
;
case
43
:
rs6
=
1.417
;
s18
=
0.000
;
break
;
case
44
:
rs6
=
1.581
;
s18
=
0.000
;
break
;
case
45
:
rs6
=
1.325
;
s18
=
0.000
;
break
;
case
46
:
rs6
=
1.619
;
s18
=
0.000
;
break
;
case
47
:
rs6
=
1.446
;
s18
=
0.000
;
break
;
/* DFTB3(zeta = 4.0), old deprecated parameters; case ("dftb3"); rs6 = 1.235; s18 = 0.673; */
case
48
:
rs6
=
1.221
;
s18
=
1.206
;
break
;
default:
error
->
all
(
FLERR
,
"Functional name unknown"
);
break
;
}
}
void
PairD3
::
setfuncpar_bj
()
{
s6
=
1.0
;
alp
=
14.0
;
std
::
unordered_map
<
std
::
string
,
int
>
commandMap
=
{
{
"b-p"
,
1
},
{
"b-lyp"
,
2
},
{
"revpbe"
,
3
},
{
"rpbe"
,
4
},
{
"b97-d"
,
5
},
{
"pbe"
,
6
},
{
"rpw86-pbe"
,
7
},
{
"b3-lyp"
,
8
},
{
"tpss"
,
9
},
{
"hf"
,
10
},
{
"tpss0"
,
11
},
{
"pbe0"
,
12
},
{
"hse06"
,
13
},
{
"revpbe38"
,
14
},
{
"pw6b95"
,
15
},
{
"b2-plyp"
,
16
},
{
"dsd-blyp"
,
17
},
{
"dsd-blyp-fc"
,
18
},
{
"bop"
,
19
},
{
"mpwlyp"
,
20
},
{
"o-lyp"
,
21
},
{
"pbesol"
,
22
},
{
"bpbe"
,
23
},
{
"opbe"
,
24
},
{
"ssb"
,
25
},
{
"revssb"
,
26
},
{
"otpss"
,
27
},
{
"b3pw91"
,
28
},
{
"bh-lyp"
,
29
},
{
"revpbe0"
,
30
},
{
"tpssh"
,
31
},
{
"mpw1b95"
,
32
},
{
"pwb6k"
,
33
},
{
"b1b95"
,
34
},
{
"bmk"
,
35
},
{
"cam-b3lyp"
,
36
},
{
"lc-wpbe"
,
37
},
{
"b2gp-plyp"
,
38
},
{
"ptpss"
,
39
},
{
"pwpb95"
,
40
},
{
"hf/mixed"
,
41
},
{
"hf/sv"
,
42
},
{
"hf/minis"
,
43
},
{
"b3-lyp/6-31gd"
,
44
},
{
"hcth120"
,
45
},
{
"pw1pw"
,
46
},
{
"pwgga"
,
47
},
{
"hsesol"
,
48
},
{
"hf3c"
,
49
},
{
"hf3cv"
,
50
},
{
"pbeh3c"
,
51
},
{
"pbeh-3c"
,
52
},
{
"wb97m"
,
53
}
};
int
commandCode
=
commandMap
[
functional
];
switch
(
commandCode
)
{
case
1
:
rs6
=
0.3946
;
s18
=
3.2822
;
rs18
=
4.8516
;
break
;
case
2
:
rs6
=
0.4298
;
s18
=
2.6996
;
rs18
=
4.2359
;
break
;
case
3
:
rs6
=
0.5238
;
s18
=
2.3550
;
rs18
=
3.5016
;
break
;
case
4
:
rs6
=
0.1820
;
s18
=
0.8318
;
rs18
=
4.0094
;
break
;
case
5
:
rs6
=
0.5545
;
s18
=
2.2609
;
rs18
=
3.2297
;
break
;
case
6
:
rs6
=
0.4289
;
s18
=
0.7875
;
rs18
=
4.4407
;
break
;
case
7
:
rs6
=
0.4613
;
s18
=
1.3845
;
rs18
=
4.5062
;
break
;
case
8
:
rs6
=
0.3981
;
s18
=
1.9889
;
rs18
=
4.4211
;
break
;
case
9
:
rs6
=
0.4535
;
s18
=
1.9435
;
rs18
=
4.4752
;
break
;
case
10
:
rs6
=
0.3385
;
s18
=
0.9171
;
rs18
=
2.8830
;
break
;
case
11
:
rs6
=
0.3768
;
s18
=
1.2576
;
rs18
=
4.5865
;
break
;
case
12
:
rs6
=
0.4145
;
s18
=
1.2177
;
rs18
=
4.8593
;
break
;
case
13
:
rs6
=
0.383
;
s18
=
2.310
;
rs18
=
5.685
;
break
;
case
14
:
rs6
=
0.4309
;
s18
=
1.4760
;
rs18
=
3.9446
;
break
;
case
15
:
rs6
=
0.2076
;
s18
=
0.7257
;
rs18
=
6.3750
;
break
;
case
16
:
rs6
=
0.3065
;
s18
=
0.9147
;
rs18
=
5.0570
;
break
;
s6
=
0.64
;
case
17
:
rs6
=
0.0000
;
s18
=
0.2130
;
rs18
=
6.0519
;
s6
=
0.50
;
break
;
case
18
:
rs6
=
0.0009
;
s18
=
0.2112
;
rs18
=
5.9807
;
s6
=
0.50
;
break
;
case
19
:
rs6
=
0.4870
;
s18
=
3.2950
;
rs18
=
3.5043
;
break
;
case
20
:
rs6
=
0.4831
;
s18
=
2.0077
;
rs18
=
4.5323
;
break
;
case
21
:
rs6
=
0.5299
;
s18
=
2.6205
;
rs18
=
2.8065
;
break
;
case
22
:
rs6
=
0.4466
;
s18
=
2.9491
;
rs18
=
6.1742
;
break
;
case
23
:
rs6
=
0.4567
;
s18
=
4.0728
;
rs18
=
4.3908
;
break
;
case
24
:
rs6
=
0.5512
;
s18
=
3.3816
;
rs18
=
2.9444
;
break
;
case
25
:
rs6
=
-
0.0952
;
s18
=
-
0.1744
;
rs18
=
5.2170
;
break
;
case
26
:
rs6
=
0.4720
;
s18
=
0.4389
;
rs18
=
4.0986
;
break
;
case
27
:
rs6
=
0.4634
;
s18
=
2.7495
;
rs18
=
4.3153
;
break
;
case
28
:
rs6
=
0.4312
;
s18
=
2.8524
;
rs18
=
4.4693
;
break
;
case
29
:
rs6
=
0.2793
;
s18
=
1.0354
;
rs18
=
4.9615
;
break
;
case
30
:
rs6
=
0.4679
;
s18
=
1.7588
;
rs18
=
3.7619
;
break
;
case
31
:
rs6
=
0.4529
;
s18
=
2.2382
;
rs18
=
4.6550
;
break
;
case
32
:
rs6
=
0.1955
;
s18
=
1.0508
;
rs18
=
6.4177
;
break
;
case
33
:
rs6
=
0.1805
;
s18
=
0.9383
;
rs18
=
7.7627
;
break
;
case
34
:
rs6
=
0.2092
;
s18
=
1.4507
;
rs18
=
5.5545
;
break
;
case
35
:
rs6
=
0.1940
;
s18
=
2.0860
;
rs18
=
5.9197
;
break
;
case
36
:
rs6
=
0.3708
;
s18
=
2.0674
;
rs18
=
5.4743
;
break
;
case
37
:
rs6
=
0.3919
;
s18
=
1.8541
;
rs18
=
5.0897
;
break
;
case
38
:
rs6
=
0.0000
;
s18
=
0.2597
;
rs18
=
6.3332
;
s6
=
0.560
;
break
;
case
39
:
rs6
=
0.0000
;
s18
=
0.2804
;
rs18
=
6.5745
;
s6
=
0.750
;
break
;
case
40
:
rs6
=
0.0000
;
s18
=
0.2904
;
rs18
=
7.3141
;
s6
=
0.820
;
break
;
// special HF / DFT with eBSSE correction;
case
41
:
rs6
=
0.5607
;
s18
=
3.9027
;
rs18
=
4.5622
;
break
;
case
42
:
rs6
=
0.4249
;
s18
=
2.1849
;
rs18
=
4.2783
;
break
;
case
43
:
rs6
=
0.1702
;
s18
=
0.9841
;
rs18
=
3.8506
;
break
;
case
44
:
rs6
=
0.5014
;
s18
=
4.0672
;
rs18
=
4.8409
;
break
;
case
45
:
rs6
=
0.3563
;
s18
=
1.0821
;
rs18
=
4.3359
;
break
;
/* DFTB3 old, deprecated parameters : ;
* case ("dftb3"); rs6 = 0.7461; s18 = 3.209; rs18 = 4.1906;
* special SCC - DFTB parametrization;
* full third order DFTB, self consistent charges, hydrogen pair damping with; exponent 4.2;
*/
case
46
:
rs6
=
0.3807
;
s18
=
2.3363
;
rs18
=
5.8844
;
break
;
case
47
:
rs6
=
0.2211
;
s18
=
2.6910
;
rs18
=
6.7278
;
break
;
case
48
:
rs6
=
0.4650
;
s18
=
2.9215
;
rs18
=
6.2003
;
break
;
// special HF - D3 - gCP - SRB / MINIX parametrization;
case
49
:
rs6
=
0.4171
;
s18
=
0.8777
;
rs18
=
2.9149
;
break
;
// special HF - D3 - gCP - SRB2 / ECP - 2G parametrization;
case
50
:
rs6
=
0.3063
;
s18
=
0.5022
;
rs18
=
3.9856
;
break
;
// special PBEh - D3 - gCP / def2 - mSVP parametrization;
case
51
:
rs6
=
0.4860
;
s18
=
0.0000
;
rs18
=
4.5000
;
break
;
case
52
:
rs6
=
0.4860
;
s18
=
0.0000
;
rs18
=
4.5000
;
break
;
case
53
:
rs6
=
0.5660
;
s18
=
0.3908
;
rs18
=
3.1280
;
break
;
default:
error
->
all
(
FLERR
,
"Functional name unknown"
);
break
;
}
}
void
PairD3
::
setfuncpar_zerom
()
{
s6
=
1.0
;
alp
=
14.0
;
std
::
unordered_map
<
std
::
string
,
int
>
commandMap
=
{
{
"b2-plyp"
,
1
},
{
"b3-lyp"
,
2
},
{
"b97-d"
,
3
},
{
"b-lyp"
,
4
},
{
"b-p"
,
5
},
{
"pbe"
,
6
},
{
"pbe0"
,
7
},
{
"lc-wpbe"
,
8
}
};
int
commandCode
=
commandMap
[
functional
];
switch
(
commandCode
)
{
case
1
:
rs6
=
1.313134
;
s18
=
0.717543
;
rs18
=
0.016035
;
s6
=
0.640000
;
break
;
case
2
:
rs6
=
1.338153
;
s18
=
1.532981
;
rs18
=
0.013988
;
break
;
case
3
:
rs6
=
1.151808
;
s18
=
1.020078
;
rs18
=
0.035964
;
break
;
case
4
:
rs6
=
1.279637
;
s18
=
1.841686
;
rs18
=
0.014370
;
break
;
case
5
:
rs6
=
1.233460
;
s18
=
1.945174
;
rs18
=
0.000000
;
break
;
case
6
:
rs6
=
2.340218
;
s18
=
0.000000
;
rs18
=
0.129434
;
break
;
case
7
:
rs6
=
2.077949
;
s18
=
0.000081
;
rs18
=
0.116755
;
break
;
case
8
:
rs6
=
1.366361
;
s18
=
1.280619
;
rs18
=
0.003160
;
break
;
default:
error
->
all
(
FLERR
,
"Functional name unknown"
);
break
;
}
}
void
PairD3
::
setfuncpar_bjm
()
{
s6
=
1.0
;
alp
=
14.0
;
std
::
unordered_map
<
std
::
string
,
int
>
commandMap
=
{
{
"b2-plyp"
,
1
},
{
"b3-lyp"
,
2
},
{
"b97-d"
,
3
},
{
"b-lyp"
,
4
},
{
"b-p"
,
5
},
{
"pbe"
,
6
},
{
"pbe0"
,
7
},
{
"lc-wpbe"
,
8
}
};
int
commandCode
=
commandMap
[
functional
];
switch
(
commandCode
)
{
case
1
:
rs6
=
0.486434
;
s18
=
0.672820
;
rs18
=
3.656466
;
s6
=
0.640000
;
break
;
case
2
:
rs6
=
0.278672
;
s18
=
1.466677
;
rs18
=
4.606311
;
break
;
case
3
:
rs6
=
0.240184
;
s18
=
1.206988
;
rs18
=
3.864426
;
break
;
case
4
:
rs6
=
0.448486
;
s18
=
1.875007
;
rs18
=
3.610679
;
break
;
case
5
:
rs6
=
0.821850
;
s18
=
3.140281
;
rs18
=
2.728151
;
break
;
case
6
:
rs6
=
0.012092
;
s18
=
0.358940
;
rs18
=
5.938951
;
break
;
case
7
:
rs6
=
0.007912
;
s18
=
0.528823
;
rs18
=
6.162326
;
break
;
case
8
:
rs6
=
0.563761
;
s18
=
0.906564
;
rs18
=
3.593680
;
break
;
default:
error
->
all
(
FLERR
,
"Functional name unknown"
);
break
;
}
}
void
PairD3
::
setfuncpar
()
{
void
(
PairD3
::*
setfuncpar_damp
[
4
])()
=
{
&
PairD3
::
setfuncpar_zero
,
&
PairD3
::
setfuncpar_bj
,
&
PairD3
::
setfuncpar_zerom
,
&
PairD3
::
setfuncpar_bjm
};
(
this
->*
setfuncpar_damp
[
damping
])();
rs8
=
rs18
;
alp6
=
alp
;
alp8
=
alp
+
2.0
;
// rs10 = rs18
// alp10 = alp + 4.0;
a1
=
rs6
;
a2
=
rs8
;
s8
=
s18
;
// s6 is already defined
}
/* ----------------------------------------------------------------------
Coeff : read from pair_coeff (Required) -> pair_coeff * * element1 element2 ...
------------------------------------------------------------------------- */
void
PairD3
::
coeff
(
int
*
atomic_numbers
)
{
if
(
!
allocated
)
allocate
();
if
(
atom
->
ntypes
+
1
!=
np1_save
)
{
reallocate_arrays_np1
();
}
int
ntypes
=
atom
->
ntypes
;
/*
scale r4/r2 values of the atoms by sqrt(Z)
sqrt is also globally close to optimum
together with the factor 1/2 this yield reasonable
c8 for he, ne and ar. for larger Z, C8 becomes too large
which effectively mimics higher R^n terms neglected due
to stability reasons
r2r4 =sqrt(0.5*r2r4(i)*dfloat(i)**0.5 ) with i=elementnumber
the large number of digits is just to keep the results consistent
with older versions. They should not imply any higher accuracy than
the old values
*/
double
r2r4_ref
[
94
]
=
{
2.00734898
,
1.56637132
,
5.01986934
,
3.85379032
,
3.64446594
,
3.10492822
,
2.71175247
,
2.59361680
,
2.38825250
,
2.21522516
,
6.58585536
,
5.46295967
,
5.65216669
,
4.88284902
,
4.29727576
,
4.04108902
,
3.72932356
,
3.44677275
,
7.97762753
,
7.07623947
,
6.60844053
,
6.28791364
,
6.07728703
,
5.54643096
,
5.80491167
,
5.58415602
,
5.41374528
,
5.28497229
,
5.22592821
,
5.09817141
,
6.12149689
,
5.54083734
,
5.06696878
,
4.87005108
,
4.59089647
,
4.31176304
,
9.55461698
,
8.67396077
,
7.97210197
,
7.43439917
,
6.58711862
,
6.19536215
,
6.01517290
,
5.81623410
,
5.65710424
,
5.52640661
,
5.44263305
,
5.58285373
,
7.02081898
,
6.46815523
,
5.98089120
,
5.81686657
,
5.53321815
,
5.25477007
,
11.02204549
,
10.15679528
,
9.35167836
,
9.06926079
,
8.97241155
,
8.90092807
,
8.85984840
,
8.81736827
,
8.79317710
,
7.89969626
,
8.80588454
,
8.42439218
,
8.54289262
,
8.47583370
,
8.45090888
,
8.47339339
,
7.83525634
,
8.20702843
,
7.70559063
,
7.32755997
,
7.03887381
,
6.68978720
,
6.05450052
,
5.88752022
,
5.70661499
,
5.78450695
,
7.79780729
,
7.26443867
,
6.78151984
,
6.67883169
,
6.39024318
,
6.09527958
,
11.79156076
,
11.10997644
,
9.51377795
,
8.67197068
,
8.77140725
,
8.65402716
,
8.53923501
,
8.85024712
};
// atomic <r^2>/<r^4> values
/*
covalent radii (taken from Pyykko and Atsumi, Chem. Eur. J. 15, 2009, 188-197)
values for metals decreased by 10 %
! data rcov/
! . 0.32, 0.46, 1.20, 0.94, 0.77, 0.75, 0.71, 0.63, 0.64, 0.67
! ., 1.40, 1.25, 1.13, 1.04, 1.10, 1.02, 0.99, 0.96, 1.76, 1.54
! ., 1.33, 1.22, 1.21, 1.10, 1.07, 1.04, 1.00, 0.99, 1.01, 1.09
! ., 1.12, 1.09, 1.15, 1.10, 1.14, 1.17, 1.89, 1.67, 1.47, 1.39
! ., 1.32, 1.24, 1.15, 1.13, 1.13, 1.08, 1.15, 1.23, 1.28, 1.26
! ., 1.26, 1.23, 1.32, 1.31, 2.09, 1.76, 1.62, 1.47, 1.58, 1.57
! ., 1.56, 1.55, 1.51, 1.52, 1.51, 1.50, 1.49, 1.49, 1.48, 1.53
! ., 1.46, 1.37, 1.31, 1.23, 1.18, 1.16, 1.11, 1.12, 1.13, 1.32
! ., 1.30, 1.30, 1.36, 1.31, 1.38, 1.42, 2.01, 1.81, 1.67, 1.58
! ., 1.52, 1.53, 1.54, 1.55 /
these new data are scaled with k2=4./3. and converted a_0 via
autoang=0.52917726d0
*/
double
rcov_ref
[
94
]
=
{
0.80628308
,
1.15903197
,
3.02356173
,
2.36845659
,
1.94011865
,
1.88972601
,
1.78894056
,
1.58736983
,
1.61256616
,
1.68815527
,
3.52748848
,
3.14954334
,
2.84718717
,
2.62041997
,
2.77159820
,
2.57002732
,
2.49443835
,
2.41884923
,
4.43455700
,
3.88023730
,
3.35111422
,
3.07395437
,
3.04875805
,
2.77159820
,
2.69600923
,
2.62041997
,
2.51963467
,
2.49443835
,
2.54483100
,
2.74640188
,
2.82199085
,
2.74640188
,
2.89757982
,
2.77159820
,
2.87238349
,
2.94797246
,
4.76210950
,
4.20778980
,
3.70386304
,
3.50229216
,
3.32591790
,
3.12434702
,
2.89757982
,
2.84718717
,
2.84718717
,
2.72120556
,
2.89757982
,
3.09915070
,
3.22513231
,
3.17473967
,
3.17473967
,
3.09915070
,
3.32591790
,
3.30072128
,
5.26603625
,
4.43455700
,
4.08180818
,
3.70386304
,
3.98102289
,
3.95582657
,
3.93062995
,
3.90543362
,
3.80464833
,
3.82984466
,
3.80464833
,
3.77945201
,
3.75425569
,
3.75425569
,
3.72905937
,
3.85504098
,
3.67866672
,
3.45189952
,
3.30072128
,
3.09915070
,
2.97316878
,
2.92277614
,
2.79679452
,
2.82199085
,
2.84718717
,
3.32591790
,
3.27552496
,
3.27552496
,
3.42670319
,
3.30072128
,
3.47709584
,
3.57788113
,
5.06446567
,
4.56053862
,
4.20778980
,
3.98102289
,
3.82984466
,
3.85504098
,
3.88023730
,
3.90543362
};
// covalent radii
for
(
int
i
=
0
;
i
<
ntypes
;
i
++
)
{
r2r4
[
i
+
1
]
=
r2r4_ref
[
atomic_numbers
[
i
]
-
1
];
rcov
[
i
+
1
]
=
rcov_ref
[
atomic_numbers
[
i
]
-
1
];
}
// set r0ab
read_r0ab
(
atomic_numbers
,
ntypes
);
// read c6ab
read_c6ab
(
atomic_numbers
,
ntypes
);
}
/* ----------------------------------------------------------------------
Get derivative of C6 w.r.t. CN (used in PairD3::compute)
C6 = C6(CN_A, CN_B) == W(CN_A, CN_B) / Z(CN_A, CN_B)
This gives below from chain rule:
d(C6)/dr = d(C6)/d(CN_A) * d(CN_A)/dr + d(C6)/d(CN_B) * d(CN_B)/dr
So we can pre-calculate the d(C6)/d(CN_A), d(C6)/d(CN_B) part.
d(C6)/d(CN_i) = (dW/d(CN_i) * Z - W * dZ/d(CN_i)) / (W * W)
W : "denominator"
Z : "numerator"
dW/d(CN_i) : "d_denominator_i"
dZ/d(CN_j) : "d_numerator_j"
Z = Sum( L_ij(CN_A, CN_B) * C6_ref(CN_A_i, CN_B_j) ) over i, j
W = Sum( L_ij(CN_A, CN_B) ) over i, j
And the resulting derivative term is saved into
"dc6_iji_tot", "dc6_ijj_tot" array,
where we can find the value of d(C6)/d(CN_i)
by knowing the index of "iat", and "jat". ("idx_linij")
Also, c6 values will also be saved into "c6_ij_tot" array.
Here, as we only interested in *pair* of atoms, assume "iat" >= "jat".
Then "idx_linij" = "jat + (iat + 1) * iat / 2" have the order below.
idx_linij | j = 0 j = 1 j = 2 j = 3 ...
---------------------------------------------
i = 0 | 0
i = 1 | 1 2
i = 2 | 3 4 5
i = 3 | 6 7 8 9
... | ... ... ... ... ...
------------------------------------------------------------------------- */
__global__
void
kernel_get_dC6_dCNij
(
int
maxij
,
float
K3
,
double
*
cn
,
int
*
mxc
,
float
*****
c6ab
,
int
*
type
,
float
*
c6_ij_tot
,
float
*
dc6_iji_tot
,
float
*
dc6_ijj_tot
)
{
int
iter
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
iter
<
maxij
)
{
int
iat
,
jat
;
ij_at_linij
(
iter
,
iat
,
jat
);
const
int
atomtype_i
=
type
[
iat
];
const
int
atomtype_j
=
type
[
jat
];
const
float
cni
=
cn
[
iat
];
const
int
mxci
=
mxc
[
atomtype_i
];
const
float
cnj
=
cn
[
jat
];
const
int
mxcj
=
mxc
[
atomtype_j
];
float
c6mem
=
-
1e99
f
;
float
r_save
=
9999.0
f
;
double
numerator
=
0.0
;
double
denominator
=
0.0
;
double
d_numerator_i
=
0.0
;
double
d_denominator_i
=
0.0
;
double
d_numerator_j
=
0.0
;
double
d_denominator_j
=
0.0
;
for
(
int
a
=
0
;
a
<
mxci
;
a
++
)
{
for
(
int
b
=
0
;
b
<
mxcj
;
b
++
)
{
float
c6ref
=
c6ab
[
atomtype_i
][
atomtype_j
][
a
][
b
][
0
];
if
(
c6ref
>
0.0
f
)
{
float
cn_refi
=
c6ab
[
atomtype_i
][
atomtype_j
][
a
][
b
][
1
];
float
cn_refj
=
c6ab
[
atomtype_i
][
atomtype_j
][
a
][
b
][
2
];
float
r
=
(
cn_refi
-
cni
)
*
(
cn_refi
-
cni
)
+
(
cn_refj
-
cnj
)
*
(
cn_refj
-
cnj
);
if
(
r
<
r_save
)
{
r_save
=
r
;
c6mem
=
c6ref
;
}
double
expterm
=
exp
(
static_cast
<
double
>
(
K3
)
*
static_cast
<
double
>
(
r
));
// must be double
numerator
+=
c6ref
*
expterm
;
denominator
+=
expterm
;
expterm
*=
2.0
f
*
K3
;
double
term
=
expterm
*
(
cni
-
cn_refi
);
d_numerator_i
+=
c6ref
*
term
;
d_denominator_i
+=
term
;
term
=
expterm
*
(
cnj
-
cn_refj
);
d_numerator_j
+=
c6ref
*
term
;
d_denominator_j
+=
term
;
}
}
}
if
(
denominator
>
1e-99
)
{
const
double
denominator_rc
=
1.0
/
denominator
;
// must be double
const
double
unit_frac
=
numerator
*
denominator_rc
;
c6_ij_tot
[
iter
]
=
unit_frac
;
dc6_iji_tot
[
iter
]
=
denominator_rc
*
fma
(
unit_frac
,
-
d_denominator_i
,
d_numerator_i
);
// must be double
dc6_ijj_tot
[
iter
]
=
denominator_rc
*
fma
(
unit_frac
,
-
d_denominator_j
,
d_numerator_j
);
// must be double
//const double denominator_rc = 1.0 / denominator;
//const float unit_frac = numerator * denominator_rc;
//c6_ij_tot[iter] = unit_frac;
//dc6_iji_tot[iter] = \
static_cast
<
float
>
(
d_numerator_i
*
denominator_rc
)
-
static_cast
<
float
>
(
d_denominator_i
*
denominator_rc
)
*
unit_frac
;
//dc6_ijj_tot[iter] = \
static_cast
<
float
>
(
d_numerator_j
*
denominator_rc
)
-
static_cast
<
float
>
(
d_denominator_j
*
denominator_rc
)
*
unit_frac
;
}
else
{
c6_ij_tot
[
iter
]
=
c6mem
;
dc6_iji_tot
[
iter
]
=
0.0
f
;
dc6_ijj_tot
[
iter
]
=
0.0
f
;
}
}
}
void
PairD3
::
get_dC6_dCNij
()
{
int
n
=
atom
->
natoms
;
int
maxij
=
n
*
(
n
+
1
)
/
2
;
//START_CUDA_TIMER();
int
threadsPerBlock
=
128
;
int
blocksPerGrid
=
(
maxij
+
threadsPerBlock
-
1
)
/
threadsPerBlock
;
kernel_get_dC6_dCNij
<<<
blocksPerGrid
,
threadsPerBlock
>>>
(
maxij
,
K3
,
cn
,
mxc
,
c6ab
,
atomtype
,
c6_ij_tot
,
dc6_iji_tot
,
dc6_ijj_tot
);
cudaDeviceSynchronize
();
//STOP_CUDA_TIMER("get_dC6dCNij");
}
/* ----------------------------------------------------------------------
Get lattice vectors (used in PairD3::compute)
1) Save lattice vectors into "lat_v_1", "lat_v_2", "lat_v_3"
2) Calculate repetition criteria for vdw, cn
3) precaluclate tau (xyz shift due to cell repetition)
------------------------------------------------------------------------- */
void
PairD3
::
set_lattice_vectors
()
{
double
boxxlo
=
domain
->
boxlo
[
0
];
double
boxxhi
=
domain
->
boxhi
[
0
];
double
boxylo
=
domain
->
boxlo
[
1
];
double
boxyhi
=
domain
->
boxhi
[
1
];
double
boxzlo
=
domain
->
boxlo
[
2
];
double
boxzhi
=
domain
->
boxhi
[
2
];
double
xy
=
domain
->
xy
;
double
xz
=
domain
->
xz
;
double
yz
=
domain
->
yz
;
lat_v_1
[
0
]
=
(
boxxhi
-
boxxlo
)
/
AU_TO_ANG
;
lat_v_1
[
1
]
=
0.0
;
lat_v_1
[
2
]
=
0.0
;
lat_v_2
[
0
]
=
xy
/
AU_TO_ANG
;
lat_v_2
[
1
]
=
(
boxyhi
-
boxylo
)
/
AU_TO_ANG
;
lat_v_2
[
2
]
=
0.0
;
lat_v_3
[
0
]
=
xz
/
AU_TO_ANG
;
lat_v_3
[
1
]
=
yz
/
AU_TO_ANG
;
lat_v_3
[
2
]
=
(
boxzhi
-
boxzlo
)
/
AU_TO_ANG
;
int
vdwrx_save
=
2
*
rep_vdw
[
0
]
+
1
;
int
vdwry_save
=
2
*
rep_vdw
[
1
]
+
1
;
int
vdwrz_save
=
2
*
rep_vdw
[
2
]
+
1
;
int
cnrx_save
=
2
*
rep_cn
[
0
]
+
1
;
int
cnry_save
=
2
*
rep_cn
[
1
]
+
1
;
int
cnrz_save
=
2
*
rep_cn
[
2
]
+
1
;
set_lattice_repetition_criteria
(
rthr
,
rep_vdw
);
set_lattice_repetition_criteria
(
cnthr
,
rep_cn
);
int
vdw_range_x
=
2
*
rep_vdw
[
0
]
+
1
;
int
vdw_range_y
=
2
*
rep_vdw
[
1
]
+
1
;
int
vdw_range_z
=
2
*
rep_vdw
[
2
]
+
1
;
int
tau_loop_size_vdw
=
vdw_range_x
*
vdw_range_y
*
vdw_range_z
*
3
;
if
(
tau_loop_size_vdw
!=
tau_idx_vdw_total_size
)
{
if
(
tau_idx_vdw
!=
nullptr
)
{
for
(
int
i
=
0
;
i
<
vdwrx_save
;
i
++
)
{
for
(
int
j
=
0
;
j
<
vdwry_save
;
j
++
)
{
for
(
int
k
=
0
;
k
<
vdwrz_save
;
k
++
)
{
cudaFree
(
tau_vdw
[
i
][
j
][
k
]);
}
cudaFree
(
tau_vdw
[
i
][
j
]);
}
cudaFree
(
tau_vdw
[
i
]);
}
cudaFree
(
tau_vdw
);
cudaFree
(
tau_idx_vdw
);
}
tau_idx_vdw_total_size
=
tau_loop_size_vdw
;
cudaMallocManaged
(
&
tau_vdw
,
vdw_range_x
*
sizeof
(
float
***
));
for
(
int
i
=
0
;
i
<
vdw_range_x
;
i
++
)
{
cudaMallocManaged
(
&
tau_vdw
[
i
],
vdw_range_y
*
sizeof
(
float
**
));
for
(
int
j
=
0
;
j
<
vdw_range_y
;
j
++
)
{
cudaMallocManaged
(
&
tau_vdw
[
i
][
j
],
vdw_range_z
*
sizeof
(
float
*
));
for
(
int
k
=
0
;
k
<
vdw_range_z
;
k
++
)
{
cudaMallocManaged
(
&
tau_vdw
[
i
][
j
][
k
],
3
*
sizeof
(
float
));
}
}
}
cudaMallocManaged
(
&
tau_idx_vdw
,
tau_idx_vdw_total_size
*
sizeof
(
int
));
}
int
cn_range_x
=
2
*
rep_cn
[
0
]
+
1
;
int
cn_range_y
=
2
*
rep_cn
[
1
]
+
1
;
int
cn_range_z
=
2
*
rep_cn
[
2
]
+
1
;
int
tau_loop_size_cn
=
cn_range_x
*
cn_range_y
*
cn_range_z
*
3
;
if
(
tau_loop_size_cn
!=
tau_idx_cn_total_size
)
{
if
(
tau_idx_cn
!=
nullptr
)
{
for
(
int
i
=
0
;
i
<
cnrx_save
;
i
++
)
{
for
(
int
j
=
0
;
j
<
cnry_save
;
j
++
)
{
for
(
int
k
=
0
;
k
<
cnrz_save
;
k
++
)
{
cudaFree
(
tau_cn
[
i
][
j
][
k
]);
}
cudaFree
(
tau_cn
[
i
][
j
]);
}
cudaFree
(
tau_cn
[
i
]);
}
cudaFree
(
tau_cn
);
cudaFree
(
tau_idx_cn
);
}
tau_idx_cn_total_size
=
tau_loop_size_cn
;
cudaMallocManaged
(
&
tau_cn
,
cn_range_x
*
sizeof
(
float
***
));
for
(
int
i
=
0
;
i
<
cn_range_x
;
i
++
)
{
cudaMallocManaged
(
&
tau_cn
[
i
],
cn_range_y
*
sizeof
(
float
**
));
for
(
int
j
=
0
;
j
<
cn_range_y
;
j
++
)
{
cudaMallocManaged
(
&
tau_cn
[
i
][
j
],
cn_range_z
*
sizeof
(
float
*
));
for
(
int
k
=
0
;
k
<
cn_range_z
;
k
++
)
{
cudaMallocManaged
(
&
tau_cn
[
i
][
j
][
k
],
3
*
sizeof
(
float
));
}
}
}
cudaMallocManaged
(
&
tau_idx_cn
,
tau_idx_cn_total_size
*
sizeof
(
int
));
}
}
/* ----------------------------------------------------------------------
Set repetition criteria (used in PairD3::compute)
Needed as Periodic Boundary Condition should be considered.
As the cell may *not* be orthorhombic,
the dot product should be used between x/y/z direction and
corresponding cross product vector.
------------------------------------------------------------------------- */
void
PairD3
::
set_lattice_repetition_criteria
(
float
r_threshold
,
int
*
rep_v
)
{
double
r_cutoff
=
sqrt
(
r_threshold
);
double
lat_cp_12
[
3
],
lat_cp_23
[
3
],
lat_cp_31
[
3
];
double
cos_value
;
cross3
(
lat_v_1
,
lat_v_2
,
lat_cp_12
);
cross3
(
lat_v_2
,
lat_v_3
,
lat_cp_23
);
cross3
(
lat_v_3
,
lat_v_1
,
lat_cp_31
);
cos_value
=
dot3
(
lat_cp_23
,
lat_v_1
)
/
len3
(
lat_cp_23
);
rep_v
[
0
]
=
static_cast
<
int
>
(
std
::
abs
(
r_cutoff
/
cos_value
))
+
1
;
cos_value
=
dot3
(
lat_cp_31
,
lat_v_2
)
/
len3
(
lat_cp_31
);
rep_v
[
1
]
=
static_cast
<
int
>
(
std
::
abs
(
r_cutoff
/
cos_value
))
+
1
;
cos_value
=
dot3
(
lat_cp_12
,
lat_v_3
)
/
len3
(
lat_cp_12
);
rep_v
[
2
]
=
static_cast
<
int
>
(
std
::
abs
(
r_cutoff
/
cos_value
))
+
1
;
if
(
domain
->
xperiodic
==
0
)
{
rep_v
[
0
]
=
0
;
}
if
(
domain
->
yperiodic
==
0
)
{
rep_v
[
1
]
=
0
;
}
if
(
domain
->
zperiodic
==
0
)
{
rep_v
[
2
]
=
0
;
}
}
/* ----------------------------------------------------------------------
Calculate Coordination Number (used in PairD3::compute)
------------------------------------------------------------------------- */
__global__
void
kernel_get_coordination_number
(
int
maxij
,
int
maxtau
,
float
cnthr
,
float
K1
,
float
*
rcov
,
int
*
rep_cn
,
float
****
tau_cn
,
int
*
tau_idx_cn
,
int
*
type
,
float
**
x
,
double
*
cn
)
{
int
iter
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
iter
<
maxij
)
{
int
iat
,
jat
;
ij_at_linij
(
iter
,
iat
,
jat
);
float
cn_local
=
0.0
f
;
if
(
iat
==
jat
)
{
const
float
rcov_sum
=
rcov
[
type
[
iat
]]
*
2.0
f
;
for
(
int
k
=
maxtau
-
1
;
k
>=
0
;
k
-=
3
)
{
const
int
idx1
=
tau_idx_cn
[
k
-
2
];
const
int
idx2
=
tau_idx_cn
[
k
-
1
];
const
int
idx3
=
tau_idx_cn
[
k
];
if
(
idx1
==
rep_cn
[
0
]
&&
idx2
==
rep_cn
[
1
]
&&
idx3
==
rep_cn
[
2
])
{
continue
;
}
const
float
rx
=
tau_cn
[
idx1
][
idx2
][
idx3
][
0
];
const
float
ry
=
tau_cn
[
idx1
][
idx2
][
idx3
][
1
];
const
float
rz
=
tau_cn
[
idx1
][
idx2
][
idx3
][
2
];
const
float
r2
=
rx
*
rx
+
ry
*
ry
+
rz
*
rz
;
if
(
r2
<=
cnthr
)
{
const
float
r_rc
=
rsqrtf
(
r2
);
const
float
damp
=
1.0
f
/
(
1.0
f
+
expf
(
-
K1
*
((
rcov_sum
*
r_rc
)
-
1.0
f
)));
cn_local
+=
damp
;
}
}
atomicAdd
(
&
cn
[
iat
],
cn_local
);
}
else
{
const
float
rcov_sum
=
rcov
[
type
[
iat
]]
+
rcov
[
type
[
jat
]];
for
(
int
k
=
maxtau
-
1
;
k
>=
0
;
k
-=
3
)
{
const
int
idx1
=
tau_idx_cn
[
k
-
2
];
const
int
idx2
=
tau_idx_cn
[
k
-
1
];
const
int
idx3
=
tau_idx_cn
[
k
];
const
float
rx
=
x
[
jat
][
0
]
-
x
[
iat
][
0
]
+
tau_cn
[
idx1
][
idx2
][
idx3
][
0
];
const
float
ry
=
x
[
jat
][
1
]
-
x
[
iat
][
1
]
+
tau_cn
[
idx1
][
idx2
][
idx3
][
1
];
const
float
rz
=
x
[
jat
][
2
]
-
x
[
iat
][
2
]
+
tau_cn
[
idx1
][
idx2
][
idx3
][
2
];
const
float
r2
=
rx
*
rx
+
ry
*
ry
+
rz
*
rz
;
if
(
r2
<=
cnthr
)
{
const
float
r_rc
=
rsqrtf
(
r2
);
const
float
damp
=
1.0
f
/
(
1.0
f
+
expf
(
-
K1
*
((
rcov_sum
*
r_rc
)
-
1.0
f
)));
cn_local
+=
damp
;
}
}
atomicAdd
(
&
cn
[
iat
],
cn_local
);
atomicAdd
(
&
cn
[
jat
],
cn_local
);
}
}
}
void
PairD3
::
get_coordination_number
()
{
int
n
=
atom
->
natoms
;
int
maxij
=
n
*
(
n
+
1
)
/
2
;
int
maxtau
=
tau_idx_cn_total_size
;
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
cn
[
i
]
=
0.0
;
}
//START_CUDA_TIMER();
int
threadsPerBlock
=
128
;
int
blocksPerGrid
=
(
maxij
+
threadsPerBlock
-
1
)
/
threadsPerBlock
;
kernel_get_coordination_number
<<<
blocksPerGrid
,
threadsPerBlock
>>>
(
maxij
,
maxtau
,
cnthr
,
K1
,
rcov
,
rep_cn
,
tau_cn
,
tau_idx_cn
,
atomtype
,
x
,
cn
);
cudaDeviceSynchronize
();
//STOP_CUDA_TIMER("get_coord");
}
/* ----------------------------------------------------------------------
reallcate memory if the number of atoms has changed (used in PairD3::compute)
------------------------------------------------------------------------- */
void
PairD3
::
reallocate_arrays
()
{
/* -------------- Destroy previous arrays -------------- */
cudaFree
(
cn
);
for
(
int
i
=
0
;
i
<
n_save
;
i
++
)
{
cudaFree
(
x
[
i
]);
};
cudaFree
(
x
);
cudaFree
(
dc6i
);
for
(
int
i
=
0
;
i
<
n_save
;
i
++
)
{
cudaFree
(
f
[
i
]);
};
cudaFree
(
f
);
cudaFree
(
dc6_iji_tot
);
cudaFree
(
dc6_ijj_tot
);
cudaFree
(
c6_ij_tot
);
cudaFree
(
atomtype
);
/* -------------- Destroy previous arrays -------------- */
/* -------------- Create new arrays -------------- */
int
n
=
atom
->
natoms
;
n_save
=
n
;
cudaMallocManaged
(
&
cn
,
n
*
sizeof
(
double
));
cudaMallocManaged
(
&
x
,
n
*
sizeof
(
float
*
));
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
cudaMallocManaged
(
&
x
[
i
],
3
*
sizeof
(
float
));
}
cudaMallocManaged
(
&
dc6i
,
n
*
sizeof
(
double
));
cudaMallocManaged
(
&
f
,
n
*
sizeof
(
double
*
));
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
cudaMallocManaged
(
&
f
[
i
],
3
*
sizeof
(
double
));
}
int
n_ij_combination
=
n
*
(
n
+
1
)
/
2
;
cudaMallocManaged
(
&
dc6_iji_tot
,
n_ij_combination
*
sizeof
(
float
));
cudaMallocManaged
(
&
dc6_ijj_tot
,
n_ij_combination
*
sizeof
(
float
));
cudaMallocManaged
(
&
c6_ij_tot
,
n_ij_combination
*
sizeof
(
float
));
cudaMallocManaged
(
&
atomtype
,
n
*
sizeof
(
int
));
/* -------------- Create new arrays -------------- */
}
void
PairD3
::
reallocate_arrays_np1
()
{
/* -------------- Destroy previous arrays -------------- */
cudaFree
(
r2r4
);
cudaFree
(
rcov
);
cudaFree
(
mxc
);
for
(
int
i
=
0
;
i
<
np1_save
;
i
++
)
{
cudaFree
(
r0ab
[
i
]);
};
cudaFree
(
r0ab
);
for
(
int
i
=
0
;
i
<
np1_save
;
i
++
)
{
for
(
int
j
=
0
;
j
<
np1_save
;
j
++
)
{
for
(
int
k
=
0
;
k
<
MAXC
;
k
++
)
{
for
(
int
l
=
0
;
l
<
MAXC
;
l
++
)
{
cudaFree
(
c6ab
[
i
][
j
][
k
][
l
]);
}
cudaFree
(
c6ab
[
i
][
j
][
k
]);
}
cudaFree
(
c6ab
[
i
][
j
]);
}
cudaFree
(
c6ab
[
i
]);
}
cudaFree
(
c6ab
);
/* -------------- Destroy previous arrays -------------- */
/* -------------- Create new arrays -------------- */
int
np1
=
atom
->
ntypes
+
1
;
np1_save
=
np1
;
cudaMallocManaged
(
&
r2r4
,
np1
*
sizeof
(
float
));
cudaMallocManaged
(
&
rcov
,
np1
*
sizeof
(
float
));
cudaMallocManaged
(
&
mxc
,
np1
*
sizeof
(
int
));
cudaMallocManaged
(
&
r0ab
,
np1
*
sizeof
(
float
*
));
for
(
int
i
=
0
;
i
<
np1
;
i
++
)
{
cudaMallocManaged
(
&
r0ab
[
i
],
np1
*
sizeof
(
float
));
}
cudaMallocManaged
(
&
c6ab
,
np1
*
sizeof
(
float
****
));
for
(
int
i
=
0
;
i
<
np1
;
i
++
)
{
cudaMallocManaged
(
&
c6ab
[
i
],
np1
*
sizeof
(
float
***
));
for
(
int
j
=
0
;
j
<
np1
;
j
++
)
{
cudaMallocManaged
(
&
c6ab
[
i
][
j
],
MAXC
*
sizeof
(
float
**
));
for
(
int
k
=
0
;
k
<
MAXC
;
k
++
)
{
cudaMallocManaged
(
&
c6ab
[
i
][
j
][
k
],
MAXC
*
sizeof
(
float
*
));
for
(
int
l
=
0
;
l
<
MAXC
;
l
++
)
{
cudaMallocManaged
(
&
c6ab
[
i
][
j
][
k
][
l
],
3
*
sizeof
(
float
));
}
}
}
}
/* -------------- Create new arrays -------------- */
}
/* ----------------------------------------------------------------------
Initialize atomic positions & types (used in PairD3::compute)
As the default xyz from lammps does not assure that atoms are within unit cell,
this function shifts atoms into the unit cell.
------------------------------------------------------------------------- */
void
PairD3
::
load_atom_info
()
{
double
lat
[
3
][
3
];
lat
[
0
][
0
]
=
lat_v_1
[
0
];
lat
[
0
][
1
]
=
lat_v_2
[
0
];
lat
[
0
][
2
]
=
lat_v_3
[
0
];
lat
[
1
][
0
]
=
lat_v_1
[
1
];
lat
[
1
][
1
]
=
lat_v_2
[
1
];
lat
[
1
][
2
]
=
lat_v_3
[
1
];
lat
[
2
][
0
]
=
lat_v_1
[
2
];
lat
[
2
][
1
]
=
lat_v_2
[
2
];
lat
[
2
][
2
]
=
lat_v_3
[
2
];
double
det
=
lat
[
0
][
0
]
*
lat
[
1
][
1
]
*
lat
[
2
][
2
]
+
lat
[
0
][
1
]
*
lat
[
1
][
2
]
*
lat
[
2
][
0
]
+
lat
[
0
][
2
]
*
lat
[
1
][
0
]
*
lat
[
2
][
1
]
-
lat
[
0
][
2
]
*
lat
[
1
][
1
]
*
lat
[
2
][
0
]
-
lat
[
0
][
1
]
*
lat
[
1
][
0
]
*
lat
[
2
][
2
]
-
lat
[
0
][
0
]
*
lat
[
1
][
2
]
*
lat
[
2
][
1
];
double
lat_inv
[
3
][
3
];
lat_inv
[
0
][
0
]
=
(
lat
[
1
][
1
]
*
lat
[
2
][
2
]
-
lat
[
1
][
2
]
*
lat
[
2
][
1
])
/
det
;
lat_inv
[
1
][
0
]
=
(
lat
[
1
][
2
]
*
lat
[
2
][
0
]
-
lat
[
1
][
0
]
*
lat
[
2
][
2
])
/
det
;
lat_inv
[
2
][
0
]
=
(
lat
[
1
][
0
]
*
lat
[
2
][
1
]
-
lat
[
1
][
1
]
*
lat
[
2
][
0
])
/
det
;
lat_inv
[
0
][
1
]
=
(
lat
[
0
][
2
]
*
lat
[
2
][
1
]
-
lat
[
0
][
1
]
*
lat
[
2
][
2
])
/
det
;
lat_inv
[
1
][
1
]
=
(
lat
[
0
][
0
]
*
lat
[
2
][
2
]
-
lat
[
0
][
2
]
*
lat
[
2
][
0
])
/
det
;
lat_inv
[
2
][
1
]
=
(
lat
[
0
][
1
]
*
lat
[
2
][
0
]
-
lat
[
0
][
0
]
*
lat
[
2
][
1
])
/
det
;
lat_inv
[
0
][
2
]
=
(
lat
[
0
][
1
]
*
lat
[
1
][
2
]
-
lat
[
0
][
2
]
*
lat
[
1
][
1
])
/
det
;
lat_inv
[
1
][
2
]
=
(
lat
[
0
][
2
]
*
lat
[
1
][
0
]
-
lat
[
0
][
0
]
*
lat
[
1
][
2
])
/
det
;
lat_inv
[
2
][
2
]
=
(
lat
[
0
][
0
]
*
lat
[
1
][
1
]
-
lat
[
0
][
1
]
*
lat
[
1
][
0
])
/
det
;
double
a
[
3
]
=
{
0.0
};
for
(
int
iat
=
0
;
iat
<
atom
->
natoms
;
iat
++
)
{
for
(
int
i
=
0
;
i
<
3
;
i
++
)
{
a
[
i
]
=
lat_inv
[
i
][
0
]
*
(
atom
->
x
)[
iat
][
0
]
/
AU_TO_ANG
+
lat_inv
[
i
][
1
]
*
(
atom
->
x
)[
iat
][
1
]
/
AU_TO_ANG
+
lat_inv
[
i
][
2
]
*
(
atom
->
x
)[
iat
][
2
]
/
AU_TO_ANG
;
a
[
i
]
-=
floor
(
a
[
i
]);
// replaces the code below
//if (a[i] > 1) { while (a[i] > 1) { a[i]--; } }
//else if (a[i] < 0) { while (a[i] < 0) { a[i]++; } }
}
for
(
int
i
=
0
;
i
<
3
;
i
++
)
{
x
[
iat
][
i
]
=
(
lat
[
i
][
0
]
*
a
[
0
]
+
lat
[
i
][
1
]
*
a
[
1
]
+
lat
[
i
][
2
]
*
a
[
2
]);
}
}
}
/* ----------------------------------------------------------------------
Precalculate tau array
------------------------------------------------------------------------- */
void
PairD3
::
precalculate_tau_array
()
{
int
xlim
=
rep_vdw
[
0
];
int
ylim
=
rep_vdw
[
1
];
int
zlim
=
rep_vdw
[
2
];
int
index
=
0
;
for
(
int
taux
=
-
xlim
;
taux
<=
xlim
;
taux
++
)
{
for
(
int
tauy
=
-
ylim
;
tauy
<=
ylim
;
tauy
++
)
{
for
(
int
tauz
=
-
zlim
;
tauz
<=
zlim
;
tauz
++
)
{
tau_vdw
[
taux
+
xlim
][
tauy
+
ylim
][
tauz
+
zlim
][
0
]
=
lat_v_1
[
0
]
*
taux
+
lat_v_2
[
0
]
*
tauy
+
lat_v_3
[
0
]
*
tauz
;
tau_vdw
[
taux
+
xlim
][
tauy
+
ylim
][
tauz
+
zlim
][
1
]
=
lat_v_1
[
1
]
*
taux
+
lat_v_2
[
1
]
*
tauy
+
lat_v_3
[
1
]
*
tauz
;
tau_vdw
[
taux
+
xlim
][
tauy
+
ylim
][
tauz
+
zlim
][
2
]
=
lat_v_1
[
2
]
*
taux
+
lat_v_2
[
2
]
*
tauy
+
lat_v_3
[
2
]
*
tauz
;
tau_idx_vdw
[
index
++
]
=
taux
+
xlim
;
tau_idx_vdw
[
index
++
]
=
tauy
+
ylim
;
tau_idx_vdw
[
index
++
]
=
tauz
+
zlim
;
}
}
}
xlim
=
rep_cn
[
0
];
ylim
=
rep_cn
[
1
];
zlim
=
rep_cn
[
2
];
index
=
0
;
for
(
int
taux
=
-
xlim
;
taux
<=
xlim
;
taux
++
)
{
for
(
int
tauy
=
-
ylim
;
tauy
<=
ylim
;
tauy
++
)
{
for
(
int
tauz
=
-
zlim
;
tauz
<=
zlim
;
tauz
++
)
{
tau_cn
[
taux
+
xlim
][
tauy
+
ylim
][
tauz
+
zlim
][
0
]
=
lat_v_1
[
0
]
*
taux
+
lat_v_2
[
0
]
*
tauy
+
lat_v_3
[
0
]
*
tauz
;
tau_cn
[
taux
+
xlim
][
tauy
+
ylim
][
tauz
+
zlim
][
1
]
=
lat_v_1
[
1
]
*
taux
+
lat_v_2
[
1
]
*
tauy
+
lat_v_3
[
1
]
*
tauz
;
tau_cn
[
taux
+
xlim
][
tauy
+
ylim
][
tauz
+
zlim
][
2
]
=
lat_v_1
[
2
]
*
taux
+
lat_v_2
[
2
]
*
tauy
+
lat_v_3
[
2
]
*
tauz
;
tau_idx_cn
[
index
++
]
=
taux
+
xlim
;
tau_idx_cn
[
index
++
]
=
tauy
+
ylim
;
tau_idx_cn
[
index
++
]
=
tauz
+
zlim
;
}
}
}
}
/* ----------------------------------------------------------------------
Get forces (Zero damping)
------------------------------------------------------------------------- */
__global__
void
kernel_get_forces_without_dC6_zero
(
int
maxij
,
int
maxtau
,
float
rthr
,
float
s6
,
float
s8
,
float
a1
,
float
a2
,
float
alp6
,
float
alp8
,
float
*
r2r4
,
float
**
r0ab
,
int
*
rep_vdw
,
float
****
tau_vdw
,
int
*
tau_idx_vdw
,
int
*
type
,
float
**
x
,
float
*
c6_ij_tot
,
float
*
dc6_iji_tot
,
float
*
dc6_ijj_tot
,
double
*
dc6i
,
double
*
disp
,
double
**
f
,
double
**
sigma
)
{
int
iter
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
__shared__
float
sigma_00
[
128
];
__shared__
float
sigma_01
[
128
];
__shared__
float
sigma_02
[
128
];
__shared__
float
sigma_10
[
128
];
__shared__
float
sigma_11
[
128
];
__shared__
float
sigma_12
[
128
];
__shared__
float
sigma_20
[
128
];
__shared__
float
sigma_21
[
128
];
__shared__
float
sigma_22
[
128
];
__shared__
float
disp_shared
[
128
];
float
sigma_local_00
=
0.0
f
;
float
sigma_local_01
=
0.0
f
;
float
sigma_local_02
=
0.0
f
;
float
sigma_local_10
=
0.0
f
;
float
sigma_local_11
=
0.0
f
;
float
sigma_local_12
=
0.0
f
;
float
sigma_local_20
=
0.0
f
;
float
sigma_local_21
=
0.0
f
;
float
sigma_local_22
=
0.0
f
;
float
disp_local
=
0.0
f
;
if
(
iter
<
maxij
)
{
int
iat
,
jat
;
ij_at_linij
(
iter
,
iat
,
jat
);
float
f_local
[
3
]
=
{
0.0
f
};
float
dc6i_local_i
=
0.0
f
;
float
dc6i_local_j
=
0.0
f
;
const
float
c6
=
c6_ij_tot
[
iter
];
const
float
dc6iji
=
dc6_iji_tot
[
iter
];
const
float
dc6ijj
=
dc6_ijj_tot
[
iter
];
if
(
iat
==
jat
)
{
const
int
atomtype_i
=
type
[
iat
];
const
float
r0
=
r0ab
[
atomtype_i
][
atomtype_i
];
const
float
unit_r2r4
=
r2r4
[
atomtype_i
];
const
float
r42
=
unit_r2r4
*
unit_r2r4
;
const
float
unit_a1
=
(
a1
*
r0
);
const
float
unit_a2
=
(
a2
*
r0
);
const
float
s8r42
=
s8
*
r42
;
for
(
int
k
=
maxtau
-
1
;
k
>=
0
;
k
-=
3
)
{
const
int
idx1
=
tau_idx_vdw
[
k
-
2
];
const
int
idx2
=
tau_idx_vdw
[
k
-
1
];
const
int
idx3
=
tau_idx_vdw
[
k
];
if
(
idx1
==
rep_vdw
[
0
]
&&
idx2
==
rep_vdw
[
1
]
&&
idx3
==
rep_vdw
[
2
])
{
continue
;
}
const
float
rij
[
3
]
=
{
tau_vdw
[
idx1
][
idx2
][
idx3
][
0
],
tau_vdw
[
idx1
][
idx2
][
idx3
][
1
],
tau_vdw
[
idx1
][
idx2
][
idx3
][
2
]
};
const
float
r2
=
lensq3
(
rij
);
if
(
r2
>
rthr
)
{
continue
;
}
const
float
r_rc
=
rsqrtf
(
r2
);
float
unit_rc_a1
=
unit_a1
*
r_rc
;
float
t6
=
unit_rc_a1
*
unit_rc_a1
;
// ^2
t6
*=
unit_rc_a1
;
// ^3
t6
*=
t6
;
// ^6
t6
*=
unit_rc_a1
;
// ^7
t6
*=
t6
;
// ^14
const
float
damp6
=
1.0
f
/
fmaf
(
t6
,
6.0
f
,
1.0
f
);
float
unit_rc_a2
=
unit_a2
*
r_rc
;
float
t8
=
unit_rc_a2
*
unit_rc_a2
;
// ^2
t8
*=
t8
;
// ^4
t8
*=
t8
;
// ^8
t8
*=
t8
;
// ^16
const
float
damp8
=
1.0
f
/
fmaf
(
t8
,
6.0
f
,
1.0
f
);
const
float
r2_rc
=
r_rc
*
r_rc
;
// 1.0 / r2
const
float
r6_rc
=
r2_rc
*
r2_rc
*
r2_rc
;
const
float
r8_rc
=
r6_rc
*
r2_rc
;
const
float
x1
=
3.0
f
*
c6
*
r8_rc
*
fmaf
(
r2_rc
,
s8r42
*
damp8
*
fmaf
(
3.0
f
*
alp8
*
t8
,
damp8
,
-
4.0
f
),
s6
*
damp6
*
fmaf
(
alp6
*
t6
,
damp6
,
-
1.0
f
));
//const float x1 = 0.5 * 6.0 * c6 * r8_rc * (s6 * damp6 * (14.0 * t6 * damp6 - 1.0) + s8r42 * r2_rc * damp8 * (48.0 * t8 * damp8 - 4.0));
//3.0 * alp6 = 48.0
const
float
vec
[
3
]
=
{
x1
*
rij
[
0
],
x1
*
rij
[
1
],
x1
*
rij
[
2
]
};
sigma_local_00
+=
vec
[
0
]
*
rij
[
0
];
sigma_local_01
+=
vec
[
0
]
*
rij
[
1
];
sigma_local_02
+=
vec
[
0
]
*
rij
[
2
];
sigma_local_10
+=
vec
[
1
]
*
rij
[
0
];
sigma_local_11
+=
vec
[
1
]
*
rij
[
1
];
sigma_local_12
+=
vec
[
1
]
*
rij
[
2
];
sigma_local_20
+=
vec
[
2
]
*
rij
[
0
];
sigma_local_21
+=
vec
[
2
]
*
rij
[
1
];
sigma_local_22
+=
vec
[
2
]
*
rij
[
2
];
const
float
dc6_rest
=
0.5
f
*
r6_rc
*
fmaf
(
3.0
f
*
r2_rc
,
s8r42
*
damp8
,
s6
*
damp6
);
//const float dc6_rest = 0.5 * r6_rc * (s6 * damp6 + 3.0 * s8r42 * damp8 * r2_rc);
disp_local
-=
dc6_rest
*
c6
;
dc6i_local_i
+=
dc6_rest
*
dc6iji
;
dc6i_local_j
+=
dc6_rest
*
dc6ijj
;
}
atomicAdd
(
&
dc6i
[
iat
],
dc6i_local_i
);
atomicAdd
(
&
dc6i
[
jat
],
dc6i_local_j
);
}
else
{
const
int
atomtype_i
=
type
[
iat
];
const
int
atomtype_j
=
type
[
jat
];
const
float
r0
=
r0ab
[
atomtype_i
][
atomtype_j
];
const
float
r42
=
r2r4
[
atomtype_i
]
*
r2r4
[
atomtype_j
];
const
float
unit_a1
=
(
a1
*
r0
);
const
float
unit_a2
=
(
a2
*
r0
);
const
float
s8r42
=
s8
*
r42
;
for
(
int
k
=
maxtau
-
1
;
k
>=
0
;
k
-=
3
)
{
const
int
idx1
=
tau_idx_vdw
[
k
-
2
];
const
int
idx2
=
tau_idx_vdw
[
k
-
1
];
const
int
idx3
=
tau_idx_vdw
[
k
];
const
float
rij
[
3
]
=
{
x
[
jat
][
0
]
-
x
[
iat
][
0
]
+
tau_vdw
[
idx1
][
idx2
][
idx3
][
0
],
x
[
jat
][
1
]
-
x
[
iat
][
1
]
+
tau_vdw
[
idx1
][
idx2
][
idx3
][
1
],
x
[
jat
][
2
]
-
x
[
iat
][
2
]
+
tau_vdw
[
idx1
][
idx2
][
idx3
][
2
]
};
const
float
r2
=
lensq3
(
rij
);
if
(
r2
>
rthr
)
{
continue
;
}
const
float
r_rc
=
rsqrtf
(
r2
);
float
unit_rc_a1
=
unit_a1
*
r_rc
;
float
t6
=
unit_rc_a1
*
unit_rc_a1
;
// ^2
t6
*=
unit_rc_a1
;
// ^3
t6
*=
t6
;
// ^6
t6
*=
unit_rc_a1
;
// ^7
t6
*=
t6
;
// ^14
const
float
damp6
=
1.0
f
/
fmaf
(
t6
,
6.0
f
,
1.0
f
);
float
unit_rc_a2
=
unit_a2
*
r_rc
;
float
t8
=
unit_rc_a2
*
unit_rc_a2
;
// ^2
t8
*=
t8
;
// ^4
t8
*=
t8
;
// ^8
t8
*=
t8
;
// ^16
const
float
damp8
=
1.0
f
/
fmaf
(
t8
,
6.0
f
,
1.0
f
);
const
float
r2_rc
=
r_rc
*
r_rc
;
// 1.0 / r2
const
float
r6_rc
=
r2_rc
*
r2_rc
*
r2_rc
;
const
float
r8_rc
=
r6_rc
*
r2_rc
;
const
float
x1
=
6.0
f
*
c6
*
r8_rc
*
fmaf
(
r2_rc
,
s8r42
*
damp8
*
fmaf
(
3.0
f
*
alp8
*
t8
,
damp8
,
-
4.0
f
),
s6
*
damp6
*
fmaf
(
alp6
*
t6
,
damp6
,
-
1.0
f
));
//const float x1 = 6.0 * c6 * r8_rc * (s6 * damp6 * (14.0 * t6 * damp6 - 1.0) + s8r42 * r2_rc * damp8 * (48.0 * t8 * damp8 - 4.0));
//3.0 * alp6 = 48.0
const
float
vec
[
3
]
=
{
x1
*
rij
[
0
],
x1
*
rij
[
1
],
x1
*
rij
[
2
]
};
f_local
[
0
]
-=
vec
[
0
];
f_local
[
1
]
-=
vec
[
1
];
f_local
[
2
]
-=
vec
[
2
];
sigma_local_00
+=
vec
[
0
]
*
rij
[
0
];
sigma_local_01
+=
vec
[
0
]
*
rij
[
1
];
sigma_local_02
+=
vec
[
0
]
*
rij
[
2
];
sigma_local_10
+=
vec
[
1
]
*
rij
[
0
];
sigma_local_11
+=
vec
[
1
]
*
rij
[
1
];
sigma_local_12
+=
vec
[
1
]
*
rij
[
2
];
sigma_local_20
+=
vec
[
2
]
*
rij
[
0
];
sigma_local_21
+=
vec
[
2
]
*
rij
[
1
];
sigma_local_22
+=
vec
[
2
]
*
rij
[
2
];
const
float
dc6_rest
=
r6_rc
*
fmaf
(
3.0
f
*
r2_rc
,
s8r42
*
damp8
,
s6
*
damp6
);
//const float dc6_rest = r6_rc * (s6 * damp6 + 3.0 * s8r42 * damp8 * r2_rc);
disp_local
-=
dc6_rest
*
c6
;
dc6i_local_i
+=
dc6_rest
*
dc6iji
;
dc6i_local_j
+=
dc6_rest
*
dc6ijj
;
}
atomicAdd
(
&
dc6i
[
iat
],
dc6i_local_i
);
atomicAdd
(
&
dc6i
[
jat
],
dc6i_local_j
);
atomicAdd
(
&
f
[
iat
][
0
],
f_local
[
0
]);
atomicAdd
(
&
f
[
iat
][
1
],
f_local
[
1
]);
atomicAdd
(
&
f
[
iat
][
2
],
f_local
[
2
]);
atomicAdd
(
&
f
[
jat
][
0
],
-
f_local
[
0
]);
atomicAdd
(
&
f
[
jat
][
1
],
-
f_local
[
1
]);
atomicAdd
(
&
f
[
jat
][
2
],
-
f_local
[
2
]);
}
}
sigma_00
[
threadIdx
.
x
]
=
sigma_local_00
;
sigma_01
[
threadIdx
.
x
]
=
sigma_local_01
;
sigma_02
[
threadIdx
.
x
]
=
sigma_local_02
;
sigma_10
[
threadIdx
.
x
]
=
sigma_local_10
;
sigma_11
[
threadIdx
.
x
]
=
sigma_local_11
;
sigma_12
[
threadIdx
.
x
]
=
sigma_local_12
;
sigma_20
[
threadIdx
.
x
]
=
sigma_local_20
;
sigma_21
[
threadIdx
.
x
]
=
sigma_local_21
;
sigma_22
[
threadIdx
.
x
]
=
sigma_local_22
;
disp_shared
[
threadIdx
.
x
]
=
disp_local
;
__syncthreads
();
for
(
int
s
=
blockDim
.
x
/
2
;
s
>
0
;
s
>>=
1
)
{
if
(
threadIdx
.
x
<
s
)
{
sigma_00
[
threadIdx
.
x
]
+=
sigma_00
[
threadIdx
.
x
+
s
];
sigma_01
[
threadIdx
.
x
]
+=
sigma_01
[
threadIdx
.
x
+
s
];
sigma_02
[
threadIdx
.
x
]
+=
sigma_02
[
threadIdx
.
x
+
s
];
sigma_10
[
threadIdx
.
x
]
+=
sigma_10
[
threadIdx
.
x
+
s
];
sigma_11
[
threadIdx
.
x
]
+=
sigma_11
[
threadIdx
.
x
+
s
];
sigma_12
[
threadIdx
.
x
]
+=
sigma_12
[
threadIdx
.
x
+
s
];
sigma_20
[
threadIdx
.
x
]
+=
sigma_20
[
threadIdx
.
x
+
s
];
sigma_21
[
threadIdx
.
x
]
+=
sigma_21
[
threadIdx
.
x
+
s
];
sigma_22
[
threadIdx
.
x
]
+=
sigma_22
[
threadIdx
.
x
+
s
];
disp_shared
[
threadIdx
.
x
]
+=
disp_shared
[
threadIdx
.
x
+
s
];
}
__syncthreads
();
}
if
(
threadIdx
.
x
==
0
)
{
atomicAdd
(
&
sigma
[
0
][
0
],
sigma_00
[
0
]);
atomicAdd
(
&
sigma
[
0
][
1
],
sigma_01
[
0
]);
atomicAdd
(
&
sigma
[
0
][
2
],
sigma_02
[
0
]);
atomicAdd
(
&
sigma
[
1
][
0
],
sigma_10
[
0
]);
atomicAdd
(
&
sigma
[
1
][
1
],
sigma_11
[
0
]);
atomicAdd
(
&
sigma
[
1
][
2
],
sigma_12
[
0
]);
atomicAdd
(
&
sigma
[
2
][
0
],
sigma_20
[
0
]);
atomicAdd
(
&
sigma
[
2
][
1
],
sigma_21
[
0
]);
atomicAdd
(
&
sigma
[
2
][
2
],
sigma_22
[
0
]);
atomicAdd
(
disp
,
disp_shared
[
0
]);
}
}
void
PairD3
::
get_forces_without_dC6_zero
()
{
int
n
=
atom
->
natoms
;
int
maxij
=
n
*
(
n
+
1
)
/
2
;
int
maxtau
=
tau_idx_vdw_total_size
;
*
disp
=
0.0
;
for
(
int
dim
=
0
;
dim
<
n
;
dim
++
)
{
dc6i
[
dim
]
=
0.0
;
}
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
for
(
int
j
=
0
;
j
<
3
;
j
++
)
{
f
[
i
][
j
]
=
0.0
;
}
}
for
(
int
ii
=
0
;
ii
<
3
;
ii
++
)
{
for
(
int
jj
=
0
;
jj
<
3
;
jj
++
)
{
sigma
[
ii
][
jj
]
=
0.0
;
}
}
//START_CUDA_TIMER();
int
threadsPerBlock
=
128
;
int
blocksPerGrid
=
(
maxij
+
threadsPerBlock
-
1
)
/
threadsPerBlock
;
kernel_get_forces_without_dC6_zero
<<<
blocksPerGrid
,
threadsPerBlock
>>>
(
maxij
,
maxtau
,
rthr
,
s6
,
s8
,
a1
,
a2
,
alp6
,
alp8
,
r2r4
,
r0ab
,
rep_vdw
,
tau_vdw
,
tau_idx_vdw
,
atomtype
,
x
,
c6_ij_tot
,
dc6_iji_tot
,
dc6_ijj_tot
,
dc6i
,
disp
,
f
,
sigma
);
cudaDeviceSynchronize
();
disp_total
=
*
disp
;
//STOP_CUDA_TIMER("get_forces_without");
}
__global__
void
kernel_get_forces_without_dC6_bj
(
int
maxij
,
int
maxtau
,
float
rthr
,
float
s6
,
float
s8
,
float
a1
,
float
a2
,
float
*
r2r4
,
int
*
rep_vdw
,
float
****
tau_vdw
,
int
*
tau_idx_vdw
,
int
*
type
,
float
**
x
,
float
*
c6_ij_tot
,
float
*
dc6_iji_tot
,
float
*
dc6_ijj_tot
,
double
*
dc6i
,
double
*
disp
,
double
**
f
,
double
**
sigma
)
{
int
iter
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
__shared__
float
sigma_00
[
128
];
__shared__
float
sigma_01
[
128
];
__shared__
float
sigma_02
[
128
];
__shared__
float
sigma_10
[
128
];
__shared__
float
sigma_11
[
128
];
__shared__
float
sigma_12
[
128
];
__shared__
float
sigma_20
[
128
];
__shared__
float
sigma_21
[
128
];
__shared__
float
sigma_22
[
128
];
__shared__
float
disp_shared
[
128
];
float
sigma_local_00
=
0.0
f
;
float
sigma_local_01
=
0.0
f
;
float
sigma_local_02
=
0.0
f
;
float
sigma_local_10
=
0.0
f
;
float
sigma_local_11
=
0.0
f
;
float
sigma_local_12
=
0.0
f
;
float
sigma_local_20
=
0.0
f
;
float
sigma_local_21
=
0.0
f
;
float
sigma_local_22
=
0.0
f
;
float
disp_local
=
0.0
f
;
if
(
iter
<
maxij
)
{
int
iat
,
jat
;
ij_at_linij
(
iter
,
iat
,
jat
);
float
f_local
[
3
]
=
{
0.0
f
};
float
dc6i_local_i
=
0.0
f
;
float
dc6i_local_j
=
0.0
f
;
const
float
c6
=
c6_ij_tot
[
iter
];
const
float
dc6iji
=
dc6_iji_tot
[
iter
];
const
float
dc6ijj
=
dc6_ijj_tot
[
iter
];
if
(
iat
==
jat
)
{
const
float
unit_r2r4
=
r2r4
[
type
[
iat
]];
const
float
r42x3
=
unit_r2r4
*
unit_r2r4
*
3.0
f
;
const
float
R0
=
fmaf
(
a1
,
sqrtf
(
r42x3
),
a2
);
const
float
R0_2
=
R0
*
R0
;
const
float
R0_6
=
R0_2
*
R0_2
*
R0_2
;
const
float
R0_8
=
R0_6
*
R0_2
;
const
float
s8r42x3
=
s8
*
r42x3
;
for
(
int
k
=
maxtau
-
1
;
k
>=
0
;
k
-=
3
)
{
const
int
idx1
=
tau_idx_vdw
[
k
-
2
];
const
int
idx2
=
tau_idx_vdw
[
k
-
1
];
const
int
idx3
=
tau_idx_vdw
[
k
];
if
(
idx1
==
rep_vdw
[
0
]
&&
idx2
==
rep_vdw
[
1
]
&&
idx3
==
rep_vdw
[
2
])
{
continue
;
}
const
float
rij
[
3
]
=
{
tau_vdw
[
idx1
][
idx2
][
idx3
][
0
],
tau_vdw
[
idx1
][
idx2
][
idx3
][
1
],
tau_vdw
[
idx1
][
idx2
][
idx3
][
2
]
};
const
float
r2
=
lensq3
(
rij
);
if
(
r2
>
rthr
)
{
continue
;
}
const
float
r
=
sqrtf
(
r2
);
const
float
r5
=
r2
*
r2
*
r
;
const
float
r7
=
r5
*
r2
;
const
float
t6_rc
=
1.0
f
/
fmaf
(
r5
,
r
,
R0_6
);
const
float
t8_rc
=
1.0
f
/
fmaf
(
r7
,
r
,
R0_8
);
const
float
t6_sqrc
=
t6_rc
*
t6_rc
;
const
float
t8_sqrc
=
t8_rc
*
t8_rc
;
const
float
x1
=
-
c6
*
fmaf
(
4.0
f
*
s8r42x3
*
r7
,
t8_sqrc
,
3.0
f
*
s6
*
r5
*
t6_sqrc
);
//const float x1 = 0.5 * -c6 * (6.0 * s6 * r5 * t6_sqrc + 8.0 * s8r42x3 * r7 * t8_sqrc;
const
float
r_rc
=
1.0
f
/
r
;
// rsqrt(r2)
const
float
vec
[
3
]
=
{
x1
*
rij
[
0
]
*
r_rc
,
x1
*
rij
[
1
]
*
r_rc
,
x1
*
rij
[
2
]
*
r_rc
};
sigma_local_00
+=
vec
[
0
]
*
rij
[
0
];
sigma_local_01
+=
vec
[
0
]
*
rij
[
1
];
sigma_local_02
+=
vec
[
0
]
*
rij
[
2
];
sigma_local_10
+=
vec
[
1
]
*
rij
[
0
];
sigma_local_11
+=
vec
[
1
]
*
rij
[
1
];
sigma_local_12
+=
vec
[
1
]
*
rij
[
2
];
sigma_local_20
+=
vec
[
2
]
*
rij
[
0
];
sigma_local_21
+=
vec
[
2
]
*
rij
[
1
];
sigma_local_22
+=
vec
[
2
]
*
rij
[
2
];
const
float
dc6_rest
=
0.5
f
*
fmaf
(
s8r42x3
,
t8_rc
,
s6
*
t6_rc
);
//const float dc6_rest = 0.5 * s6 * t6_rc + s8r42x3 * t8_rc;
disp_local
-=
dc6_rest
*
c6
;
dc6i_local_i
+=
dc6_rest
*
dc6iji
;
dc6i_local_j
+=
dc6_rest
*
dc6ijj
;
}
atomicAdd
(
&
dc6i
[
iat
],
dc6i_local_i
);
atomicAdd
(
&
dc6i
[
jat
],
dc6i_local_j
);
}
else
{
const
float
r42x3
=
r2r4
[
type
[
iat
]]
*
r2r4
[
type
[
jat
]]
*
3.0
f
;
const
float
R0
=
fmaf
(
a1
,
sqrtf
(
r42x3
),
a2
);
const
float
R0_2
=
R0
*
R0
;
const
float
R0_6
=
R0_2
*
R0_2
*
R0_2
;
const
float
R0_8
=
R0_6
*
R0_2
;
const
float
s8r42x3
=
s8
*
r42x3
;
for
(
int
k
=
maxtau
-
1
;
k
>=
0
;
k
-=
3
)
{
const
int
idx1
=
tau_idx_vdw
[
k
-
2
];
const
int
idx2
=
tau_idx_vdw
[
k
-
1
];
const
int
idx3
=
tau_idx_vdw
[
k
];
const
float
rij
[
3
]
=
{
x
[
jat
][
0
]
-
x
[
iat
][
0
]
+
tau_vdw
[
idx1
][
idx2
][
idx3
][
0
],
x
[
jat
][
1
]
-
x
[
iat
][
1
]
+
tau_vdw
[
idx1
][
idx2
][
idx3
][
1
],
x
[
jat
][
2
]
-
x
[
iat
][
2
]
+
tau_vdw
[
idx1
][
idx2
][
idx3
][
2
]
};
const
float
r2
=
lensq3
(
rij
);
if
(
r2
>
rthr
)
{
continue
;
}
const
float
r
=
sqrtf
(
r2
);
const
float
r5
=
r2
*
r2
*
r
;
const
float
r7
=
r5
*
r2
;
const
float
t6_rc
=
1.0
f
/
fmaf
(
r5
,
r
,
R0_6
);
const
float
t8_rc
=
1.0
f
/
fmaf
(
r7
,
r
,
R0_8
);
const
float
t6_sqrc
=
t6_rc
*
t6_rc
;
const
float
t8_sqrc
=
t8_rc
*
t8_rc
;
const
float
x1
=
-
c6
*
fmaf
(
8.0
f
*
s8r42x3
*
r7
,
t8_sqrc
,
6.0
f
*
s6
*
r5
*
t6_sqrc
);
//const float x1 = -c6 * (6.0 * s6 * r5 * t6_sqrc + 8.0 * s8r42x3 * r7 * t8_sqrc;
const
float
r_rc
=
1.0
f
/
r
;
// rsqrt(r2)
const
float
vec
[
3
]
=
{
x1
*
rij
[
0
]
*
r_rc
,
x1
*
rij
[
1
]
*
r_rc
,
x1
*
rij
[
2
]
*
r_rc
};
f_local
[
0
]
-=
vec
[
0
];
f_local
[
1
]
-=
vec
[
1
];
f_local
[
2
]
-=
vec
[
2
];
sigma_local_00
+=
vec
[
0
]
*
rij
[
0
];
sigma_local_01
+=
vec
[
0
]
*
rij
[
1
];
sigma_local_02
+=
vec
[
0
]
*
rij
[
2
];
sigma_local_10
+=
vec
[
1
]
*
rij
[
0
];
sigma_local_11
+=
vec
[
1
]
*
rij
[
1
];
sigma_local_12
+=
vec
[
1
]
*
rij
[
2
];
sigma_local_20
+=
vec
[
2
]
*
rij
[
0
];
sigma_local_21
+=
vec
[
2
]
*
rij
[
1
];
sigma_local_22
+=
vec
[
2
]
*
rij
[
2
];
const
float
dc6_rest
=
fmaf
(
s8r42x3
,
t8_rc
,
s6
*
t6_rc
);
//const float dc6_rest = s6 * t6_rc + s8r42x3 * t8_rc;
disp_local
-=
dc6_rest
*
c6
;
dc6i_local_i
+=
dc6_rest
*
dc6iji
;
dc6i_local_j
+=
dc6_rest
*
dc6ijj
;
}
atomicAdd
(
&
dc6i
[
iat
],
dc6i_local_i
);
atomicAdd
(
&
dc6i
[
jat
],
dc6i_local_j
);
atomicAdd
(
&
f
[
iat
][
0
],
f_local
[
0
]);
atomicAdd
(
&
f
[
iat
][
1
],
f_local
[
1
]);
atomicAdd
(
&
f
[
iat
][
2
],
f_local
[
2
]);
atomicAdd
(
&
f
[
jat
][
0
],
-
f_local
[
0
]);
atomicAdd
(
&
f
[
jat
][
1
],
-
f_local
[
1
]);
atomicAdd
(
&
f
[
jat
][
2
],
-
f_local
[
2
]);
}
}
sigma_00
[
threadIdx
.
x
]
=
sigma_local_00
;
sigma_01
[
threadIdx
.
x
]
=
sigma_local_01
;
sigma_02
[
threadIdx
.
x
]
=
sigma_local_02
;
sigma_10
[
threadIdx
.
x
]
=
sigma_local_10
;
sigma_11
[
threadIdx
.
x
]
=
sigma_local_11
;
sigma_12
[
threadIdx
.
x
]
=
sigma_local_12
;
sigma_20
[
threadIdx
.
x
]
=
sigma_local_20
;
sigma_21
[
threadIdx
.
x
]
=
sigma_local_21
;
sigma_22
[
threadIdx
.
x
]
=
sigma_local_22
;
disp_shared
[
threadIdx
.
x
]
=
disp_local
;
__syncthreads
();
for
(
int
s
=
blockDim
.
x
/
2
;
s
>
0
;
s
>>=
1
)
{
if
(
threadIdx
.
x
<
s
)
{
sigma_00
[
threadIdx
.
x
]
+=
sigma_00
[
threadIdx
.
x
+
s
];
sigma_01
[
threadIdx
.
x
]
+=
sigma_01
[
threadIdx
.
x
+
s
];
sigma_02
[
threadIdx
.
x
]
+=
sigma_02
[
threadIdx
.
x
+
s
];
sigma_10
[
threadIdx
.
x
]
+=
sigma_10
[
threadIdx
.
x
+
s
];
sigma_11
[
threadIdx
.
x
]
+=
sigma_11
[
threadIdx
.
x
+
s
];
sigma_12
[
threadIdx
.
x
]
+=
sigma_12
[
threadIdx
.
x
+
s
];
sigma_20
[
threadIdx
.
x
]
+=
sigma_20
[
threadIdx
.
x
+
s
];
sigma_21
[
threadIdx
.
x
]
+=
sigma_21
[
threadIdx
.
x
+
s
];
sigma_22
[
threadIdx
.
x
]
+=
sigma_22
[
threadIdx
.
x
+
s
];
disp_shared
[
threadIdx
.
x
]
+=
disp_shared
[
threadIdx
.
x
+
s
];
}
__syncthreads
();
}
if
(
threadIdx
.
x
==
0
)
{
atomicAdd
(
&
sigma
[
0
][
0
],
sigma_00
[
0
]);
atomicAdd
(
&
sigma
[
0
][
1
],
sigma_01
[
0
]);
atomicAdd
(
&
sigma
[
0
][
2
],
sigma_02
[
0
]);
atomicAdd
(
&
sigma
[
1
][
0
],
sigma_10
[
0
]);
atomicAdd
(
&
sigma
[
1
][
1
],
sigma_11
[
0
]);
atomicAdd
(
&
sigma
[
1
][
2
],
sigma_12
[
0
]);
atomicAdd
(
&
sigma
[
2
][
0
],
sigma_20
[
0
]);
atomicAdd
(
&
sigma
[
2
][
1
],
sigma_21
[
0
]);
atomicAdd
(
&
sigma
[
2
][
2
],
sigma_22
[
0
]);
atomicAdd
(
disp
,
disp_shared
[
0
]);
}
}
void
PairD3
::
get_forces_without_dC6_bj
()
{
int
n
=
atom
->
natoms
;
int
maxij
=
n
*
(
n
+
1
)
/
2
;
int
maxtau
=
tau_idx_vdw_total_size
;
*
disp
=
0.0
;
for
(
int
dim
=
0
;
dim
<
n
;
dim
++
)
{
dc6i
[
dim
]
=
0.0
;
}
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
for
(
int
j
=
0
;
j
<
3
;
j
++
)
{
f
[
i
][
j
]
=
0.0
;
}
}
for
(
int
ii
=
0
;
ii
<
3
;
ii
++
)
{
for
(
int
jj
=
0
;
jj
<
3
;
jj
++
)
{
sigma
[
ii
][
jj
]
=
0.0
;
}
}
//START_CUDA_TIMER();
int
threadsPerBlock
=
128
;
int
blocksPerGrid
=
(
maxij
+
threadsPerBlock
-
1
)
/
threadsPerBlock
;
kernel_get_forces_without_dC6_bj
<<<
blocksPerGrid
,
threadsPerBlock
>>>
(
maxij
,
maxtau
,
rthr
,
s6
,
s8
,
a1
,
a2
,
r2r4
,
rep_vdw
,
tau_vdw
,
tau_idx_vdw
,
atomtype
,
x
,
c6_ij_tot
,
dc6_iji_tot
,
dc6_ijj_tot
,
dc6i
,
disp
,
f
,
sigma
);
cudaDeviceSynchronize
();
disp_total
=
*
disp
;
//STOP_CUDA_TIMER("get_forces_without");
}
void
PairD3
::
get_forces_without_dC6_zerom
()
{}
void
PairD3
::
get_forces_without_dC6_bjm
()
{}
void
PairD3
::
get_forces_without_dC6
()
{
void
(
PairD3
::*
get_forces_without_dC6_damp
[
4
])()
=
{
&
PairD3
::
get_forces_without_dC6_zero
,
&
PairD3
::
get_forces_without_dC6_bj
,
&
PairD3
::
get_forces_without_dC6_zerom
,
&
PairD3
::
get_forces_without_dC6_bjm
};
(
this
->*
get_forces_without_dC6_damp
[
damping
])();
}
__global__
void
kernel_get_forces_with_dC6
(
int
maxij
,
int
maxtau
,
float
cnthr
,
float
K1
,
double
*
dc6i
,
float
*
rcov
,
int
*
rep_cn
,
float
****
tau_cn
,
int
*
tau_idx_cn
,
int
*
type
,
float
**
x
,
double
**
f
,
double
**
sigma
)
{
int
iter
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
__shared__
float
sigma_00
[
128
];
__shared__
float
sigma_01
[
128
];
__shared__
float
sigma_02
[
128
];
__shared__
float
sigma_10
[
128
];
__shared__
float
sigma_11
[
128
];
__shared__
float
sigma_12
[
128
];
__shared__
float
sigma_20
[
128
];
__shared__
float
sigma_21
[
128
];
__shared__
float
sigma_22
[
128
];
float
sigma_local_00
=
0.0
f
;
float
sigma_local_01
=
0.0
f
;
float
sigma_local_02
=
0.0
f
;
float
sigma_local_10
=
0.0
f
;
float
sigma_local_11
=
0.0
f
;
float
sigma_local_12
=
0.0
f
;
float
sigma_local_20
=
0.0
f
;
float
sigma_local_21
=
0.0
f
;
float
sigma_local_22
=
0.0
f
;
float
f_local
[
3
]
=
{
0.0
f
};
if
(
iter
<
maxij
)
{
int
iat
,
jat
;
ij_at_linij
(
iter
,
iat
,
jat
);
if
(
iat
==
jat
)
{
const
float
rcov_sum
=
rcov
[
type
[
iat
]]
*
2.0
f
;
const
float
dc6i_sum
=
dc6i
[
iat
];
for
(
int
k
=
maxtau
-
1
;
k
>=
0
;
k
-=
3
)
{
const
int
idx1
=
tau_idx_cn
[
k
-
2
];
const
int
idx2
=
tau_idx_cn
[
k
-
1
];
const
int
idx3
=
tau_idx_cn
[
k
];
if
(
idx1
==
rep_cn
[
0
]
&&
idx2
==
rep_cn
[
1
]
&&
idx3
==
rep_cn
[
2
])
{
continue
;
}
const
float
rij
[
3
]
=
{
tau_cn
[
idx1
][
idx2
][
idx3
][
0
],
tau_cn
[
idx1
][
idx2
][
idx3
][
1
],
tau_cn
[
idx1
][
idx2
][
idx3
][
2
],
};
const
float
r2
=
lensq3
(
rij
);
if
(
r2
>=
cnthr
)
{
continue
;
}
const
float
r_rc
=
rsqrtf
(
r2
);
const
float
expterm
=
expf
(
-
K1
*
(
rcov_sum
*
r_rc
-
1.0
f
));
const
float
unit_rc
=
1.0
f
/
(
r2
*
(
expterm
+
1.0
f
)
*
(
expterm
+
1.0
f
));
const
float
dcnn
=
-
K1
*
rcov_sum
*
expterm
*
unit_rc
;
const
float
x1
=
dcnn
*
dc6i_sum
;
const
float
vec
[
3
]
=
{
x1
*
rij
[
0
]
*
r_rc
,
x1
*
rij
[
1
]
*
r_rc
,
x1
*
rij
[
2
]
*
r_rc
};
sigma_local_00
+=
vec
[
0
]
*
rij
[
0
];
sigma_local_01
+=
vec
[
0
]
*
rij
[
1
];
sigma_local_02
+=
vec
[
0
]
*
rij
[
2
];
sigma_local_10
+=
vec
[
1
]
*
rij
[
0
];
sigma_local_11
+=
vec
[
1
]
*
rij
[
1
];
sigma_local_12
+=
vec
[
1
]
*
rij
[
2
];
sigma_local_20
+=
vec
[
2
]
*
rij
[
0
];
sigma_local_21
+=
vec
[
2
]
*
rij
[
1
];
sigma_local_22
+=
vec
[
2
]
*
rij
[
2
];
}
}
else
{
const
float
rcov_sum
=
rcov
[
type
[
iat
]]
+
rcov
[
type
[
jat
]];
const
float
dc6i_sum
=
dc6i
[
iat
]
+
dc6i
[
jat
];
for
(
int
k
=
maxtau
-
1
;
k
>=
0
;
k
-=
3
)
{
const
int
idx1
=
tau_idx_cn
[
k
-
2
];
const
int
idx2
=
tau_idx_cn
[
k
-
1
];
const
int
idx3
=
tau_idx_cn
[
k
];
const
float
rij
[
3
]
=
{
x
[
jat
][
0
]
-
x
[
iat
][
0
]
+
tau_cn
[
idx1
][
idx2
][
idx3
][
0
],
x
[
jat
][
1
]
-
x
[
iat
][
1
]
+
tau_cn
[
idx1
][
idx2
][
idx3
][
1
],
x
[
jat
][
2
]
-
x
[
iat
][
2
]
+
tau_cn
[
idx1
][
idx2
][
idx3
][
2
]
};
const
float
r2
=
lensq3
(
rij
);
if
(
r2
>=
cnthr
)
{
continue
;
}
const
float
r_rc
=
rsqrtf
(
r2
);
const
float
expterm
=
expf
(
-
K1
*
(
rcov_sum
*
r_rc
-
1.0
f
));
const
float
unit_rc
=
1.0
f
/
(
r2
*
(
expterm
+
1.0
f
)
*
(
expterm
+
1.0
f
));
const
float
dcnn
=
-
K1
*
rcov_sum
*
expterm
*
unit_rc
;
const
float
x1
=
dcnn
*
dc6i_sum
;
const
float
vec
[
3
]
=
{
x1
*
rij
[
0
]
*
r_rc
,
x1
*
rij
[
1
]
*
r_rc
,
x1
*
rij
[
2
]
*
r_rc
};
f_local
[
0
]
-=
vec
[
0
];
f_local
[
1
]
-=
vec
[
1
];
f_local
[
2
]
-=
vec
[
2
];
sigma_local_00
+=
vec
[
0
]
*
rij
[
0
];
sigma_local_01
+=
vec
[
0
]
*
rij
[
1
];
sigma_local_02
+=
vec
[
0
]
*
rij
[
2
];
sigma_local_10
+=
vec
[
1
]
*
rij
[
0
];
sigma_local_11
+=
vec
[
1
]
*
rij
[
1
];
sigma_local_12
+=
vec
[
1
]
*
rij
[
2
];
sigma_local_20
+=
vec
[
2
]
*
rij
[
0
];
sigma_local_21
+=
vec
[
2
]
*
rij
[
1
];
sigma_local_22
+=
vec
[
2
]
*
rij
[
2
];
}
atomicAdd
(
&
f
[
iat
][
0
],
f_local
[
0
]);
atomicAdd
(
&
f
[
iat
][
1
],
f_local
[
1
]);
atomicAdd
(
&
f
[
iat
][
2
],
f_local
[
2
]);
atomicAdd
(
&
f
[
jat
][
0
],
-
f_local
[
0
]);
atomicAdd
(
&
f
[
jat
][
1
],
-
f_local
[
1
]);
atomicAdd
(
&
f
[
jat
][
2
],
-
f_local
[
2
]);
}
}
sigma_00
[
threadIdx
.
x
]
=
sigma_local_00
;
sigma_01
[
threadIdx
.
x
]
=
sigma_local_01
;
sigma_02
[
threadIdx
.
x
]
=
sigma_local_02
;
sigma_10
[
threadIdx
.
x
]
=
sigma_local_10
;
sigma_11
[
threadIdx
.
x
]
=
sigma_local_11
;
sigma_12
[
threadIdx
.
x
]
=
sigma_local_12
;
sigma_20
[
threadIdx
.
x
]
=
sigma_local_20
;
sigma_21
[
threadIdx
.
x
]
=
sigma_local_21
;
sigma_22
[
threadIdx
.
x
]
=
sigma_local_22
;
__syncthreads
();
for
(
int
s
=
blockDim
.
x
/
2
;
s
>
0
;
s
>>=
1
)
{
if
(
threadIdx
.
x
<
s
)
{
sigma_00
[
threadIdx
.
x
]
+=
sigma_00
[
threadIdx
.
x
+
s
];
sigma_01
[
threadIdx
.
x
]
+=
sigma_01
[
threadIdx
.
x
+
s
];
sigma_02
[
threadIdx
.
x
]
+=
sigma_02
[
threadIdx
.
x
+
s
];
sigma_10
[
threadIdx
.
x
]
+=
sigma_10
[
threadIdx
.
x
+
s
];
sigma_11
[
threadIdx
.
x
]
+=
sigma_11
[
threadIdx
.
x
+
s
];
sigma_12
[
threadIdx
.
x
]
+=
sigma_12
[
threadIdx
.
x
+
s
];
sigma_20
[
threadIdx
.
x
]
+=
sigma_20
[
threadIdx
.
x
+
s
];
sigma_21
[
threadIdx
.
x
]
+=
sigma_21
[
threadIdx
.
x
+
s
];
sigma_22
[
threadIdx
.
x
]
+=
sigma_22
[
threadIdx
.
x
+
s
];
}
__syncthreads
();
}
if
(
threadIdx
.
x
==
0
)
{
atomicAdd
(
&
sigma
[
0
][
0
],
sigma_00
[
0
]);
atomicAdd
(
&
sigma
[
0
][
1
],
sigma_01
[
0
]);
atomicAdd
(
&
sigma
[
0
][
2
],
sigma_02
[
0
]);
atomicAdd
(
&
sigma
[
1
][
0
],
sigma_10
[
0
]);
atomicAdd
(
&
sigma
[
1
][
1
],
sigma_11
[
0
]);
atomicAdd
(
&
sigma
[
1
][
2
],
sigma_12
[
0
]);
atomicAdd
(
&
sigma
[
2
][
0
],
sigma_20
[
0
]);
atomicAdd
(
&
sigma
[
2
][
1
],
sigma_21
[
0
]);
atomicAdd
(
&
sigma
[
2
][
2
],
sigma_22
[
0
]);
}
}
void
PairD3
::
get_forces_with_dC6
()
{
int
n
=
atom
->
natoms
;
int
maxij
=
n
*
(
n
+
1
)
/
2
;
int
maxtau
=
tau_idx_cn_total_size
;
//START_CUDA_TIMER();
int
threadsPerBlock
=
128
;
int
blocksPerGrid
=
(
maxij
+
threadsPerBlock
-
1
)
/
threadsPerBlock
;
kernel_get_forces_with_dC6
<<<
blocksPerGrid
,
threadsPerBlock
>>>
(
maxij
,
maxtau
,
cnthr
,
K1
,
dc6i
,
rcov
,
rep_cn
,
tau_cn
,
tau_idx_cn
,
atomtype
,
x
,
f
,
sigma
);
cudaDeviceSynchronize
();
//STOP_CUDA_TIMER("get_forces_with");
}
/* ----------------------------------------------------------------------
Update energy, force, and stress
------------------------------------------------------------------------- */
void
PairD3
::
update
()
{
int
n
=
atom
->
natoms
;
// unit: eV <- eng_vdwl
result_E
=
disp_total
*
AU_TO_EV
;
// unit: eV/Å, flatten for wrapper <- atom->f (f_local)
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
for
(
int
j
=
0
;
j
<
3
;
j
++
)
{
result_F
[
i
*
3
+
j
]
=
f
[
i
][
j
]
*
AU_TO_EV
/
AU_TO_ANG
;
}
}
// unit: eV, virial, xx, yy, zz, xy, xz, yz <- virial
result_S
[
0
]
=
sigma
[
0
][
0
]
*
AU_TO_EV
;
result_S
[
1
]
=
sigma
[
1
][
1
]
*
AU_TO_EV
;
result_S
[
2
]
=
sigma
[
2
][
2
]
*
AU_TO_EV
;
result_S
[
3
]
=
sigma
[
0
][
1
]
*
AU_TO_EV
;
result_S
[
4
]
=
sigma
[
0
][
2
]
*
AU_TO_EV
;
result_S
[
5
]
=
sigma
[
1
][
2
]
*
AU_TO_EV
;
}
/* ----------------------------------------------------------------------
Compute : energy, force, and stress (Required)
------------------------------------------------------------------------- */
void
PairD3
::
compute
()
{
if
(
atom
->
natoms
!=
n_save
)
{
reallocate_arrays
();
}
set_lattice_vectors
();
precalculate_tau_array
();
load_atom_info
();
cudaMemcpy
(
atomtype
,
atom
->
type
,
atom
->
natoms
*
sizeof
(
int
),
cudaMemcpyHostToDevice
);
get_coordination_number
();
get_dC6_dCNij
();
get_forces_without_dC6
();
get_forces_with_dC6
();
update
();
CHECK_CUDA_ERROR
();
}
int
main
()
{
}
extern
"C"
{
// C wrapper for ctypes or cffi
PairD3
*
pair_init
()
{
return
new
PairD3
();
}
void
pair_set_atom
(
PairD3
*
pair
,
int
natoms
,
int
ntypes
,
int
*
type
,
double
*
x_flat
)
{
double
**
x
=
new
double
*
[
natoms
];
for
(
int
i
=
0
;
i
<
natoms
;
i
++
)
{
x
[
i
]
=
x_flat
+
i
*
3
;
}
pair
->
atom
=
new
Atom
(
natoms
,
ntypes
,
type
,
x
);
pair
->
result_F
=
new
double
[
natoms
*
3
];
}
void
pair_set_domain
(
PairD3
*
pair
,
int
xperiodic
,
int
yperiodic
,
int
zperiodic
,
double
*
boxlo
,
double
*
boxhi
,
double
xy
,
double
xz
,
double
yz
)
{
pair
->
domain
=
new
Domain
(
xperiodic
,
yperiodic
,
zperiodic
,
boxlo
,
boxhi
,
xy
,
xz
,
yz
);
}
void
pair_run_settings
(
PairD3
*
pair
,
double
rthr
,
double
cnthr
,
const
char
*
damp_name
,
const
char
*
func_name
)
{
pair
->
settings
(
rthr
,
cnthr
,
damp_name
,
func_name
);
}
void
pair_run_coeff
(
PairD3
*
pair
,
int
*
atomic_numbers
)
{
pair
->
coeff
(
atomic_numbers
);
}
void
pair_run_compute
(
PairD3
*
pair
)
{
pair
->
compute
();
}
double
pair_get_energy
(
PairD3
*
pair
)
{
return
pair
->
result_E
;
}
double
*
pair_get_force
(
PairD3
*
pair
)
{
return
pair
->
result_F
;
}
double
*
pair_get_stress
(
PairD3
*
pair
)
{
return
pair
->
result_S
;
}
void
pair_fin
(
PairD3
*
pair
)
{
//delete[] result_F;
delete
pair
;
//delete domain;
//delete atom;
}
}
mace-bench/3rdparty/SevenNet/sevenn/pair_e3gnn/pair_d3_for_ase.h
0 → 100644
View file @
ca86f720
/*
This code is a skeleton of the LAMMPS pair_style d3 accelerated by CUDA.
All dependencies on LAMMPS have been removed.
The input and output variables are named based on the LAMMPS variables.
*/
#ifndef LMP_PAIR_D3
#define LMP_PAIR_D3
#include <cmath>
#include <string>
#include <vector> // for 'element_table'
#include <algorithm> // for 'atomic_number'
#include <map>
#include <unordered_map>
#include <cuda_runtime.h>
#include "pair_d3_pars.h"
// Removed dependencies to STL
// #include <stdlib.h> -> no more C style functions
// #define _USE_MATH_DEFINES -> no predefined constants
// Removed dependencies to LAMMPS
// #include "pair.h" -> removed, for construncting pair class.
// #include "utils.h" -> removed, some float converters.
// #include "atom.h" -> Atom class to replace it.
// #include "domain.h" -> Domain class to replace it.
// #include "error.h" -> Error class to replace it.
// #include "comm.h" -> already no dependency
// #include "neighbor.h" -> already no dependency
// #include "neigh_list.h" -> already no dependency
// #include "memory.h" -> already no dependency for CUDA version
// #include "math_extra.h" -> removed, dot and len3 operations.
// #include "potential_file_reader.h" -> removed, PotentialFileReader
/* --------- Fake class to replace 'LAMMPS' class --------- */
class
Atom
{
public:
int
natoms
;
int
ntypes
;
int
*
type
;
double
**
x
;
Atom
(
int
natoms
,
int
ntypes
,
int
*
type
,
double
**
x
)
:
natoms
(
natoms
),
ntypes
(
ntypes
),
type
(
type
),
x
(
x
)
{}
~
Atom
()
{
//delete[] type;
//for (int i = 0; i < natoms; i++) {
// delete[] x[i];
//}
//delete[] x;
}
};
class
Domain
{
public:
int
xperiodic
,
yperiodic
,
zperiodic
;
double
boxlo
[
3
],
boxhi
[
3
];
double
xy
,
xz
,
yz
;
Domain
(
int
xperiodic
,
int
yperiodic
,
int
zperiodic
,
double
*
boxlo
,
double
*
boxhi
,
double
xy
,
double
xz
,
double
yz
)
:
xperiodic
(
xperiodic
),
yperiodic
(
yperiodic
),
zperiodic
(
zperiodic
),
xy
(
xy
),
xz
(
xz
),
yz
(
yz
)
{
for
(
int
i
=
0
;
i
<
3
;
i
++
)
{
this
->
boxlo
[
i
]
=
boxlo
[
i
];
this
->
boxhi
[
i
]
=
boxhi
[
i
];
}
}
~
Domain
()
{
}
};
class
Error
{
public:
void
all
(
int
flerr
,
const
char
*
message
)
{
printf
(
"Error: %s
\n
"
,
message
);
}
Error
()
{}
~
Error
()
{}
};
/* ------------------------------------------------------- */
/* --------- Declaration of fake classes and variables --------- */
#define FLERR 1
//Error* error = nullptr;
//
//int allocated;
//int** setflag;
//double** cutsq;
//Atom* atom = nullptr;
//Domain* domain = nullptr;
//
//double result_E;
//double* result_F = nullptr;
//double result_S[6];
class
Pair
{
public:
int
allocated
;
Atom
*
atom
;
Domain
*
domain
;
double
result_E
;
double
*
result_F
;
double
result_S
[
6
];
Error
*
error
;
Pair
()
:
allocated
(
0
),
atom
(
nullptr
),
domain
(
nullptr
),
result_E
(
0.0
),
result_F
(
nullptr
),
error
(
nullptr
)
{
std
::
fill
(
std
::
begin
(
result_S
),
std
::
end
(
result_S
),
0.0
);
}
virtual
~
Pair
()
{
if
(
result_F
)
{
delete
[]
result_F
;
result_F
=
nullptr
;
}
if
(
atom
)
{
delete
atom
;
atom
=
nullptr
;
}
if
(
domain
)
{
delete
domain
;
domain
=
nullptr
;
}
if
(
error
)
{
delete
error
;
error
=
nullptr
;
}
}
};
/* -------------------------------------------------------------- */
class
PairD3
:
public
Pair
{
public:
PairD3
();
~
PairD3
();
void
settings
(
double
rthr
,
double
cnthr
,
std
::
string
damp_name
,
std
::
string
func_name
);
void
coeff
(
int
*
atomic_number
);
void
compute
();
protected:
virtual
void
allocate
();
/* ------- Read parameters ------- */
int
find_atomic_number
(
std
::
string
&
);
int
is_int_in_array
(
int
*
,
int
,
int
);
void
read_r0ab
(
int
*
,
int
);
// void read_r0ab(class LAMMPS*, char*, int*, int);
void
get_limit_in_pars_array
(
int
&
,
int
&
,
int
&
,
int
&
);
void
read_c6ab
(
int
*
,
int
);
// void read_c6ab(class LAMMPS*, char*, int*, int);
void
setfuncpar_zero
();
void
setfuncpar_bj
();
void
setfuncpar_zerom
();
void
setfuncpar_bjm
();
void
setfuncpar
();
/* ------- Read parameters ------- */
/* ------- Lattice information ------- */
void
set_lattice_repetition_criteria
(
float
,
int
*
);
void
set_lattice_vectors
();
/* ------- Lattice information ------- */
/* ------- Initialize & Precalculate ------- */
void
load_atom_info
();
void
precalculate_tau_array
();
/* ------- Initialize & Precalculate ------- */
/* ------- Reallocate (when number of atoms changed) ------- */
void
reallocate_arrays
();
void
reallocate_arrays_np1
();
/* ------- Reallocate (when number of atoms changed) ------- */
/* ------- Coordination number ------- */
void
get_coordination_number
();
void
get_dC6_dCNij
();
/* ------- Coordination number ------- */
/* ------- Main workers ------- */
void
get_forces_without_dC6_zero
();
void
get_forces_without_dC6_bj
();
void
get_forces_without_dC6_zerom
();
void
get_forces_without_dC6_bjm
();
void
get_forces_without_dC6
();
void
get_forces_with_dC6
();
void
update
();
/* ------- Main workers ------- */
/*--------- Constants ---------*/
static
constexpr
int
MAX_ELEM
=
94
;
// maximum of the element number
static
constexpr
int
MAXC
=
5
;
// maximum coordination number references per element
static
constexpr
double
AU_TO_ANG
=
0.52917726
;
// conversion factors (atomic unit --> angstrom)
static
constexpr
double
AU_TO_EV
=
27.21138505
;
// conversion factors (atomic unit --> eV)
static
constexpr
float
K1
=
16.0
;
// global ad hoc parameters
static
constexpr
float
K3
=
-
4.0
;
// global ad hoc parameters
/*--------- Constants ---------*/
/*--------- Parameters to read ---------*/
int
damping
;
std
::
string
functional
;
float
*
r2r4
=
nullptr
;
// scale r4/r2 values of the atoms by sqrt(Z)
float
*
rcov
=
nullptr
;
// covalent radii
int
*
mxc
=
nullptr
;
// How large the grid for c6 interpolation
float
**
r0ab
=
nullptr
;
// cut-off radii for all element pairs
float
*****
c6ab
=
nullptr
;
// C6 for all element pairs
float
rthr
;
// R^2 distance to cutoff for C calculation
float
cnthr
;
// R^2 distance to cutoff for CN_calculation
float
s6
,
s8
,
s18
,
rs6
,
rs8
,
rs18
,
alp
,
alp6
,
alp8
,
a1
,
a2
;
// parameters for D3
/*--------- Parameters to read ---------*/
/*--------- Lattice related values ---------*/
double
*
lat_v_1
=
nullptr
;
// lattice coordination vector
double
*
lat_v_2
=
nullptr
;
// lattice coordination vector
double
*
lat_v_3
=
nullptr
;
// lattice coordination vector
int
*
rep_vdw
=
nullptr
;
// repetition of cell for calculating D3
int
*
rep_cn
=
nullptr
;
// repetition of cell for calculating
double
**
sigma
=
nullptr
;
// virial pressure on cell
/*--------- Lattice related values ---------*/
/*--------- Per-atom values/arrays ---------*/
double
*
cn
=
nullptr
;
// Coordination numbers
float
**
x
=
nullptr
;
// Positions
double
**
f
=
nullptr
;
// Forces
double
*
dc6i
=
nullptr
;
// dC6i(iat) saves dE_dsp/dCN(iat)
/*--------- Per-atom values/arrays ---------*/
/*--------- Per-pair values/arrays ---------*/
float
*
c6_ij_tot
=
nullptr
;
float
*
dc6_iji_tot
=
nullptr
;
float
*
dc6_ijj_tot
=
nullptr
;
/*--------- Per-pair values/arrays ---------*/
/*---------- Global values ---------*/
int
n_save
;
// to check whether the number of atoms has changed
int
np1_save
;
// to check whether the number of types has changed
float
disp_total
;
// Dispersion energy
/*---------- Global values ---------*/
/*--------- For loop over tau (translation of cell) ---------*/
float
****
tau_vdw
=
nullptr
;
float
****
tau_cn
=
nullptr
;
int
*
tau_idx_vdw
=
nullptr
;
int
*
tau_idx_cn
=
nullptr
;
int
tau_idx_vdw_total_size
;
int
tau_idx_cn_total_size
;
/*--------- For loop over tau (translation of cell) ---------*/
/*--------- For cuda memory transfer (pointerized) ---------*/
int
*
atomtype
;
double
*
disp
;
/*--------- For cuda memory transfer (pointerized) ---------*/
};
#endif // LMP_PAIR_D3
mace-bench/3rdparty/SevenNet/sevenn/pair_e3gnn/pair_d3_pars.h
0 → 100644
View file @
ca86f720
This source diff could not be displayed because it is too large. You can
view the blob
instead.
mace-bench/3rdparty/SevenNet/sevenn/pair_e3gnn/pair_e3gnn.cpp
0 → 100644
View file @
ca86f720
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://lammps.sandia.gov/, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Yutack Park (SNU)
------------------------------------------------------------------------- */
#include <ATen/ops/from_blob.h>
#include <c10/core/Scalar.h>
#include <c10/core/TensorOptions.h>
#include <string>
#include <torch/script.h>
#include <torch/torch.h>
#include "atom.h"
#include "domain.h"
#include "error.h"
#include "force.h"
#include "memory.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "pair_e3gnn.h"
using
namespace
LAMMPS_NS
;
#define INTEGER_TYPE torch::TensorOptions().dtype(torch::kInt64)
#define FLOAT_TYPE torch::TensorOptions().dtype(torch::kFloat)
PairE3GNN
::
PairE3GNN
(
LAMMPS
*
lmp
)
:
Pair
(
lmp
)
{
// constructor
const
char
*
print_flag
=
std
::
getenv
(
"SEVENN_PRINT_INFO"
);
if
(
print_flag
)
print_info
=
true
;
std
::
string
device_name
;
if
(
torch
::
cuda
::
is_available
())
{
device
=
torch
::
kCUDA
;
device_name
=
"CUDA"
;
}
else
{
device
=
torch
::
kCPU
;
device_name
=
"CPU"
;
}
if
(
lmp
->
logfile
)
{
fprintf
(
lmp
->
logfile
,
"PairE3GNN using device : %s
\n
"
,
device_name
.
c_str
());
}
}
PairE3GNN
::~
PairE3GNN
()
{
if
(
allocated
)
{
memory
->
destroy
(
setflag
);
memory
->
destroy
(
cutsq
);
memory
->
destroy
(
map
);
memory
->
destroy
(
elements
);
}
}
void
PairE3GNN
::
compute
(
int
eflag
,
int
vflag
)
{
// compute
/*
This compute function is ispired/modified from stress branch of pair-nequip
https://github.com/mir-group/pair_nequip
*/
if
(
eflag
||
vflag
)
ev_setup
(
eflag
,
vflag
);
else
evflag
=
vflag_fdotr
=
0
;
if
(
vflag_atom
)
{
error
->
all
(
FLERR
,
"atomic stress is not supported
\n
"
);
}
int
nlocal
=
list
->
inum
;
// same as nlocal
int
*
ilist
=
list
->
ilist
;
tagint
*
tag
=
atom
->
tag
;
std
::
unordered_map
<
int
,
int
>
tag_map
;
if
(
atom
->
tag_consecutive
()
==
0
)
{
for
(
int
ii
=
0
;
ii
<
nlocal
;
ii
++
)
{
const
int
i
=
ilist
[
ii
];
int
itag
=
tag
[
i
];
tag_map
[
itag
]
=
ii
+
1
;
// printf("MODIFY setting %i => %i \n",itag, tag_map[itag] );
}
}
else
{
//Ordered which mappling required
for
(
int
ii
=
0
;
ii
<
nlocal
;
ii
++
)
{
const
int
itag
=
ilist
[
ii
]
+
1
;
tag_map
[
itag
]
=
ii
+
1
;
// printf("normal setting %i => %i \n",itag, tag_map[itag] );
}
}
double
**
x
=
atom
->
x
;
double
**
f
=
atom
->
f
;
int
*
type
=
atom
->
type
;
long
num_atoms
[
1
]
=
{
nlocal
};
int
tag2i
[
nlocal
];
int
*
numneigh
=
list
->
numneigh
;
// j loop cond
int
**
firstneigh
=
list
->
firstneigh
;
// j list
int
bound
;
if
(
this
->
nedges_bound
==
-
1
)
{
bound
=
std
::
accumulate
(
numneigh
,
numneigh
+
nlocal
,
0
);
}
else
{
bound
=
this
->
nedges_bound
;
}
const
int
nedges_upper_bound
=
bound
;
float
cell
[
3
][
3
];
cell
[
0
][
0
]
=
domain
->
boxhi
[
0
]
-
domain
->
boxlo
[
0
];
cell
[
0
][
1
]
=
0.0
;
cell
[
0
][
2
]
=
0.0
;
cell
[
1
][
0
]
=
domain
->
xy
;
cell
[
1
][
1
]
=
domain
->
boxhi
[
1
]
-
domain
->
boxlo
[
1
];
cell
[
1
][
2
]
=
0.0
;
cell
[
2
][
0
]
=
domain
->
xz
;
cell
[
2
][
1
]
=
domain
->
yz
;
cell
[
2
][
2
]
=
domain
->
boxhi
[
2
]
-
domain
->
boxlo
[
2
];
torch
::
Tensor
inp_cell
=
torch
::
from_blob
(
cell
,
{
3
,
3
},
FLOAT_TYPE
);
torch
::
Tensor
inp_num_atoms
=
torch
::
from_blob
(
num_atoms
,
{
1
},
INTEGER_TYPE
);
torch
::
Tensor
inp_node_type
=
torch
::
zeros
({
nlocal
},
INTEGER_TYPE
);
torch
::
Tensor
inp_pos
=
torch
::
zeros
({
nlocal
,
3
});
torch
::
Tensor
inp_cell_volume
=
torch
::
dot
(
inp_cell
[
0
],
torch
::
cross
(
inp_cell
[
1
],
inp_cell
[
2
],
0
));
float
pbc_shift_tmp
[
nedges_upper_bound
][
3
];
auto
node_type
=
inp_node_type
.
accessor
<
long
,
1
>
();
auto
pos
=
inp_pos
.
accessor
<
float
,
2
>
();
long
edge_idx_src
[
nedges_upper_bound
];
long
edge_idx_dst
[
nedges_upper_bound
];
int
nedges
=
0
;
for
(
int
ii
=
0
;
ii
<
nlocal
;
ii
++
)
{
const
int
i
=
ilist
[
ii
];
int
itag
=
tag_map
[
tag
[
i
]];
tag2i
[
itag
-
1
]
=
i
;
const
int
itype
=
type
[
i
];
node_type
[
itag
-
1
]
=
map
[
itype
];
pos
[
itag
-
1
][
0
]
=
x
[
i
][
0
];
pos
[
itag
-
1
][
1
]
=
x
[
i
][
1
];
pos
[
itag
-
1
][
2
]
=
x
[
i
][
2
];
}
for
(
int
ii
=
0
;
ii
<
nlocal
;
ii
++
)
{
const
int
i
=
ilist
[
ii
];
int
itag
=
tag_map
[
tag
[
i
]];
const
int
*
jlist
=
firstneigh
[
i
];
const
int
jnum
=
numneigh
[
i
];
for
(
int
jj
=
0
;
jj
<
jnum
;
jj
++
)
{
int
j
=
jlist
[
jj
];
// atom over pbc is different atom
int
jtag
=
tag_map
[
tag
[
j
]];
// atom over pbs is same atom (it starts from 1)
j
&=
NEIGHMASK
;
const
int
jtype
=
type
[
j
];
const
double
delij
[
3
]
=
{
x
[
j
][
0
]
-
x
[
i
][
0
],
x
[
j
][
1
]
-
x
[
i
][
1
],
x
[
j
][
2
]
-
x
[
i
][
2
]};
const
double
Rij
=
delij
[
0
]
*
delij
[
0
]
+
delij
[
1
]
*
delij
[
1
]
+
delij
[
2
]
*
delij
[
2
];
if
(
Rij
<
cutoff_square
)
{
edge_idx_src
[
nedges
]
=
itag
-
1
;
edge_idx_dst
[
nedges
]
=
jtag
-
1
;
pbc_shift_tmp
[
nedges
][
0
]
=
x
[
j
][
0
]
-
pos
[
jtag
-
1
][
0
];
pbc_shift_tmp
[
nedges
][
1
]
=
x
[
j
][
1
]
-
pos
[
jtag
-
1
][
1
];
pbc_shift_tmp
[
nedges
][
2
]
=
x
[
j
][
2
]
-
pos
[
jtag
-
1
][
2
];
nedges
++
;
}
}
// j loop end
}
// i loop end
auto
edge_idx_src_tensor
=
torch
::
from_blob
(
edge_idx_src
,
{
nedges
},
INTEGER_TYPE
);
auto
edge_idx_dst_tensor
=
torch
::
from_blob
(
edge_idx_dst
,
{
nedges
},
INTEGER_TYPE
);
auto
inp_edge_index
=
torch
::
stack
({
edge_idx_src_tensor
,
edge_idx_dst_tensor
});
// r' = r + {shift_tensor(integer vector of len 3)} @ cell_tensor
// shift_tensor = (cell_tensor)^-1^T @ (r' - r)
torch
::
Tensor
cell_inv_tensor
=
inp_cell
.
inverse
().
transpose
(
0
,
1
).
unsqueeze
(
0
).
to
(
device
);
torch
::
Tensor
pbc_shift_tmp_tensor
=
torch
::
from_blob
(
pbc_shift_tmp
,
{
nedges
,
3
},
FLOAT_TYPE
)
.
view
({
nedges
,
3
,
1
})
.
to
(
device
);
torch
::
Tensor
inp_cell_shift
=
torch
::
bmm
(
cell_inv_tensor
.
expand
({
nedges
,
3
,
3
}),
pbc_shift_tmp_tensor
)
.
view
({
nedges
,
3
});
inp_pos
.
set_requires_grad
(
true
);
c10
::
Dict
<
std
::
string
,
torch
::
Tensor
>
input_dict
;
input_dict
.
insert
(
"x"
,
inp_node_type
.
to
(
device
));
input_dict
.
insert
(
"pos"
,
inp_pos
.
to
(
device
));
input_dict
.
insert
(
"edge_index"
,
inp_edge_index
.
to
(
device
));
input_dict
.
insert
(
"num_atoms"
,
inp_num_atoms
.
to
(
device
));
input_dict
.
insert
(
"cell_lattice_vectors"
,
inp_cell
.
to
(
device
));
input_dict
.
insert
(
"cell_volume"
,
inp_cell_volume
.
to
(
device
));
input_dict
.
insert
(
"pbc_shift"
,
inp_cell_shift
);
std
::
vector
<
torch
::
IValue
>
input
(
1
,
input_dict
);
auto
output
=
model
.
forward
(
input
).
toGenericDict
();
torch
::
Tensor
total_energy_tensor
=
output
.
at
(
"inferred_total_energy"
).
toTensor
().
cpu
();
torch
::
Tensor
force_tensor
=
output
.
at
(
"inferred_force"
).
toTensor
().
cpu
();
auto
forces
=
force_tensor
.
accessor
<
float
,
2
>
();
eng_vdwl
+=
total_energy_tensor
.
item
<
float
>
();
for
(
int
itag
=
0
;
itag
<
nlocal
;
itag
++
)
{
int
i
=
tag2i
[
itag
];
f
[
i
][
0
]
+=
forces
[
itag
][
0
];
f
[
i
][
1
]
+=
forces
[
itag
][
1
];
f
[
i
][
2
]
+=
forces
[
itag
][
2
];
}
if
(
vflag
)
{
// more accurately, it is virial part of stress
torch
::
Tensor
stress_tensor
=
output
.
at
(
"inferred_stress"
).
toTensor
().
cpu
();
auto
virial_stress_tensor
=
stress_tensor
*
inp_cell_volume
;
// xy yz zx order in vasp (voigt is xx yy zz yz xz xy)
auto
virial_stress
=
virial_stress_tensor
.
accessor
<
float
,
1
>
();
virial
[
0
]
+=
virial_stress
[
0
];
virial
[
1
]
+=
virial_stress
[
1
];
virial
[
2
]
+=
virial_stress
[
2
];
virial
[
3
]
+=
virial_stress
[
3
];
virial
[
4
]
+=
virial_stress
[
5
];
virial
[
5
]
+=
virial_stress
[
4
];
}
if
(
eflag_atom
)
{
torch
::
Tensor
atomic_energy_tensor
=
output
.
at
(
"atomic_energy"
).
toTensor
().
cpu
().
squeeze
();
auto
atomic_energy
=
atomic_energy_tensor
.
accessor
<
float
,
1
>
();
for
(
int
itag
=
0
;
itag
<
nlocal
;
itag
++
)
{
int
i
=
tag2i
[
itag
];
eatom
[
i
]
+=
atomic_energy
[
itag
];
}
}
// if it was the first MD step
if
(
this
->
nedges_bound
==
-
1
)
{
this
->
nedges_bound
=
nedges
*
1.2
;
}
// else if the nedges is too small, increase the bound
else
if
(
nedges
>
this
->
nedges_bound
/
1.2
)
{
this
->
nedges_bound
=
nedges
*
1.2
;
}
}
// allocate arrays (called from coeff)
void
PairE3GNN
::
allocate
()
{
allocated
=
1
;
int
n
=
atom
->
ntypes
;
memory
->
create
(
setflag
,
n
+
1
,
n
+
1
,
"pair:setflag"
);
memory
->
create
(
cutsq
,
n
+
1
,
n
+
1
,
"pair:cutsq"
);
memory
->
create
(
map
,
n
+
1
,
"pair:map"
);
}
// global settings for pair_style
void
PairE3GNN
::
settings
(
int
narg
,
char
**
arg
)
{
if
(
narg
!=
0
)
{
error
->
all
(
FLERR
,
"Illegal pair_style command"
);
}
}
void
PairE3GNN
::
coeff
(
int
narg
,
char
**
arg
)
{
if
(
allocated
)
{
error
->
all
(
FLERR
,
"pair_e3gnn coeff called twice"
);
}
allocate
();
if
(
strcmp
(
arg
[
0
],
"*"
)
!=
0
||
strcmp
(
arg
[
1
],
"*"
)
!=
0
)
{
error
->
all
(
FLERR
,
"e3gnn: first and second input of pair_coeff should be '*'"
);
}
// expected input : pair_coeff * * pot.pth type_name1 type_name2 ...
std
::
unordered_map
<
std
::
string
,
std
::
string
>
meta_dict
=
{
{
"chemical_symbols_to_index"
,
""
},
{
"cutoff"
,
""
},
{
"num_species"
,
""
},
{
"model_type"
,
""
},
{
"version"
,
""
},
{
"dtype"
,
""
},
{
"time"
,
""
}};
// model loading from input
try
{
model
=
torch
::
jit
::
load
(
std
::
string
(
arg
[
2
]),
device
,
meta_dict
);
}
catch
(
const
c10
::
Error
&
e
)
{
error
->
all
(
FLERR
,
"error loading the model, check the path of the model"
);
}
// model = torch::jit::freeze(model); model is already freezed
torch
::
jit
::
setGraphExecutorOptimize
(
false
);
torch
::
jit
::
FusionStrategy
strategy
;
// thing about dynamic recompile as tensor shape varies, this is default
// strategy = {{torch::jit::FusionBehavior::DYNAMIC, 3}};
strategy
=
{{
torch
::
jit
::
FusionBehavior
::
STATIC
,
0
}};
torch
::
jit
::
setFusionStrategy
(
strategy
);
cutoff
=
std
::
stod
(
meta_dict
[
"cutoff"
]);
cutoff_square
=
cutoff
*
cutoff
;
if
(
meta_dict
[
"model_type"
].
compare
(
"E3_equivariant_model"
)
!=
0
)
{
error
->
all
(
FLERR
,
"given model type is not E3_equivariant_model"
);
}
std
::
string
chem_str
=
meta_dict
[
"chemical_symbols_to_index"
];
int
ntypes
=
atom
->
ntypes
;
auto
delim
=
" "
;
char
*
tok
=
std
::
strtok
(
const_cast
<
char
*>
(
chem_str
.
c_str
()),
delim
);
std
::
vector
<
std
::
string
>
chem_vec
;
while
(
tok
!=
nullptr
)
{
chem_vec
.
push_back
(
std
::
string
(
tok
));
tok
=
std
::
strtok
(
nullptr
,
delim
);
}
bool
found_flag
=
false
;
for
(
int
i
=
3
;
i
<
narg
;
i
++
)
{
found_flag
=
false
;
for
(
int
j
=
0
;
j
<
chem_vec
.
size
();
j
++
)
{
if
(
chem_vec
[
j
].
compare
(
arg
[
i
])
==
0
)
{
map
[
i
-
2
]
=
j
;
found_flag
=
true
;
fprintf
(
lmp
->
logfile
,
"Chemical specie '%s' is assigned to type %d
\n
"
,
arg
[
i
],
i
-
2
);
break
;
}
}
if
(
!
found_flag
)
{
error
->
all
(
FLERR
,
"Unknown chemical specie is given"
);
}
}
if
(
ntypes
>
narg
-
3
)
{
error
->
all
(
FLERR
,
"Not enough chemical specie is given. Check pair_coeff "
"and types in your data/script"
);
}
for
(
int
i
=
1
;
i
<=
ntypes
;
i
++
)
{
for
(
int
j
=
1
;
j
<=
ntypes
;
j
++
)
{
if
((
map
[
i
]
>=
0
)
&&
(
map
[
j
]
>=
0
))
{
setflag
[
i
][
j
]
=
1
;
cutsq
[
i
][
j
]
=
cutoff
*
cutoff
;
}
}
}
if
(
lmp
->
logfile
)
{
fprintf
(
lmp
->
logfile
,
"from sevenn version '%s' "
,
meta_dict
[
"version"
].
c_str
());
fprintf
(
lmp
->
logfile
,
"%s precision model, deployed when: %s
\n
"
,
meta_dict
[
"dtype"
].
c_str
(),
meta_dict
[
"time"
].
c_str
());
}
}
// init specific to this pair
void
PairE3GNN
::
init_style
()
{
// Newton flag is irrelevant if use only one processor for simulation
/*
if (force->newton_pair == 0) {
error->all(FLERR, "Pair style nn requires newton pair on");
}
*/
// full neighbor list (this is many-body potential)
neighbor
->
add_request
(
this
,
NeighConst
::
REQ_FULL
);
}
double
PairE3GNN
::
init_one
(
int
i
,
int
j
)
{
return
cutoff
;
}
mace-bench/3rdparty/SevenNet/sevenn/pair_e3gnn/pair_e3gnn.h
0 → 100644
View file @
ca86f720
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle
(
e3gnn
,
PairE3GNN
)
#else
#ifndef LMP_PAIR_E3GNN
#define LMP_PAIR_E3GNN
#include "pair.h"
#include <torch/torch.h>
namespace
LAMMPS_NS
{
class
PairE3GNN
:
public
Pair
{
private:
double
cutoff
;
double
cutoff_square
;
torch
::
jit
::
Module
model
;
torch
::
Device
device
=
torch
::
kCPU
;
int
nelements
;
bool
print_info
=
false
;
int
nedges_bound
=
-
1
;
public:
PairE3GNN
(
class
LAMMPS
*
);
~
PairE3GNN
();
void
compute
(
int
,
int
);
void
settings
(
int
,
char
**
);
// read Atom type string from input script & related coeff
void
coeff
(
int
,
char
**
);
void
allocate
();
void
init_style
();
double
init_one
(
int
,
int
);
};
}
// namespace LAMMPS_NS
#endif
#endif
mace-bench/3rdparty/SevenNet/sevenn/pair_e3gnn/pair_e3gnn_parallel.cpp
0 → 100644
View file @
ca86f720
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://lammps.sandia.gov/, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Yutack Park (SNU)
------------------------------------------------------------------------- */
#include <ATen/core/Dict.h>
#include <ATen/core/ivalue_inl.h>
#include <ATen/ops/from_blob.h>
#include <c10/core/Scalar.h>
#include <c10/core/TensorOptions.h>
#include <cstdlib>
#include <filesystem>
#include <numeric>
#include <string>
#include <torch/csrc/jit/api/module.h>
#include <torch/script.h>
#include <torch/torch.h>
#include <cuda_runtime.h>
#include "atom.h"
#include "comm.h"
#include "comm_brick.h"
#include "error.h"
#include "force.h"
#include "memory.h"
#include "neigh_list.h"
#include "neighbor.h"
// #include "nvToolsExt.h"
#include "pair_e3gnn_parallel.h"
#include <cassert>
#ifdef OMPI_MPI_H
#include "mpi-ext.h" //This should be included after mpi.h which is included in pair.h
#endif
using
namespace
LAMMPS_NS
;
#define INTEGER_TYPE torch::TensorOptions().dtype(torch::kInt64)
#define FLOAT_TYPE torch::TensorOptions().dtype(torch::kFloat)
DeviceBuffManager
&
DeviceBuffManager
::
getInstance
()
{
static
DeviceBuffManager
instance
;
return
instance
;
}
void
DeviceBuffManager
::
get_buffer
(
int
send_size
,
int
recv_size
,
float
*&
buf_send_ptr
,
float
*&
buf_recv_ptr
)
{
if
(
send_size
>
send_buf_size
)
{
cudaFree
(
buf_send_device
);
cudaError_t
cuda_err
=
cudaMalloc
(
&
buf_send_device
,
send_size
*
sizeof
(
float
));
send_buf_size
=
send_size
;
}
if
(
recv_size
>
recv_buf_size
)
{
cudaFree
(
buf_recv_device
);
cudaError_t
cuda_err
=
cudaMalloc
(
&
buf_recv_device
,
recv_size
*
sizeof
(
float
));
recv_buf_size
=
recv_size
;
}
buf_send_ptr
=
buf_send_device
;
buf_recv_ptr
=
buf_recv_device
;
}
DeviceBuffManager
::~
DeviceBuffManager
()
{
cudaFree
(
buf_send_device
);
cudaFree
(
buf_recv_device
);
}
PairE3GNNParallel
::
PairE3GNNParallel
(
LAMMPS
*
lmp
)
:
Pair
(
lmp
)
{
// constructor
const
char
*
print_flag
=
std
::
getenv
(
"SEVENN_PRINT_INFO"
);
const
char
*
print_both_flag
=
std
::
getenv
(
"SEVENN_PRINT_BOTH_INFO"
);
if
(
print_flag
)
{
world_rank
=
comm
->
me
;
std
::
cout
<<
"process rank: "
<<
world_rank
<<
" initialized"
<<
std
::
endl
;
print_info
=
(
world_rank
==
0
)
||
print_both_flag
;
}
std
::
string
device_name
;
const
bool
use_gpu
=
torch
::
cuda
::
is_available
();
comm_forward
=
0
;
comm_reverse
=
0
;
// OpenMPI detection
#ifdef OMPI_MPI_H
#if defined(MPIX_CUDA_AWARE_SUPPORT)
if
(
1
==
MPIX_Query_cuda_support
())
{
use_cuda_mpi
=
true
;
}
else
{
use_cuda_mpi
=
false
;
}
#else
use_cuda_mpi
=
false
;
#endif
#else
use_cuda_mpi
=
false
;
#endif
// use_cuda_mpi = use_gpu && use_cuda_mpi;
// if (use_cuda_mpi) {
if
(
use_gpu
)
{
device
=
get_cuda_device
();
device_name
=
"CUDA"
;
}
else
{
device
=
torch
::
kCPU
;
device_name
=
"CPU"
;
}
if
(
std
::
getenv
(
"OFF_E3GNN_PARALLEL_CUDA_MPI"
))
{
use_cuda_mpi
=
false
;
}
if
(
lmp
->
screen
)
{
if
(
use_gpu
&&
!
use_cuda_mpi
)
{
device_comm
=
torch
::
kCPU
;
fprintf
(
lmp
->
screen
,
"cuda-aware mpi not found, communicate via host device
\n
"
);
}
else
{
device_comm
=
device
;
}
fprintf
(
lmp
->
screen
,
"PairE3GNNParallel using device : %s
\n
"
,
device_name
.
c_str
());
fprintf
(
lmp
->
screen
,
"PairE3GNNParallel cuda-aware mpi: %s
\n
"
,
use_cuda_mpi
?
"True"
:
"False"
);
}
if
(
lmp
->
logfile
)
{
if
(
use_gpu
&&
!
use_cuda_mpi
)
{
device_comm
=
torch
::
kCPU
;
fprintf
(
lmp
->
logfile
,
"cuda-aware mpi not found, communicate via host device
\n
"
);
}
else
{
device_comm
=
device
;
}
fprintf
(
lmp
->
logfile
,
"PairE3GNNParallel using device : %s
\n
"
,
device_name
.
c_str
());
fprintf
(
lmp
->
logfile
,
"PairE3GNNParallel cuda-aware mpi: %s
\n
"
,
use_cuda_mpi
?
"True"
:
"False"
);
}
}
torch
::
Device
PairE3GNNParallel
::
get_cuda_device
()
{
char
*
cuda_visible
=
std
::
getenv
(
"CUDA_VISIBLE_DEVICES"
);
int
num_gpus
;
int
idx
;
int
rank
=
comm
->
me
;
num_gpus
=
torch
::
cuda
::
device_count
();
idx
=
rank
%
num_gpus
;
if
(
print_info
)
std
::
cout
<<
world_rank
<<
" Available # of GPUs found: "
<<
num_gpus
<<
std
::
endl
;
cudaError_t
cuda_err
=
cudaSetDevice
(
idx
);
if
(
cuda_err
!=
cudaSuccess
)
{
std
::
cerr
<<
"E3GNN: Failed to set CUDA device: "
<<
cudaGetErrorString
(
cuda_err
)
<<
std
::
endl
;
}
return
torch
::
Device
(
torch
::
kCUDA
,
idx
);
}
PairE3GNNParallel
::~
PairE3GNNParallel
()
{
if
(
allocated
)
{
memory
->
destroy
(
setflag
);
memory
->
destroy
(
cutsq
);
memory
->
destroy
(
map
);
}
}
int
PairE3GNNParallel
::
get_x_dim
()
{
return
x_dim
;
}
bool
PairE3GNNParallel
::
use_cuda_mpi_
()
{
return
use_cuda_mpi
;
}
bool
PairE3GNNParallel
::
is_comm_preprocess_done
()
{
return
comm_preprocess_done
;
}
void
PairE3GNNParallel
::
compute
(
int
eflag
,
int
vflag
)
{
/*
Graph build on cpu
*/
if
(
eflag
||
vflag
)
ev_setup
(
eflag
,
vflag
);
else
evflag
=
vflag_fdotr
=
0
;
if
(
vflag_atom
)
{
error
->
all
(
FLERR
,
"atomic stress is not supported
\n
"
);
}
if
(
atom
->
tag_consecutive
()
==
0
)
{
error
->
all
(
FLERR
,
"Pair e3gnn requires consecutive atom IDs"
);
}
double
**
x
=
atom
->
x
;
double
**
f
=
atom
->
f
;
int
*
type
=
atom
->
type
;
int
nlocal
=
list
->
inum
;
// same as nlocal
int
nghost
=
atom
->
nghost
;
int
ntotal
=
nlocal
+
nghost
;
int
*
ilist
=
list
->
ilist
;
int
inum
=
list
->
inum
;
CommBrick
*
comm_brick
=
dynamic_cast
<
CommBrick
*>
(
comm
);
if
(
comm_brick
==
nullptr
)
{
error
->
all
(
FLERR
,
"e3gnn/parallel: comm style should be brick & from "
"modified code of comm_brick"
);
}
bigint
natoms
=
atom
->
natoms
;
// tag ignore PBC
tagint
*
tag
=
atom
->
tag
;
// store graph_idx from local to known ghost atoms(ghost atoms inside cutoff)
int
tag_to_graph_idx
[
natoms
+
1
];
// tag starts from 1 not 0
std
::
fill_n
(
tag_to_graph_idx
,
natoms
+
1
,
-
1
);
// to access tag_to_graph_idx from comm
tag_to_graph_idx_ptr
=
tag_to_graph_idx
;
int
graph_indexer
=
nlocal
;
int
graph_index_to_i
[
ntotal
];
int
*
numneigh
=
list
->
numneigh
;
// j loop cond
int
**
firstneigh
=
list
->
firstneigh
;
// j list
const
int
nedges_upper_bound
=
std
::
accumulate
(
numneigh
,
numneigh
+
nlocal
,
0
);
std
::
vector
<
long
>
node_type
;
std
::
vector
<
long
>
node_type_ghost
;
float
edge_vec
[
nedges_upper_bound
][
3
];
long
edge_idx_src
[
nedges_upper_bound
];
long
edge_idx_dst
[
nedges_upper_bound
];
int
nedges
=
0
;
for
(
int
ii
=
0
;
ii
<
inum
;
ii
++
)
{
// populate tag_to_graph_idx of local atoms
const
int
i
=
ilist
[
ii
];
const
int
itag
=
tag
[
i
];
const
int
itype
=
type
[
i
];
tag_to_graph_idx
[
itag
]
=
ii
;
graph_index_to_i
[
ii
]
=
i
;
node_type
.
push_back
(
map
[
itype
]);
}
// loop over neighbors, build graph
for
(
int
ii
=
0
;
ii
<
inum
;
ii
++
)
{
const
int
i
=
ilist
[
ii
];
const
int
i_graph_idx
=
ii
;
const
int
*
jlist
=
firstneigh
[
i
];
const
int
jnum
=
numneigh
[
i
];
for
(
int
jj
=
0
;
jj
<
jnum
;
jj
++
)
{
int
j
=
jlist
[
jj
];
const
int
jtag
=
tag
[
j
];
j
&=
NEIGHMASK
;
const
int
jtype
=
type
[
j
];
// we have to calculate Rij to check cutoff in lammps side
const
double
delij
[
3
]
=
{
x
[
j
][
0
]
-
x
[
i
][
0
],
x
[
j
][
1
]
-
x
[
i
][
1
],
x
[
j
][
2
]
-
x
[
i
][
2
]};
const
double
Rij
=
delij
[
0
]
*
delij
[
0
]
+
delij
[
1
]
*
delij
[
1
]
+
delij
[
2
]
*
delij
[
2
];
int
j_graph_idx
;
if
(
Rij
<
cutoff_square
)
{
// if given j is not local atom and inside cutoff
if
(
tag_to_graph_idx
[
jtag
]
==
-
1
)
{
// if j is ghost atom inside cutoff but first seen
tag_to_graph_idx
[
jtag
]
=
graph_indexer
;
graph_index_to_i
[
graph_indexer
]
=
j
;
node_type_ghost
.
push_back
(
map
[
jtype
]);
graph_indexer
++
;
}
j_graph_idx
=
tag_to_graph_idx
[
jtag
];
edge_idx_src
[
nedges
]
=
i_graph_idx
;
edge_idx_dst
[
nedges
]
=
j_graph_idx
;
edge_vec
[
nedges
][
0
]
=
delij
[
0
];
edge_vec
[
nedges
][
1
]
=
delij
[
1
];
edge_vec
[
nedges
][
2
]
=
delij
[
2
];
nedges
++
;
}
}
// j loop end
}
// i loop end
// member variable
graph_size
=
graph_indexer
;
const
int
ghost_node_num
=
graph_size
-
nlocal
;
// convert data to Tensor
auto
inp_node_type
=
torch
::
from_blob
(
node_type
.
data
(),
nlocal
,
INTEGER_TYPE
);
auto
inp_node_type_ghost
=
torch
::
from_blob
(
node_type_ghost
.
data
(),
ghost_node_num
,
INTEGER_TYPE
);
long
num_nodes
[
1
]
=
{
long
(
nlocal
)};
auto
inp_num_atoms
=
torch
::
from_blob
(
num_nodes
,
{
1
},
INTEGER_TYPE
);
auto
edge_idx_src_tensor
=
torch
::
from_blob
(
edge_idx_src
,
{
nedges
},
INTEGER_TYPE
);
auto
edge_idx_dst_tensor
=
torch
::
from_blob
(
edge_idx_dst
,
{
nedges
},
INTEGER_TYPE
);
auto
inp_edge_index
=
torch
::
stack
({
edge_idx_src_tensor
,
edge_idx_dst_tensor
});
auto
inp_edge_vec
=
torch
::
from_blob
(
edge_vec
,
{
nedges
,
3
},
FLOAT_TYPE
);
if
(
print_info
)
{
std
::
cout
<<
world_rank
<<
" Nlocal: "
<<
nlocal
<<
std
::
endl
;
std
::
cout
<<
world_rank
<<
" Graph_size: "
<<
graph_size
<<
std
::
endl
;
std
::
cout
<<
world_rank
<<
" Ghost_node_num: "
<<
ghost_node_num
<<
std
::
endl
;
std
::
cout
<<
world_rank
<<
" Nedges: "
<<
nedges
<<
"
\n
"
<<
std
::
endl
;
}
// r_original requires grad True
inp_edge_vec
.
set_requires_grad
(
true
);
torch
::
Dict
<
std
::
string
,
torch
::
Tensor
>
input_dict
;
input_dict
.
insert
(
"x"
,
inp_node_type
.
to
(
device
));
input_dict
.
insert
(
"x_ghost"
,
inp_node_type_ghost
.
to
(
device
));
input_dict
.
insert
(
"edge_index"
,
inp_edge_index
.
to
(
device
));
input_dict
.
insert
(
"edge_vec"
,
inp_edge_vec
.
to
(
device
));
input_dict
.
insert
(
"num_atoms"
,
inp_num_atoms
.
to
(
device
));
input_dict
.
insert
(
"nlocal"
,
inp_num_atoms
.
to
(
torch
::
kCPU
));
std
::
list
<
std
::
vector
<
torch
::
Tensor
>>
wrt_tensors
;
wrt_tensors
.
push_back
({
input_dict
.
at
(
"edge_vec"
)});
auto
model_part
=
model_list
.
front
();
auto
output
=
model_part
.
forward
({
input_dict
}).
toGenericDict
();
comm_preprocess
();
// extra_graph_idx_map is set from comm_preprocess();
// last one is for trash values. See pack_forward_init
const
int
extra_size
=
ghost_node_num
+
static_cast
<
int
>
(
extra_graph_idx_map
.
size
())
+
1
;
torch
::
Tensor
x_local
;
torch
::
Tensor
x_ghost
;
for
(
auto
it
=
model_list
.
begin
();
it
!=
model_list
.
end
();
++
it
)
{
if
(
it
==
model_list
.
begin
())
continue
;
model_part
=
*
it
;
x_local
=
output
.
at
(
"x"
).
toTensor
().
detach
().
to
(
device
);
x_dim
=
x_local
.
size
(
1
);
// length of per atom vector(node feature)
auto
ghost_and_extra_x
=
torch
::
zeros
({
ghost_node_num
+
extra_size
,
x_dim
},
FLOAT_TYPE
.
device
(
device
));
x_comm
=
torch
::
cat
({
x_local
,
ghost_and_extra_x
},
0
).
to
(
device_comm
);
comm_brick
->
forward_comm
(
this
);
// populate x_ghost by communication
// What we got from forward_comm (node feature of ghosts)
x_ghost
=
torch
::
split_with_sizes
(
x_comm
,
{
nlocal
,
ghost_node_num
,
extra_size
},
0
)[
1
];
x_ghost
.
set_requires_grad
(
true
);
// prepare next input (output > next input)
output
.
insert_or_assign
(
"x_ghost"
,
x_ghost
.
to
(
device
));
// make another edge_vec to discriminate grad calculation with other
// edge_vecs(maybe redundant?)
output
.
insert_or_assign
(
"edge_vec"
,
output
.
at
(
"edge_vec"
).
toTensor
().
clone
());
// save tensors for backprop
wrt_tensors
.
push_back
({
output
.
at
(
"edge_vec"
).
toTensor
(),
output
.
at
(
"x"
).
toTensor
(),
output
.
at
(
"self_cont_tmp"
).
toTensor
(),
output
.
at
(
"x_ghost"
).
toTensor
()});
output
=
model_part
.
forward
({
output
}).
toGenericDict
();
}
torch
::
Tensor
energy_tensor
=
output
.
at
(
"inferred_total_energy"
).
toTensor
().
squeeze
();
torch
::
Tensor
dE_dr
=
torch
::
zeros
({
nedges
,
3
},
FLOAT_TYPE
.
device
(
device
));
// create on device
torch
::
Tensor
x_local_save
;
// holds grad info of x_local (it loses its grad
// when sends to CPU)
torch
::
Tensor
self_conn_grads
;
std
::
vector
<
torch
::
Tensor
>
grads
;
std
::
vector
<
torch
::
Tensor
>
of_tensor
;
// TODO: most values of self_conn_grads were zero because we use only scalars
// for energy
for
(
auto
rit
=
wrt_tensors
.
rbegin
();
rit
!=
wrt_tensors
.
rend
();
++
rit
)
{
// edge_vec, x, x_ghost order
auto
wrt_tensor
=
*
rit
;
if
(
rit
==
wrt_tensors
.
rbegin
())
{
grads
=
torch
::
autograd
::
grad
({
energy_tensor
},
wrt_tensor
);
}
else
{
x_local_save
.
copy_
(
x_local
);
// of wrt grads_output
grads
=
torch
::
autograd
::
grad
(
of_tensor
,
wrt_tensor
,
{
x_local_save
,
self_conn_grads
});
}
dE_dr
=
dE_dr
+
grads
.
at
(
0
);
// accumulate force
if
(
std
::
distance
(
rit
,
wrt_tensors
.
rend
())
==
1
)
continue
;
// if last iteration
of_tensor
.
clear
();
of_tensor
.
push_back
(
wrt_tensor
[
1
]);
// x
of_tensor
.
push_back
(
wrt_tensor
[
2
]);
// self_cont_tmp
x_local_save
=
grads
.
at
(
1
);
// for grads_output
x_local
=
x_local_save
.
detach
();
// grad_outputs & communication
x_dim
=
x_local
.
size
(
1
);
self_conn_grads
=
grads
.
at
(
2
);
// no communication, for grads_output
x_ghost
=
grads
.
at
(
3
).
detach
();
// yes communication, not for grads_output
auto
extra_x
=
torch
::
zeros
({
extra_size
,
x_dim
},
FLOAT_TYPE
.
device
(
device
));
x_comm
=
torch
::
cat
({
x_local
,
x_ghost
,
extra_x
},
0
).
to
(
device_comm
);
comm_brick
->
reverse_comm
(
this
);
// completes x_local
// now x_local is complete (dE_dx), become next grads_output(with
// self_conn_grads)
x_local
=
torch
::
split_with_sizes
(
x_comm
,
{
nlocal
,
ghost_node_num
,
extra_size
},
0
)[
0
];
}
// postprocessing
if
(
print_info
)
{
size_t
free
,
tot
;
cudaMemGetInfo
(
&
free
,
&
tot
);
std
::
cout
<<
world_rank
<<
" MEM use after backward(MB)"
<<
std
::
endl
;
double
Mfree
=
static_cast
<
double
>
(
free
)
/
(
1024
*
1024
);
double
Mtot
=
static_cast
<
double
>
(
tot
)
/
(
1024
*
1024
);
std
::
cout
<<
world_rank
<<
" Total: "
<<
Mtot
<<
std
::
endl
;
std
::
cout
<<
world_rank
<<
" Free: "
<<
Mfree
<<
std
::
endl
;
std
::
cout
<<
world_rank
<<
" Used: "
<<
Mtot
-
Mfree
<<
std
::
endl
;
double
Mused
=
Mtot
-
Mfree
;
std
::
cout
<<
world_rank
<<
" Used/Nedges: "
<<
Mused
/
nedges
<<
std
::
endl
;
std
::
cout
<<
world_rank
<<
" Used/Nlocal: "
<<
Mused
/
nlocal
<<
std
::
endl
;
std
::
cout
<<
world_rank
<<
" Used/GraphSize: "
<<
Mused
/
graph_size
<<
"
\n
"
<<
std
::
endl
;
}
eng_vdwl
+=
energy_tensor
.
item
<
float
>
();
// accumulate energy
dE_dr
=
dE_dr
.
to
(
torch
::
kCPU
);
torch
::
Tensor
force_tensor
=
torch
::
zeros
({
graph_indexer
,
3
});
auto
_edge_idx_src_tensor
=
edge_idx_src_tensor
.
repeat_interleave
(
3
).
view
({
nedges
,
3
});
auto
_edge_idx_dst_tensor
=
edge_idx_dst_tensor
.
repeat_interleave
(
3
).
view
({
nedges
,
3
});
force_tensor
.
scatter_reduce_
(
0
,
_edge_idx_src_tensor
,
dE_dr
,
"sum"
);
force_tensor
.
scatter_reduce_
(
0
,
_edge_idx_dst_tensor
,
torch
::
neg
(
dE_dr
),
"sum"
);
auto
forces
=
force_tensor
.
accessor
<
float
,
2
>
();
for
(
int
graph_idx
=
0
;
graph_idx
<
graph_indexer
;
graph_idx
++
)
{
int
i
=
graph_index_to_i
[
graph_idx
];
f
[
i
][
0
]
+=
forces
[
graph_idx
][
0
];
f
[
i
][
1
]
+=
forces
[
graph_idx
][
1
];
f
[
i
][
2
]
+=
forces
[
graph_idx
][
2
];
}
if
(
vflag
)
{
auto
diag
=
inp_edge_vec
*
dE_dr
;
auto
s12
=
inp_edge_vec
.
select
(
1
,
0
)
*
dE_dr
.
select
(
1
,
1
);
auto
s23
=
inp_edge_vec
.
select
(
1
,
1
)
*
dE_dr
.
select
(
1
,
2
);
auto
s31
=
inp_edge_vec
.
select
(
1
,
2
)
*
dE_dr
.
select
(
1
,
0
);
std
::
vector
<
torch
::
Tensor
>
voigt_list
=
{
diag
,
s12
.
unsqueeze
(
-
1
),
s23
.
unsqueeze
(
-
1
),
s31
.
unsqueeze
(
-
1
)};
auto
voigt
=
torch
::
cat
(
voigt_list
,
1
);
torch
::
Tensor
per_atom_stress_tensor
=
torch
::
zeros
({
graph_indexer
,
6
});
auto
_edge_idx_dst6_tensor
=
edge_idx_dst_tensor
.
repeat_interleave
(
6
).
view
({
nedges
,
6
});
per_atom_stress_tensor
.
scatter_reduce_
(
0
,
_edge_idx_dst6_tensor
,
voigt
,
"sum"
);
auto
virial_stress_tensor
=
torch
::
neg
(
torch
::
sum
(
per_atom_stress_tensor
,
0
));
auto
virial_stress
=
virial_stress_tensor
.
accessor
<
float
,
1
>
();
virial
[
0
]
+=
virial_stress
[
0
];
virial
[
1
]
+=
virial_stress
[
1
];
virial
[
2
]
+=
virial_stress
[
2
];
virial
[
3
]
+=
virial_stress
[
3
];
virial
[
4
]
+=
virial_stress
[
5
];
virial
[
5
]
+=
virial_stress
[
4
];
}
if
(
eflag_atom
)
{
torch
::
Tensor
atomic_energy_tensor
=
output
.
at
(
"atomic_energy"
).
toTensor
().
cpu
().
squeeze
();
auto
atomic_energy
=
atomic_energy_tensor
.
accessor
<
float
,
1
>
();
for
(
int
graph_idx
=
0
;
graph_idx
<
nlocal
;
graph_idx
++
)
{
int
i
=
graph_index_to_i
[
graph_idx
];
eatom
[
i
]
+=
atomic_energy
[
graph_idx
];
}
}
// clean up comm preprocess variables
comm_preprocess_done
=
false
;
for
(
int
i
=
0
;
i
<
6
;
i
++
)
{
// array of vector<long>
comm_index_pack_forward
[
i
].
clear
();
comm_index_unpack_forward
[
i
].
clear
();
comm_index_unpack_reverse
[
i
].
clear
();
}
extra_graph_idx_map
.
clear
();
}
// allocate arrays (called from coeff)
void
PairE3GNNParallel
::
allocate
()
{
allocated
=
1
;
int
n
=
atom
->
ntypes
;
memory
->
create
(
setflag
,
n
+
1
,
n
+
1
,
"pair:setflag"
);
memory
->
create
(
cutsq
,
n
+
1
,
n
+
1
,
"pair:cutsq"
);
memory
->
create
(
map
,
n
+
1
,
"pair:map"
);
}
// global settings for pair_style
void
PairE3GNNParallel
::
settings
(
int
narg
,
char
**
arg
)
{
if
(
narg
!=
0
)
{
error
->
all
(
FLERR
,
"Illegal pair_style command"
);
}
}
void
PairE3GNNParallel
::
coeff
(
int
narg
,
char
**
arg
)
{
if
(
allocated
)
{
error
->
all
(
FLERR
,
"pair_e3gnn coeff called twice"
);
}
allocate
();
if
(
strcmp
(
arg
[
0
],
"*"
)
!=
0
||
strcmp
(
arg
[
1
],
"*"
)
!=
0
)
{
error
->
all
(
FLERR
,
"e3gnn: first and second input of pair_coeff should be '*'"
);
}
// expected input : pair_coeff * * pot.pth type_name1 type_name2 ...
std
::
unordered_map
<
std
::
string
,
std
::
string
>
meta_dict
=
{
{
"chemical_symbols_to_index"
,
""
},
{
"cutoff"
,
""
},
{
"num_species"
,
""
},
{
"model_type"
,
""
},
{
"version"
,
""
},
{
"dtype"
,
""
},
{
"time"
,
""
},
{
"comm_size"
,
""
}};
// model loading from input
int
n_model
=
std
::
stoi
(
arg
[
2
]);
int
chem_arg_i
=
4
;
std
::
vector
<
std
::
string
>
model_fnames
;
if
(
std
::
filesystem
::
exists
(
arg
[
3
]))
{
if
(
std
::
filesystem
::
is_directory
(
arg
[
3
]))
{
auto
headf
=
std
::
string
(
arg
[
3
]);
for
(
int
i
=
0
;
i
<
n_model
;
i
++
)
{
auto
stri
=
std
::
to_string
(
i
);
model_fnames
.
push_back
(
headf
+
"/deployed_parallel_"
+
stri
+
".pt"
);
}
}
else
if
(
std
::
filesystem
::
is_regular_file
(
arg
[
3
]))
{
for
(
int
i
=
3
;
i
<
n_model
+
3
;
i
++
)
{
model_fnames
.
push_back
(
std
::
string
(
arg
[
i
]));
}
chem_arg_i
=
n_model
+
3
;
}
else
{
error
->
all
(
FLERR
,
"No such file or directory:"
+
std
::
string
(
arg
[
3
]));
}
}
for
(
const
auto
&
modelf
:
model_fnames
)
{
if
(
!
std
::
filesystem
::
is_regular_file
(
modelf
))
{
error
->
all
(
FLERR
,
"Expected this is a regular file:"
+
modelf
);
}
model_list
.
push_back
(
torch
::
jit
::
load
(
modelf
,
device
,
meta_dict
));
}
torch
::
jit
::
setGraphExecutorOptimize
(
false
);
torch
::
jit
::
FusionStrategy
strategy
;
// strategy = {{torch::jit::FusionBehavior::DYNAMIC, 3}};
strategy
=
{{
torch
::
jit
::
FusionBehavior
::
STATIC
,
0
}};
torch
::
jit
::
setFusionStrategy
(
strategy
);
cutoff
=
std
::
stod
(
meta_dict
[
"cutoff"
]);
// maximum possible size of per atom x before last convolution
int
comm_size
=
std
::
stod
(
meta_dict
[
"comm_size"
]);
// to initialize buffer size for communication
comm_forward
=
comm_size
;
comm_reverse
=
comm_size
;
cutoff_square
=
cutoff
*
cutoff
;
if
(
meta_dict
[
"model_type"
].
compare
(
"E3_equivariant_model"
)
!=
0
)
{
error
->
all
(
FLERR
,
"given model type is not E3_equivariant_model"
);
}
std
::
string
chem_str
=
meta_dict
[
"chemical_symbols_to_index"
];
int
ntypes
=
atom
->
ntypes
;
auto
delim
=
" "
;
char
*
tok
=
std
::
strtok
(
const_cast
<
char
*>
(
chem_str
.
c_str
()),
delim
);
std
::
vector
<
std
::
string
>
chem_vec
;
while
(
tok
!=
nullptr
)
{
chem_vec
.
push_back
(
std
::
string
(
tok
));
tok
=
std
::
strtok
(
nullptr
,
delim
);
}
// what if unknown chemical specie is in arg? should I abort? is there any use
// case for that?
bool
found_flag
=
false
;
int
n_chem
=
narg
-
chem_arg_i
;
for
(
int
i
=
0
;
i
<
n_chem
;
i
++
)
{
found_flag
=
false
;
for
(
int
j
=
0
;
j
<
chem_vec
.
size
();
j
++
)
{
if
(
chem_vec
[
j
].
compare
(
arg
[
i
+
chem_arg_i
])
==
0
)
{
map
[
i
+
1
]
=
j
;
// store from 1, (not 0)
found_flag
=
true
;
if
(
lmp
->
logfile
)
{
fprintf
(
lmp
->
logfile
,
"Chemical specie '%s' is assigned to type %d
\n
"
,
arg
[
i
+
chem_arg_i
],
i
+
1
);
break
;
}
}
}
if
(
!
found_flag
)
{
error
->
all
(
FLERR
,
"Unknown chemical specie is given or the number of "
"potential files is not consistent"
);
}
}
for
(
int
i
=
1
;
i
<=
ntypes
;
i
++
)
{
for
(
int
j
=
1
;
j
<=
ntypes
;
j
++
)
{
if
((
map
[
i
]
>=
0
)
&&
(
map
[
j
]
>=
0
))
{
setflag
[
i
][
j
]
=
1
;
cutsq
[
i
][
j
]
=
cutoff
*
cutoff
;
}
}
}
if
(
lmp
->
logfile
)
{
fprintf
(
lmp
->
logfile
,
"from sevenn version '%s' "
,
meta_dict
[
"version"
].
c_str
());
fprintf
(
lmp
->
logfile
,
"%s precision model, deployed when: %s
\n
"
,
meta_dict
[
"dtype"
].
c_str
(),
meta_dict
[
"time"
].
c_str
());
}
}
// init specific to this pair
void
PairE3GNNParallel
::
init_style
()
{
// full neighbor list & newton on
if
(
force
->
newton_pair
==
0
)
{
error
->
all
(
FLERR
,
"Pair style e3gnn/parallel requires newton pair on"
);
}
neighbor
->
add_request
(
this
,
NeighConst
::
REQ_FULL
);
}
double
PairE3GNNParallel
::
init_one
(
int
i
,
int
j
)
{
return
cutoff
;
}
void
PairE3GNNParallel
::
notify_proc_ids
(
const
int
*
sendproc
,
const
int
*
recvproc
)
{
for
(
int
iswap
=
0
;
iswap
<
6
;
iswap
++
)
{
this
->
sendproc
[
iswap
]
=
sendproc
[
iswap
];
this
->
recvproc
[
iswap
]
=
recvproc
[
iswap
];
}
}
void
PairE3GNNParallel
::
comm_preprocess
()
{
assert
(
!
comm_preprocess_done
);
CommBrick
*
comm_brick
=
dynamic_cast
<
CommBrick
*>
(
comm
);
// fake lammps communication call to preprocess index
// gives complete comm_index_pack, unpack_forward, and extra_graph_idx_map
comm_brick
->
forward_comm
(
this
);
std
::
map
<
int
,
std
::
set
<
int
>>
already_met_map
;
for
(
int
comm_phase
=
0
;
comm_phase
<
6
;
comm_phase
++
)
{
const
int
n
=
comm_index_pack_forward
[
comm_phase
].
size
();
int
sproc
=
this
->
sendproc
[
comm_phase
];
if
(
already_met_map
.
count
(
sproc
)
==
0
)
{
already_met_map
.
insert
({
sproc
,
std
::
set
<
int
>
()});
}
// for unpack_reverse, Ignore duplicated index by 'already_met'
std
::
vector
<
long
>
&
idx_map_forward
=
comm_index_pack_forward
[
comm_phase
];
std
::
vector
<
long
>
&
idx_map_reverse
=
comm_index_unpack_reverse
[
comm_phase
];
std
::
set
<
int
>&
already_met
=
already_met_map
[
sproc
];
// the last index of x_comm is used to trash unnecessary values
const
int
trash_index
=
graph_size
+
static_cast
<
int
>
(
extra_graph_idx_map
.
size
());
//+ 1;
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
const
int
idx
=
idx_map_forward
[
i
];
if
(
idx
<
graph_size
)
{
if
(
already_met
.
count
(
idx
)
==
1
)
{
idx_map_reverse
.
push_back
(
trash_index
);
}
else
{
idx_map_reverse
.
push_back
(
idx
);
already_met
.
insert
(
idx
);
}
}
else
{
idx_map_reverse
.
push_back
(
idx
);
}
}
if
(
use_cuda_mpi
)
{
comm_index_pack_forward_tensor
[
comm_phase
]
=
torch
::
from_blob
(
idx_map_forward
.
data
(),
idx_map_forward
.
size
(),
INTEGER_TYPE
).
to
(
device
);
auto
upmap
=
comm_index_unpack_forward
[
comm_phase
];
comm_index_unpack_forward_tensor
[
comm_phase
]
=
torch
::
from_blob
(
upmap
.
data
(),
upmap
.
size
(),
INTEGER_TYPE
).
to
(
device
);
comm_index_unpack_reverse_tensor
[
comm_phase
]
=
torch
::
from_blob
(
idx_map_reverse
.
data
(),
idx_map_reverse
.
size
(),
INTEGER_TYPE
).
to
(
device
);
}
}
comm_preprocess_done
=
true
;
}
// called from comm_brick if comm_preprocess_done is false
void
PairE3GNNParallel
::
pack_forward_init
(
int
n
,
int
*
list_send
,
int
comm_phase
)
{
std
::
vector
<
long
>
&
idx_map
=
comm_index_pack_forward
[
comm_phase
];
idx_map
.
reserve
(
n
);
int
i
,
j
;
int
nlocal
=
list
->
inum
;
tagint
*
tag
=
atom
->
tag
;
for
(
i
=
0
;
i
<
n
;
i
++
)
{
int
list_i
=
list_send
[
i
];
int
graph_idx
=
tag_to_graph_idx_ptr
[
tag
[
list_i
]];
if
(
graph_idx
!=
-
1
)
{
// known atom (local atom + ghost atom inside cutoff)
idx_map
.
push_back
(
graph_idx
);
}
else
{
// unknown atom, these are not used in computation in this process
// instead, this process is used to hand over these atoms to other proecss
// hold them in continuous manner for flexible tensor operations later
if
(
extra_graph_idx_map
.
find
(
list_i
)
!=
extra_graph_idx_map
.
end
())
{
idx_map
.
push_back
(
extra_graph_idx_map
[
list_i
]);
}
else
{
// unknown atom at pack forward, ghost atom outside cutoff?
extra_graph_idx_map
[
i
]
=
graph_size
+
extra_graph_idx_map
.
size
();
idx_map
.
push_back
(
extra_graph_idx_map
[
i
]);
// same as list_i in pack
}
}
}
}
// called from comm_brick if comm_preprocess_done is false
void
PairE3GNNParallel
::
unpack_forward_init
(
int
n
,
int
first
,
int
comm_phase
)
{
std
::
vector
<
long
>
&
idx_map
=
comm_index_unpack_forward
[
comm_phase
];
idx_map
.
reserve
(
n
);
int
i
,
j
,
last
;
last
=
first
+
n
;
int
nlocal
=
list
->
inum
;
tagint
*
tag
=
atom
->
tag
;
for
(
i
=
first
;
i
<
last
;
i
++
)
{
int
graph_idx
=
tag_to_graph_idx_ptr
[
tag
[
i
]];
if
(
graph_idx
!=
-
1
)
{
idx_map
.
push_back
(
graph_idx
);
}
else
{
extra_graph_idx_map
[
i
]
=
graph_size
+
extra_graph_idx_map
.
size
();
idx_map
.
push_back
(
extra_graph_idx_map
[
i
]);
// same as list_i in pack
}
}
}
int
PairE3GNNParallel
::
pack_forward_comm_gnn
(
float
*
buf
,
int
comm_phase
)
{
std
::
vector
<
long
>
&
idx_map
=
comm_index_pack_forward
[
comm_phase
];
const
int
n
=
static_cast
<
int
>
(
idx_map
.
size
());
if
(
use_cuda_mpi
&&
n
!=
0
)
{
torch
::
Tensor
&
idx_map_tensor
=
comm_index_pack_forward_tensor
[
comm_phase
];
auto
selected
=
x_comm
.
index_select
(
0
,
idx_map_tensor
);
// its size is x_dim * n
cudaError_t
cuda_err
=
cudaMemcpy
(
buf
,
selected
.
data_ptr
<
float
>
(),
(
x_dim
*
n
)
*
sizeof
(
float
),
cudaMemcpyDeviceToDevice
);
}
else
{
int
i
,
j
,
m
;
m
=
0
;
for
(
i
=
0
;
i
<
n
;
i
++
)
{
const
int
idx
=
static_cast
<
int
>
(
idx_map
.
at
(
i
));
float
*
from
=
x_comm
[
idx
].
data_ptr
<
float
>
();
for
(
j
=
0
;
j
<
x_dim
;
j
++
)
{
buf
[
m
++
]
=
from
[
j
];
}
}
}
if
(
print_info
)
{
std
::
cout
<<
world_rank
<<
" comm_phase: "
<<
comm_phase
<<
std
::
endl
;
std
::
cout
<<
world_rank
<<
" pack_forward x_dim: "
<<
x_dim
<<
std
::
endl
;
std
::
cout
<<
world_rank
<<
" pack_forward n: "
<<
n
<<
std
::
endl
;
std
::
cout
<<
world_rank
<<
" pack_forward x_dim*n: "
<<
x_dim
*
n
<<
std
::
endl
;
double
Msend
=
static_cast
<
double
>
(
x_dim
*
n
*
4
)
/
(
1024
*
1024
);
std
::
cout
<<
world_rank
<<
" send size(MB): "
<<
Msend
<<
"
\n
"
<<
std
::
endl
;
}
return
x_dim
*
n
;
}
void
PairE3GNNParallel
::
unpack_forward_comm_gnn
(
float
*
buf
,
int
comm_phase
)
{
std
::
vector
<
long
>
&
idx_map
=
comm_index_unpack_forward
[
comm_phase
];
const
int
n
=
static_cast
<
int
>
(
idx_map
.
size
());
if
(
use_cuda_mpi
&&
n
!=
0
)
{
torch
::
Tensor
&
idx_map_tensor
=
comm_index_unpack_forward_tensor
[
comm_phase
];
auto
buf_tensor
=
torch
::
from_blob
(
buf
,
{
n
,
x_dim
},
FLOAT_TYPE
.
device
(
device
));
x_comm
.
scatter_
(
0
,
idx_map_tensor
.
repeat_interleave
(
x_dim
).
view
({
n
,
x_dim
}),
buf_tensor
);
}
else
{
int
i
,
j
,
m
;
m
=
0
;
for
(
i
=
0
;
i
<
n
;
i
++
)
{
const
int
idx
=
static_cast
<
int
>
(
idx_map
.
at
(
i
));
float
*
to
=
x_comm
[
idx
].
data_ptr
<
float
>
();
for
(
j
=
0
;
j
<
x_dim
;
j
++
)
{
to
[
j
]
=
buf
[
m
++
];
}
}
}
}
int
PairE3GNNParallel
::
pack_reverse_comm_gnn
(
float
*
buf
,
int
comm_phase
)
{
std
::
vector
<
long
>
&
idx_map
=
comm_index_unpack_forward
[
comm_phase
];
const
int
n
=
static_cast
<
int
>
(
idx_map
.
size
());
if
(
use_cuda_mpi
&&
n
!=
0
)
{
torch
::
Tensor
&
idx_map_tensor
=
comm_index_unpack_forward_tensor
[
comm_phase
];
auto
selected
=
x_comm
.
index_select
(
0
,
idx_map_tensor
);
cudaError_t
cuda_err
=
cudaMemcpy
(
buf
,
selected
.
data_ptr
<
float
>
(),
(
x_dim
*
n
)
*
sizeof
(
float
),
cudaMemcpyDeviceToDevice
);
}
else
{
int
i
,
j
,
m
;
m
=
0
;
for
(
i
=
0
;
i
<
n
;
i
++
)
{
const
int
idx
=
static_cast
<
int
>
(
idx_map
.
at
(
i
));
float
*
from
=
x_comm
[
idx
].
data_ptr
<
float
>
();
for
(
j
=
0
;
j
<
x_dim
;
j
++
)
{
buf
[
m
++
]
=
from
[
j
];
}
}
}
if
(
print_info
)
{
std
::
cout
<<
world_rank
<<
" comm_phase: "
<<
comm_phase
<<
std
::
endl
;
std
::
cout
<<
world_rank
<<
" pack_reverse x_dim: "
<<
x_dim
<<
std
::
endl
;
std
::
cout
<<
world_rank
<<
" pack_reverse n: "
<<
n
<<
std
::
endl
;
std
::
cout
<<
world_rank
<<
" pack_reverse x_dim*n: "
<<
x_dim
*
n
<<
std
::
endl
;
double
Msend
=
static_cast
<
double
>
(
x_dim
*
n
*
4
)
/
(
1024
*
1024
);
}
return
x_dim
*
n
;
}
void
PairE3GNNParallel
::
unpack_reverse_comm_gnn
(
float
*
buf
,
int
comm_phase
)
{
std
::
vector
<
long
>
&
idx_map
=
comm_index_unpack_reverse
[
comm_phase
];
const
int
n
=
static_cast
<
int
>
(
idx_map
.
size
());
if
(
use_cuda_mpi
&&
n
!=
0
)
{
torch
::
Tensor
&
idx_map_tensor
=
comm_index_unpack_reverse_tensor
[
comm_phase
];
auto
buf_tensor
=
torch
::
from_blob
(
buf
,
{
n
,
x_dim
},
FLOAT_TYPE
.
device
(
device
));
x_comm
.
scatter_
(
0
,
idx_map_tensor
.
repeat_interleave
(
x_dim
).
view
({
n
,
x_dim
}),
buf_tensor
,
"add"
);
}
else
{
int
i
,
j
,
m
;
m
=
0
;
for
(
i
=
0
;
i
<
n
;
i
++
)
{
const
int
idx
=
static_cast
<
int
>
(
idx_map
.
at
(
i
));
if
(
idx
==
-
1
)
{
m
+=
x_dim
;
continue
;
}
float
*
to
=
x_comm
[
idx
].
data_ptr
<
float
>
();
for
(
j
=
0
;
j
<
x_dim
;
j
++
)
{
to
[
j
]
+=
buf
[
m
++
];
}
}
}
}
mace-bench/3rdparty/SevenNet/sevenn/pair_e3gnn/pair_e3gnn_parallel.h
0 → 100644
View file @
ca86f720
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle
(
e3gnn
/
parallel
,
PairE3GNNParallel
)
#else
#ifndef LMP_PAIR_E3GNN_PARALLEL
#define LMP_PAIR_E3GNN_PARALLEL
#include "pair.h"
#include <torch/torch.h>
#include <vector>
namespace
LAMMPS_NS
{
class
PairE3GNNParallel
:
public
Pair
{
private:
double
cutoff
;
double
cutoff_square
;
std
::
vector
<
torch
::
jit
::
Module
>
model_list
;
torch
::
Device
device
=
torch
::
kCPU
;
torch
::
Device
device_comm
=
torch
::
kCPU
;
torch
::
Device
get_cuda_device
();
bool
use_cuda_mpi
;
// for communication
// Most of these variables for communication is temporary and valid for only
// one MD step.
int
x_dim
;
// to determine per atom data size
int
graph_size
;
torch
::
Tensor
x_comm
;
// x_local + x_ghost + x_comm_extra
void
comm_preprocess
();
bool
comm_preprocess_done
=
false
;
// temporary variables holds for each compute step
std
::
unordered_map
<
int
,
long
>
extra_graph_idx_map
;
// To use scatter, store long instead of int
// array of vector
std
::
vector
<
long
>
comm_index_pack_forward
[
6
];
std
::
vector
<
long
>
comm_index_unpack_forward
[
6
];
std
::
vector
<
long
>
comm_index_unpack_reverse
[
6
];
// its size is 6 and initialized at comm_preprocess()
torch
::
Tensor
comm_index_pack_forward_tensor
[
6
];
torch
::
Tensor
comm_index_unpack_forward_tensor
[
6
];
torch
::
Tensor
comm_index_unpack_reverse_tensor
[
6
];
// to use tag_to_graph_idx inside comm methods
int
*
tag_to_graph_idx_ptr
=
nullptr
;
int
sendproc
[
6
];
int
recvproc
[
6
];
public:
PairE3GNNParallel
(
class
LAMMPS
*
);
~
PairE3GNNParallel
();
// TODO: keep encapsulation..
void
compute
(
int
,
int
)
override
;
void
settings
(
int
,
char
**
)
override
;
// read Atom type string from input script & related coeff
void
coeff
(
int
,
char
**
)
override
;
void
allocate
();
void
pack_forward_init
(
int
n
,
int
*
list
,
int
comm_phase
);
void
unpack_forward_init
(
int
n
,
int
first
,
int
comm_phase
);
int
pack_forward_comm_gnn
(
float
*
buf
,
int
comm_phase
);
void
unpack_forward_comm_gnn
(
float
*
buf
,
int
comm_phase
);
int
pack_reverse_comm_gnn
(
float
*
buf
,
int
comm_phase
);
void
unpack_reverse_comm_gnn
(
float
*
buf
,
int
comm_phase
);
void
init_style
()
override
;
double
init_one
(
int
,
int
)
override
;
int
get_x_dim
();
bool
use_cuda_mpi_
();
bool
is_comm_preprocess_done
();
void
notify_proc_ids
(
const
int
*
sendproc
,
const
int
*
recvproc
);
bool
print_info
=
false
;
int
world_rank
;
};
class
DeviceBuffManager
{
private:
DeviceBuffManager
()
{}
DeviceBuffManager
(
const
DeviceBuffManager
&
);
DeviceBuffManager
&
operator
=
(
const
DeviceBuffManager
&
);
float
*
buf_send_device
=
nullptr
;
float
*
buf_recv_device
=
nullptr
;
int
send_buf_size
=
0
;
int
recv_buf_size
=
0
;
public:
static
DeviceBuffManager
&
getInstance
();
void
get_buffer
(
int
,
int
,
float
*&
,
float
*&
);
~
DeviceBuffManager
();
};
}
// namespace LAMMPS_NS
#endif
#endif
mace-bench/3rdparty/SevenNet/sevenn/pair_e3gnn/patch_lammps.sh
0 → 100644
View file @
ca86f720
#!/bin/bash
lammps_root
=
$1
cxx_standard
=
$2
# 14, 17
d3_support
=
$3
# 1, 0
SCRIPT_DIR
=
$(
dirname
"
${
BASH_SOURCE
[0]
}
"
)
###########################################
# Check if the given arguments are valid #
###########################################
# Check the number of arguments
if
[
"$#"
-ne
3
]
;
then
echo
"Usage: sh patch_lammps.sh {lammps_root} {cxx_standard} {d3_support}"
echo
" {lammps_root}: Root directory of LAMMPS source"
echo
" {cxx_standard}: C++ standard (14, 17)"
echo
" {d3_support}: Support for pair_d3 (1, 0)"
exit
1
fi
# Check if the lammps_root directory exists
if
[
!
-d
"
$lammps_root
"
]
;
then
echo
"Error: No such directory:
$lammps_root
"
exit
1
fi
# Check if the given directory is the root of LAMMPS source
if
[
!
-d
"
$lammps_root
/cmake"
]
&&
[
!
-d
"
$lammps_root
/potentials"
]
;
then
echo
"Error: Given
$lammps_root
is not a root of LAMMPS source"
exit
1
fi
# Check if the script is being run from the root of SevenNet
if
[
!
-f
"
${
SCRIPT_DIR
}
/pair_e3gnn.cpp"
]
;
then
echo
"Error: Script executed in a wrong directory"
exit
1
fi
# Check if the patch is already applied
if
[
-f
"
$lammps_root
/src/pair_e3gnn.cpp"
]
;
then
echo
"----------------------------------------------------------"
echo
"Seems like given LAMMPS is already patched."
echo
"Try again after removing src/pair_e3gnn.cpp to force patch"
echo
"----------------------------------------------------------"
echo
"Example build commands, under LAMMPS root"
echo
" mkdir build; cd build"
echo
" cmake ../cmake -DCMAKE_PREFIX_PATH=
$(
python
-c
'import torch;print(torch.utils.cmake_prefix_path)'
)
"
echo
" make -j 4"
exit
0
fi
# Check if OpenMPI exists and if it is CUDA-aware
if
command
-v
ompi_info &> /dev/null
;
then
cuda_support
=
$(
ompi_info
--parsable
--all
|
grep
mpi_built_with_cuda_support:value
)
if
[[
-z
"
$cuda_support
"
]]
;
then
echo
"OpenMPI not found, parallel performance is not optimal"
elif
[[
"
$cuda_support
"
==
*
"true"
]]
;
then
echo
"OpenMPI is CUDA aware"
else
echo
"This system's OpenMPI is not 'CUDA aware', parallel performance is not optimal"
fi
else
echo
"OpenMPI not found, parallel performance is not optimal"
fi
# Extract LAMMPS version and update
lammps_version
=
$(
grep
"#define LAMMPS_VERSION"
$lammps_root
/src/version.h |
awk
'{print $3, $4, $5}'
|
tr
-d
'"'
)
# Combine version and update
detected_version
=
"
$lammps_version
"
required_version
=
"2 Aug 2023"
# Example required version
# Check if the detected version is compatible
if
[[
"
$detected_version
"
!=
"
$required_version
"
]]
;
then
echo
"Warning: Detected LAMMPS version (
$detected_version
) may not be compatible. Required version:
$required_version
"
fi
###########################################
# Backup original LAMMPS source code #
###########################################
# Create a backup directory if it doesn't exist
backup_dir
=
"
$lammps_root
/_backups"
mkdir
-p
$backup_dir
# Copy comm_* from original LAMMPS source as backup
cp
$lammps_root
/src/comm_brick.cpp
$backup_dir
/
cp
$lammps_root
/src/comm_brick.h
$backup_dir
/
# Copy cmake/CMakeLists.txt from original source as backup
cp
$lammps_root
/cmake/CMakeLists.txt
$backup_dir
/CMakeLists.txt
###########################################
# Patch LAMMPS source code: e3gnn #
###########################################
# 1. Copy pair_e3gnn files to LAMMPS source
cp
$SCRIPT_DIR
/
{
pair_e3gnn,pair_e3gnn_parallel,comm_brick
}
.cpp
$lammps_root
/src/
cp
$SCRIPT_DIR
/
{
pair_e3gnn,pair_e3gnn_parallel,comm_brick
}
.h
$lammps_root
/src/
# 2. Patch cmake/CMakeLists.txt
sed
-i
"s/set(CMAKE_CXX_STANDARD 11)/set(CMAKE_CXX_STANDARD
$cxx_standard
)/"
$lammps_root
/cmake/CMakeLists.txt
cat
>>
$lammps_root
/cmake/CMakeLists.txt
<<
"
EOF
"
find_package(Torch REQUIRED)
set(CMAKE_CXX_FLAGS "
${
CMAKE_CXX_FLAGS
}
${
TORCH_CXX_FLAGS
}
")
target_link_libraries(lammps PUBLIC "
${
TORCH_LIBRARIES
}
")
EOF
###########################################
# Patch LAMMPS source code: d3 #
###########################################
if
[
"
$d3_support
"
-ne
0
]
;
then
# 1. Copy pair_d3 files to LAMMPS source
cp
$SCRIPT_DIR
/pair_d3.cu
$lammps_root
/src/
cp
$SCRIPT_DIR
/pair_d3.h
$lammps_root
/src/
cp
$SCRIPT_DIR
/pair_d3_pars.h
$lammps_root
/src/
# 2. Patch cmake/CMakeLists.txt
sed
-i
"s/project(lammps CXX)/project(lammps CXX CUDA)/"
$lammps_root
/cmake/CMakeLists.txt
sed
-i
"s/
\$
{LAMMPS_SOURCE_DIR}
\/\[\^
.
\]\*\.
cpp/
\$
{LAMMPS_SOURCE_DIR}
\/\[\^
.
\]\*\.
cpp
\$
{LAMMPS_SOURCE_DIR}
\/\[\^
.
\]\*\.
cu/"
$lammps_root
/cmake/CMakeLists.txt
cat
>>
$lammps_root
/cmake/CMakeLists.txt
<<
"
EOF
"
find_package(CUDA)
set(CMAKE_CUDA_FLAGS "
${
CMAKE_CUDA_FLAGS
}
-fmad=false -O3")
string(REPLACE "-gencode arch=compute_50,code=sm_50" "" CMAKE_CUDA_FLAGS "
${
CMAKE_CUDA_FLAGS
}
")
target_link_libraries(lammps PUBLIC
${
CUDA_LIBRARIES
}
cuda)
EOF
fi
###########################################
# Print changes and backup file locations #
###########################################
# Print changes and backup file locations
echo
"Changes made:"
echo
" - Original LAMMPS files (src/comm_brick.*, cmake/CMakeList.txt) are in {lammps_root}/_backups"
echo
" - Copied contents of pair_e3gnn to
$lammps_root
/src/"
echo
" - Patched CMakeLists.txt: include LibTorch, CXX_STANDARD
$cxx_standard
"
if
[
"
$d3_support
"
-ne
0
]
;
then
echo
" - Copied contents of pair_d3 to
$lammps_root
/src/"
echo
" - Patched CMakeLists.txt: include CUDA"
fi
# Provide example cmake command to the user
echo
"Example build commands, under LAMMPS root"
echo
" mkdir build; cd build"
echo
" cmake ../cmake -DCMAKE_PREFIX_PATH=
$(
python
-c
'import torch;print(torch.utils.cmake_prefix_path)'
)
"
echo
" make -j 4"
exit
0
mace-bench/3rdparty/SevenNet/sevenn/presets/MF_0.yaml
0 → 100644
View file @
ca86f720
model
:
chemical_species
:
'
univ'
# Ready for 119 elements
cutoff
:
5.0
channel
:
128
is_parity
:
False
lmax
:
2
num_convolution_layer
:
5
irreps_manual
:
-
"
128x0e"
-
"
128x0e+64x1e+32x2e"
-
"
128x0e+64x1e+32x2e"
-
"
128x0e+64x1e+32x2e"
-
"
128x0e+64x1e+32x2e"
-
"
128x0e"
weight_nn_hidden_neurons
:
[
64
,
64
]
radial_basis
:
radial_basis_name
:
'
bessel'
bessel_basis_num
:
8
cutoff_function
:
cutoff_function_name
:
'
XPLOR'
cutoff_on
:
4.5
act_gate
:
{
'
e'
:
'
silu'
,
'
o'
:
'
tanh'
}
act_scalar
:
{
'
e'
:
'
silu'
,
'
o'
:
'
tanh'
}
conv_denominator
:
'
avg_num_neigh'
train_shift_scale
:
False
train_denominator
:
False
self_connection_type
:
'
linear'
# Following are used to specify which part of the model would utilize fidelity-dependent parameters for multi-fidelity training.
# For detailed architecture, please refer to https://arxiv.org/abs/2409.07947
# Parts using fidelity-dependent weights are indicated as `Modified linear` layers in Figure 1.
use_modal_node_embedding
:
False
# If true, use modified linear layer in atom-type embedding layer.
use_modal_self_inter_intro
:
True
# If true, use modified linear layers in self-interaction block before the convolution in the interaction blocks.
use_modal_self_inter_outro
:
True
# If true, use modified linear layers in self-interaction block after the convolution in the interaction blocks.
use_modal_output_block
:
True
# If true, use modified linear layer in the output block.
train
:
train_shuffle
:
True
random_seed
:
777
is_train_stress
:
True
epoch
:
200
loss
:
'
Huber'
loss_param
:
delta
:
0.01
optimizer
:
'
adam'
optim_param
:
lr
:
0.01
scheduler
:
'
linearlr'
scheduler_param
:
start_factor
:
1.0
total_iters
:
200
end_factor
:
0.0001
force_loss_weight
:
1.00
stress_loss_weight
:
0.01
error_record
:
-
[
'
Energy'
,
'
MAE'
]
-
[
'
Force'
,
'
MAE'
]
-
[
'
Stress'
,
'
MAE'
]
-
[
'
Energy'
,
'
Loss'
]
-
[
'
Force'
,
'
Loss'
]
-
[
'
Stress'
,
'
Loss'
]
-
[
'
TotalLoss'
,
'
None'
]
per_epoch
:
10
use_modality
:
True
use_weight
:
True
data
:
batch_size
:
64
shift
:
'
elemwise_reference_energies'
scale
:
1.73
use_modal_wise_shift
:
True
# If true, use different atomic energy shift for each database
use_modal_wise_scale
:
False
# If true, use different atomic energy scale for each database
load_trainset_path
:
-
data_modality
:
pbe
# Name of database
file_list
:
-
file
:
"
**path
to
PBE
database**"
# ASE readable or .pt file (graph.pt)
data_weight
:
energy
:
1.0
force
:
1.0
# This weight would be additionally multiplied to `force_loss_weight` for this database
stress
:
1.0
# This weight would be additionally multiplied to `stress_loss_weight` for this database
-
data_modality
:
r2scan
file_list
:
-
file
:
"
**path
to
r2SCAN
database**"
data_weight
:
energy
:
7.0
force
:
7.0
stress
:
7.0
load_pbe_validset_path
:
# any name starts with 'load' and ends with 'set_path'
-
data_modality
:
pbe
# modality must be given for mm valid set
file_list
:
-
file
:
"
**path
to
PBE
test
set**"
load_scan_validset_path
:
-
data_modality
:
r2scan
file_list
:
-
file
:
"
**path
to
r2SCAN
test
set**"
mace-bench/3rdparty/SevenNet/sevenn/presets/base.yaml
0 → 100644
View file @
ca86f720
# Example input.yaml for training SevenNet.
# '*' signifies default. You can check log.sevenn for defaults.
model
:
chemical_species
:
'
Auto'
# Elements model should know. [ 'Univ' | 'Auto' | manual_user_input ]
cutoff
:
5.0
# Cutoff radius in Angstroms. If two atoms are within the cutoff, they are connected.
channel
:
32
# The multiplicity(channel) of node features.
lmax
:
2
# Maximum order of irreducible representations (rotation order).
num_convolution_layer
:
3
# The number of message passing layers.
#irreps_manual: # Manually set irreps of the model in each layer
#- "128x0e"
#- "128x0e+64x1e+32x2e"
#- "128x0e+64x1e+32x2e"
#- "128x0e+64x1e+32x2e"
#- "128x0e+64x1e+32x2e"
#- "128x0e"
weight_nn_hidden_neurons
:
[
64
,
64
]
# Hidden neurons in convolution weight neural network
radial_basis
:
# Function and its parameters to encode radial distance
radial_basis_name
:
'
bessel'
# Only 'bessel' is currently supported
bessel_basis_num
:
8
cutoff_function
:
# Envelop function, multiplied to radial_basis functions to init edge features
cutoff_function_name
:
'
poly_cut'
# {'poly_cut' and 'poly_cut_p_value'} or {'XPLOR' and 'cutoff_on'}
poly_cut_p_value
:
6
act_gate
:
{
'
e'
:
'
silu'
,
'
o'
:
'
tanh'
}
# Equivalent to 'nonlinearity_gates' in nequip
act_scalar
:
{
'
e'
:
'
silu'
,
'
o'
:
'
tanh'
}
# Equivalent to 'nonlinearity_scalars' in nequip
is_parity
:
False
# Pairy True (E(3) group) or False (to SE(3) group)
self_connection_type
:
'
nequip'
# Default is 'nequip'. 'linear' is used for SevenNet-0. I recommend 'linear' for 'Univ' chemical_species
conv_denominator
:
"
avg_num_neigh"
# Valid options are "avg_num_neigh*", "sqrt_avg_num_neigh", or float
train_denominator
:
False
# Enable training for denominator in convolution layer
train_shift_scale
:
False
# Enable training for shift & scale in output layer
train
:
random_seed
:
1
is_train_stress
:
True
# Includes stress in the loss function
epoch
:
200
# Ends training after this number of epochs
#loss: 'Huber' # Default is 'mse' (mean squared error)
#loss_param:
#delta: 0.01
# Each optimizer and scheduler have different available parameters.
# You can refer to sevenn/train/optim.py for supporting optimizer & schedulers
optimizer
:
'
adam'
# Options available are 'sgd', 'adagrad', 'adam', 'adamw', 'radam'
optim_param
:
lr
:
0.005
scheduler
:
'
exponentiallr'
# 'steplr', 'multisteplr', 'exponentiallr', 'cosineannealinglr', 'reducelronplateau', 'linearlr'
scheduler_param
:
gamma
:
0.99
force_loss_weight
:
0.1
# Coefficient for force loss
stress_loss_weight
:
1e-06
# Coefficient for stress loss (to kbar unit)
per_epoch
:
10
# Generate checkpoints every this epoch
# ['target y', 'metric']
# Target y: TotalEnergy, Energy, Force, Stress, Stress_GPa, TotalLoss
# Metric : RMSE, MAE, or Loss
error_record
:
-
[
'
Energy'
,
'
RMSE'
]
-
[
'
Force'
,
'
RMSE'
]
-
[
'
Stress'
,
'
RMSE'
]
-
[
'
TotalLoss'
,
'
None'
]
# Continue training model from given checkpoint, or pre-trained model checkpoint for fine-tuning
#continue:
#checkpoint: 'checkpoint_best.pth' # Checkpoint of pre-trained model or a model want to continue training.
#reset_optimizer: False # Set True for fine-tuning
#reset_scheduler: False # Set True for fine-tuning
data
:
batch_size
:
4
# Per GPU batch size.
shift
:
'
per_atom_energy_mean'
# One of 'per_atom_energy_mean*', 'elemwise_reference_energies', float
scale
:
'
force_rms'
# One of 'force_rms*', 'per_atom_energy_std', float
# SevenNet automatically matches data format from its filename.
# For those not `structure_list` or `.pt` files, assumes it is ASE readable
# In this case, below arguments are directly passed to `ase.io.read`
data_format_args
:
index
:
'
:'
# see `https://wiki.fysik.dtu.dk/ase/ase/io/io.html` for more valid arguments
# validset is needed if you want '_best.pth' during training. If not, both validset and testset is optional.
load_trainset_path
:
[
'
./train_*.extxyz'
]
# Example of using ase as data_format, support multiple files and expansion(*)
load_validset_path
:
[
'
./valid.extxyz'
]
load_testset_path
:
[
'
./sevenn_data/mydata.pt'
]
# Graph can be preprocessed using `sevenn_graph_build` and accessible like this
mace-bench/3rdparty/SevenNet/sevenn/presets/fine_tune.yaml
0 → 100644
View file @
ca86f720
# Example input.yaml for fine-tuning sevennet-0
# '*' signifies default. You can check log.sevenn for defaults.
model
:
# model keys should be consistent except for train_* keys
chemical_species
:
'
Auto'
cutoff
:
5.0
channel
:
128
is_parity
:
False
lmax
:
2
num_convolution_layer
:
5
irreps_manual
:
-
"
128x0e"
-
"
128x0e+64x1e+32x2e"
-
"
128x0e+64x1e+32x2e"
-
"
128x0e+64x1e+32x2e"
-
"
128x0e+64x1e+32x2e"
-
"
128x0e"
weight_nn_hidden_neurons
:
[
64
,
64
]
radial_basis
:
radial_basis_name
:
'
bessel'
bessel_basis_num
:
8
cutoff_function
:
cutoff_function_name
:
'
XPLOR'
cutoff_on
:
4.5
self_connection_type
:
'
linear'
train_shift_scale
:
False
# customizable (True | False)
train_denominator
:
False
# customizable (True | False)
train
:
# Customizable
random_seed
:
1
is_train_stress
:
True
epoch
:
100
loss
:
'
Huber'
# keeping original loss function give better ft result
loss_param
:
delta
:
0.01
optimizer
:
'
adam'
optim_param
:
lr
:
0.004
scheduler
:
'
exponentiallr'
scheduler_param
:
gamma
:
0.99
force_loss_weight
:
1.0
stress_loss_weight
:
0.01
per_epoch
:
10
# Generate checkpoints every this epoch
# ['target y', 'metric']
# Target y: TotalEnergy, Energy, Force, Stress, Stress_GPa, TotalLoss
# Metric : RMSE, MAE, or Loss
error_record
:
-
[
'
Energy'
,
'
RMSE'
]
-
[
'
Force'
,
'
RMSE'
]
-
[
'
Stress'
,
'
RMSE'
]
-
[
'
TotalLoss'
,
'
None'
]
continue
:
reset_optimizer
:
True
reset_scheduler
:
True
reset_epoch
:
True
checkpoint
:
'
SevenNet-0_11July2024'
data
:
# Customizable
batch_size
:
4
data_divide_ratio
:
0.1
# SevenNet automatically matches data format from its filename.
# For those not `structure_list` or `.pt` files, assumes it is ASE readable
# In this case, below arguments are directly passed to `ase.io.read`
data_format_args
:
index
:
'
:'
# see `https://wiki.fysik.dtu.dk/ase/ase/io/io.html` for more valid arguments
# validset is needed if you want '_best.pth' during training. If not, both validset and testset is optional.
load_trainset_path
:
[
'
./train_*.extxyz'
]
# Example of using ase as data_format, support multiple files and expansion(*)
load_validset_path
:
[
'
./valid.extxyz'
]
load_testset_path
:
[
'
./sevenn_data/mydata.pt'
]
# Graph can be preprocessed using `sevenn_graph_build` and accessible like this
mace-bench/3rdparty/SevenNet/sevenn/presets/fine_tune_le.yaml
0 → 100644
View file @
ca86f720
# Application of 7net-0 on liquid electrolyte system via fine-tuning
# Paper: https://arxiv.org/abs/2501.05211
model
:
# parameters of SevenNet-0, should not be changed
chemical_species
:
'
auto'
cutoff
:
5.0
channel
:
128
is_parity
:
False
lmax
:
2
num_convolution_layer
:
5
irreps_manual
:
-
"
128x0e"
-
"
128x0e+64x1e+32x2e"
-
"
128x0e+64x1e+32x2e"
-
"
128x0e+64x1e+32x2e"
-
"
128x0e+64x1e+32x2e"
-
"
128x0e"
weight_nn_hidden_neurons
:
[
64
,
64
]
radial_basis
:
radial_basis_name
:
'
bessel'
bessel_basis_num
:
8
cutoff_function
:
cutoff_function_name
:
'
XPLOR'
cutoff_on
:
4.5
act_gate
:
{
'
e'
:
'
silu'
,
'
o'
:
'
tanh'
}
act_scalar
:
{
'
e'
:
'
silu'
,
'
o'
:
'
tanh'
}
self_connection_type
:
'
linear'
# useful for fine-tuning
train_shift_scale
:
True
train_avg_num_neigh
:
True
train
:
random_seed
:
1
is_train_stress
:
True
epoch
:
100
# we went through 100 epochs and chose checkpoint at 50 epoch where the error have reached plateau.
loss
:
'
Huber'
loss_param
:
delta
:
0.01
optimizer
:
'
adam'
optim_param
:
lr
:
0.0001
scheduler
:
'
linearlr'
scheduler_param
:
start_factor
:
1.0
total_iters
:
600
end_factor
:
0.000001
force_loss_weight
:
1.00
stress_loss_weight
:
1.00
# 7net-0 quantitatively lacked accuracy on pressure histograms compared to DFT, so we increased stress loss weight
error_record
:
-
[
'
Energy'
,
'
RMSE'
]
-
[
'
Force'
,
'
RMSE'
]
-
[
'
Stress'
,
'
RMSE'
]
-
[
'
Energy'
,
'
MAE'
]
-
[
'
Force'
,
'
MAE'
]
-
[
'
Stress'
,
'
MAE'
]
-
[
'
Energy'
,
'
Loss'
]
-
[
'
Force'
,
'
Loss'
]
-
[
'
Stress'
,
'
Loss'
]
-
[
'
TotalLoss'
,
'
None'
]
per_epoch
:
10
# Generate epoch every this number of times
continue
:
use_statistic_values_of_checkpoint
:
True
checkpoint
:
'
7net-0'
# fine-tuning from 7net-0
reset_optimizer
:
True
reset_scheduler
:
True
data
:
batch_size
:
1
# our fine-tuning dataset had ~360 atoms per structure, so we used batch size of 1 to avoid GPU OOM error.
shift
:
'
elemwise_reference_energies'
scale
:
1.858
data_format
:
'
ase'
data_divide_ratio
:
0.05
load_dataset_path
:
[
"
./data/total.extxyz"
]
mace-bench/3rdparty/SevenNet/sevenn/presets/multi_modal.yaml
0 → 100644
View file @
ca86f720
model
:
chemical_species
:
'
univ'
# Ready for 119 elements
cutoff
:
6.0
channel
:
128
is_parity
:
False
lmax
:
3
num_convolution_layer
:
3
irreps_manual
:
-
"
128x0e"
-
"
128x0e+64x1e+32x2e+16x3e"
-
"
128x0e+64x1e+32x2e+16x3e"
-
"
128x0e"
weight_nn_hidden_neurons
:
[
64
,
64
]
radial_basis
:
radial_basis_name
:
'
bessel'
bessel_basis_num
:
8
cutoff_function
:
cutoff_function_name
:
'
XPLOR'
cutoff_on
:
5.5
act_gate
:
{
'
e'
:
'
silu'
,
'
o'
:
'
tanh'
}
act_scalar
:
{
'
e'
:
'
silu'
,
'
o'
:
'
tanh'
}
conv_denominator
:
'
avg_num_neigh'
train_shift_scale
:
True
train_denominator
:
False
self_connection_type
:
'
linear'
# Following are used to specify which part of the model would utilize fidelity-dependent parameters for multi-fidelity training.
# For detailed architecture, please refer to https://arxiv.org/abs/2409.07947
# Parts using fidelity-dependent weights are indicated as `Modified linear` layers in Figure 1.
use_modal_node_embedding
:
False
# If true, use modified linear layer in atom-type embedding layer.
use_modal_self_inter_intro
:
True
# If true, use modified linear layers in self-interaction block before the convolution in the interaction blocks.
use_modal_self_inter_outro
:
True
# If true, use modified linear layers in self-interaction block after the convolution in the interaction blocks.
use_modal_output_block
:
True
# If true, use modified linear layer in the output block.
train
:
train_shuffle
:
True
random_seed
:
777
is_train_stress
:
True
epoch
:
200
loss
:
'
Huber'
loss_param
:
delta
:
0.01
optimizer
:
'
adam'
optim_param
:
lr
:
0.01
scheduler
:
'
linearlr'
scheduler_param
:
start_factor
:
1.0
total_iters
:
200
end_factor
:
0.0001
force_loss_weight
:
1.00
stress_loss_weight
:
0.01
error_record
:
-
[
'
Energy'
,
'
MAE'
]
-
[
'
Force'
,
'
MAE'
]
-
[
'
Stress'
,
'
MAE'
]
-
[
'
Energy'
,
'
Loss'
]
-
[
'
Force'
,
'
Loss'
]
-
[
'
Stress'
,
'
Loss'
]
-
[
'
TotalLoss'
,
'
None'
]
per_epoch
:
10
use_modality
:
True
use_weight
:
True
data
:
batch_size
:
16
shift
:
'
elemwise_reference_energies'
scale
:
'
force_rms'
use_modal_wise_shift
:
True
# If true, use different atomic energy shift for each database
use_modal_wise_scale
:
False
# If true, use different atomic energy scale for each database
load_trainset_path
:
-
data_modality
:
pbe
# Name of database
file_list
:
-
file
:
"
path
to
pbe
dataset"
# ASE readable or .pt file (graph.pt)
data_weight
:
energy
:
1.0
force
:
0.1
# This weight would be additionally multiplied to `force_loss_weight` for this database
stress
:
1.0
# This weight would be additionally multiplied to `stress_loss_weight` for this database
-
data_modality
:
scan
file_list
:
-
file
:
"
path
to
scan
dataset"
data_weight
:
energy
:
1.0
force
:
10.0
stress
:
1.0
load_pbe_validset_path
:
# any name starts with 'load' and ends with 'set_path'
-
data_modality
:
pbe
# modality must be given for mm valid set
file_list
:
-
file
:
"
path
to
pbe
validset"
load_scan_validset_path
:
-
data_modality
:
scan
file_list
:
-
file
:
"
path
to
scan
validset"
mace-bench/3rdparty/SevenNet/sevenn/presets/sevennet-0.yaml
0 → 100644
View file @
ca86f720
# SevenNet-0, should be run with `sevenn -m train_v1` as it uses old routine
model
:
chemical_species
:
'
auto'
cutoff
:
5.0
channel
:
128
is_parity
:
False
lmax
:
2
num_convolution_layer
:
5
irreps_manual
:
-
"
128x0e"
-
"
128x0e+64x1e+32x2e"
-
"
128x0e+64x1e+32x2e"
-
"
128x0e+64x1e+32x2e"
-
"
128x0e+64x1e+32x2e"
-
"
128x0e"
weight_nn_hidden_neurons
:
[
64
,
64
]
radial_basis
:
radial_basis_name
:
'
bessel'
bessel_basis_num
:
8
cutoff_function
:
cutoff_function_name
:
'
XPLOR'
cutoff_on
:
4.5
act_gate
:
{
'
e'
:
'
silu'
,
'
o'
:
'
tanh'
}
act_scalar
:
{
'
e'
:
'
silu'
,
'
o'
:
'
tanh'
}
conv_denominator
:
'
avg_num_neigh'
train_shift_scale
:
False
train_denominator
:
False
self_connection_type
:
'
linear'
train
:
train_shuffle
:
False
random_seed
:
1
is_train_stress
:
True
epoch
:
600
loss
:
'
Huber'
loss_param
:
delta
:
0.01
optimizer
:
'
adam'
optim_param
:
lr
:
0.01
scheduler
:
'
linearlr'
scheduler_param
:
start_factor
:
1.0
total_iters
:
600
end_factor
:
0.0001
force_loss_weight
:
1.00
stress_loss_weight
:
0.01
error_record
:
-
[
'
Energy'
,
'
RMSE'
]
-
[
'
Force'
,
'
RMSE'
]
-
[
'
Stress'
,
'
RMSE'
]
-
[
'
Energy'
,
'
MAE'
]
-
[
'
Force'
,
'
MAE'
]
-
[
'
Stress'
,
'
MAE'
]
-
[
'
Energy'
,
'
Loss'
]
-
[
'
Force'
,
'
Loss'
]
-
[
'
Stress'
,
'
Loss'
]
-
[
'
TotalLoss'
,
'
None'
]
per_epoch
:
10
# continue:
# checkpoint: './checkpoint_last.pth'
# reset_optimizer: False
# reset_scheduler: False
data
:
batch_size
:
128
# per GPU batch size, as the model trained with 32 GPUs, the effective batch size equals 4096.
scale
:
'
per_atom_energy_std'
shift
:
'
elemwise_reference_energies'
data_format
:
'
ase'
save_by_train_valid
:
False
load_dataset_path
:
[
"
path_to_MPtrj_total.sevenn_data"
]
load_validset_path
:
[
"
validaset.sevenn_data"
]
mace-bench/3rdparty/SevenNet/sevenn/presets/sevennet-l3i5.yaml
0 → 100644
View file @
ca86f720
model
:
chemical_species
:
auto
cutoff
:
5.0
irreps_manual
:
-
128x0e
-
128x0e+64x1e+32x2e+32x3e
-
128x0e+64x1e+32x2e+32x3e
-
128x0e+64x1e+32x2e+32x3e
-
128x0e+64x1e+32x2e+32x3e
-
128x0e
channel
:
128
lmax
:
3
num_convolution_layer
:
5
is_parity
:
false
radial_basis
:
radial_basis_name
:
bessel
bessel_basis_num
:
8
cutoff_function
:
cutoff_function_name
:
poly_cut
poly_cut_p_value
:
6
act_radial
:
silu
weight_nn_hidden_neurons
:
-
64
-
64
act_scalar
:
e
:
silu
o
:
tanh
act_gate
:
e
:
silu
o
:
tanh
train_denominator
:
false
train_shift_scale
:
false
use_bias_in_linear
:
false
readout_as_fcn
:
false
self_connection_type
:
linear
interaction_type
:
nequip
train
:
random_seed
:
1
epoch
:
600
loss
:
Huber
loss_param
:
delta
:
0.01
optimizer
:
adam
optim_param
:
lr
:
0.01
scheduler
:
linearlr
scheduler_param
:
start_factor
:
1.0
total_iters
:
600
end_factor
:
0.0001
force_loss_weight
:
1.0
stress_loss_weight
:
0.01
per_epoch
:
10
is_train_stress
:
true
train_shuffle
:
true
error_record
:
-
-
Energy
-
MAE
-
-
Energy
-
RMSE
-
-
Force
-
MAE
-
-
Force
-
RMSE
-
-
Stress
-
MAE
-
-
Stress
-
RMSE
-
-
Energy
-
Loss
-
-
Force
-
Loss
-
-
Stress
-
Loss
-
-
TotalLoss
-
None
best_metric
:
TotalLoss
data
:
data_format
:
ase
data_format_args
:
{}
batch_size
:
1024
# global batch size, should be divided by the number of GPUs
load_trainset_path
:
'
**path_to_trainset**'
load_validset_path
:
'
**path_to_validset**'
shift
:
'
elemwise_reference_energies'
scale
:
'
force_rms'
mace-bench/3rdparty/SevenNet/sevenn/scripts/__init__.py
0 → 100644
View file @
ca86f720
mace-bench/3rdparty/SevenNet/sevenn/scripts/__pycache__/__init__.cpython-310.pyc
0 → 100644
View file @
ca86f720
File added
mace-bench/3rdparty/SevenNet/sevenn/scripts/__pycache__/backward_compatibility.cpython-310.pyc
0 → 100644
View file @
ca86f720
File added
mace-bench/3rdparty/SevenNet/sevenn/scripts/backward_compatibility.py
0 → 100644
View file @
ca86f720
"""
Debt
keep old pre-trained checkpoints unchanged.
"""
import
copy
import
torch
import
sevenn._keys
as
KEY
def
version_tuple
(
v1
):
v1
=
tuple
(
map
(
int
,
v1
.
split
(
'.'
)))
return
v1
def
patch_old_config
(
config
):
version
=
config
.
get
(
'version'
,
None
)
if
not
version
:
raise
ValueError
(
'No version found in config'
)
major
,
minor
,
_
=
version
.
split
(
'.'
)[:
3
]
major
,
minor
=
int
(
major
),
int
(
minor
)
if
major
==
0
and
minor
<=
9
:
if
config
[
KEY
.
CUTOFF_FUNCTION
][
KEY
.
CUTOFF_FUNCTION_NAME
]
==
'XPLOR'
:
config
[
KEY
.
CUTOFF_FUNCTION
].
pop
(
'poly_cut_p_value'
,
None
)
if
KEY
.
TRAIN_DENOMINTAOR
not
in
config
:
config
[
KEY
.
TRAIN_DENOMINTAOR
]
=
config
.
pop
(
'train_avg_num_neigh'
,
False
)
_opt
=
config
.
pop
(
'optimize_by_reduce'
,
None
)
if
_opt
is
False
:
raise
ValueError
(
'This checkpoint(optimize_by_reduce: False) is no longer supported'
)
if
KEY
.
CONV_DENOMINATOR
not
in
config
:
config
[
KEY
.
CONV_DENOMINATOR
]
=
0.0
if
KEY
.
_NORMALIZE_SPH
not
in
config
:
config
[
KEY
.
_NORMALIZE_SPH
]
=
False
return
config
def
map_old_model
(
old_model_state_dict
):
"""
For compatibility with old namings (before 'correct' branch merged 2404XX)
Map old model's module names to new model's module names
"""
_old_module_name_mapping
=
{
'EdgeEmbedding'
:
'edge_embedding'
,
'reducing nn input to hidden'
:
'reduce_input_to_hidden'
,
'reducing nn hidden to energy'
:
'reduce_hidden_to_energy'
,
'rescale atomic energy'
:
'rescale_atomic_energy'
,
}
for
i
in
range
(
10
):
_old_module_name_mapping
[
f
'
{
i
}
self connection intro'
]
=
(
f
'
{
i
}
_self_connection_intro'
)
_old_module_name_mapping
[
f
'
{
i
}
convolution'
]
=
f
'
{
i
}
_convolution'
_old_module_name_mapping
[
f
'
{
i
}
self interaction 2'
]
=
(
f
'
{
i
}
_self_interaction_2'
)
_old_module_name_mapping
[
f
'
{
i
}
equivariant gate'
]
=
f
'
{
i
}
_equivariant_gate'
new_model_state_dict
=
{}
for
k
,
v
in
old_model_state_dict
.
items
():
key_name
=
k
.
split
(
'.'
)[
0
]
follower
=
'.'
.
join
(
k
.
split
(
'.'
)[
1
:])
if
'denumerator'
in
follower
:
follower
=
follower
.
replace
(
'denumerator'
,
'denominator'
)
if
key_name
in
_old_module_name_mapping
:
new_key_name
=
_old_module_name_mapping
[
key_name
]
+
'.'
+
follower
new_model_state_dict
[
new_key_name
]
=
v
else
:
new_model_state_dict
[
k
]
=
v
return
new_model_state_dict
def
sort_old_convolution
(
model_now
,
state_dict
):
from
e3nn.o3
import
wigner_3j
"""
Reason1: we have to sort instructions of convolution to be compatible with
cuEquivariance. (therefore, sort weight)
Reason2: some of old convolution module's w3j coeff has flipped sign. This also
has to be fixed to be compatible with cuEquivarinace.
"""
def
patch
(
stct
):
inst_old
=
copy
.
copy
(
conv
.
_instructions_before_sort
)
inst_old
=
[(
inst
[
0
],
inst
[
1
],
inst
[
2
])
for
inst
in
inst_old
]
del
conv
.
_instructions_before_sort
conv_args
=
conv
.
convolution_kwargs
irreps_in1
=
conv_args
[
'irreps_in1'
]
irreps_in2
=
conv_args
[
'irreps_in2'
]
irreps_out
=
conv_args
.
get
(
'irreps_out'
,
conv_args
.
get
(
'filter_irreps_out'
))
inst_sorted
=
sorted
(
inst_old
,
key
=
lambda
x
:
x
[
2
])
inst_sorted
=
[
# in1, in2, out, weights
(
inst
[
0
],
inst
[
1
],
inst
[
2
],
irreps_in1
[
inst
[
0
]].
mul
)
for
inst
in
inst_sorted
]
n
=
len
(
weight_nn
.
hs
)
-
2
ww_key
=
f
'
{
conv_key
}
.weight_nn.layer
{
n
}
.weight'
ww
=
stct
[
ww_key
]
ww_sorted
=
[
None
]
*
len
(
inst_old
)
_prev_idx
=
0
for
ist_src
in
inst_old
:
for
j
,
ist_dst
in
enumerate
(
inst_sorted
):
if
not
all
(
ist_src
[
ii
]
==
ist_dst
[
ii
]
for
ii
in
range
(
3
)):
continue
numel
=
ist_dst
[
3
]
# weight num
ww_src
=
ww
[:,
_prev_idx
:
_prev_idx
+
numel
]
l1
,
l2
,
l3
=
(
irreps_in1
[
ist_src
[
0
]].
ir
.
l
,
irreps_in2
[
ist_src
[
1
]].
ir
.
l
,
irreps_out
[
ist_src
[
2
]].
ir
.
l
,
)
if
l1
>
0
and
l2
>
0
and
l3
>
0
:
w3j_key
=
f
'_w3j_
{
l1
}
_
{
l2
}
_
{
l3
}
'
conv_w3j_key
=
(
f
'
{
conv_key
}
.convolution._compiled_main_left_right.
{
w3j_key
}
'
)
w3j_old
=
stct
[
conv_w3j_key
]
w3j_now
=
wigner_3j
(
l1
,
l2
,
l3
)
if
not
torch
.
allclose
(
w3j_old
.
to
(
w3j_now
.
device
),
w3j_now
):
assert
torch
.
allclose
(
w3j_old
.
to
(
w3j_now
.
device
),
-
1
*
w3j_now
)
ww_src
=
-
1
*
ww_src
stct
[
conv_w3j_key
]
*=
-
1
# stct updated
_prev_idx
+=
numel
ww_sorted
[
j
]
=
ww_src
ww_sorted
=
torch
.
cat
(
ww_sorted
,
dim
=
1
)
# type: ignore
stct
[
ww_key
]
=
ww_sorted
.
clone
()
# stct updated
conv_dicts
=
{}
for
k
,
v
in
state_dict
.
items
():
key_name
=
k
.
split
(
'.'
)[
0
]
if
key_name
.
split
(
'_'
)[
1
]
==
'convolution'
:
if
key_name
not
in
conv_dicts
:
conv_dicts
[
key_name
]
=
{}
conv_dicts
[
key_name
].
update
({
k
:
v
})
new_state_dict
=
{}
new_state_dict
.
update
(
state_dict
)
for
conv_key
,
conv_state_dict
in
conv_dicts
.
items
():
conv
=
model_now
.
_modules
[
conv_key
]
weight_nn
=
conv
.
weight_nn
patch
(
conv_state_dict
)
new_state_dict
.
update
(
conv_state_dict
)
return
new_state_dict
def
patch_state_dict_if_old
(
state_dict
,
config_cp
,
now_model
):
version
=
config_cp
.
get
(
'version'
,
None
)
if
not
version
:
raise
ValueError
(
'No version found in config'
)
vs
=
version
.
split
(
'.'
)
vsuffix
=
''
if
len
(
vs
)
==
4
:
vsuffix
=
vs
[
-
1
]
vs
=
version_tuple
(
'.'
.
join
(
vs
[:
3
]))
else
:
vs
=
version_tuple
(
'.'
.
join
(
vs
))
if
vs
<
version_tuple
(
'0.10.0'
):
state_dict
=
map_old_model
(
state_dict
)
# TODO: change version criteria before release!!!
# it causes problem if model is sorted but this function is called
# ... more robust way? idk
if
vs
<
version_tuple
(
'0.11.0'
)
or
(
vs
==
version_tuple
(
'0.11.0'
)
and
vsuffix
==
'dev0'
):
state_dict
=
sort_old_convolution
(
now_model
,
state_dict
)
return
state_dict
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment