Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
nivren
ICT-CSP
Commits
1be78103
"docs/design/feature/ray_based_execution.md" did not exist on "356077823ea8569ff15218e51228c1b3d50792a9"
Unverified
Commit
1be78103
authored
Aug 24, 2025
by
zcxzcx1
Committed by
GitHub
Aug 24, 2025
Browse files
Add files via upload
parent
f675ef76
Changes
38
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
6212 additions
and
0 deletions
+6212
-0
mace-bench/3rdparty/mace/tests/test_benchmark.py
mace-bench/3rdparty/mace/tests/test_benchmark.py
+121
-0
mace-bench/3rdparty/mace/tests/test_calculator.py
mace-bench/3rdparty/mace/tests/test_calculator.py
+689
-0
mace-bench/3rdparty/mace/tests/test_cg.py
mace-bench/3rdparty/mace/tests/test_cg.py
+12
-0
mace-bench/3rdparty/mace/tests/test_compile.py
mace-bench/3rdparty/mace/tests/test_compile.py
+154
-0
mace-bench/3rdparty/mace/tests/test_cueq.py
mace-bench/3rdparty/mace/tests/test_cueq.py
+181
-0
mace-bench/3rdparty/mace/tests/test_data.py
mace-bench/3rdparty/mace/tests/test_data.py
+213
-0
mace-bench/3rdparty/mace/tests/test_finetuning_select.py
mace-bench/3rdparty/mace/tests/test_finetuning_select.py
+164
-0
mace-bench/3rdparty/mace/tests/test_foundations.py
mace-bench/3rdparty/mace/tests/test_foundations.py
+512
-0
mace-bench/3rdparty/mace/tests/test_hessian.py
mace-bench/3rdparty/mace/tests/test_hessian.py
+54
-0
mace-bench/3rdparty/mace/tests/test_lmdb_database.py
mace-bench/3rdparty/mace/tests/test_lmdb_database.py
+134
-0
mace-bench/3rdparty/mace/tests/test_models.py
mace-bench/3rdparty/mace/tests/test_models.py
+374
-0
mace-bench/3rdparty/mace/tests/test_modules.py
mace-bench/3rdparty/mace/tests/test_modules.py
+268
-0
mace-bench/3rdparty/mace/tests/test_multifiles.py
mace-bench/3rdparty/mace/tests/test_multifiles.py
+1029
-0
mace-bench/3rdparty/mace/tests/test_preprocess.py
mace-bench/3rdparty/mace/tests/test_preprocess.py
+206
-0
mace-bench/3rdparty/mace/tests/test_run_train.py
mace-bench/3rdparty/mace/tests/test_run_train.py
+1458
-0
mace-bench/3rdparty/mace/tests/test_run_train_allkeys.py
mace-bench/3rdparty/mace/tests/test_run_train_allkeys.py
+468
-0
mace-bench/3rdparty/mace/tests/test_schedulefree.py
mace-bench/3rdparty/mace/tests/test_schedulefree.py
+127
-0
mace-bench/3rdparty/mace/tests/test_tools.py
mace-bench/3rdparty/mace/tests/test_tools.py
+48
-0
No files found.
mace-bench/3rdparty/mace/tests/test_benchmark.py
0 → 100644
View file @
1be78103
import
json
import
os
from
pathlib
import
Path
from
typing
import
List
,
Optional
import
pandas
as
pd
import
pytest
import
torch
from
ase
import
build
from
mace
import
data
as
mace_data
from
mace.calculators.foundations_models
import
mace_mp
from
mace.tools
import
AtomicNumberTable
,
torch_geometric
,
torch_tools
def
is_mace_full_bench
():
return
os
.
environ
.
get
(
"MACE_FULL_BENCH"
,
"0"
)
==
"1"
@
pytest
.
mark
.
skipif
(
not
torch
.
cuda
.
is_available
(),
reason
=
"cuda is not available"
)
@
pytest
.
mark
.
benchmark
(
warmup
=
True
,
warmup_iterations
=
4
,
min_rounds
=
8
)
@
pytest
.
mark
.
parametrize
(
"size"
,
(
3
,
5
,
7
,
9
))
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float32"
,
"float64"
])
@
pytest
.
mark
.
parametrize
(
"compile_mode"
,
[
None
,
"default"
])
def
test_inference
(
benchmark
,
size
:
int
,
dtype
:
str
,
compile_mode
:
Optional
[
str
],
device
:
str
=
"cuda"
):
if
not
is_mace_full_bench
()
and
compile_mode
is
not
None
:
pytest
.
skip
(
"Skipping long running benchmark, set MACE_FULL_BENCH=1 to execute"
)
with
torch_tools
.
default_dtype
(
dtype
):
model
=
load_mace_mp_medium
(
dtype
,
compile_mode
,
device
)
batch
=
create_batch
(
size
,
model
,
device
)
log_bench_info
(
benchmark
,
dtype
,
compile_mode
,
batch
)
def
func
():
torch
.
cuda
.
synchronize
()
model
(
batch
,
training
=
compile_mode
is
not
None
,
compute_force
=
True
)
torch
.
cuda
.
empty_cache
()
benchmark
(
func
)
def
load_mace_mp_medium
(
dtype
,
compile_mode
,
device
):
calc
=
mace_mp
(
model
=
"medium"
,
default_dtype
=
dtype
,
device
=
device
,
compile_mode
=
compile_mode
,
fullgraph
=
False
,
)
model
=
calc
.
models
[
0
].
to
(
device
)
return
model
def
create_batch
(
size
:
int
,
model
:
torch
.
nn
.
Module
,
device
:
str
)
->
dict
:
cutoff
=
model
.
r_max
.
item
()
z_table
=
AtomicNumberTable
([
int
(
z
)
for
z
in
model
.
atomic_numbers
])
atoms
=
build
.
bulk
(
"C"
,
"diamond"
,
a
=
3.567
,
cubic
=
True
)
atoms
=
atoms
.
repeat
((
size
,
size
,
size
))
config
=
mace_data
.
config_from_atoms
(
atoms
)
dataset
=
[
mace_data
.
AtomicData
.
from_config
(
config
,
z_table
=
z_table
,
cutoff
=
cutoff
)]
data_loader
=
torch_geometric
.
dataloader
.
DataLoader
(
dataset
=
dataset
,
batch_size
=
1
,
shuffle
=
False
,
drop_last
=
False
,
)
batch
=
next
(
iter
(
data_loader
))
batch
.
to
(
device
)
return
batch
.
to_dict
()
def
log_bench_info
(
benchmark
,
dtype
,
compile_mode
,
batch
):
benchmark
.
extra_info
[
"num_atoms"
]
=
int
(
batch
[
"positions"
].
shape
[
0
])
benchmark
.
extra_info
[
"num_edges"
]
=
int
(
batch
[
"edge_index"
].
shape
[
1
])
benchmark
.
extra_info
[
"dtype"
]
=
dtype
benchmark
.
extra_info
[
"is_compiled"
]
=
compile_mode
is
not
None
benchmark
.
extra_info
[
"device_name"
]
=
torch
.
cuda
.
get_device_name
()
def
process_benchmark_file
(
bench_file
:
Path
)
->
pd
.
DataFrame
:
with
open
(
bench_file
,
"r"
,
encoding
=
"utf-8"
)
as
f
:
bench_data
=
json
.
load
(
f
)
records
=
[]
for
bench
in
bench_data
[
"benchmarks"
]:
record
=
{
**
bench
[
"extra_info"
],
**
bench
[
"stats"
]}
records
.
append
(
record
)
result_df
=
pd
.
DataFrame
(
records
)
result_df
[
"ns/day (1 fs/step)"
]
=
0.086400
/
result_df
[
"median"
]
result_df
[
"Steps per day"
]
=
result_df
[
"ops"
]
*
86400
columns
=
[
"num_atoms"
,
"num_edges"
,
"dtype"
,
"is_compiled"
,
"device_name"
,
"median"
,
"Steps per day"
,
"ns/day (1 fs/step)"
,
]
return
result_df
[
columns
]
def
read_bench_results
(
result_files
:
List
[
str
])
->
pd
.
DataFrame
:
return
pd
.
concat
([
process_benchmark_file
(
Path
(
f
))
for
f
in
result_files
])
if
__name__
==
"__main__"
:
# Print to stdout a csv of the benchmark metrics
import
subprocess
result
=
subprocess
.
run
(
[
"pytest-benchmark"
,
"list"
],
capture_output
=
True
,
text
=
True
,
check
=
True
)
bench_files
=
result
.
stdout
.
strip
().
split
(
"
\n
"
)
bench_results
=
read_bench_results
(
bench_files
)
print
(
bench_results
.
to_csv
(
index
=
False
))
mace-bench/3rdparty/mace/tests/test_calculator.py
0 → 100644
View file @
1be78103
import
os
import
subprocess
import
sys
from
pathlib
import
Path
import
ase.io
import
numpy
as
np
import
pytest
import
torch
from
ase
import
build
from
ase.atoms
import
Atoms
from
ase.calculators.test
import
gradient_test
from
ase.constraints
import
ExpCellFilter
from
mace.calculators
import
mace_mp
,
mace_off
from
mace.calculators.mace
import
MACECalculator
from
mace.modules.models
import
ScaleShiftMACE
try
:
import
cuequivariance
as
cue
# pylint: disable=unused-import
CUET_AVAILABLE
=
True
except
ImportError
:
CUET_AVAILABLE
=
False
pytest_mace_dir
=
Path
(
__file__
).
parent
.
parent
run_train
=
Path
(
__file__
).
parent
.
parent
/
"mace"
/
"cli"
/
"run_train.py"
@
pytest
.
fixture
(
scope
=
"module"
,
name
=
"fitting_configs"
)
def
fitting_configs_fixture
():
water
=
Atoms
(
numbers
=
[
8
,
1
,
1
],
positions
=
[[
0
,
-
2.0
,
0
],
[
1
,
0
,
0
],
[
0
,
1
,
0
]],
cell
=
[
4
]
*
3
,
pbc
=
[
True
]
*
3
,
)
fit_configs
=
[
Atoms
(
numbers
=
[
8
],
positions
=
[[
0
,
0
,
0
]],
cell
=
[
6
]
*
3
),
Atoms
(
numbers
=
[
1
],
positions
=
[[
0
,
0
,
0
]],
cell
=
[
6
]
*
3
),
]
fit_configs
[
0
].
info
[
"REF_energy"
]
=
1.0
fit_configs
[
0
].
info
[
"config_type"
]
=
"IsolatedAtom"
fit_configs
[
1
].
info
[
"REF_energy"
]
=
-
0.5
fit_configs
[
1
].
info
[
"config_type"
]
=
"IsolatedAtom"
np
.
random
.
seed
(
5
)
for
_
in
range
(
20
):
c
=
water
.
copy
()
c
.
positions
+=
np
.
random
.
normal
(
0.1
,
size
=
c
.
positions
.
shape
)
c
.
info
[
"REF_energy"
]
=
np
.
random
.
normal
(
0.1
)
c
.
info
[
"REF_dipole"
]
=
np
.
random
.
normal
(
0.1
,
size
=
3
)
c
.
new_array
(
"REF_forces"
,
np
.
random
.
normal
(
0.1
,
size
=
c
.
positions
.
shape
))
c
.
new_array
(
"Qs"
,
np
.
random
.
normal
(
0.1
,
size
=
c
.
positions
.
shape
[
0
]))
c
.
info
[
"REF_stress"
]
=
np
.
random
.
normal
(
0.1
,
size
=
6
)
fit_configs
.
append
(
c
)
return
fit_configs
@
pytest
.
fixture
(
scope
=
"module"
,
name
=
"trained_model"
)
def
trained_model_fixture
(
tmp_path_factory
,
fitting_configs
):
_mace_params
=
{
"name"
:
"MACE"
,
"valid_fraction"
:
0.05
,
"energy_weight"
:
1.0
,
"forces_weight"
:
10.0
,
"stress_weight"
:
1.0
,
"model"
:
"MACE"
,
"hidden_irreps"
:
"128x0e"
,
"r_max"
:
3.5
,
"batch_size"
:
5
,
"max_num_epochs"
:
10
,
"swa"
:
None
,
"start_swa"
:
5
,
"ema"
:
None
,
"ema_decay"
:
0.99
,
"amsgrad"
:
None
,
"restart_latest"
:
None
,
"device"
:
"cpu"
,
"seed"
:
5
,
"loss"
:
"stress"
,
"energy_key"
:
"REF_energy"
,
"forces_key"
:
"REF_forces"
,
"stress_key"
:
"REF_stress"
,
"eval_interval"
:
2
,
}
tmp_path
=
tmp_path_factory
.
mktemp
(
"run_"
)
ase
.
io
.
write
(
tmp_path
/
"fit.xyz"
,
fitting_configs
)
mace_params
=
_mace_params
.
copy
()
mace_params
[
"checkpoints_dir"
]
=
str
(
tmp_path
)
mace_params
[
"model_dir"
]
=
str
(
tmp_path
)
mace_params
[
"train_file"
]
=
tmp_path
/
"fit.xyz"
# make sure run_train.py is using the mace that is currently being tested
run_env
=
os
.
environ
.
copy
()
sys
.
path
.
insert
(
0
,
str
(
Path
(
__file__
).
parent
.
parent
))
run_env
[
"PYTHONPATH"
]
=
":"
.
join
(
sys
.
path
)
print
(
"DEBUG subprocess PYTHONPATH"
,
run_env
[
"PYTHONPATH"
])
cmd
=
(
sys
.
executable
+
" "
+
str
(
run_train
)
+
" "
+
" "
.
join
(
[
(
f
"--
{
k
}
=
{
v
}
"
if
v
is
not
None
else
f
"--
{
k
}
"
)
for
k
,
v
in
mace_params
.
items
()
]
)
)
p
=
subprocess
.
run
(
cmd
.
split
(),
env
=
run_env
,
check
=
True
)
assert
p
.
returncode
==
0
return
MACECalculator
(
model_paths
=
tmp_path
/
"MACE.model"
,
device
=
"cpu"
)
@
pytest
.
fixture
(
scope
=
"module"
,
name
=
"trained_equivariant_model"
)
def
trained_model_equivariant_fixture
(
tmp_path_factory
,
fitting_configs
):
_mace_params
=
{
"name"
:
"MACE"
,
"valid_fraction"
:
0.05
,
"energy_weight"
:
1.0
,
"forces_weight"
:
10.0
,
"stress_weight"
:
1.0
,
"model"
:
"MACE"
,
"hidden_irreps"
:
"16x0e+16x1o"
,
"r_max"
:
3.5
,
"batch_size"
:
5
,
"max_num_epochs"
:
10
,
"swa"
:
None
,
"start_swa"
:
5
,
"ema"
:
None
,
"ema_decay"
:
0.99
,
"amsgrad"
:
None
,
"restart_latest"
:
None
,
"device"
:
"cpu"
,
"seed"
:
5
,
"loss"
:
"stress"
,
"energy_key"
:
"REF_energy"
,
"forces_key"
:
"REF_forces"
,
"stress_key"
:
"REF_stress"
,
"eval_interval"
:
2
,
}
tmp_path
=
tmp_path_factory
.
mktemp
(
"run_"
)
ase
.
io
.
write
(
tmp_path
/
"fit.xyz"
,
fitting_configs
)
mace_params
=
_mace_params
.
copy
()
mace_params
[
"checkpoints_dir"
]
=
str
(
tmp_path
)
mace_params
[
"model_dir"
]
=
str
(
tmp_path
)
mace_params
[
"train_file"
]
=
tmp_path
/
"fit.xyz"
# make sure run_train.py is using the mace that is currently being tested
run_env
=
os
.
environ
.
copy
()
sys
.
path
.
insert
(
0
,
str
(
Path
(
__file__
).
parent
.
parent
))
run_env
[
"PYTHONPATH"
]
=
":"
.
join
(
sys
.
path
)
print
(
"DEBUG subprocess PYTHONPATH"
,
run_env
[
"PYTHONPATH"
])
cmd
=
(
sys
.
executable
+
" "
+
str
(
run_train
)
+
" "
+
" "
.
join
(
[
(
f
"--
{
k
}
=
{
v
}
"
if
v
is
not
None
else
f
"--
{
k
}
"
)
for
k
,
v
in
mace_params
.
items
()
]
)
)
p
=
subprocess
.
run
(
cmd
.
split
(),
env
=
run_env
,
check
=
True
)
assert
p
.
returncode
==
0
return
MACECalculator
(
model_paths
=
tmp_path
/
"MACE.model"
,
device
=
"cpu"
)
@
pytest
.
fixture
(
scope
=
"module"
,
name
=
"trained_equivariant_model_cueq"
)
def
trained_model_equivariant_fixture_cueq
(
tmp_path_factory
,
fitting_configs
):
_mace_params
=
{
"name"
:
"MACE"
,
"valid_fraction"
:
0.05
,
"energy_weight"
:
1.0
,
"forces_weight"
:
10.0
,
"stress_weight"
:
1.0
,
"model"
:
"MACE"
,
"hidden_irreps"
:
"16x0e+16x1o"
,
"r_max"
:
3.5
,
"batch_size"
:
5
,
"max_num_epochs"
:
10
,
"swa"
:
None
,
"start_swa"
:
5
,
"ema"
:
None
,
"ema_decay"
:
0.99
,
"amsgrad"
:
None
,
"restart_latest"
:
None
,
"device"
:
"cpu"
,
"seed"
:
5
,
"loss"
:
"stress"
,
"energy_key"
:
"REF_energy"
,
"forces_key"
:
"REF_forces"
,
"stress_key"
:
"REF_stress"
,
"eval_interval"
:
2
,
}
tmp_path
=
tmp_path_factory
.
mktemp
(
"run_"
)
ase
.
io
.
write
(
tmp_path
/
"fit.xyz"
,
fitting_configs
)
mace_params
=
_mace_params
.
copy
()
mace_params
[
"checkpoints_dir"
]
=
str
(
tmp_path
)
mace_params
[
"model_dir"
]
=
str
(
tmp_path
)
mace_params
[
"train_file"
]
=
tmp_path
/
"fit.xyz"
# make sure run_train.py is using the mace that is currently being tested
run_env
=
os
.
environ
.
copy
()
sys
.
path
.
insert
(
0
,
str
(
Path
(
__file__
).
parent
.
parent
))
run_env
[
"PYTHONPATH"
]
=
":"
.
join
(
sys
.
path
)
print
(
"DEBUG subprocess PYTHONPATH"
,
run_env
[
"PYTHONPATH"
])
cmd
=
(
sys
.
executable
+
" "
+
str
(
run_train
)
+
" "
+
" "
.
join
(
[
(
f
"--
{
k
}
=
{
v
}
"
if
v
is
not
None
else
f
"--
{
k
}
"
)
for
k
,
v
in
mace_params
.
items
()
]
)
)
p
=
subprocess
.
run
(
cmd
.
split
(),
env
=
run_env
,
check
=
True
)
assert
p
.
returncode
==
0
return
MACECalculator
(
model_paths
=
tmp_path
/
"MACE.model"
,
device
=
"cpu"
,
enable_cueq
=
True
)
@
pytest
.
fixture
(
scope
=
"module"
,
name
=
"trained_dipole_model"
)
def
trained_dipole_fixture
(
tmp_path_factory
,
fitting_configs
):
_mace_params
=
{
"name"
:
"MACE"
,
"valid_fraction"
:
0.05
,
"energy_weight"
:
1.0
,
"forces_weight"
:
10.0
,
"stress_weight"
:
1.0
,
"model"
:
"AtomicDipolesMACE"
,
"num_channels"
:
8
,
"max_L"
:
2
,
"r_max"
:
3.5
,
"batch_size"
:
5
,
"max_num_epochs"
:
10
,
"ema"
:
None
,
"ema_decay"
:
0.99
,
"amsgrad"
:
None
,
"restart_latest"
:
None
,
"device"
:
"cpu"
,
"seed"
:
5
,
"loss"
:
"dipole"
,
"energy_key"
:
""
,
"forces_key"
:
""
,
"stress_key"
:
""
,
"dipole_key"
:
"REF_dipole"
,
"error_table"
:
"DipoleRMSE"
,
"eval_interval"
:
2
,
}
tmp_path
=
tmp_path_factory
.
mktemp
(
"run_"
)
ase
.
io
.
write
(
tmp_path
/
"fit.xyz"
,
fitting_configs
)
mace_params
=
_mace_params
.
copy
()
mace_params
[
"checkpoints_dir"
]
=
str
(
tmp_path
)
mace_params
[
"model_dir"
]
=
str
(
tmp_path
)
mace_params
[
"train_file"
]
=
tmp_path
/
"fit.xyz"
# make sure run_train.py is using the mace that is currently being tested
run_env
=
os
.
environ
.
copy
()
sys
.
path
.
insert
(
0
,
str
(
Path
(
__file__
).
parent
.
parent
))
run_env
[
"PYTHONPATH"
]
=
":"
.
join
(
sys
.
path
)
print
(
"DEBUG subprocess PYTHONPATH"
,
run_env
[
"PYTHONPATH"
])
cmd
=
(
sys
.
executable
+
" "
+
str
(
run_train
)
+
" "
+
" "
.
join
(
[
(
f
"--
{
k
}
=
{
v
}
"
if
v
is
not
None
else
f
"--
{
k
}
"
)
for
k
,
v
in
mace_params
.
items
()
]
)
)
p
=
subprocess
.
run
(
cmd
.
split
(),
env
=
run_env
,
check
=
True
)
assert
p
.
returncode
==
0
return
MACECalculator
(
model_paths
=
tmp_path
/
"MACE.model"
,
device
=
"cpu"
,
model_type
=
"DipoleMACE"
)
@
pytest
.
fixture
(
scope
=
"module"
,
name
=
"trained_energy_dipole_model"
)
def
trained_energy_dipole_fixture
(
tmp_path_factory
,
fitting_configs
):
_mace_params
=
{
"name"
:
"MACE"
,
"valid_fraction"
:
0.05
,
"energy_weight"
:
1.0
,
"forces_weight"
:
10.0
,
"stress_weight"
:
1.0
,
"model"
:
"EnergyDipolesMACE"
,
"num_channels"
:
32
,
"max_L"
:
1
,
"r_max"
:
3.5
,
"batch_size"
:
5
,
"max_num_epochs"
:
10
,
"ema"
:
None
,
"ema_decay"
:
0.99
,
"amsgrad"
:
None
,
"restart_latest"
:
None
,
"device"
:
"cpu"
,
"seed"
:
5
,
"loss"
:
"energy_forces_dipole"
,
"energy_key"
:
"REF_energy"
,
"forces_key"
:
""
,
"stress_key"
:
""
,
"dipole_key"
:
"REF_dipole"
,
"error_table"
:
"EnergyDipoleRMSE"
,
"eval_interval"
:
2
,
}
tmp_path
=
tmp_path_factory
.
mktemp
(
"run_"
)
ase
.
io
.
write
(
tmp_path
/
"fit.xyz"
,
fitting_configs
)
mace_params
=
_mace_params
.
copy
()
mace_params
[
"checkpoints_dir"
]
=
str
(
tmp_path
)
mace_params
[
"model_dir"
]
=
str
(
tmp_path
)
mace_params
[
"train_file"
]
=
tmp_path
/
"fit.xyz"
# make sure run_train.py is using the mace that is currently being tested
run_env
=
os
.
environ
.
copy
()
sys
.
path
.
insert
(
0
,
str
(
Path
(
__file__
).
parent
.
parent
))
run_env
[
"PYTHONPATH"
]
=
":"
.
join
(
sys
.
path
)
print
(
"DEBUG subprocess PYTHONPATH"
,
run_env
[
"PYTHONPATH"
])
cmd
=
(
sys
.
executable
+
" "
+
str
(
run_train
)
+
" "
+
" "
.
join
(
[
(
f
"--
{
k
}
=
{
v
}
"
if
v
is
not
None
else
f
"--
{
k
}
"
)
for
k
,
v
in
mace_params
.
items
()
]
)
)
p
=
subprocess
.
run
(
cmd
.
split
(),
env
=
run_env
,
check
=
True
)
assert
p
.
returncode
==
0
return
MACECalculator
(
model_paths
=
tmp_path
/
"MACE.model"
,
device
=
"cpu"
,
model_type
=
"EnergyDipoleMACE"
)
@
pytest
.
fixture
(
scope
=
"module"
,
name
=
"trained_committee"
)
def
trained_committee_fixture
(
tmp_path_factory
,
fitting_configs
):
_seeds
=
[
5
,
6
,
7
]
_model_paths
=
[]
for
seed
in
_seeds
:
_mace_params
=
{
"name"
:
f
"MACE
{
seed
}
"
,
"valid_fraction"
:
0.05
,
"energy_weight"
:
1.0
,
"forces_weight"
:
10.0
,
"stress_weight"
:
1.0
,
"model"
:
"MACE"
,
"hidden_irreps"
:
"16x0e"
,
"r_max"
:
3.5
,
"batch_size"
:
5
,
"max_num_epochs"
:
10
,
"swa"
:
None
,
"start_swa"
:
5
,
"ema"
:
None
,
"ema_decay"
:
0.99
,
"amsgrad"
:
None
,
"restart_latest"
:
None
,
"device"
:
"cpu"
,
"seed"
:
seed
,
"loss"
:
"stress"
,
"energy_key"
:
"REF_energy"
,
"forces_key"
:
"REF_forces"
,
"stress_key"
:
"REF_stress"
,
"eval_interval"
:
2
,
}
tmp_path
=
tmp_path_factory
.
mktemp
(
f
"run
{
seed
}
_"
)
ase
.
io
.
write
(
tmp_path
/
"fit.xyz"
,
fitting_configs
)
mace_params
=
_mace_params
.
copy
()
mace_params
[
"checkpoints_dir"
]
=
str
(
tmp_path
)
mace_params
[
"model_dir"
]
=
str
(
tmp_path
)
mace_params
[
"train_file"
]
=
tmp_path
/
"fit.xyz"
# make sure run_train.py is using the mace that is currently being tested
run_env
=
os
.
environ
.
copy
()
sys
.
path
.
insert
(
0
,
str
(
Path
(
__file__
).
parent
.
parent
))
run_env
[
"PYTHONPATH"
]
=
":"
.
join
(
sys
.
path
)
print
(
"DEBUG subprocess PYTHONPATH"
,
run_env
[
"PYTHONPATH"
])
cmd
=
(
sys
.
executable
+
" "
+
str
(
run_train
)
+
" "
+
" "
.
join
(
[
(
f
"--
{
k
}
=
{
v
}
"
if
v
is
not
None
else
f
"--
{
k
}
"
)
for
k
,
v
in
mace_params
.
items
()
]
)
)
p
=
subprocess
.
run
(
cmd
.
split
(),
env
=
run_env
,
check
=
True
)
assert
p
.
returncode
==
0
_model_paths
.
append
(
tmp_path
/
f
"MACE
{
seed
}
.model"
)
return
MACECalculator
(
model_paths
=
_model_paths
,
device
=
"cpu"
)
def
test_calculator_node_energy
(
fitting_configs
,
trained_model
):
for
at
in
fitting_configs
:
trained_model
.
calculate
(
at
)
node_energies
=
trained_model
.
results
[
"node_energy"
]
batch
=
trained_model
.
_atoms_to_batch
(
at
)
# pylint: disable=protected-access
node_heads
=
batch
[
"head"
][
batch
[
"batch"
]]
num_atoms_arange
=
torch
.
arange
(
batch
[
"positions"
].
shape
[
0
])
node_e0
=
(
trained_model
.
models
[
0
].
atomic_energies_fn
(
batch
[
"node_attrs"
]).
detach
()
)
node_e0
=
node_e0
[
num_atoms_arange
,
node_heads
].
cpu
().
numpy
()
energy_via_nodes
=
np
.
sum
(
node_energies
+
node_e0
)
energy
=
trained_model
.
results
[
"energy"
]
np
.
testing
.
assert_allclose
(
energy
,
energy_via_nodes
,
atol
=
1e-6
)
def
test_calculator_forces
(
fitting_configs
,
trained_model
):
at
=
fitting_configs
[
2
].
copy
()
at
.
calc
=
trained_model
# test just forces
grads
=
gradient_test
(
at
)
assert
np
.
allclose
(
grads
[
0
],
grads
[
1
])
def
test_calculator_stress
(
fitting_configs
,
trained_model
):
at
=
fitting_configs
[
2
].
copy
()
at
.
calc
=
trained_model
# test forces and stress
at_wrapped
=
ExpCellFilter
(
at
)
grads
=
gradient_test
(
at_wrapped
)
assert
np
.
allclose
(
grads
[
0
],
grads
[
1
])
def
test_calculator_committee
(
fitting_configs
,
trained_committee
):
at
=
fitting_configs
[
2
].
copy
()
at
.
calc
=
trained_committee
# test just forces
grads
=
gradient_test
(
at
)
assert
np
.
allclose
(
grads
[
0
],
grads
[
1
])
E
=
at
.
get_potential_energy
()
energies
=
at
.
calc
.
results
[
"energies"
]
energies_var
=
at
.
calc
.
results
[
"energy_var"
]
forces_var
=
np
.
var
(
at
.
calc
.
results
[
"forces_comm"
],
axis
=
0
)
assert
np
.
allclose
(
E
,
np
.
mean
(
energies
))
assert
np
.
allclose
(
energies_var
,
np
.
var
(
energies
))
assert
forces_var
.
shape
==
at
.
calc
.
results
[
"forces"
].
shape
def
test_calculator_from_model
(
fitting_configs
,
trained_committee
):
# test single model
test_calculator_forces
(
fitting_configs
,
trained_model
=
MACECalculator
(
models
=
trained_committee
.
models
[
0
],
device
=
"cpu"
),
)
# test committee model
test_calculator_committee
(
fitting_configs
,
trained_committee
=
MACECalculator
(
models
=
trained_committee
.
models
,
device
=
"cpu"
),
)
def
test_calculator_dipole
(
fitting_configs
,
trained_dipole_model
):
at
=
fitting_configs
[
2
].
copy
()
at
.
calc
=
trained_dipole_model
dip
=
at
.
get_dipole_moment
()
assert
len
(
dip
)
==
3
def
test_calculator_energy_dipole
(
fitting_configs
,
trained_energy_dipole_model
):
at
=
fitting_configs
[
2
].
copy
()
at
.
calc
=
trained_energy_dipole_model
grads
=
gradient_test
(
at
)
dip
=
at
.
get_dipole_moment
()
assert
np
.
allclose
(
grads
[
0
],
grads
[
1
])
assert
len
(
dip
)
==
3
def
test_calculator_descriptor
(
fitting_configs
,
trained_equivariant_model
):
at
=
fitting_configs
[
2
].
copy
()
at_rotated
=
fitting_configs
[
2
].
copy
()
at_rotated
.
rotate
(
90
,
"x"
)
calc
=
trained_equivariant_model
desc_invariant
=
calc
.
get_descriptors
(
at
,
invariants_only
=
True
)
desc_invariant_rotated
=
calc
.
get_descriptors
(
at_rotated
,
invariants_only
=
True
)
desc_invariant_single_layer
=
calc
.
get_descriptors
(
at
,
invariants_only
=
True
,
num_layers
=
1
)
desc_invariant_single_layer_rotated
=
calc
.
get_descriptors
(
at_rotated
,
invariants_only
=
True
,
num_layers
=
1
)
desc
=
calc
.
get_descriptors
(
at
,
invariants_only
=
False
)
desc_single_layer
=
calc
.
get_descriptors
(
at
,
invariants_only
=
False
,
num_layers
=
1
)
desc_rotated
=
calc
.
get_descriptors
(
at_rotated
,
invariants_only
=
False
)
desc_rotated_single_layer
=
calc
.
get_descriptors
(
at_rotated
,
invariants_only
=
False
,
num_layers
=
1
)
assert
desc_invariant
.
shape
[
0
]
==
3
assert
desc_invariant
.
shape
[
1
]
==
32
assert
desc_invariant_single_layer
.
shape
[
0
]
==
3
assert
desc_invariant_single_layer
.
shape
[
1
]
==
16
assert
desc
.
shape
[
0
]
==
3
assert
desc
.
shape
[
1
]
==
80
assert
desc_single_layer
.
shape
[
0
]
==
3
assert
desc_single_layer
.
shape
[
1
]
==
16
*
4
assert
desc_rotated_single_layer
.
shape
[
0
]
==
3
assert
desc_rotated_single_layer
.
shape
[
1
]
==
16
*
4
np
.
testing
.
assert_allclose
(
desc_invariant
,
desc_invariant_rotated
,
atol
=
1e-6
)
np
.
testing
.
assert_allclose
(
desc_invariant_single_layer
,
desc_invariant
[:,
:
16
],
atol
=
1e-6
)
np
.
testing
.
assert_allclose
(
desc_invariant_single_layer_rotated
,
desc_invariant
[:,
:
16
],
atol
=
1e-6
)
np
.
testing
.
assert_allclose
(
desc_single_layer
[:,
:
16
],
desc_rotated_single_layer
[:,
:
16
],
atol
=
1e-6
)
assert
not
np
.
allclose
(
desc_single_layer
[:,
16
:],
desc_rotated_single_layer
[:,
16
:],
atol
=
1e-6
)
assert
not
np
.
allclose
(
desc
,
desc_rotated
,
atol
=
1e-6
)
@
pytest
.
mark
.
skipif
(
not
CUET_AVAILABLE
,
reason
=
"cuequivariance not installed"
)
def
test_calculator_descriptor_cueq
(
fitting_configs
,
trained_equivariant_model_cueq
):
at
=
fitting_configs
[
2
].
copy
()
at_rotated
=
fitting_configs
[
2
].
copy
()
at_rotated
.
rotate
(
90
,
"x"
)
calc
=
trained_equivariant_model_cueq
desc_invariant
=
calc
.
get_descriptors
(
at
,
invariants_only
=
True
)
desc_invariant_rotated
=
calc
.
get_descriptors
(
at_rotated
,
invariants_only
=
True
)
desc_invariant_single_layer
=
calc
.
get_descriptors
(
at
,
invariants_only
=
True
,
num_layers
=
1
)
desc_invariant_single_layer_rotated
=
calc
.
get_descriptors
(
at_rotated
,
invariants_only
=
True
,
num_layers
=
1
)
desc
=
calc
.
get_descriptors
(
at
,
invariants_only
=
False
)
desc_single_layer
=
calc
.
get_descriptors
(
at
,
invariants_only
=
False
,
num_layers
=
1
)
desc_rotated
=
calc
.
get_descriptors
(
at_rotated
,
invariants_only
=
False
)
desc_rotated_single_layer
=
calc
.
get_descriptors
(
at_rotated
,
invariants_only
=
False
,
num_layers
=
1
)
assert
desc_invariant
.
shape
[
0
]
==
3
assert
desc_invariant
.
shape
[
1
]
==
32
assert
desc_invariant_single_layer
.
shape
[
0
]
==
3
assert
desc_invariant_single_layer
.
shape
[
1
]
==
16
assert
desc
.
shape
[
0
]
==
3
assert
desc
.
shape
[
1
]
==
80
assert
desc_single_layer
.
shape
[
0
]
==
3
assert
desc_single_layer
.
shape
[
1
]
==
16
*
4
assert
desc_rotated_single_layer
.
shape
[
0
]
==
3
assert
desc_rotated_single_layer
.
shape
[
1
]
==
16
*
4
np
.
testing
.
assert_allclose
(
desc_invariant
,
desc_invariant_rotated
,
atol
=
1e-6
)
np
.
testing
.
assert_allclose
(
desc_invariant_single_layer
,
desc_invariant
[:,
:
16
],
atol
=
1e-6
)
np
.
testing
.
assert_allclose
(
desc_invariant_single_layer_rotated
,
desc_invariant
[:,
:
16
],
atol
=
1e-6
)
np
.
testing
.
assert_allclose
(
desc_single_layer
[:,
:
16
],
desc_rotated_single_layer
[:,
:
16
],
atol
=
1e-6
)
assert
not
np
.
allclose
(
desc_single_layer
[:,
16
:],
desc_rotated_single_layer
[:,
16
:],
atol
=
1e-6
)
assert
not
np
.
allclose
(
desc
,
desc_rotated
,
atol
=
1e-6
)
def
test_mace_mp
(
capsys
:
pytest
.
CaptureFixture
):
mp_mace
=
mace_mp
()
assert
isinstance
(
mp_mace
,
MACECalculator
)
assert
mp_mace
.
model_type
==
"MACE"
assert
len
(
mp_mace
.
models
)
==
1
assert
isinstance
(
mp_mace
.
models
[
0
],
ScaleShiftMACE
)
_
,
stderr
=
capsys
.
readouterr
()
assert
stderr
==
""
def
test_mace_off
():
mace_off_model
=
mace_off
(
model
=
"small"
,
device
=
"cpu"
)
assert
isinstance
(
mace_off_model
,
MACECalculator
)
assert
mace_off_model
.
model_type
==
"MACE"
assert
len
(
mace_off_model
.
models
)
==
1
assert
isinstance
(
mace_off_model
.
models
[
0
],
ScaleShiftMACE
)
atoms
=
build
.
molecule
(
"H2O"
)
atoms
.
calc
=
mace_off_model
E
=
atoms
.
get_potential_energy
()
assert
np
.
allclose
(
E
,
-
2081.116128586803
,
atol
=
1e-9
)
@
pytest
.
mark
.
skipif
(
not
CUET_AVAILABLE
,
reason
=
"cuequivariance not installed"
)
def
test_mace_off_cueq
(
model
=
"medium"
,
device
=
"cpu"
):
mace_off_model
=
mace_off
(
model
=
model
,
device
=
device
,
enable_cueq
=
True
)
assert
isinstance
(
mace_off_model
,
MACECalculator
)
assert
mace_off_model
.
model_type
==
"MACE"
assert
len
(
mace_off_model
.
models
)
==
1
assert
isinstance
(
mace_off_model
.
models
[
0
],
ScaleShiftMACE
)
atoms
=
build
.
molecule
(
"H2O"
)
atoms
.
calc
=
mace_off_model
E
=
atoms
.
get_potential_energy
()
assert
np
.
allclose
(
E
,
-
2081.116128586803
,
atol
=
1e-9
)
def
test_mace_mp_stresses
(
model
=
"medium"
,
device
=
"cpu"
):
atoms
=
build
.
bulk
(
"Al"
,
"fcc"
,
a
=
4.05
,
cubic
=
True
)
atoms
=
atoms
.
repeat
((
2
,
2
,
2
))
mace_mp_model
=
mace_mp
(
model
=
model
,
device
=
device
,
compute_atomic_stresses
=
True
)
atoms
.
set_calculator
(
mace_mp_model
)
stress
=
atoms
.
get_stress
()
stresses
=
atoms
.
get_stresses
()
assert
stress
.
shape
==
(
6
,)
assert
stresses
.
shape
==
(
32
,
6
)
assert
np
.
allclose
(
stress
,
stresses
.
sum
(
axis
=
0
),
atol
=
1e-6
)
mace-bench/3rdparty/mace/tests/test_cg.py
0 → 100644
View file @
1be78103
from
e3nn
import
o3
from
mace.tools
import
cg
def
test_U_matrix
():
irreps_in
=
o3
.
Irreps
(
"1x0e + 1x1o + 1x2e"
)
irreps_out
=
o3
.
Irreps
(
"1x0e + 1x1o"
)
u_matrix
=
cg
.
U_matrix_real
(
irreps_in
=
irreps_in
,
irreps_out
=
irreps_out
,
correlation
=
3
)[
-
1
]
assert
u_matrix
.
shape
==
(
3
,
9
,
9
,
9
,
21
)
mace-bench/3rdparty/mace/tests/test_compile.py
0 → 100644
View file @
1be78103
import
os
from
functools
import
wraps
from
typing
import
Callable
import
numpy
as
np
import
pytest
import
torch
import
torch.nn.functional
as
F
from
e3nn
import
o3
from
torch.testing
import
assert_close
from
mace
import
data
,
modules
,
tools
from
mace.tools
import
compile
as
mace_compile
from
mace.tools
import
torch_geometric
table
=
tools
.
AtomicNumberTable
([
6
])
atomic_energies
=
np
.
array
([
1.0
],
dtype
=
float
)
cutoff
=
5.0
def
create_mace
(
device
:
str
,
seed
:
int
=
1702
):
torch_geometric
.
seed_everything
(
seed
)
model_config
=
{
"r_max"
:
cutoff
,
"num_bessel"
:
8
,
"num_polynomial_cutoff"
:
6
,
"max_ell"
:
3
,
"interaction_cls"
:
modules
.
interaction_classes
[
"RealAgnosticResidualInteractionBlock"
],
"interaction_cls_first"
:
modules
.
interaction_classes
[
"RealAgnosticResidualInteractionBlock"
],
"num_interactions"
:
2
,
"num_elements"
:
1
,
"hidden_irreps"
:
o3
.
Irreps
(
"128x0e + 128x1o"
),
"MLP_irreps"
:
o3
.
Irreps
(
"16x0e"
),
"gate"
:
F
.
silu
,
"atomic_energies"
:
atomic_energies
,
"avg_num_neighbors"
:
8
,
"atomic_numbers"
:
table
.
zs
,
"correlation"
:
3
,
"radial_type"
:
"bessel"
,
"atomic_inter_scale"
:
1.0
,
"atomic_inter_shift"
:
0.0
,
}
model
=
modules
.
ScaleShiftMACE
(
**
model_config
)
return
model
.
to
(
device
)
def
create_batch
(
device
:
str
):
from
ase
import
build
size
=
2
atoms
=
build
.
bulk
(
"C"
,
"diamond"
,
a
=
3.567
,
cubic
=
True
)
atoms_list
=
[
atoms
.
repeat
((
size
,
size
,
size
))]
print
(
"Number of atoms"
,
len
(
atoms_list
[
0
]))
configs
=
[
data
.
config_from_atoms
(
atoms
)
for
atoms
in
atoms_list
]
data_loader
=
torch_geometric
.
dataloader
.
DataLoader
(
dataset
=
[
data
.
AtomicData
.
from_config
(
config
,
z_table
=
table
,
cutoff
=
cutoff
)
for
config
in
configs
],
batch_size
=
1
,
shuffle
=
False
,
drop_last
=
False
,
)
batch
=
next
(
iter
(
data_loader
))
batch
=
batch
.
to
(
device
)
batch
=
batch
.
to_dict
()
return
batch
def
time_func
(
func
:
Callable
):
@
wraps
(
func
)
def
wrapper
(
*
args
,
**
kwargs
):
torch
.
_inductor
.
cudagraph_mark_step_begin
()
# pylint: disable=W0212
outputs
=
func
(
*
args
,
**
kwargs
)
torch
.
cuda
.
synchronize
()
return
outputs
return
wrapper
@
pytest
.
fixture
(
params
=
[
torch
.
float32
,
torch
.
float64
],
ids
=
[
"fp32"
,
"fp64"
])
def
default_dtype
(
request
):
with
tools
.
torch_tools
.
default_dtype
(
request
.
param
):
yield
torch
.
get_default_dtype
()
# skip if on windows
@
pytest
.
mark
.
skipif
(
os
.
name
==
"nt"
,
reason
=
"Not supported on Windows"
)
@
pytest
.
mark
.
parametrize
(
"device"
,
[
"cpu"
,
"cuda"
])
def
test_mace
(
device
,
default_dtype
):
# pylint: disable=W0621
print
(
f
"using default dtype =
{
default_dtype
}
"
)
if
device
==
"cuda"
and
not
torch
.
cuda
.
is_available
():
pytest
.
skip
(
reason
=
"cuda is not available"
)
model_defaults
=
create_mace
(
device
)
tmp_model
=
mace_compile
.
prepare
(
create_mace
)(
device
)
model_compiled
=
torch
.
compile
(
tmp_model
,
mode
=
"default"
)
batch
=
create_batch
(
device
)
output1
=
model_defaults
(
batch
,
training
=
True
)
output2
=
model_compiled
(
batch
,
training
=
True
)
assert_close
(
output1
[
"energy"
],
output2
[
"energy"
])
assert_close
(
output1
[
"forces"
],
output2
[
"forces"
])
@
pytest
.
mark
.
skipif
(
os
.
name
==
"nt"
,
reason
=
"Not supported on Windows"
)
@
pytest
.
mark
.
skipif
(
not
torch
.
cuda
.
is_available
(),
reason
=
"cuda is not available"
)
def
test_eager_benchmark
(
benchmark
,
default_dtype
):
# pylint: disable=W0621
print
(
f
"using default dtype =
{
default_dtype
}
"
)
batch
=
create_batch
(
"cuda"
)
model
=
create_mace
(
"cuda"
)
model
=
time_func
(
model
)
benchmark
(
model
,
batch
,
training
=
True
)
@
pytest
.
mark
.
skipif
(
os
.
name
==
"nt"
,
reason
=
"Not supported on Windows"
)
@
pytest
.
mark
.
skipif
(
not
torch
.
cuda
.
is_available
(),
reason
=
"cuda is not available"
)
@
pytest
.
mark
.
parametrize
(
"compile_mode"
,
[
"default"
,
"reduce-overhead"
,
"max-autotune"
])
@
pytest
.
mark
.
parametrize
(
"enable_amp"
,
[
False
,
True
],
ids
=
[
"fp32"
,
"mixed"
])
def
test_compile_benchmark
(
benchmark
,
compile_mode
,
enable_amp
):
if
enable_amp
:
pytest
.
skip
(
reason
=
"autocast compiler assertion aten.slice_scatter.default"
)
with
tools
.
torch_tools
.
default_dtype
(
torch
.
float32
):
batch
=
create_batch
(
"cuda"
)
torch
.
compiler
.
reset
()
model
=
mace_compile
.
prepare
(
create_mace
)(
"cuda"
)
model
=
torch
.
compile
(
model
,
mode
=
compile_mode
)
model
=
time_func
(
model
)
with
torch
.
autocast
(
"cuda"
,
enabled
=
enable_amp
):
benchmark
(
model
,
batch
,
training
=
True
)
@
pytest
.
mark
.
skipif
(
os
.
name
==
"nt"
,
reason
=
"Not supported on Windows"
)
@
pytest
.
mark
.
skipif
(
not
torch
.
cuda
.
is_available
(),
reason
=
"cuda is not available"
)
def
test_graph_breaks
():
import
torch._dynamo
as
dynamo
batch
=
create_batch
(
"cuda"
)
model
=
mace_compile
.
prepare
(
create_mace
)(
"cuda"
)
explanation
=
dynamo
.
explain
(
model
)(
batch
,
training
=
False
)
# these clutter the output but might be useful for investigating graph breaks
explanation
.
ops_per_graph
=
None
explanation
.
out_guards
=
None
print
(
explanation
)
assert
explanation
.
graph_break_count
==
0
mace-bench/3rdparty/mace/tests/test_cueq.py
0 → 100644
View file @
1be78103
# pylint: disable=wrong-import-position
import
os
from
copy
import
deepcopy
from
typing
import
Any
,
Dict
os
.
environ
[
"TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD"
]
=
"1"
import
pytest
import
torch
import
torch.nn.functional
as
F
from
e3nn
import
o3
from
mace
import
data
,
modules
,
tools
from
mace.cli.convert_cueq_e3nn
import
run
as
run_cueq_to_e3nn
from
mace.cli.convert_e3nn_cueq
import
run
as
run_e3nn_to_cueq
from
mace.tools
import
torch_geometric
try
:
import
cuequivariance
as
cue
# pylint: disable=unused-import
CUET_AVAILABLE
=
True
except
ImportError
:
CUET_AVAILABLE
=
False
CUDA_AVAILABLE
=
torch
.
cuda
.
is_available
()
@
pytest
.
mark
.
skipif
(
not
CUET_AVAILABLE
,
reason
=
"cuequivariance not installed"
)
class
TestCueq
:
@
pytest
.
fixture
def
model_config
(
self
,
interaction_cls_first
,
hidden_irreps
)
->
Dict
[
str
,
Any
]:
table
=
tools
.
AtomicNumberTable
([
6
])
return
{
"r_max"
:
5.0
,
"num_bessel"
:
8
,
"num_polynomial_cutoff"
:
6
,
"max_ell"
:
3
,
"interaction_cls"
:
modules
.
interaction_classes
[
"RealAgnosticResidualInteractionBlock"
],
"interaction_cls_first"
:
interaction_cls_first
,
"num_interactions"
:
2
,
"num_elements"
:
1
,
"hidden_irreps"
:
hidden_irreps
,
"MLP_irreps"
:
o3
.
Irreps
(
"16x0e"
),
"gate"
:
F
.
silu
,
"atomic_energies"
:
torch
.
tensor
([
1.0
]),
"avg_num_neighbors"
:
8
,
"atomic_numbers"
:
table
.
zs
,
"correlation"
:
3
,
"radial_type"
:
"bessel"
,
"atomic_inter_scale"
:
1.0
,
"atomic_inter_shift"
:
0.0
,
}
@
pytest
.
fixture
def
batch
(
self
,
device
:
str
,
default_dtype
:
torch
.
dtype
)
->
Dict
[
str
,
torch
.
Tensor
]:
from
ase
import
build
torch
.
set_default_dtype
(
default_dtype
)
table
=
tools
.
AtomicNumberTable
([
6
])
atoms
=
build
.
bulk
(
"C"
,
"diamond"
,
a
=
3.567
,
cubic
=
True
)
import
numpy
as
np
displacement
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
size
=
atoms
.
positions
.
shape
)
atoms
.
positions
+=
displacement
atoms_list
=
[
atoms
.
repeat
((
2
,
2
,
2
))]
configs
=
[
data
.
config_from_atoms
(
atoms
)
for
atoms
in
atoms_list
]
data_loader
=
torch_geometric
.
dataloader
.
DataLoader
(
dataset
=
[
data
.
AtomicData
.
from_config
(
config
,
z_table
=
table
,
cutoff
=
5.0
)
for
config
in
configs
],
batch_size
=
1
,
shuffle
=
False
,
drop_last
=
False
,
)
batch
=
next
(
iter
(
data_loader
))
return
batch
.
to
(
device
).
to_dict
()
@
pytest
.
mark
.
parametrize
(
"device"
,
[
"cpu"
]
+
([
"cuda"
]
if
CUDA_AVAILABLE
else
[]),
)
@
pytest
.
mark
.
parametrize
(
"interaction_cls_first"
,
[
modules
.
interaction_classes
[
"RealAgnosticResidualInteractionBlock"
],
modules
.
interaction_classes
[
"RealAgnosticInteractionBlock"
],
modules
.
interaction_classes
[
"RealAgnosticDensityInteractionBlock"
],
],
)
@
pytest
.
mark
.
parametrize
(
"hidden_irreps"
,
[
o3
.
Irreps
(
"32x0e + 32x1o"
),
o3
.
Irreps
(
"32x0e + 32x1o + 32x2e"
),
o3
.
Irreps
(
"32x0e"
),
],
)
@
pytest
.
mark
.
parametrize
(
"default_dtype"
,
[
torch
.
float32
,
torch
.
float64
])
def
test_bidirectional_conversion
(
self
,
model_config
:
Dict
[
str
,
Any
],
batch
:
Dict
[
str
,
torch
.
Tensor
],
device
:
str
,
default_dtype
:
torch
.
dtype
,
):
if
device
==
"cuda"
and
not
CUDA_AVAILABLE
:
pytest
.
skip
(
"CUDA not available"
)
torch
.
manual_seed
(
42
)
# Create original E3nn model
model_e3nn
=
modules
.
ScaleShiftMACE
(
**
model_config
).
to
(
device
)
# Convert E3nn to CuEq
model_cueq
=
run_e3nn_to_cueq
(
model_e3nn
).
to
(
device
)
# Convert CuEq back to E3nn
model_e3nn_back
=
run_cueq_to_e3nn
(
model_cueq
).
to
(
device
)
# Test forward pass equivalence
out_e3nn
=
model_e3nn
(
deepcopy
(
batch
),
training
=
True
,
compute_stress
=
True
)
out_cueq
=
model_cueq
(
deepcopy
(
batch
),
training
=
True
,
compute_stress
=
True
)
out_e3nn_back
=
model_e3nn_back
(
deepcopy
(
batch
),
training
=
True
,
compute_stress
=
True
)
# Check outputs match for both conversions
torch
.
testing
.
assert_close
(
out_e3nn
[
"energy"
],
out_cueq
[
"energy"
])
torch
.
testing
.
assert_close
(
out_cueq
[
"energy"
],
out_e3nn_back
[
"energy"
])
torch
.
testing
.
assert_close
(
out_e3nn
[
"forces"
],
out_cueq
[
"forces"
])
torch
.
testing
.
assert_close
(
out_cueq
[
"forces"
],
out_e3nn_back
[
"forces"
])
torch
.
testing
.
assert_close
(
out_e3nn
[
"stress"
],
out_cueq
[
"stress"
])
torch
.
testing
.
assert_close
(
out_cueq
[
"stress"
],
out_e3nn_back
[
"stress"
])
# Test backward pass equivalence
loss_e3nn
=
out_e3nn
[
"energy"
].
sum
()
loss_cueq
=
out_cueq
[
"energy"
].
sum
()
loss_e3nn_back
=
out_e3nn_back
[
"energy"
].
sum
()
loss_e3nn
.
backward
()
loss_cueq
.
backward
()
loss_e3nn_back
.
backward
()
# Compare gradients for all conversions
tol
=
1e-4
if
default_dtype
==
torch
.
float32
else
1e-7
def
print_gradient_diff
(
name1
,
p1
,
name2
,
p2
,
conv_type
):
if
p1
.
grad
is
not
None
and
p1
.
grad
.
shape
==
p2
.
grad
.
shape
:
if
name1
.
split
(
"."
,
2
)[:
2
]
==
name2
.
split
(
"."
,
2
)[:
2
]:
error
=
torch
.
abs
(
p1
.
grad
-
p2
.
grad
)
print
(
f
"
{
conv_type
}
- Parameter
{
name1
}
/
{
name2
}
, Max error:
{
error
.
max
()
}
"
)
torch
.
testing
.
assert_close
(
p1
.
grad
,
p2
.
grad
,
atol
=
tol
,
rtol
=
tol
)
# E3nn to CuEq gradients
for
(
name_e3nn
,
p_e3nn
),
(
name_cueq
,
p_cueq
)
in
zip
(
model_e3nn
.
named_parameters
(),
model_cueq
.
named_parameters
()
):
print_gradient_diff
(
name_e3nn
,
p_e3nn
,
name_cueq
,
p_cueq
,
"E3nn->CuEq"
)
# CuEq to E3nn gradients
for
(
name_cueq
,
p_cueq
),
(
name_e3nn_back
,
p_e3nn_back
)
in
zip
(
model_cueq
.
named_parameters
(),
model_e3nn_back
.
named_parameters
()
):
print_gradient_diff
(
name_cueq
,
p_cueq
,
name_e3nn_back
,
p_e3nn_back
,
"CuEq->E3nn"
)
# Full circle comparison (E3nn -> E3nn)
for
(
name_e3nn
,
p_e3nn
),
(
name_e3nn_back
,
p_e3nn_back
)
in
zip
(
model_e3nn
.
named_parameters
(),
model_e3nn_back
.
named_parameters
()
):
print_gradient_diff
(
name_e3nn
,
p_e3nn
,
name_e3nn_back
,
p_e3nn_back
,
"Full circle"
)
mace-bench/3rdparty/mace/tests/test_data.py
0 → 100644
View file @
1be78103
from
copy
import
deepcopy
from
pathlib
import
Path
import
ase.build
import
h5py
import
numpy
as
np
import
torch
from
mace.data
import
(
AtomicData
,
Configuration
,
HDF5Dataset
,
config_from_atoms
,
get_neighborhood
,
save_configurations_as_HDF5
,
)
from
mace.tools
import
AtomicNumberTable
,
torch_geometric
mace_path
=
Path
(
__file__
).
parent
.
parent
class
TestAtomicData
:
config
=
Configuration
(
atomic_numbers
=
np
.
array
([
8
,
1
,
1
]),
positions
=
np
.
array
(
[
[
0.0
,
-
2.0
,
0.0
],
[
1.0
,
0.0
,
0.0
],
[
0.0
,
1.0
,
0.0
],
]
),
properties
=
{
"forces"
:
np
.
array
(
[
[
0.0
,
-
1.3
,
0.0
],
[
1.0
,
0.2
,
0.0
],
[
0.0
,
1.1
,
0.3
],
]
),
"energy"
:
-
1.5
,
},
property_weights
=
{
"forces"
:
1.0
,
"energy"
:
1.0
,
},
)
config_2
=
deepcopy
(
config
)
config_2
.
positions
=
config
.
positions
+
0.01
table
=
AtomicNumberTable
([
1
,
8
])
def
test_atomic_data
(
self
):
data
=
AtomicData
.
from_config
(
self
.
config
,
z_table
=
self
.
table
,
cutoff
=
3.0
)
assert
data
.
edge_index
.
shape
==
(
2
,
4
)
assert
data
.
forces
.
shape
==
(
3
,
3
)
assert
data
.
node_attrs
.
shape
==
(
3
,
2
)
def
test_data_loader
(
self
):
data1
=
AtomicData
.
from_config
(
self
.
config
,
z_table
=
self
.
table
,
cutoff
=
3.0
)
data2
=
AtomicData
.
from_config
(
self
.
config
,
z_table
=
self
.
table
,
cutoff
=
3.0
)
data_loader
=
torch_geometric
.
dataloader
.
DataLoader
(
dataset
=
[
data1
,
data2
],
batch_size
=
2
,
shuffle
=
True
,
drop_last
=
False
,
)
for
batch
in
data_loader
:
assert
batch
.
batch
.
shape
==
(
6
,)
assert
batch
.
edge_index
.
shape
==
(
2
,
8
)
assert
batch
.
shifts
.
shape
==
(
8
,
3
)
assert
batch
.
positions
.
shape
==
(
6
,
3
)
assert
batch
.
node_attrs
.
shape
==
(
6
,
2
)
assert
batch
.
energy
.
shape
==
(
2
,)
assert
batch
.
forces
.
shape
==
(
6
,
3
)
def
test_to_atomic_data_dict
(
self
):
data1
=
AtomicData
.
from_config
(
self
.
config
,
z_table
=
self
.
table
,
cutoff
=
3.0
)
data2
=
AtomicData
.
from_config
(
self
.
config
,
z_table
=
self
.
table
,
cutoff
=
3.0
)
data_loader
=
torch_geometric
.
dataloader
.
DataLoader
(
dataset
=
[
data1
,
data2
],
batch_size
=
2
,
shuffle
=
True
,
drop_last
=
False
,
)
for
batch
in
data_loader
:
batch_dict
=
batch
.
to_dict
()
assert
batch_dict
[
"batch"
].
shape
==
(
6
,)
assert
batch_dict
[
"edge_index"
].
shape
==
(
2
,
8
)
assert
batch_dict
[
"shifts"
].
shape
==
(
8
,
3
)
assert
batch_dict
[
"positions"
].
shape
==
(
6
,
3
)
assert
batch_dict
[
"node_attrs"
].
shape
==
(
6
,
2
)
assert
batch_dict
[
"energy"
].
shape
==
(
2
,)
assert
batch_dict
[
"forces"
].
shape
==
(
6
,
3
)
def
test_hdf5_dataloader
(
self
):
datasets
=
[
self
.
config
,
self
.
config_2
]
*
5
# get path of the mace package
with
h5py
.
File
(
str
(
mace_path
)
+
"test.h5"
,
"w"
)
as
f
:
save_configurations_as_HDF5
(
datasets
,
0
,
f
)
train_dataset
=
HDF5Dataset
(
str
(
mace_path
)
+
"test.h5"
,
z_table
=
self
.
table
,
r_max
=
3.0
)
train_loader
=
torch_geometric
.
dataloader
.
DataLoader
(
dataset
=
train_dataset
,
batch_size
=
2
,
shuffle
=
False
,
drop_last
=
False
,
)
batch_count
=
0
for
batch
in
train_loader
:
batch_count
+=
1
assert
batch
.
batch
.
shape
==
(
6
,)
assert
batch
.
edge_index
.
shape
==
(
2
,
8
)
assert
batch
.
shifts
.
shape
==
(
8
,
3
)
assert
batch
.
positions
.
shape
==
(
6
,
3
)
assert
batch
.
node_attrs
.
shape
==
(
6
,
2
)
assert
batch
.
energy
.
shape
==
(
2
,)
assert
batch
.
forces
.
shape
==
(
6
,
3
)
print
(
batch_count
,
len
(
train_loader
),
len
(
train_dataset
))
assert
batch_count
==
len
(
train_loader
)
==
len
(
train_dataset
)
/
2
train_loader_direct
=
torch_geometric
.
dataloader
.
DataLoader
(
dataset
=
[
AtomicData
.
from_config
(
config
,
z_table
=
self
.
table
,
cutoff
=
3.0
)
for
config
in
datasets
],
batch_size
=
2
,
shuffle
=
False
,
drop_last
=
False
,
)
for
batch_direct
,
batch
in
zip
(
train_loader_direct
,
train_loader
):
assert
torch
.
all
(
batch_direct
.
edge_index
==
batch
.
edge_index
)
assert
torch
.
all
(
batch_direct
.
shifts
==
batch
.
shifts
)
assert
torch
.
all
(
batch_direct
.
positions
==
batch
.
positions
)
assert
torch
.
all
(
batch_direct
.
node_attrs
==
batch
.
node_attrs
)
assert
torch
.
all
(
batch_direct
.
energy
==
batch
.
energy
)
assert
torch
.
all
(
batch_direct
.
forces
==
batch
.
forces
)
class
TestNeighborhood
:
def
test_basic
(
self
):
positions
=
np
.
array
(
[
[
-
1.0
,
0.0
,
0.0
],
[
+
0.0
,
0.0
,
0.0
],
[
+
1.0
,
0.0
,
0.0
],
]
)
indices
,
shifts
,
unit_shifts
,
_
=
get_neighborhood
(
positions
,
cutoff
=
1.5
)
assert
indices
.
shape
==
(
2
,
4
)
assert
shifts
.
shape
==
(
4
,
3
)
assert
unit_shifts
.
shape
==
(
4
,
3
)
def
test_signs
(
self
):
positions
=
np
.
array
(
[
[
+
0.5
,
0.5
,
0.0
],
[
+
1.0
,
1.0
,
0.0
],
]
)
cell
=
np
.
array
([[
2.0
,
0.0
,
0.0
],
[
0.0
,
1.0
,
0.0
],
[
0.0
,
0.0
,
1.0
]])
edge_index
,
shifts
,
unit_shifts
,
_
=
get_neighborhood
(
positions
,
cutoff
=
3.5
,
pbc
=
(
True
,
False
,
False
),
cell
=
cell
)
num_edges
=
10
assert
edge_index
.
shape
==
(
2
,
num_edges
)
assert
shifts
.
shape
==
(
num_edges
,
3
)
assert
unit_shifts
.
shape
==
(
num_edges
,
3
)
# Based on mir-group/nequip
def
test_periodic_edge
():
atoms
=
ase
.
build
.
bulk
(
"Cu"
,
"fcc"
)
dist
=
np
.
linalg
.
norm
(
atoms
.
cell
[
0
]).
item
()
config
=
config_from_atoms
(
atoms
)
edge_index
,
shifts
,
_
,
_
=
get_neighborhood
(
config
.
positions
,
cutoff
=
1.05
*
dist
,
pbc
=
(
True
,
True
,
True
),
cell
=
config
.
cell
)
sender
,
receiver
=
edge_index
vectors
=
(
config
.
positions
[
receiver
]
-
config
.
positions
[
sender
]
+
shifts
)
# [n_edges, 3]
assert
vectors
.
shape
==
(
12
,
3
)
# 12 neighbors in close-packed bulk
assert
np
.
allclose
(
np
.
linalg
.
norm
(
vectors
,
axis
=-
1
),
dist
,
)
def
test_half_periodic
():
atoms
=
ase
.
build
.
fcc111
(
"Al"
,
size
=
(
3
,
3
,
1
),
vacuum
=
0.0
)
assert
all
(
atoms
.
pbc
==
(
True
,
True
,
False
))
config
=
config_from_atoms
(
atoms
)
# first shell dist is 2.864A
edge_index
,
shifts
,
_
,
_
=
get_neighborhood
(
config
.
positions
,
cutoff
=
2.9
,
pbc
=
(
True
,
True
,
False
),
cell
=
config
.
cell
)
sender
,
receiver
=
edge_index
vectors
=
(
config
.
positions
[
receiver
]
-
config
.
positions
[
sender
]
+
shifts
)
# [n_edges, 3]
# Check number of neighbors:
_
,
neighbor_count
=
np
.
unique
(
edge_index
[
0
],
return_counts
=
True
)
assert
(
neighbor_count
==
6
).
all
()
# 6 neighbors
# Check not periodic in z
assert
np
.
allclose
(
vectors
[:,
2
],
np
.
zeros
(
vectors
.
shape
[
0
]),
)
mace-bench/3rdparty/mace/tests/test_finetuning_select.py
0 → 100644
View file @
1be78103
import
ase.io
as
aio
import
numpy
as
np
import
pytest
from
ase
import
Atoms
from
ase.build
import
molecule
from
mace.cli.fine_tuning_select
import
(
FilteringType
,
SelectionSettings
,
SubselectType
,
_filter_pretraining_data
,
_load_descriptors
,
_maybe_save_descriptors
,
filter_atoms
,
select_samples
,
)
@
pytest
.
fixture
(
name
=
"train_atoms_fixture"
)
def
train_atoms
():
return
[
molecule
(
"H2O"
),
molecule
(
"CH4"
),
Atoms
(
"Fe2O3"
),
Atoms
(
"C"
),
Atoms
(
"FeON"
),
Atoms
(
"Fe"
),
]
@
pytest
.
fixture
(
name
=
"train_atom_descriptors_fixture"
)
def
train_atom_descriptors
(
train_atoms_fixture
):
return
[
{
x
:
np
.
zeros
(
5
)
+
i
for
x
in
atoms
.
symbols
}
for
i
,
atoms
in
enumerate
(
train_atoms_fixture
)
]
@
pytest
.
mark
.
parametrize
(
"filtering_type, passes_filter, element_sublist"
,
[
(
FilteringType
.
NONE
,
[
True
]
*
6
,
[]),
(
FilteringType
.
NONE
,
[
True
]
*
6
,
[
"C"
,
"U"
,
"Anything really"
]),
(
FilteringType
.
COMBINATIONS
,
[
False
,
False
,
True
,
False
,
False
,
True
],
[
"O"
,
"Fe"
],
),
(
FilteringType
.
INCLUSIVE
,
[
False
,
False
,
True
,
False
,
True
,
False
],
[
"O"
,
"Fe"
],
),
(
FilteringType
.
EXCLUSIVE
,
[
False
,
False
,
True
,
False
,
False
,
False
],
[
"O"
,
"Fe"
],
),
],
)
def
test_filter_data
(
train_atoms_fixture
,
filtering_type
,
passes_filter
,
element_sublist
):
filtered
,
_
,
passes
=
_filter_pretraining_data
(
train_atoms_fixture
,
filtering_type
,
element_sublist
)
assert
passes
==
passes_filter
assert
len
(
filtered
)
==
sum
(
passes_filter
)
@
pytest
.
mark
.
parametrize
(
"passes_filter"
,
[[
True
]
*
6
,
[
False
,
True
,
False
,
True
,
False
,
True
]]
)
def
test_load_descriptors
(
train_atoms_fixture
,
train_atom_descriptors_fixture
,
passes_filter
,
tmp_path
):
for
i
,
atoms
in
enumerate
(
train_atoms_fixture
):
atoms
.
info
[
"mace_descriptors"
]
=
train_atom_descriptors_fixture
[
i
]
save_path
=
tmp_path
/
"test.xyz"
_maybe_save_descriptors
(
train_atoms_fixture
,
save_path
.
as_posix
())
assert
all
(
not
"mace_descriptors"
in
atoms
.
info
for
atoms
in
train_atoms_fixture
)
filtered_atoms
=
[
x
for
x
,
passes
in
zip
(
train_atoms_fixture
,
passes_filter
)
if
passes
]
descriptors_path
=
save_path
.
as_posix
().
replace
(
".xyz"
,
"_descriptors.npy"
)
_load_descriptors
(
filtered_atoms
,
passes_filter
,
descriptors_path
=
descriptors_path
,
calc
=
None
,
full_data_length
=
len
(
train_atoms_fixture
),
)
expected_descriptors
=
[
train_atom_descriptors_fixture
[
i
]
for
i
,
passes
in
enumerate
(
passes_filter
)
if
passes
]
for
i
,
atoms
in
enumerate
(
filtered_atoms
):
assert
"mace_descriptors"
in
atoms
.
info
for
key
,
value
in
expected_descriptors
[
i
].
items
():
assert
np
.
allclose
(
atoms
.
info
[
"mace_descriptors"
][
key
],
value
)
def
test_select_samples_random
(
train_atoms_fixture
,
tmp_path
):
input_file_path
=
tmp_path
/
"input.xyz"
aio
.
write
(
input_file_path
,
train_atoms_fixture
,
format
=
"extxyz"
)
output_file_path
=
tmp_path
/
"output.xyz"
settings
=
SelectionSettings
(
configs_pt
=
input_file_path
.
as_posix
(),
output
=
output_file_path
.
as_posix
(),
num_samples
=
2
,
subselect
=
SubselectType
.
RANDOM
,
filtering_type
=
FilteringType
.
NONE
,
)
select_samples
(
settings
)
# Check if output file is created
assert
output_file_path
.
exists
()
combined_output_file_path
=
tmp_path
/
"output_combined.xyz"
assert
combined_output_file_path
.
exists
()
output_atoms
=
aio
.
read
(
output_file_path
,
index
=
":"
)
assert
isinstance
(
output_atoms
,
list
)
assert
len
(
output_atoms
)
==
2
combined_output_atoms
=
aio
.
read
(
combined_output_file_path
,
index
=
":"
)
assert
isinstance
(
combined_output_atoms
,
list
)
assert
(
len
(
combined_output_atoms
)
==
2
)
# combined same as output since no FT data provided
def
test_select_samples_ft_provided
(
train_atoms_fixture
,
tmp_path
):
input_file_path
=
tmp_path
/
"input.xyz"
aio
.
write
(
input_file_path
,
train_atoms_fixture
,
format
=
"extxyz"
)
output_file_path
=
tmp_path
/
"output.xyz"
ft_file_path
=
tmp_path
/
"ft_data.xyz"
ft_data
=
[
Atoms
(
"FeO"
)]
aio
.
write
(
ft_file_path
.
as_posix
(),
ft_data
,
format
=
"extxyz"
)
settings
=
SelectionSettings
(
configs_pt
=
input_file_path
.
as_posix
(),
output
=
output_file_path
.
as_posix
(),
num_samples
=
2
,
subselect
=
SubselectType
.
RANDOM
,
configs_ft
=
ft_file_path
.
as_posix
(),
)
select_samples
(
settings
)
# Check if output file is created
assert
output_file_path
.
exists
()
combined_output_file_path
=
tmp_path
/
"output_combined.xyz"
assert
combined_output_file_path
.
exists
()
output_atoms
=
aio
.
read
(
output_file_path
,
index
=
":"
)
assert
isinstance
(
output_atoms
,
list
)
assert
len
(
output_atoms
)
==
2
assert
all
(
filter_atoms
(
x
,
[
"Fe"
,
"O"
])
for
x
in
output_atoms
)
combined_atoms
=
aio
.
read
(
combined_output_file_path
,
index
=
":"
)
assert
isinstance
(
combined_atoms
,
list
)
assert
len
(
combined_atoms
)
==
len
(
output_atoms
)
+
len
(
ft_data
)
mace-bench/3rdparty/mace/tests/test_foundations.py
0 → 100644
View file @
1be78103
from
pathlib
import
Path
import
numpy
as
np
import
pytest
import
torch
import
torch.nn.functional
from
ase.build
import
molecule
from
e3nn
import
o3
from
e3nn.util
import
jit
from
scipy.spatial.transform
import
Rotation
as
R
from
mace
import
data
,
modules
,
tools
from
mace.calculators
import
mace_mp
,
mace_off
from
mace.tools
import
torch_geometric
from
mace.tools.finetuning_utils
import
load_foundations_elements
from
mace.tools.scripts_utils
import
extract_config_mace_model
,
remove_pt_head
from
mace.tools.utils
import
AtomicNumberTable
MODEL_PATH
=
(
Path
(
__file__
).
parent
.
parent
/
"mace"
/
"calculators"
/
"foundations_models"
/
"2023-12-03-mace-mp.model"
)
torch
.
set_default_dtype
(
torch
.
float64
)
@
pytest
.
skip
(
"Problem with the float type"
,
allow_module_level
=
True
)
def
test_foundations
():
# Create MACE model
config
=
data
.
Configuration
(
atomic_numbers
=
molecule
(
"H2COH"
).
numbers
,
positions
=
molecule
(
"H2COH"
).
positions
,
properties
=
{
"forces"
:
molecule
(
"H2COH"
).
positions
,
"energy"
:
-
1.5
,
"charges"
:
molecule
(
"H2COH"
).
numbers
,
"dipole"
:
np
.
array
([
-
1.5
,
1.5
,
2.0
]),
},
property_weights
=
{
"forces"
:
1.0
,
"energy"
:
1.0
,
"charges"
:
1.0
,
"dipole"
:
1.0
,
},
)
# Created the rotated environment
rot
=
R
.
from_euler
(
"z"
,
60
,
degrees
=
True
).
as_matrix
()
positions_rotated
=
np
.
array
(
rot
@
config
.
positions
.
T
).
T
config_rotated
=
data
.
Configuration
(
atomic_numbers
=
molecule
(
"H2COH"
).
numbers
,
positions
=
positions_rotated
,
properties
=
{
"forces"
:
molecule
(
"H2COH"
).
positions
,
"energy"
:
-
1.5
,
"charges"
:
molecule
(
"H2COH"
).
numbers
,
"dipole"
:
np
.
array
([
-
1.5
,
1.5
,
2.0
]),
},
property_weights
=
{
"forces"
:
1.0
,
"energy"
:
1.0
,
"charges"
:
1.0
,
"dipole"
:
1.0
,
},
)
table
=
tools
.
AtomicNumberTable
([
1
,
6
,
8
])
atomic_energies
=
np
.
array
([
0.0
,
0.0
,
0.0
],
dtype
=
float
)
model_config
=
dict
(
r_max
=
6
,
num_bessel
=
10
,
num_polynomial_cutoff
=
5
,
max_ell
=
3
,
interaction_cls
=
modules
.
interaction_classes
[
"RealAgnosticResidualInteractionBlock"
],
interaction_cls_first
=
modules
.
interaction_classes
[
"RealAgnosticResidualInteractionBlock"
],
num_interactions
=
2
,
num_elements
=
3
,
hidden_irreps
=
o3
.
Irreps
(
"128x0e + 128x1o"
),
MLP_irreps
=
o3
.
Irreps
(
"16x0e"
),
gate
=
torch
.
nn
.
functional
.
silu
,
atomic_energies
=
atomic_energies
,
avg_num_neighbors
=
3
,
atomic_numbers
=
table
.
zs
,
correlation
=
3
,
radial_type
=
"bessel"
,
atomic_inter_scale
=
0.1
,
atomic_inter_shift
=
0.0
,
)
model
=
modules
.
ScaleShiftMACE
(
**
model_config
)
calc_foundation
=
mace_mp
(
model
=
"medium"
,
device
=
"cpu"
,
default_dtype
=
"float64"
)
model_loaded
=
load_foundations_elements
(
model
,
calc_foundation
.
models
[
0
],
table
=
table
,
load_readout
=
True
,
use_shift
=
False
,
max_L
=
1
,
)
atomic_data
=
data
.
AtomicData
.
from_config
(
config
,
z_table
=
table
,
cutoff
=
6.0
)
atomic_data2
=
data
.
AtomicData
.
from_config
(
config_rotated
,
z_table
=
table
,
cutoff
=
6.0
)
data_loader
=
torch_geometric
.
dataloader
.
DataLoader
(
dataset
=
[
atomic_data
,
atomic_data2
],
batch_size
=
2
,
shuffle
=
True
,
drop_last
=
False
,
)
batch
=
next
(
iter
(
data_loader
))
forces_loaded
=
model_loaded
(
batch
.
to_dict
())[
"forces"
]
forces
=
model
(
batch
.
to_dict
())[
"forces"
]
assert
torch
.
allclose
(
forces
,
forces_loaded
)
def
test_multi_reference
():
config_multi
=
data
.
Configuration
(
atomic_numbers
=
molecule
(
"H2COH"
).
numbers
,
positions
=
molecule
(
"H2COH"
).
positions
,
properties
=
{
"forces"
:
molecule
(
"H2COH"
).
positions
,
"energy"
:
-
1.5
,
"charges"
:
molecule
(
"H2COH"
).
numbers
,
"dipole"
:
np
.
array
([
-
1.5
,
1.5
,
2.0
]),
},
property_weights
=
{
"forces"
:
1.0
,
"energy"
:
1.0
,
"charges"
:
1.0
,
"dipole"
:
1.0
,
},
head
=
"MP2"
,
)
table_multi
=
tools
.
AtomicNumberTable
([
1
,
6
,
8
])
atomic_energies_multi
=
np
.
array
([[
0.0
,
0.0
,
0.0
],
[
0.0
,
0.0
,
0.0
]],
dtype
=
float
)
table
=
tools
.
AtomicNumberTable
([
1
,
6
,
8
])
# Create MACE model
model_config
=
dict
(
r_max
=
6
,
num_bessel
=
10
,
num_polynomial_cutoff
=
5
,
max_ell
=
3
,
interaction_cls
=
modules
.
interaction_classes
[
"RealAgnosticResidualInteractionBlock"
],
interaction_cls_first
=
modules
.
interaction_classes
[
"RealAgnosticResidualInteractionBlock"
],
num_interactions
=
2
,
num_elements
=
3
,
hidden_irreps
=
o3
.
Irreps
(
"128x0e + 128x1o"
),
MLP_irreps
=
o3
.
Irreps
(
"16x0e"
),
gate
=
torch
.
nn
.
functional
.
silu
,
atomic_energies
=
atomic_energies_multi
,
avg_num_neighbors
=
61
,
atomic_numbers
=
table
.
zs
,
correlation
=
3
,
radial_type
=
"bessel"
,
atomic_inter_scale
=
[
1.0
,
1.0
],
atomic_inter_shift
=
[
0.0
,
0.0
],
heads
=
[
"MP2"
,
"DFT"
],
)
model
=
modules
.
ScaleShiftMACE
(
**
model_config
)
calc_foundation
=
mace_mp
(
model
=
"medium"
,
device
=
"cpu"
,
default_dtype
=
"float64"
)
model_loaded
=
load_foundations_elements
(
model
,
calc_foundation
.
models
[
0
],
table
=
table
,
load_readout
=
True
,
use_shift
=
False
,
max_L
=
1
,
)
atomic_data
=
data
.
AtomicData
.
from_config
(
config_multi
,
z_table
=
table_multi
,
cutoff
=
6.0
,
heads
=
[
"MP2"
,
"DFT"
]
)
data_loader
=
torch_geometric
.
dataloader
.
DataLoader
(
dataset
=
[
atomic_data
,
atomic_data
],
batch_size
=
2
,
shuffle
=
True
,
drop_last
=
False
,
)
batch
=
next
(
iter
(
data_loader
))
forces_loaded
=
model_loaded
(
batch
.
to_dict
())[
"forces"
]
calc_foundation
=
mace_mp
(
model
=
"medium"
,
device
=
"cpu"
,
default_dtype
=
"float64"
)
atoms
=
molecule
(
"H2COH"
)
atoms
.
info
[
"head"
]
=
"MP2"
atoms
.
calc
=
calc_foundation
forces
=
atoms
.
get_forces
()
assert
np
.
allclose
(
forces
,
forces_loaded
.
detach
().
numpy
()[:
5
,
:],
atol
=
1e-5
,
rtol
=
1e-5
)
@
pytest
.
mark
.
parametrize
(
"calc"
,
[
mace_mp
(
device
=
"cpu"
,
default_dtype
=
"float64"
),
mace_mp
(
model
=
"small"
,
device
=
"cpu"
,
default_dtype
=
"float64"
),
mace_mp
(
model
=
"medium"
,
device
=
"cpu"
,
default_dtype
=
"float64"
),
mace_mp
(
model
=
"large"
,
device
=
"cpu"
,
default_dtype
=
"float64"
),
mace_mp
(
model
=
MODEL_PATH
,
device
=
"cpu"
,
default_dtype
=
"float64"
),
mace_off
(
model
=
"small"
,
device
=
"cpu"
,
default_dtype
=
"float64"
),
mace_off
(
model
=
"medium"
,
device
=
"cpu"
,
default_dtype
=
"float64"
),
mace_off
(
model
=
"large"
,
device
=
"cpu"
,
default_dtype
=
"float64"
),
mace_off
(
model
=
MODEL_PATH
,
device
=
"cpu"
,
default_dtype
=
"float64"
),
],
)
def
test_compile_foundation
(
calc
):
model
=
calc
.
models
[
0
]
atoms
=
molecule
(
"CH4"
)
atoms
.
positions
+=
np
.
random
.
randn
(
*
atoms
.
positions
.
shape
)
*
0.1
batch
=
calc
.
_atoms_to_batch
(
atoms
)
# pylint: disable=protected-access
output_1
=
model
(
batch
.
to_dict
())
model_compiled
=
jit
.
compile
(
model
)
output
=
model_compiled
(
batch
.
to_dict
())
for
key
in
output_1
.
keys
():
if
isinstance
(
output_1
[
key
],
torch
.
Tensor
):
assert
torch
.
allclose
(
output_1
[
key
],
output
[
key
],
atol
=
1e-5
)
@
pytest
.
mark
.
parametrize
(
"model"
,
[
mace_mp
(
model
=
"small"
,
device
=
"cpu"
,
default_dtype
=
"float64"
).
models
[
0
],
mace_mp
(
model
=
"medium"
,
device
=
"cpu"
,
default_dtype
=
"float64"
).
models
[
0
],
mace_mp
(
model
=
"large"
,
device
=
"cpu"
,
default_dtype
=
"float64"
).
models
[
0
],
mace_mp
(
model
=
MODEL_PATH
,
device
=
"cpu"
,
default_dtype
=
"float64"
).
models
[
0
],
mace_off
(
model
=
"small"
,
device
=
"cpu"
,
default_dtype
=
"float64"
).
models
[
0
],
mace_off
(
model
=
"medium"
,
device
=
"cpu"
,
default_dtype
=
"float64"
).
models
[
0
],
mace_off
(
model
=
"large"
,
device
=
"cpu"
,
default_dtype
=
"float64"
).
models
[
0
],
mace_off
(
model
=
MODEL_PATH
,
device
=
"cpu"
,
default_dtype
=
"float64"
).
models
[
0
],
],
)
def
test_extract_config
(
model
):
assert
isinstance
(
model
,
modules
.
ScaleShiftMACE
)
config
=
data
.
Configuration
(
atomic_numbers
=
molecule
(
"H2COH"
).
numbers
,
positions
=
molecule
(
"H2COH"
).
positions
,
properties
=
{
"forces"
:
molecule
(
"H2COH"
).
positions
,
"energy"
:
-
1.5
,
"charges"
:
molecule
(
"H2COH"
).
numbers
,
"dipole"
:
np
.
array
([
-
1.5
,
1.5
,
2.0
]),
},
property_weights
=
{
"forces"
:
1.0
,
"energy"
:
1.0
,
"charges"
:
1.0
,
"dipole"
:
1.0
,
},
)
model_copy
=
modules
.
ScaleShiftMACE
(
**
extract_config_mace_model
(
model
))
model_copy
.
load_state_dict
(
model
.
state_dict
())
z_table
=
AtomicNumberTable
([
int
(
z
)
for
z
in
model
.
atomic_numbers
])
atomic_data
=
data
.
AtomicData
.
from_config
(
config
,
z_table
=
z_table
,
cutoff
=
6.0
)
data_loader
=
torch_geometric
.
dataloader
.
DataLoader
(
dataset
=
[
atomic_data
,
atomic_data
],
batch_size
=
2
,
shuffle
=
True
,
drop_last
=
False
,
)
batch
=
next
(
iter
(
data_loader
))
output
=
model
(
batch
.
to_dict
())
output_copy
=
model_copy
(
batch
.
to_dict
())
# assert all items of the output dicts are equal
for
key
in
output
.
keys
():
if
isinstance
(
output
[
key
],
torch
.
Tensor
):
assert
torch
.
allclose
(
output
[
key
],
output_copy
[
key
],
atol
=
1e-5
)
def
test_remove_pt_head
():
# Set up test data
torch
.
manual_seed
(
42
)
atomic_energies_pt_head
=
np
.
array
([[
1.0
,
2.0
],
[
3.0
,
4.0
]],
dtype
=
float
)
z_table
=
AtomicNumberTable
([
1
,
8
])
# H and O
# Create multihead model
model_config
=
{
"r_max"
:
5.0
,
"num_bessel"
:
8
,
"num_polynomial_cutoff"
:
5
,
"max_ell"
:
2
,
"interaction_cls"
:
modules
.
interaction_classes
[
"RealAgnosticResidualInteractionBlock"
],
"interaction_cls_first"
:
modules
.
interaction_classes
[
"RealAgnosticResidualInteractionBlock"
],
"num_interactions"
:
2
,
"num_elements"
:
len
(
z_table
),
"hidden_irreps"
:
o3
.
Irreps
(
"32x0e + 32x1o"
),
"MLP_irreps"
:
o3
.
Irreps
(
"16x0e"
),
"gate"
:
torch
.
nn
.
functional
.
silu
,
"atomic_energies"
:
atomic_energies_pt_head
,
"avg_num_neighbors"
:
8
,
"atomic_numbers"
:
z_table
.
zs
,
"correlation"
:
3
,
"heads"
:
[
"pt_head"
,
"DFT"
],
"atomic_inter_scale"
:
[
1.0
,
1.0
],
"atomic_inter_shift"
:
[
0.0
,
0.1
],
}
model
=
modules
.
ScaleShiftMACE
(
**
model_config
)
# Create test molecule
mol
=
molecule
(
"H2O"
)
config_pt_head
=
data
.
Configuration
(
atomic_numbers
=
mol
.
numbers
,
positions
=
mol
.
positions
,
properties
=
{
"energy"
:
1.0
,
"forces"
:
np
.
random
.
randn
(
len
(
mol
),
3
)},
property_weights
=
{
"forces"
:
1.0
,
"energy"
:
1.0
},
head
=
"DFT"
,
)
atomic_data
=
data
.
AtomicData
.
from_config
(
config_pt_head
,
z_table
=
z_table
,
cutoff
=
5.0
,
heads
=
[
"pt_head"
,
"DFT"
]
)
dataloader
=
torch_geometric
.
dataloader
.
DataLoader
(
dataset
=
[
atomic_data
],
batch_size
=
1
,
shuffle
=
False
)
batch
=
next
(
iter
(
dataloader
))
# Test original mode
output_orig
=
model
(
batch
.
to_dict
())
# Convert to single head model
new_model
=
remove_pt_head
(
model
,
head_to_keep
=
"DFT"
)
# Basic structure tests
assert
len
(
new_model
.
heads
)
==
1
assert
new_model
.
heads
[
0
]
==
"DFT"
assert
new_model
.
atomic_energies_fn
.
atomic_energies
.
shape
[
0
]
==
1
assert
len
(
torch
.
atleast_1d
(
new_model
.
scale_shift
.
scale
))
==
1
assert
len
(
torch
.
atleast_1d
(
new_model
.
scale_shift
.
shift
))
==
1
# Test output consistency
atomic_data
=
data
.
AtomicData
.
from_config
(
config_pt_head
,
z_table
=
z_table
,
cutoff
=
5.0
,
heads
=
[
"DFT"
]
)
dataloader
=
torch_geometric
.
dataloader
.
DataLoader
(
dataset
=
[
atomic_data
],
batch_size
=
1
,
shuffle
=
False
)
batch
=
next
(
iter
(
dataloader
))
output_new
=
new_model
(
batch
.
to_dict
())
torch
.
testing
.
assert_close
(
output_orig
[
"energy"
],
output_new
[
"energy"
],
rtol
=
1e-5
,
atol
=
1e-5
)
torch
.
testing
.
assert_close
(
output_orig
[
"forces"
],
output_new
[
"forces"
],
rtol
=
1e-5
,
atol
=
1e-5
)
def
test_remove_pt_head_multihead
():
# Set up test data
torch
.
manual_seed
(
42
)
atomic_energies_pt_head
=
np
.
array
(
[
[
1.0
,
2.0
],
# H energies for each head
[
3.0
,
4.0
],
# O energies for each head
]
*
2
)
z_table
=
AtomicNumberTable
([
1
,
8
])
# H and O
# Create multihead model
model_config
=
{
"r_max"
:
5.0
,
"num_bessel"
:
8
,
"num_polynomial_cutoff"
:
5
,
"max_ell"
:
2
,
"interaction_cls"
:
modules
.
interaction_classes
[
"RealAgnosticResidualInteractionBlock"
],
"interaction_cls_first"
:
modules
.
interaction_classes
[
"RealAgnosticResidualInteractionBlock"
],
"num_interactions"
:
2
,
"num_elements"
:
len
(
z_table
),
"hidden_irreps"
:
o3
.
Irreps
(
"32x0e + 32x1o"
),
"MLP_irreps"
:
o3
.
Irreps
(
"16x0e"
),
"gate"
:
torch
.
nn
.
functional
.
silu
,
"atomic_energies"
:
atomic_energies_pt_head
,
"avg_num_neighbors"
:
8
,
"atomic_numbers"
:
z_table
.
zs
,
"correlation"
:
3
,
"heads"
:
[
"pt_head"
,
"DFT"
,
"MP2"
,
"CCSD"
],
"atomic_inter_scale"
:
[
1.0
,
1.0
,
1.0
,
1.0
],
"atomic_inter_shift"
:
[
0.0
,
0.1
,
0.2
,
0.3
],
}
model
=
modules
.
ScaleShiftMACE
(
**
model_config
)
# Create test configurations for each head
mol
=
molecule
(
"H2O"
)
configs
=
{}
atomic_datas
=
{}
dataloaders
=
{}
original_outputs
=
{}
# First get outputs from original model for each head
for
head
in
model
.
heads
:
config_pt_head
=
data
.
Configuration
(
atomic_numbers
=
mol
.
numbers
,
positions
=
mol
.
positions
,
properties
=
{
"energy"
:
1.0
,
"forces"
:
np
.
random
.
randn
(
len
(
mol
),
3
)},
property_weights
=
{
"forces"
:
1.0
,
"energy"
:
1.0
},
head
=
head
,
)
configs
[
head
]
=
config_pt_head
atomic_data
=
data
.
AtomicData
.
from_config
(
config_pt_head
,
z_table
=
z_table
,
cutoff
=
5.0
,
heads
=
model
.
heads
)
atomic_datas
[
head
]
=
atomic_data
dataloader
=
torch_geometric
.
dataloader
.
DataLoader
(
dataset
=
[
atomic_data
],
batch_size
=
1
,
shuffle
=
False
)
dataloaders
[
head
]
=
dataloader
batch
=
next
(
iter
(
dataloader
))
output
=
model
(
batch
.
to_dict
())
original_outputs
[
head
]
=
output
# Now test each head separately
for
i
,
head
in
enumerate
(
model
.
heads
):
# Convert to single head model
new_model
=
remove_pt_head
(
model
,
head_to_keep
=
head
)
# Basic structure tests
assert
len
(
new_model
.
heads
)
==
1
,
f
"Failed for head
{
head
}
"
assert
new_model
.
heads
[
0
]
==
head
,
f
"Failed for head
{
head
}
"
assert
(
new_model
.
atomic_energies_fn
.
atomic_energies
.
shape
[
0
]
==
1
),
f
"Failed for head
{
head
}
"
assert
(
len
(
torch
.
atleast_1d
(
new_model
.
scale_shift
.
scale
))
==
1
),
f
"Failed for head
{
head
}
"
assert
(
len
(
torch
.
atleast_1d
(
new_model
.
scale_shift
.
shift
))
==
1
),
f
"Failed for head
{
head
}
"
# Verify scale and shift values
assert
torch
.
allclose
(
new_model
.
scale_shift
.
scale
,
model
.
scale_shift
.
scale
[
i
:
i
+
1
]
),
f
"Failed for head
{
head
}
"
assert
torch
.
allclose
(
new_model
.
scale_shift
.
shift
,
model
.
scale_shift
.
shift
[
i
:
i
+
1
]
),
f
"Failed for head
{
head
}
"
# Test output consistency
single_head_data
=
data
.
AtomicData
.
from_config
(
configs
[
head
],
z_table
=
z_table
,
cutoff
=
5.0
,
heads
=
[
head
]
)
single_head_loader
=
torch_geometric
.
dataloader
.
DataLoader
(
dataset
=
[
single_head_data
],
batch_size
=
1
,
shuffle
=
False
)
batch
=
next
(
iter
(
single_head_loader
))
new_output
=
new_model
(
batch
.
to_dict
())
# Compare outputs
print
(
original_outputs
[
head
][
"energy"
],
new_output
[
"energy"
],
)
torch
.
testing
.
assert_close
(
original_outputs
[
head
][
"energy"
],
new_output
[
"energy"
],
rtol
=
1e-5
,
atol
=
1e-5
,
msg
=
f
"Energy mismatch for head
{
head
}
"
,
)
torch
.
testing
.
assert_close
(
original_outputs
[
head
][
"forces"
],
new_output
[
"forces"
],
rtol
=
1e-5
,
atol
=
1e-5
,
msg
=
f
"Forces mismatch for head
{
head
}
"
,
)
# Test error cases
with
pytest
.
raises
(
ValueError
,
match
=
"Head non_existent not found in model"
):
remove_pt_head
(
model
,
head_to_keep
=
"non_existent"
)
# Test default behavior (first non-PT head)
default_model
=
remove_pt_head
(
model
)
assert
default_model
.
heads
[
0
]
==
"DFT"
# Additional test: check if each model's computation graph is independent
models
=
{
head
:
remove_pt_head
(
model
,
head_to_keep
=
head
)
for
head
in
model
.
heads
}
results
=
{}
for
head
,
head_model
in
models
.
items
():
single_head_data
=
data
.
AtomicData
.
from_config
(
configs
[
head
],
z_table
=
z_table
,
cutoff
=
5.0
,
heads
=
[
head
]
)
single_head_loader
=
torch_geometric
.
dataloader
.
DataLoader
(
dataset
=
[
single_head_data
],
batch_size
=
1
,
shuffle
=
False
)
batch
=
next
(
iter
(
single_head_loader
))
results
[
head
]
=
head_model
(
batch
.
to_dict
())
# Verify each model produces different outputs
energies
=
torch
.
stack
([
results
[
head
][
"energy"
]
for
head
in
model
.
heads
])
assert
not
torch
.
allclose
(
energies
[
0
],
energies
[
1
],
rtol
=
1e-3
),
"Different heads should produce different outputs"
mace-bench/3rdparty/mace/tests/test_hessian.py
0 → 100644
View file @
1be78103
import
numpy
as
np
import
pytest
from
ase.build
import
fcc111
from
mace.calculators
import
mace_mp
@
pytest
.
fixture
(
name
=
"setup_calculator_"
)
def
setup_calculator
():
calc
=
mace_mp
(
model
=
"medium"
,
dispersion
=
False
,
default_dtype
=
"float64"
,
device
=
"cpu"
)
return
calc
@
pytest
.
fixture
(
name
=
"setup_structure_"
)
def
setup_structure
(
setup_calculator_
):
initial
=
fcc111
(
"Pt"
,
size
=
(
4
,
4
,
1
),
vacuum
=
10.0
,
orthogonal
=
True
)
initial
.
calc
=
setup_calculator_
return
initial
def
test_potential_energy_and_hessian
(
setup_structure_
):
initial
=
setup_structure_
h_autograd
=
initial
.
calc
.
get_hessian
(
atoms
=
initial
)
assert
h_autograd
.
shape
==
(
len
(
initial
)
*
3
,
len
(
initial
),
3
)
def
test_finite_difference_hessian
(
setup_structure_
):
initial
=
setup_structure_
indicies
=
list
(
range
(
len
(
initial
)))
delta
,
ndim
=
1e-4
,
3
hessian
=
np
.
zeros
((
len
(
indicies
)
*
ndim
,
len
(
indicies
)
*
ndim
))
atoms_h
=
initial
.
copy
()
for
i
,
index
in
enumerate
(
indicies
):
for
j
in
range
(
ndim
):
atoms_i
=
atoms_h
.
copy
()
atoms_i
.
positions
[
index
,
j
]
+=
delta
atoms_i
.
calc
=
initial
.
calc
forces_i
=
atoms_i
.
get_forces
()
atoms_j
=
atoms_h
.
copy
()
atoms_j
.
positions
[
index
,
j
]
-=
delta
atoms_j
.
calc
=
initial
.
calc
forces_j
=
atoms_j
.
get_forces
()
hessian
[:,
i
*
ndim
+
j
]
=
-
(
forces_i
-
forces_j
)[
indicies
].
flatten
()
/
(
2
*
delta
)
hessian
=
hessian
.
reshape
((
-
1
,
len
(
initial
),
3
))
h_autograd
=
initial
.
calc
.
get_hessian
(
atoms
=
initial
)
is_close
=
np
.
allclose
(
h_autograd
,
hessian
,
atol
=
1e-6
)
assert
is_close
mace-bench/3rdparty/mace/tests/test_lmdb_database.py
0 → 100644
View file @
1be78103
import
os
import
tempfile
import
numpy
as
np
import
torch
from
ase.build
import
molecule
from
ase.calculators.singlepoint
import
SinglePointCalculator
from
mace.data.lmdb_dataset
import
LMDBDataset
from
mace.tools
import
AtomicNumberTable
,
torch_geometric
from
mace.tools.fairchem_dataset.lmdb_dataset_tools
import
LMDBDatabase
def
test_lmdb_dataset
():
"""Test the LMDBDataset by creating a fake database and verifying batch creation."""
# Set default dtype to match typical MACE usage
torch
.
set_default_dtype
(
torch
.
float64
)
# Set random seed for reproducibility
np
.
random
.
seed
(
42
)
# Create temporary directories for the databases
with
tempfile
.
TemporaryDirectory
()
as
tmpdir
:
# Create 3 folders for databases
db_paths
=
[]
for
i
in
range
(
3
):
folder_path
=
os
.
path
.
join
(
tmpdir
,
f
"folder_
{
i
}
"
)
os
.
makedirs
(
folder_path
,
exist_ok
=
True
)
# Create LMDB database files in each folder (2 per folder)
for
j
in
range
(
2
):
db_path
=
os
.
path
.
join
(
folder_path
,
f
"data_
{
j
}
.aselmdb"
)
db
=
LMDBDatabase
(
db_path
,
readonly
=
False
)
# Add 2 configurations to each database
for
_
in
range
(
2
):
# Create a water molecule using ASE's build functionality
atoms
=
molecule
(
"H2O"
)
# Apply small random displacements to the positions
displacement
=
np
.
random
.
rand
(
*
atoms
.
positions
.
shape
)
*
0.1
atoms
.
positions
+=
displacement
# Set cell and PBC
atoms
.
set_cell
(
np
.
eye
(
3
)
*
5.0
)
atoms
.
set_pbc
(
True
)
# Add random energy, forces, and stress
energy
=
np
.
random
.
uniform
(
-
15.0
,
-
5.0
)
# Random energy between -15 and -5 eV
forces
=
(
np
.
random
.
randn
(
*
atoms
.
positions
.
shape
)
*
0.5
)
# Random forces
stress
=
np
.
random
.
randn
(
6
)
*
0.2
# Random stress in Voigt notation
# Add calculator to atoms with results
calc
=
SinglePointCalculator
(
atoms
,
energy
=
energy
,
forces
=
forces
,
stress
=
stress
)
atoms
.
calc
=
calc
# Store in database
db
.
write
(
atoms
)
db
.
close
()
# Add folder path to our list
db_paths
.
append
(
folder_path
)
# Create the dataset using paths joined with colons
paths_str
=
":"
.
join
(
db_paths
)
z_table
=
AtomicNumberTable
([
1
,
8
])
# H and O
dataset
=
LMDBDataset
(
file_path
=
paths_str
,
r_max
=
5.0
,
z_table
=
z_table
)
# Check dataset size (3 folders * 2 files * 2 configs = 12 entries)
assert
len
(
dataset
)
==
12
# Test retrieving a single item
item
=
dataset
[
0
]
print
(
item
)
assert
item
.
positions
.
shape
==
(
3
,
3
)
# 3 atoms, 3 coordinates
assert
hasattr
(
item
,
"energy"
)
assert
hasattr
(
item
,
"forces"
)
assert
hasattr
(
item
,
"stress"
)
# Create a dataloader
dataloader
=
torch_geometric
.
dataloader
.
DataLoader
(
dataset
=
dataset
,
batch_size
=
4
,
shuffle
=
False
,
drop_last
=
False
)
# Get a batch and validate it
batch
=
next
(
iter
(
dataloader
))
# Verify batch properties - should have 12 atoms (4 configs * 3 atoms per water)
assert
batch
.
positions
.
shape
==
(
12
,
3
)
# 12 atoms, 3 coordinates
assert
batch
.
energy
.
shape
[
0
]
==
4
# 4 energies (one per config)
assert
batch
.
forces
.
shape
==
(
12
,
3
)
# Forces for each atom
print
(
batch
.
stress
.
shape
)
assert
batch
.
stress
.
shape
==
(
4
,
3
,
3
)
# Stress for each config
# Check batch has required attributes for MACE model processing
assert
hasattr
(
batch
,
"batch"
)
# Batch indices
assert
batch
.
batch
.
shape
[
0
]
==
12
# One index per atom
assert
hasattr
(
batch
,
"ptr"
)
# Pointer for batch processing
assert
batch
.
ptr
.
shape
[
0
]
==
5
# One pointer per config + 1
# Check that batch indices are correctly assigned
# First 3 atoms should be from config 0, next 3 from config 1, etc.
expected_batch
=
torch
.
tensor
([
0
,
0
,
0
,
1
,
1
,
1
,
2
,
2
,
2
,
3
,
3
,
3
])
assert
torch
.
all
(
batch
.
batch
==
expected_batch
)
# Check ptr correctly points to start of each configuration
assert
batch
.
ptr
.
tolist
()
==
[
0
,
3
,
6
,
9
,
12
]
# Create a batch dictionary that can be passed to a MACE model
batch_dict
=
batch
.
to_dict
()
assert
"positions"
in
batch_dict
assert
"energy"
in
batch_dict
assert
"forces"
in
batch_dict
assert
"stress"
in
batch_dict
assert
"batch"
in
batch_dict
assert
"ptr"
in
batch_dict
# Verify additional properties required by MACE
assert
hasattr
(
batch
,
"edge_index"
)
# Connectivity information
assert
hasattr
(
batch
,
"shifts"
)
# For periodic boundary conditions
assert
hasattr
(
batch
,
"cell"
)
# Unit cell information
# Test that a full batch can be processed (without errors)
all_batches
=
list
(
dataloader
)
assert
(
len
(
all_batches
)
==
3
)
# Should have 3 batches (12 configs with batch size 4)
mace-bench/3rdparty/mace/tests/test_models.py
0 → 100644
View file @
1be78103
import
numpy
as
np
import
torch
import
torch.nn.functional
from
ase
import
build
from
e3nn
import
o3
from
e3nn.util
import
jit
from
scipy.spatial.transform
import
Rotation
as
R
from
mace
import
data
,
modules
,
tools
from
mace.tools
import
torch_geometric
torch
.
set_default_dtype
(
torch
.
float64
)
config
=
data
.
Configuration
(
atomic_numbers
=
np
.
array
([
8
,
1
,
1
]),
positions
=
np
.
array
(
[
[
0.0
,
-
2.0
,
0.0
],
[
1.0
,
0.0
,
0.0
],
[
0.0
,
1.0
,
0.0
],
]
),
properties
=
{
"forces"
:
np
.
array
(
[
[
0.0
,
-
1.3
,
0.0
],
[
1.0
,
0.2
,
0.0
],
[
0.0
,
1.1
,
0.3
],
]
),
"energy"
:
-
1.5
,
"charges"
:
np
.
array
([
-
2.0
,
1.0
,
1.0
]),
"dipole"
:
np
.
array
([
-
1.5
,
1.5
,
2.0
]),
},
property_weights
=
{
"forces"
:
1.0
,
"energy"
:
1.0
,
"charges"
:
1.0
,
"dipole"
:
1.0
,
},
)
# Created the rotated environment
rot
=
R
.
from_euler
(
"z"
,
60
,
degrees
=
True
).
as_matrix
()
positions_rotated
=
np
.
array
(
rot
@
config
.
positions
.
T
).
T
config_rotated
=
data
.
Configuration
(
atomic_numbers
=
np
.
array
([
8
,
1
,
1
]),
positions
=
positions_rotated
,
properties
=
{
"forces"
:
np
.
array
(
[
[
0.0
,
-
1.3
,
0.0
],
[
1.0
,
0.2
,
0.0
],
[
0.0
,
1.1
,
0.3
],
]
),
"energy"
:
-
1.5
,
"charges"
:
np
.
array
([
-
2.0
,
1.0
,
1.0
]),
"dipole"
:
np
.
array
([
-
1.5
,
1.5
,
2.0
]),
},
property_weights
=
{
"forces"
:
1.0
,
"energy"
:
1.0
,
"charges"
:
1.0
,
"dipole"
:
1.0
,
},
)
table
=
tools
.
AtomicNumberTable
([
1
,
8
])
atomic_energies
=
np
.
array
([
1.0
,
3.0
],
dtype
=
float
)
def
test_mace
():
# Create MACE model
model_config
=
dict
(
r_max
=
5
,
num_bessel
=
8
,
num_polynomial_cutoff
=
6
,
max_ell
=
2
,
interaction_cls
=
modules
.
interaction_classes
[
"RealAgnosticResidualInteractionBlock"
],
interaction_cls_first
=
modules
.
interaction_classes
[
"RealAgnosticResidualInteractionBlock"
],
num_interactions
=
5
,
num_elements
=
2
,
hidden_irreps
=
o3
.
Irreps
(
"32x0e + 32x1o"
),
MLP_irreps
=
o3
.
Irreps
(
"16x0e"
),
gate
=
torch
.
nn
.
functional
.
silu
,
atomic_energies
=
atomic_energies
,
avg_num_neighbors
=
8
,
atomic_numbers
=
table
.
zs
,
correlation
=
3
,
radial_type
=
"bessel"
,
)
model
=
modules
.
MACE
(
**
model_config
)
model_compiled
=
jit
.
compile
(
model
)
atomic_data
=
data
.
AtomicData
.
from_config
(
config
,
z_table
=
table
,
cutoff
=
3.0
)
atomic_data2
=
data
.
AtomicData
.
from_config
(
config_rotated
,
z_table
=
table
,
cutoff
=
3.0
)
data_loader
=
torch_geometric
.
dataloader
.
DataLoader
(
dataset
=
[
atomic_data
,
atomic_data2
],
batch_size
=
2
,
shuffle
=
True
,
drop_last
=
False
,
)
batch
=
next
(
iter
(
data_loader
))
output1
=
model
(
batch
.
to_dict
(),
training
=
True
)
output2
=
model_compiled
(
batch
.
to_dict
(),
training
=
True
)
assert
torch
.
allclose
(
output1
[
"energy"
][
0
],
output2
[
"energy"
][
0
])
assert
torch
.
allclose
(
output2
[
"energy"
][
0
],
output2
[
"energy"
][
1
])
def
test_dipole_mace
():
# create dipole MACE model
model_config
=
dict
(
r_max
=
5
,
num_bessel
=
8
,
num_polynomial_cutoff
=
5
,
max_ell
=
2
,
interaction_cls
=
modules
.
interaction_classes
[
"RealAgnosticResidualInteractionBlock"
],
interaction_cls_first
=
modules
.
interaction_classes
[
"RealAgnosticResidualInteractionBlock"
],
num_interactions
=
2
,
num_elements
=
2
,
hidden_irreps
=
o3
.
Irreps
(
"16x0e + 16x1o + 16x2e"
),
MLP_irreps
=
o3
.
Irreps
(
"16x0e"
),
gate
=
torch
.
nn
.
functional
.
silu
,
atomic_energies
=
None
,
avg_num_neighbors
=
3
,
atomic_numbers
=
table
.
zs
,
correlation
=
3
,
radial_type
=
"gaussian"
,
)
model
=
modules
.
AtomicDipolesMACE
(
**
model_config
)
atomic_data
=
data
.
AtomicData
.
from_config
(
config
,
z_table
=
table
,
cutoff
=
3.0
)
atomic_data2
=
data
.
AtomicData
.
from_config
(
config_rotated
,
z_table
=
table
,
cutoff
=
3.0
)
data_loader
=
torch_geometric
.
dataloader
.
DataLoader
(
dataset
=
[
atomic_data
,
atomic_data2
],
batch_size
=
2
,
shuffle
=
False
,
drop_last
=
False
,
)
batch
=
next
(
iter
(
data_loader
))
output
=
model
(
batch
,
training
=
True
,
)
# sanity check of dipoles being the right shape
assert
output
[
"dipole"
][
0
].
unsqueeze
(
0
).
shape
==
atomic_data
.
dipole
.
shape
# test equivariance of output dipoles
assert
np
.
allclose
(
np
.
array
(
rot
@
output
[
"dipole"
][
0
].
detach
().
numpy
()),
output
[
"dipole"
][
1
].
detach
().
numpy
(),
)
def
test_energy_dipole_mace
():
# create dipole MACE model
model_config
=
dict
(
r_max
=
5
,
num_bessel
=
8
,
num_polynomial_cutoff
=
5
,
max_ell
=
2
,
interaction_cls
=
modules
.
interaction_classes
[
"RealAgnosticResidualInteractionBlock"
],
interaction_cls_first
=
modules
.
interaction_classes
[
"RealAgnosticResidualInteractionBlock"
],
num_interactions
=
2
,
num_elements
=
2
,
hidden_irreps
=
o3
.
Irreps
(
"16x0e + 16x1o + 16x2e"
),
MLP_irreps
=
o3
.
Irreps
(
"16x0e"
),
gate
=
torch
.
nn
.
functional
.
silu
,
atomic_energies
=
atomic_energies
,
avg_num_neighbors
=
3
,
atomic_numbers
=
table
.
zs
,
correlation
=
3
,
)
model
=
modules
.
EnergyDipolesMACE
(
**
model_config
)
atomic_data
=
data
.
AtomicData
.
from_config
(
config
,
z_table
=
table
,
cutoff
=
3.0
)
atomic_data2
=
data
.
AtomicData
.
from_config
(
config_rotated
,
z_table
=
table
,
cutoff
=
3.0
)
data_loader
=
torch_geometric
.
dataloader
.
DataLoader
(
dataset
=
[
atomic_data
,
atomic_data2
],
batch_size
=
2
,
shuffle
=
False
,
drop_last
=
False
,
)
batch
=
next
(
iter
(
data_loader
))
output
=
model
(
batch
,
training
=
True
,
)
# sanity check of dipoles being the right shape
assert
output
[
"dipole"
][
0
].
unsqueeze
(
0
).
shape
==
atomic_data
.
dipole
.
shape
# test energy is invariant
assert
torch
.
allclose
(
output
[
"energy"
][
0
],
output
[
"energy"
][
1
])
# test equivariance of output dipoles
assert
np
.
allclose
(
np
.
array
(
rot
@
output
[
"dipole"
][
0
].
detach
().
numpy
()),
output
[
"dipole"
][
1
].
detach
().
numpy
(),
)
def
test_mace_multi_reference
():
atomic_energies_multi
=
np
.
array
([[
1.0
,
3.0
],
[
0.0
,
0.0
]],
dtype
=
float
)
model_config
=
dict
(
r_max
=
5
,
num_bessel
=
8
,
num_polynomial_cutoff
=
6
,
max_ell
=
3
,
interaction_cls
=
modules
.
interaction_classes
[
"RealAgnosticResidualInteractionBlock"
],
interaction_cls_first
=
modules
.
interaction_classes
[
"RealAgnosticResidualInteractionBlock"
],
num_interactions
=
2
,
num_elements
=
2
,
hidden_irreps
=
o3
.
Irreps
(
"96x0e + 96x1o"
),
MLP_irreps
=
o3
.
Irreps
(
"16x0e"
),
gate
=
torch
.
nn
.
functional
.
silu
,
atomic_energies
=
atomic_energies_multi
,
avg_num_neighbors
=
8
,
atomic_numbers
=
table
.
zs
,
distance_transform
=
True
,
pair_repulsion
=
True
,
correlation
=
3
,
heads
=
[
"Default"
,
"dft"
],
# radial_type="chebyshev",
atomic_inter_scale
=
[
1.0
,
1.0
],
atomic_inter_shift
=
[
0.0
,
0.1
],
)
model
=
modules
.
ScaleShiftMACE
(
**
model_config
)
model_compiled
=
jit
.
compile
(
model
)
config
.
head
=
"Default"
config_rotated
.
head
=
"dft"
atomic_data
=
data
.
AtomicData
.
from_config
(
config
,
z_table
=
table
,
cutoff
=
3.0
,
heads
=
[
"Default"
,
"dft"
]
)
atomic_data2
=
data
.
AtomicData
.
from_config
(
config_rotated
,
z_table
=
table
,
cutoff
=
3.0
,
heads
=
[
"Default"
,
"dft"
]
)
data_loader
=
torch_geometric
.
dataloader
.
DataLoader
(
dataset
=
[
atomic_data
,
atomic_data2
],
batch_size
=
2
,
shuffle
=
True
,
drop_last
=
False
,
)
batch
=
next
(
iter
(
data_loader
))
output1
=
model
(
batch
.
to_dict
(),
training
=
True
)
output2
=
model_compiled
(
batch
.
to_dict
(),
training
=
True
)
assert
torch
.
allclose
(
output1
[
"energy"
][
0
],
output2
[
"energy"
][
0
])
assert
output2
[
"energy"
].
shape
[
0
]
==
2
def
test_atomic_virials_stresses
():
"""
Test that atomic virials and stresses sum to the total virials and stress.
"""
# Set default dtype for reproducibility
torch
.
set_default_dtype
(
torch
.
float64
)
# Create a periodic cell with ASE
atoms
=
build
.
bulk
(
"Si"
,
"diamond"
,
a
=
5.43
)
# Apply strain to ensure non-zero stress
strain_tensor
=
np
.
eye
(
3
)
*
1.02
# 2% strain
atoms
.
set_cell
(
np
.
dot
(
atoms
.
get_cell
(),
strain_tensor
),
scale_atoms
=
True
)
# Add forces and energy for completeness
atoms
.
arrays
[
"REF_forces"
]
=
np
.
random
.
normal
(
0
,
0.1
,
size
=
atoms
.
positions
.
shape
)
atoms
.
info
[
"REF_energy"
]
=
np
.
random
.
normal
(
0
,
1
)
atoms
.
info
[
"REF_stress"
]
=
np
.
random
.
normal
(
0
,
0.1
,
size
=
6
)
# Setup MACE model configuration
stress_z_table
=
tools
.
AtomicNumberTable
([
14
])
# Silicon
stress_atomic_energies
=
np
.
array
([
0.0
])
model_config
=
dict
(
r_max
=
5.0
,
num_bessel
=
8
,
num_polynomial_cutoff
=
6
,
max_ell
=
2
,
interaction_cls
=
modules
.
interaction_classes
[
"RealAgnosticResidualInteractionBlock"
],
interaction_cls_first
=
modules
.
interaction_classes
[
"RealAgnosticResidualInteractionBlock"
],
num_interactions
=
3
,
num_elements
=
1
,
hidden_irreps
=
o3
.
Irreps
(
"32x0e + 32x1o"
),
MLP_irreps
=
o3
.
Irreps
(
"16x0e"
),
gate
=
torch
.
nn
.
functional
.
silu
,
atomic_energies
=
stress_atomic_energies
,
avg_num_neighbors
=
4.0
,
atomic_numbers
=
table
.
zs
,
correlation
=
3
,
atomic_inter_scale
=
1.0
,
atomic_inter_shift
=
0.0
,
)
# Create the model
model
=
modules
.
ScaleShiftMACE
(
**
model_config
)
# Create atomic data
atomic_data
=
data
.
AtomicData
.
from_config
(
data
.
config_from_atoms
(
atoms
,
key_specification
=
data
.
KeySpecification
.
from_defaults
()
),
z_table
=
stress_z_table
,
cutoff
=
5.0
,
)
data_loader
=
torch_geometric
.
dataloader
.
DataLoader
(
dataset
=
[
atomic_data
],
batch_size
=
2
,
shuffle
=
True
,
drop_last
=
False
,
)
batch
=
next
(
iter
(
data_loader
))
batch_dict
=
batch
.
to_dict
()
# Run the model with compute_atomic_stresses=True
output
=
model
(
batch_dict
,
compute_force
=
True
,
compute_virials
=
True
,
compute_stress
=
True
,
compute_atomic_stresses
=
True
,
)
# Get total virials/stress and atomic virials/stresses
total_virials
=
output
[
"virials"
]
atomic_virials
=
output
[
"atomic_virials"
]
total_stress
=
output
[
"stress"
]
atomic_stresses
=
output
[
"atomic_stresses"
]
# Test that atomic values are not None
assert
atomic_virials
is
not
None
,
"Atomic virials were not computed"
assert
atomic_stresses
is
not
None
,
"Atomic stresses were not computed"
# Test shape of atomic values
assert
atomic_virials
.
shape
[
0
]
==
len
(
atoms
),
"Wrong shape for atomic virials"
assert
atomic_virials
.
shape
[
1
:]
==
(
3
,
3
),
"Atomic virials should be 3x3 matrices"
assert
atomic_stresses
.
shape
[
0
]
==
len
(
atoms
),
"Wrong shape for atomic stresses"
assert
atomic_stresses
.
shape
[
1
:]
==
(
3
,
3
),
"Atomic stresses should be 3x3 matrices"
# Compute sum of atomic values
summed_atomic_virials
=
torch
.
sum
(
atomic_virials
,
dim
=
0
)
summed_atomic_stresses
=
torch
.
sum
(
atomic_stresses
,
dim
=
0
)
# Test that sums match total values
assert
torch
.
allclose
(
summed_atomic_virials
,
total_virials
.
squeeze
(
0
),
atol
=
1e-6
),
f
"Sum of atomic virials
{
summed_atomic_virials
}
does not match total virials
{
total_virials
.
squeeze
(
0
)
}
"
assert
torch
.
allclose
(
summed_atomic_stresses
,
total_stress
.
squeeze
(
0
),
atol
=
1e-6
),
f
"Sum of atomic stresses (normalized by volume)
{
summed_atomic_stresses
}
does not match total stress
{
total_stress
.
squeeze
(
0
)
}
"
mace-bench/3rdparty/mace/tests/test_modules.py
0 → 100644
View file @
1be78103
import
numpy
as
np
import
pytest
import
torch
import
torch.nn.functional
from
e3nn
import
o3
from
mace.data
import
AtomicData
,
Configuration
from
mace.modules
import
(
AtomicEnergiesBlock
,
BesselBasis
,
PolynomialCutoff
,
SymmetricContraction
,
WeightedEnergyForcesLoss
,
WeightedHuberEnergyForcesStressLoss
,
compute_mean_rms_energy_forces
,
compute_statistics
,
)
from
mace.tools
import
AtomicNumberTable
,
scatter
,
to_numpy
,
torch_geometric
from
mace.tools.scripts_utils
import
dict_to_array
@
pytest
.
fixture
(
name
=
"config"
)
def
_config
():
return
Configuration
(
atomic_numbers
=
np
.
array
([
8
,
1
,
1
]),
positions
=
np
.
array
(
[
[
0.0
,
-
2.0
,
0.0
],
[
1.0
,
0.0
,
0.0
],
[
0.0
,
1.0
,
0.0
],
]
),
properties
=
{
"forces"
:
np
.
array
(
[
[
0.0
,
-
1.3
,
0.0
],
[
1.0
,
0.2
,
0.0
],
[
0.0
,
1.1
,
0.3
],
]
),
"energy"
:
-
1.5
,
"stress"
:
np
.
array
([
1.0
,
0.0
,
0.5
,
0.0
,
-
1.0
,
0.0
]),
},
property_weights
=
{
"forces"
:
1.0
,
"energy"
:
1.0
,
"stress"
:
1.0
,
},
)
@
pytest
.
fixture
(
name
=
"table"
)
def
_table
():
return
AtomicNumberTable
([
1
,
8
])
@
pytest
.
fixture
(
name
=
"config1"
)
def
_config1
():
return
Configuration
(
atomic_numbers
=
np
.
array
([
8
,
1
,
1
]),
positions
=
np
.
array
(
[
[
0.0
,
-
2.0
,
0.0
],
[
1.0
,
0.0
,
0.0
],
[
0.0
,
1.0
,
0.0
],
]
),
properties
=
{
"forces"
:
np
.
array
(
[
[
0.0
,
-
1.3
,
0.0
],
[
1.0
,
0.2
,
0.0
],
[
0.0
,
1.1
,
0.3
],
]
),
"energy"
:
-
1.5
,
},
property_weights
=
{
"forces"
:
1.0
,
"energy"
:
1.0
,
},
head
=
"DFT"
,
)
@
pytest
.
fixture
(
name
=
"config2"
)
def
_config2
():
return
Configuration
(
atomic_numbers
=
np
.
array
([
8
,
1
,
1
]),
positions
=
np
.
array
(
[
[
0.1
,
-
1.9
,
0.1
],
[
1.1
,
0.1
,
0.1
],
[
0.1
,
1.1
,
0.1
],
]
),
properties
=
{
"forces"
:
np
.
array
(
[
[
0.1
,
-
1.2
,
0.1
],
[
1.1
,
0.3
,
0.1
],
[
0.1
,
1.2
,
0.4
],
]
),
"energy"
:
-
1.4
,
},
property_weights
=
{
"forces"
:
1.0
,
"energy"
:
1.0
,
},
head
=
"MP2"
,
)
@
pytest
.
fixture
(
name
=
"atomic_data"
)
def
_atomic_data
(
config1
,
config2
,
table
):
atomic_data1
=
AtomicData
.
from_config
(
config1
,
z_table
=
table
,
cutoff
=
3.0
,
heads
=
[
"DFT"
,
"MP2"
]
)
atomic_data2
=
AtomicData
.
from_config
(
config2
,
z_table
=
table
,
cutoff
=
3.0
,
heads
=
[
"DFT"
,
"MP2"
]
)
return
[
atomic_data1
,
atomic_data2
]
@
pytest
.
fixture
(
name
=
"data_loader"
)
def
_data_loader
(
atomic_data
):
return
torch_geometric
.
dataloader
.
DataLoader
(
dataset
=
atomic_data
,
batch_size
=
2
,
shuffle
=
False
,
drop_last
=
False
,
)
@
pytest
.
fixture
(
name
=
"atomic_energies"
)
def
_atomic_energies
():
atomic_energies_dict
=
{
"DFT"
:
np
.
array
([
0.0
,
0.0
]),
"MP2"
:
np
.
array
([
0.1
,
0.1
]),
}
return
dict_to_array
(
atomic_energies_dict
,
[
"DFT"
,
"MP2"
])
@
pytest
.
fixture
(
autouse
=
True
)
def
_set_torch_default_dtype
():
torch
.
set_default_dtype
(
torch
.
float64
)
def
test_weighted_loss
(
config
,
table
):
loss1
=
WeightedEnergyForcesLoss
(
energy_weight
=
1
,
forces_weight
=
10
)
loss2
=
WeightedHuberEnergyForcesStressLoss
(
energy_weight
=
1
,
forces_weight
=
10
)
data
=
AtomicData
.
from_config
(
config
,
z_table
=
table
,
cutoff
=
3.0
)
data_loader
=
torch_geometric
.
dataloader
.
DataLoader
(
dataset
=
[
data
,
data
],
batch_size
=
2
,
shuffle
=
True
,
drop_last
=
False
,
)
batch
=
next
(
iter
(
data_loader
))
pred
=
{
"energy"
:
batch
.
energy
,
"forces"
:
batch
.
forces
,
"stress"
:
batch
.
stress
,
}
out1
=
loss1
(
batch
,
pred
)
assert
out1
==
0.0
out2
=
loss2
(
batch
,
pred
)
assert
out2
==
0.0
def
test_symmetric_contraction
():
operation
=
SymmetricContraction
(
irreps_in
=
o3
.
Irreps
(
"16x0e + 16x1o + 16x2e"
),
irreps_out
=
o3
.
Irreps
(
"16x0e + 16x1o"
),
correlation
=
3
,
num_elements
=
2
,
)
torch
.
manual_seed
(
123
)
features
=
torch
.
randn
(
30
,
16
,
9
)
one_hots
=
torch
.
nn
.
functional
.
one_hot
(
torch
.
arange
(
0
,
30
)
%
2
).
to
(
torch
.
get_default_dtype
()
)
out
=
operation
(
features
,
one_hots
)
assert
out
.
shape
==
(
30
,
64
)
assert
operation
.
contractions
[
0
].
weights_max
.
shape
==
(
2
,
11
,
16
)
def
test_bessel_basis
():
d
=
torch
.
linspace
(
start
=
0.5
,
end
=
5.5
,
steps
=
10
)
bessel_basis
=
BesselBasis
(
r_max
=
6.0
,
num_basis
=
5
)
output
=
bessel_basis
(
d
.
unsqueeze
(
-
1
))
assert
output
.
shape
==
(
10
,
5
)
def
test_polynomial_cutoff
():
d
=
torch
.
linspace
(
start
=
0.5
,
end
=
5.5
,
steps
=
10
)
cutoff_fn
=
PolynomialCutoff
(
r_max
=
5.0
)
output
=
cutoff_fn
(
d
)
assert
output
.
shape
==
(
10
,)
def
test_atomic_energies
(
config
,
table
):
energies_block
=
AtomicEnergiesBlock
(
atomic_energies
=
np
.
array
([
1.0
,
3.0
]))
data
=
AtomicData
.
from_config
(
config
,
z_table
=
table
,
cutoff
=
3.0
)
data_loader
=
torch_geometric
.
dataloader
.
DataLoader
(
dataset
=
[
data
,
data
],
batch_size
=
2
,
shuffle
=
True
,
drop_last
=
False
,
)
batch
=
next
(
iter
(
data_loader
))
energies
=
energies_block
(
batch
.
node_attrs
).
squeeze
(
-
1
)
out
=
scatter
.
scatter_sum
(
src
=
energies
,
index
=
batch
.
batch
,
dim
=-
1
,
reduce
=
"sum"
)
out
=
to_numpy
(
out
)
assert
np
.
allclose
(
out
,
np
.
array
([
5.0
,
5.0
]))
def
test_atomic_energies_multireference
(
config
,
table
):
energies_block
=
AtomicEnergiesBlock
(
atomic_energies
=
np
.
array
([[
1.0
,
3.0
],
[
2.0
,
4.0
]])
)
config
.
head
=
"MP2"
data
=
AtomicData
.
from_config
(
config
,
z_table
=
table
,
cutoff
=
3.0
,
heads
=
[
"DFT"
,
"MP2"
]
)
data_loader
=
torch_geometric
.
dataloader
.
DataLoader
(
dataset
=
[
data
,
data
],
batch_size
=
2
,
shuffle
=
True
,
drop_last
=
False
,
)
batch
=
next
(
iter
(
data_loader
))
num_atoms_arange
=
torch
.
arange
(
batch
[
"positions"
].
shape
[
0
])
node_heads
=
(
batch
[
"head"
][
batch
[
"batch"
]]
if
"head"
in
batch
else
torch
.
zeros_like
(
batch
[
"batch"
])
)
energies
=
energies_block
(
batch
.
node_attrs
).
squeeze
(
-
1
)
energies
=
energies
[
num_atoms_arange
,
node_heads
]
out
=
scatter
.
scatter_sum
(
src
=
energies
,
index
=
batch
.
batch
,
dim
=-
1
,
reduce
=
"sum"
)
out
=
to_numpy
(
out
)
assert
np
.
allclose
(
out
,
np
.
array
([
8.0
,
8.0
]))
def
test_compute_mean_rms_energy_forces_multi_head
(
data_loader
,
atomic_energies
):
mean
,
rms
=
compute_mean_rms_energy_forces
(
data_loader
,
atomic_energies
)
assert
isinstance
(
mean
,
np
.
ndarray
)
assert
isinstance
(
rms
,
np
.
ndarray
)
assert
mean
.
shape
==
(
2
,)
assert
rms
.
shape
==
(
2
,)
assert
np
.
all
(
rms
>=
0
)
assert
rms
[
0
]
!=
rms
[
1
]
def
test_compute_statistics
(
data_loader
,
atomic_energies
):
avg_num_neighbors
,
mean
,
std
=
compute_statistics
(
data_loader
,
atomic_energies
)
assert
isinstance
(
avg_num_neighbors
,
float
)
assert
isinstance
(
mean
,
np
.
ndarray
)
assert
isinstance
(
std
,
np
.
ndarray
)
assert
mean
.
shape
==
(
2
,)
assert
std
.
shape
==
(
2
,)
assert
avg_num_neighbors
>
0
assert
np
.
all
(
mean
!=
0
)
assert
np
.
all
(
std
>
0
)
assert
mean
[
0
]
!=
mean
[
1
]
assert
std
[
0
]
!=
std
[
1
]
mace-bench/3rdparty/mace/tests/test_multifiles.py
0 → 100644
View file @
1be78103
import
json
import
os
import
shutil
import
subprocess
import
sys
import
tempfile
import
zlib
from
pathlib
import
Path
import
lmdb
import
numpy
as
np
import
orjson
import
pytest
import
torch
import
yaml
from
ase.atoms
import
Atoms
from
ase.calculators.singlepoint
import
SinglePointCalculator
from
mace.calculators
import
MACECalculator
def
create_test_atoms
(
num_atoms
=
5
,
seed
=
42
):
"""Create random atoms for testing purposes with energy, forces, and stress."""
# Set random seed for reproducibility
rng
=
np
.
random
.
RandomState
(
seed
)
# Create random positions
positions
=
rng
.
rand
(
num_atoms
,
3
)
*
5.0
# Create random atomic numbers (H, C, N, O)
atomic_numbers
=
rng
.
choice
([
1
,
6
,
7
,
8
],
size
=
num_atoms
)
# Create atoms object
atoms
=
Atoms
(
numbers
=
atomic_numbers
,
positions
=
positions
,
cell
=
np
.
eye
(
3
)
*
10.0
,
# 10 Å periodic box
pbc
=
True
,
)
# Add random energy, forces and stress
energy
=
float
(
rng
.
uniform
(
-
15.0
,
-
5.0
))
forces
=
rng
.
rand
(
num_atoms
,
3
)
*
0.5
-
0.25
# Forces between -0.25 and 0.25 eV/Å
stress
=
rng
.
rand
(
6
)
*
0.2
-
0.1
# Stress tensor in Voigt notation
# Add calculator to atoms with results
calc
=
SinglePointCalculator
(
atoms
,
energy
=
energy
,
forces
=
forces
,
stress
=
stress
)
atoms
.
calc
=
calc
# Mark isolated atoms with config_type
if
num_atoms
==
1
:
atoms
.
info
[
"config_type"
]
=
"IsolatedAtom"
return
atoms
def
create_xyz_file
(
atoms_list
,
filename
):
"""Write a list of atoms to an xyz file."""
from
ase.io
import
write
write
(
filename
,
atoms_list
,
format
=
"extxyz"
)
return
filename
def
create_e0s_file
(
e0s_dict
,
filename
):
"""Create an E0s JSON file with isolated atom energies."""
# Convert keys to integers since MACE expects atomic numbers as integers
e0s_dict_int_keys
=
{
int
(
k
):
v
for
k
,
v
in
e0s_dict
.
items
()}
with
open
(
filename
,
"w"
,
encoding
=
"utf-8"
)
as
f
:
json
.
dump
(
e0s_dict_int_keys
,
f
)
return
filename
def
create_h5_dataset
(
xyz_file
,
output_dir
,
e0s_file
=
None
,
r_max
=
5.0
,
seed
=
42
):
"""
Run MACE's preprocess_data.py script to convert an xyz file to h5 format.
Args:
xyz_file: Path to the input xyz file
output_dir: Directory to store the preprocessed h5 files
e0s_file: Path to the E0s file with isolated atom energies
r_max: Cutoff radius
seed: Random seed
Returns:
The output directory containing the h5 files
"""
# Make sure output directory exists
os
.
makedirs
(
output_dir
,
exist_ok
=
True
)
# Find the path to the preprocess_data.py script
preprocess_script
=
(
Path
(
__file__
).
parent
.
parent
/
"mace"
/
"cli"
/
"preprocess_data.py"
)
# Set up command to run preprocess_data.py
cmd
=
[
sys
.
executable
,
str
(
preprocess_script
),
f
"--train_file=
{
xyz_file
}
"
,
f
"--r_max=
{
r_max
}
"
,
f
"--h5_prefix=
{
output_dir
}
/"
,
f
"--seed=
{
seed
}
"
,
"--compute_statistics"
,
# Generate statistics file
"--num_process=2"
,
# Create 2 files for testing sharded loading
]
# Add E0s file if provided
if
e0s_file
:
cmd
.
append
(
f
"--E0s=
{
e0s_file
}
"
)
# Set up environment
env
=
os
.
environ
.
copy
()
env
[
"PYTHONPATH"
]
=
(
str
(
Path
(
__file__
).
parent
.
parent
)
+
":"
+
env
.
get
(
"PYTHONPATH"
,
""
)
)
# Run the script
print
(
f
"Running preprocess command:
{
' '
.
join
(
cmd
)
}
"
)
try
:
process
=
subprocess
.
run
(
cmd
,
env
=
env
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
check
=
True
)
# Print output for debugging
print
(
"Preprocess stdout:"
,
process
.
stdout
.
decode
())
print
(
"Preprocess stderr:"
,
process
.
stderr
.
decode
())
except
subprocess
.
CalledProcessError
as
e
:
print
(
"Preprocess failed with error:"
,
e
)
print
(
"Stdout:"
,
e
.
stdout
.
decode
()
if
e
.
stdout
else
""
)
print
(
"Stderr:"
,
e
.
stderr
.
decode
()
if
e
.
stderr
else
""
)
raise
return
output_dir
def
create_lmdb_dataset
(
atoms_list
,
folder_path
,
head_name
=
"Default"
):
"""Create an LMDB dataset from a list of atoms objects that MACE can read."""
# Create the folder if it doesn't exist
os
.
makedirs
(
folder_path
,
exist_ok
=
True
)
# Create the LMDB database file
db_path
=
os
.
path
.
join
(
folder_path
,
"data.aselmdb"
)
# Initialize LMDB environment
env
=
lmdb
.
open
(
db_path
,
map_size
=
1099511627776
,
# 1TB
subdir
=
False
,
meminit
=
False
,
map_async
=
True
,
)
# Open a transaction
with
env
.
begin
(
write
=
True
)
as
txn
:
# Store metadata
metadata
=
{
"format_version"
:
1
}
txn
.
put
(
"metadata"
.
encode
(
"ascii"
),
zlib
.
compress
(
orjson
.
dumps
(
metadata
,
option
=
orjson
.
OPT_SERIALIZE_NUMPY
)),
)
# Store nextid
nextid
=
len
(
atoms_list
)
+
1
txn
.
put
(
"nextid"
.
encode
(
"ascii"
),
zlib
.
compress
(
orjson
.
dumps
(
nextid
,
option
=
orjson
.
OPT_SERIALIZE_NUMPY
)),
)
# Store deleted_ids (empty)
txn
.
put
(
"deleted_ids"
.
encode
(
"ascii"
),
zlib
.
compress
(
orjson
.
dumps
([],
option
=
orjson
.
OPT_SERIALIZE_NUMPY
)),
)
# Store each atom
for
i
,
atoms
in
enumerate
(
atoms_list
):
id_num
=
i
+
1
# Start from 1
# Convert atoms to dictionary
positions
=
atoms
.
get_positions
()
cell
=
atoms
.
get_cell
()
# Create a dictionary with all necessary fields
dct
=
{
"numbers"
:
atoms
.
get_atomic_numbers
().
tolist
(),
"positions"
:
positions
.
tolist
(),
"cell"
:
cell
.
tolist
(),
"pbc"
:
atoms
.
get_pbc
().
tolist
(),
"ctime"
:
0.0
,
# Creation time
"mtime"
:
0.0
,
# Modification time
"user"
:
"test"
,
"energy"
:
atoms
.
calc
.
results
[
"energy"
],
"forces"
:
atoms
.
calc
.
results
[
"forces"
].
tolist
(),
"stress"
:
atoms
.
calc
.
results
[
"stress"
].
tolist
(),
"key_value_pairs"
:
{
"config_type"
:
atoms
.
info
.
get
(
"config_type"
,
"Default"
),
"head"
:
head_name
,
},
}
# Store the atom in LMDB
txn
.
put
(
f
"
{
id_num
}
"
.
encode
(
"ascii"
),
zlib
.
compress
(
orjson
.
dumps
(
dct
,
option
=
orjson
.
OPT_SERIALIZE_NUMPY
)),
)
# Close the environment
env
.
close
()
return
folder_path
@
pytest
.
mark
.
slow
def
test_multifile_training
():
"""Test training with multiple file formats per head"""
# Create temporary directory
temp_dir
=
tempfile
.
mkdtemp
()
try
:
# Set up file paths
xyz_file1
=
os
.
path
.
join
(
temp_dir
,
"data1.xyz"
)
xyz_file2
=
os
.
path
.
join
(
temp_dir
,
"data2.xyz"
)
iso_atoms_file
=
os
.
path
.
join
(
temp_dir
,
"isolated_atoms.xyz"
)
h5_folder
=
os
.
path
.
join
(
temp_dir
,
"h5_data"
)
lmdb_folder1
=
os
.
path
.
join
(
temp_dir
,
"lmdb_data1_lmdb"
)
# Add _lmdb suffix for LMDB recognition
lmdb_folder2
=
os
.
path
.
join
(
temp_dir
,
"lmdb_data2_lmdb"
)
# Add _lmdb suffix for LMDB recognition
config_path
=
os
.
path
.
join
(
temp_dir
,
"config.yaml"
)
results_dir
=
os
.
path
.
join
(
temp_dir
,
"results"
)
checkpoints_dir
=
os
.
path
.
join
(
temp_dir
,
"checkpoints"
)
model_dir
=
os
.
path
.
join
(
temp_dir
,
"models"
)
e0s_file
=
os
.
path
.
join
(
temp_dir
,
"e0s.json"
)
# Create directories
os
.
makedirs
(
results_dir
,
exist_ok
=
True
)
os
.
makedirs
(
checkpoints_dir
,
exist_ok
=
True
)
os
.
makedirs
(
model_dir
,
exist_ok
=
True
)
# Set atomic numbers for z_table
z_table_elements
=
[
1
,
6
,
7
,
8
]
# H, C, N, O
# Create test data for each format
rng
=
np
.
random
.
RandomState
(
42
)
seeds
=
rng
.
randint
(
0
,
10000
,
size
=
5
)
# Create isolated atoms for E0s (one of each element)
isolated_atoms
=
[]
e0s_dict
=
{}
for
z
in
z_table_elements
:
# Create isolated atom
atom
=
Atoms
(
numbers
=
[
z
],
positions
=
[[
0
,
0
,
0
]],
cell
=
np
.
eye
(
3
)
*
10.0
,
pbc
=
True
)
energy
=
float
(
rng
.
uniform
(
-
5.0
,
-
1.0
))
# Random reference energy
forces
=
np
.
zeros
((
1
,
3
))
stress
=
np
.
zeros
(
6
)
calc
=
SinglePointCalculator
(
atom
,
energy
=
energy
,
forces
=
forces
,
stress
=
stress
)
atom
.
calc
=
calc
atom
.
info
[
"config_type"
]
=
"IsolatedAtom"
atom
.
info
[
"REF_energy"
]
=
energy
# Make sure energy is in the right place
isolated_atoms
.
append
(
atom
)
e0s_dict
[
str
(
z
)]
=
energy
# Store energy for E0s file
# Create E0s file
create_e0s_file
(
e0s_dict
,
e0s_file
)
# Create isolated atoms xyz file
create_xyz_file
(
isolated_atoms
,
iso_atoms_file
)
# Create 10 atoms for each dataset
xyz_atoms1
=
[
create_test_atoms
(
num_atoms
=
5
,
seed
=
seeds
[
0
]
+
i
)
for
i
in
range
(
10
)
]
xyz_atoms2
=
[
create_test_atoms
(
num_atoms
=
5
,
seed
=
seeds
[
1
]
+
i
)
for
i
in
range
(
10
)
]
# Create h5 data directly - first convert the xyz file to a format with REF_ keys
for
atom
in
xyz_atoms1
:
atom
.
info
[
"REF_energy"
]
=
atom
.
calc
.
results
[
"energy"
]
atom
.
arrays
[
"REF_forces"
]
=
atom
.
calc
.
results
[
"forces"
]
atom
.
info
[
"REF_stress"
]
=
atom
.
calc
.
results
[
"stress"
]
for
atom
in
xyz_atoms2
:
atom
.
info
[
"REF_energy"
]
=
atom
.
calc
.
results
[
"energy"
]
atom
.
arrays
[
"REF_forces"
]
=
atom
.
calc
.
results
[
"forces"
]
atom
.
info
[
"REF_stress"
]
=
atom
.
calc
.
results
[
"stress"
]
# Save isolated atoms to xyz files first, then create the h5 datasets
create_xyz_file
(
xyz_atoms1
,
xyz_file1
)
create_xyz_file
(
xyz_atoms2
,
xyz_file2
)
# Create h5 data from xyz file, using both isolated atoms and real data
all_atoms_for_h5
=
isolated_atoms
+
xyz_atoms2
all_atoms_xyz
=
os
.
path
.
join
(
temp_dir
,
"all_atoms_for_h5.xyz"
)
create_xyz_file
(
all_atoms_for_h5
,
all_atoms_xyz
)
create_h5_dataset
(
all_atoms_xyz
,
h5_folder
)
# Create LMDB datasets
lmdb_atoms1
=
[
create_test_atoms
(
num_atoms
=
5
,
seed
=
seeds
[
3
]
+
i
)
for
i
in
range
(
10
)
]
lmdb_atoms2
=
[
create_test_atoms
(
num_atoms
=
5
,
seed
=
seeds
[
4
]
+
i
)
for
i
in
range
(
10
)
]
create_lmdb_dataset
(
lmdb_atoms1
,
lmdb_folder1
,
head_name
=
"head1"
)
create_lmdb_dataset
(
lmdb_atoms2
,
lmdb_folder2
,
head_name
=
"head2"
)
# Create config.yaml for training with proper format specification
config
=
{
"name"
:
"multifile_test"
,
"seed"
:
42
,
"model"
:
"MACE"
,
"hidden_irreps"
:
"32x0e"
,
"r_max"
:
5.0
,
"batch_size"
:
5
,
"max_num_epochs"
:
2
,
"patience"
:
5
,
"device"
:
"cpu"
,
"energy_weight"
:
1.0
,
"forces_weight"
:
10.0
,
"loss"
:
"weighted"
,
"optimizer"
:
"adam"
,
"default_dtype"
:
"float64"
,
"lr"
:
0.01
,
"swa"
:
False
,
"work_dir"
:
temp_dir
,
"results_dir"
:
results_dir
,
"checkpoints_dir"
:
checkpoints_dir
,
"model_dir"
:
model_dir
,
"E0s"
:
e0s_file
,
"atomic_numbers"
:
str
(
z_table_elements
),
"heads"
:
{
"head1"
:
{
"train_file"
:
[
lmdb_folder1
,
xyz_file1
],
"valid_file"
:
xyz_file1
,
"energy_key"
:
"REF_energy"
,
"forces_key"
:
"REF_forces"
,
"stress_key"
:
"REF_stress"
,
},
"head2"
:
{
"train_file"
:
[
h5_folder
+
"/train"
,
xyz_file2
],
"valid_file"
:
xyz_file2
,
"energy_key"
:
"REF_energy"
,
"forces_key"
:
"REF_forces"
,
"stress_key"
:
"REF_stress"
,
},
},
}
# Write config file
with
open
(
config_path
,
"w"
,
encoding
=
"utf-8"
)
as
f
:
yaml
.
dump
(
config
,
f
)
# Import the modified run_train from our local module
run_train_script
=
(
Path
(
__file__
).
parent
.
parent
/
"mace"
/
"cli"
/
"run_train.py"
)
# Run training with subprocess
cmd
=
[
sys
.
executable
,
str
(
run_train_script
),
f
"--config=
{
config_path
}
"
]
# Set environment to add the current path to PYTHONPATH
env
=
os
.
environ
.
copy
()
env
[
"PYTHONPATH"
]
=
(
str
(
Path
(
__file__
).
parent
.
parent
)
+
":"
+
env
.
get
(
"PYTHONPATH"
,
""
)
)
# Run the process
process
=
subprocess
.
run
(
cmd
,
env
=
env
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
check
=
False
,
# Don't raise exception on non-zero exit, we'll check manually
)
# Print output for debugging
print
(
"
\n
"
+
"="
*
40
+
" STDOUT "
+
"="
*
40
)
print
(
process
.
stdout
.
decode
())
print
(
"
\n
"
+
"="
*
40
+
" STDERR "
+
"="
*
40
)
print
(
process
.
stderr
.
decode
())
# Check that process completed successfully
assert
(
process
.
returncode
==
0
),
f
"Training failed with error:
{
process
.
stderr
.
decode
()
}
"
# Check that model was created
model_path
=
os
.
path
.
join
(
model_dir
,
"multifile_test.model"
)
assert
os
.
path
.
exists
(
model_path
),
f
"Model was not created at
{
model_path
}
"
# Try to load and run the model
model
=
torch
.
load
(
model_path
,
map_location
=
"cpu"
)
assert
model
is
not
None
,
"Failed to load model"
# Create a calculator
calc
=
MACECalculator
(
model_paths
=
model_path
,
device
=
"cpu"
,
head
=
"head1"
)
# Run prediction on a test atom
test_atom
=
create_test_atoms
(
num_atoms
=
5
,
seed
=
99999
)
test_atom
.
calc
=
calc
energy
=
test_atom
.
get_potential_energy
()
forces
=
test_atom
.
get_forces
()
# Assert we got sensible outputs
assert
np
.
isfinite
(
energy
),
"Model produced non-finite energy"
assert
np
.
all
(
np
.
isfinite
(
forces
)),
"Model produced non-finite forces"
finally
:
# Clean up
shutil
.
rmtree
(
temp_dir
)
@
pytest
.
mark
.
slow
def
test_multiple_xyz_per_head
():
"""Test training with multiple XYZ files per head for train, valid and test sets"""
# Create temporary directory
temp_dir
=
tempfile
.
mkdtemp
()
try
:
# Set up file paths - create multiple xyz files for each dataset
train_xyz_files
=
[
os
.
path
.
join
(
temp_dir
,
f
"train_data
{
i
}
.xyz"
)
for
i
in
range
(
1
,
4
)
]
# 3 train files
valid_xyz_files
=
[
os
.
path
.
join
(
temp_dir
,
f
"valid_data
{
i
}
.xyz"
)
for
i
in
range
(
1
,
3
)
]
# 2 valid files
test_xyz_files
=
[
os
.
path
.
join
(
temp_dir
,
f
"test_data
{
i
}
.xyz"
)
for
i
in
range
(
1
,
3
)
]
# 2 test files
iso_atoms_file
=
os
.
path
.
join
(
temp_dir
,
"isolated_atoms.xyz"
)
config_path
=
os
.
path
.
join
(
temp_dir
,
"config.yaml"
)
results_dir
=
os
.
path
.
join
(
temp_dir
,
"results"
)
checkpoints_dir
=
os
.
path
.
join
(
temp_dir
,
"checkpoints"
)
model_dir
=
os
.
path
.
join
(
temp_dir
,
"models"
)
e0s_file
=
os
.
path
.
join
(
temp_dir
,
"e0s.json"
)
# Create directories
os
.
makedirs
(
results_dir
,
exist_ok
=
True
)
os
.
makedirs
(
checkpoints_dir
,
exist_ok
=
True
)
os
.
makedirs
(
model_dir
,
exist_ok
=
True
)
# Set atomic numbers for z_table
z_table_elements
=
[
1
,
6
,
7
,
8
]
# H, C, N, O
# Create test data for each format
rng
=
np
.
random
.
RandomState
(
42
)
seeds
=
rng
.
randint
(
0
,
10000
,
size
=
10
)
# More seeds for multiple files
# Create isolated atoms for E0s (one of each element)
isolated_atoms
=
[]
e0s_dict
=
{}
for
z
in
z_table_elements
:
# Create isolated atom
atom
=
Atoms
(
numbers
=
[
z
],
positions
=
[[
0
,
0
,
0
]],
cell
=
np
.
eye
(
3
)
*
10.0
,
pbc
=
True
)
energy
=
float
(
rng
.
uniform
(
-
5.0
,
-
1.0
))
# Random reference energy
forces
=
np
.
zeros
((
1
,
3
))
stress
=
np
.
zeros
(
6
)
calc
=
SinglePointCalculator
(
atom
,
energy
=
energy
,
forces
=
forces
,
stress
=
stress
)
atom
.
calc
=
calc
atom
.
info
[
"config_type"
]
=
"IsolatedAtom"
isolated_atoms
.
append
(
atom
)
e0s_dict
[
str
(
z
)]
=
energy
# Store energy for E0s file
# Create E0s file
create_e0s_file
(
e0s_dict
,
e0s_file
)
# Create isolated atoms xyz file
create_xyz_file
(
isolated_atoms
,
iso_atoms_file
)
# Create atoms for each train dataset - use different seeds for variety
train_datasets
=
[]
for
i
,
file
in
enumerate
(
train_xyz_files
):
# Create atoms with different seeds
atoms
=
[
create_test_atoms
(
num_atoms
=
5
,
seed
=
seeds
[
i
]
+
j
)
for
j
in
range
(
5
)
]
create_xyz_file
(
atoms
,
file
)
train_datasets
.
append
(
atoms
)
# Create atoms for validation datasets
valid_datasets
=
[]
for
i
,
file
in
enumerate
(
valid_xyz_files
):
atoms
=
[
create_test_atoms
(
num_atoms
=
5
,
seed
=
seeds
[
i
+
3
]
+
j
)
for
j
in
range
(
3
)
]
create_xyz_file
(
atoms
,
file
)
valid_datasets
.
append
(
atoms
)
# Create atoms for test datasets
test_datasets
=
[]
for
i
,
file
in
enumerate
(
test_xyz_files
):
atoms
=
[
create_test_atoms
(
num_atoms
=
5
,
seed
=
seeds
[
i
+
5
]
+
j
)
for
j
in
range
(
3
)
]
create_xyz_file
(
atoms
,
file
)
test_datasets
.
append
(
atoms
)
# Create config.yaml for training with multiple xyz files per dataset
config
=
{
"name"
:
"multi_xyz_test"
,
"seed"
:
42
,
"model"
:
"MACE"
,
"hidden_irreps"
:
"32x0e"
,
"r_max"
:
5.0
,
"batch_size"
:
5
,
"max_num_epochs"
:
2
,
"patience"
:
5
,
"device"
:
"cpu"
,
"energy_weight"
:
1.0
,
"forces_weight"
:
10.0
,
"loss"
:
"weighted"
,
"optimizer"
:
"adam"
,
"default_dtype"
:
"float64"
,
"lr"
:
0.01
,
"swa"
:
False
,
"work_dir"
:
temp_dir
,
"results_dir"
:
results_dir
,
"checkpoints_dir"
:
checkpoints_dir
,
"model_dir"
:
model_dir
,
"E0s"
:
e0s_file
,
"atomic_numbers"
:
str
(
z_table_elements
),
"heads"
:
{
"multi_xyz_head"
:
{
# Using lists of multiple xyz files for each dataset
"train_file"
:
train_xyz_files
,
"valid_file"
:
valid_xyz_files
,
"test_file"
:
test_xyz_files
,
"energy_key"
:
"energy"
,
"forces_key"
:
"forces"
,
"stress_key"
:
"stress"
,
},
},
}
# Write config file
with
open
(
config_path
,
"w"
,
encoding
=
"utf-8"
)
as
f
:
yaml
.
dump
(
config
,
f
)
# Import the modified run_train from our local module
run_train_script
=
(
Path
(
__file__
).
parent
.
parent
/
"mace"
/
"cli"
/
"run_train.py"
)
# Run training with subprocess
cmd
=
[
sys
.
executable
,
str
(
run_train_script
),
f
"--config=
{
config_path
}
"
]
# Set environment to add the current path to PYTHONPATH
env
=
os
.
environ
.
copy
()
env
[
"PYTHONPATH"
]
=
(
str
(
Path
(
__file__
).
parent
.
parent
)
+
":"
+
env
.
get
(
"PYTHONPATH"
,
""
)
)
# Run the process
process
=
subprocess
.
run
(
cmd
,
env
=
env
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
check
=
False
,
)
# Print output for debugging
print
(
"
\n
"
+
"="
*
40
+
" STDOUT "
+
"="
*
40
)
print
(
process
.
stdout
.
decode
())
print
(
"
\n
"
+
"="
*
40
+
" STDERR "
+
"="
*
40
)
print
(
process
.
stderr
.
decode
())
# Check that process completed successfully
assert
(
process
.
returncode
==
0
),
f
"Training failed with error:
{
process
.
stderr
.
decode
()
}
"
# Check that model was created
model_path
=
os
.
path
.
join
(
model_dir
,
"multi_xyz_test.model"
)
assert
os
.
path
.
exists
(
model_path
),
f
"Model was not created at
{
model_path
}
"
# Try to load and run the model
model
=
torch
.
load
(
model_path
,
map_location
=
"cpu"
)
assert
model
is
not
None
,
"Failed to load model"
# Create a calculator
calc
=
MACECalculator
(
model_paths
=
model_path
,
device
=
"cpu"
,
head
=
"multi_xyz_head"
)
# Run prediction on a test atom
test_atom
=
create_test_atoms
(
num_atoms
=
5
,
seed
=
99999
)
test_atom
.
calc
=
calc
energy
=
test_atom
.
get_potential_energy
()
forces
=
test_atom
.
get_forces
()
# Assert we got sensible outputs
assert
np
.
isfinite
(
energy
),
"Model produced non-finite energy"
assert
np
.
all
(
np
.
isfinite
(
forces
)),
"Model produced non-finite forces"
finally
:
# Clean up
shutil
.
rmtree
(
temp_dir
)
@
pytest
.
mark
.
slow
def
test_single_xyz_per_head
():
"""Test training with multiple XYZ files per head for train, valid and test sets"""
# Create temporary directory
temp_dir
=
tempfile
.
mkdtemp
()
try
:
# Set up file paths - create multiple xyz files for each dataset
train_xyz_files
=
[
os
.
path
.
join
(
temp_dir
,
f
"train_data
{
i
}
.xyz"
)
for
i
in
range
(
1
,
2
)
]
# 3 train files
valid_xyz_files
=
[
os
.
path
.
join
(
temp_dir
,
f
"valid_data
{
i
}
.xyz"
)
for
i
in
range
(
1
,
2
)
]
# 2 valid files
test_xyz_files
=
[
os
.
path
.
join
(
temp_dir
,
f
"test_data
{
i
}
.xyz"
)
for
i
in
range
(
1
,
2
)
]
# 2 test files
iso_atoms_file
=
os
.
path
.
join
(
temp_dir
,
"isolated_atoms.xyz"
)
config_path
=
os
.
path
.
join
(
temp_dir
,
"config.yaml"
)
results_dir
=
os
.
path
.
join
(
temp_dir
,
"results"
)
checkpoints_dir
=
os
.
path
.
join
(
temp_dir
,
"checkpoints"
)
model_dir
=
os
.
path
.
join
(
temp_dir
,
"models"
)
e0s_file
=
os
.
path
.
join
(
temp_dir
,
"e0s.json"
)
# Create directories
os
.
makedirs
(
results_dir
,
exist_ok
=
True
)
os
.
makedirs
(
checkpoints_dir
,
exist_ok
=
True
)
os
.
makedirs
(
model_dir
,
exist_ok
=
True
)
# Set atomic numbers for z_table
z_table_elements
=
[
1
,
6
,
7
,
8
]
# H, C, N, O
# Create test data for each format
rng
=
np
.
random
.
RandomState
(
42
)
seeds
=
rng
.
randint
(
0
,
10000
,
size
=
10
)
# More seeds for multiple files
# Create isolated atoms for E0s (one of each element)
isolated_atoms
=
[]
e0s_dict
=
{}
for
z
in
z_table_elements
:
# Create isolated atom
atom
=
Atoms
(
numbers
=
[
z
],
positions
=
[[
0
,
0
,
0
]],
cell
=
np
.
eye
(
3
)
*
10.0
,
pbc
=
True
)
energy
=
float
(
rng
.
uniform
(
-
5.0
,
-
1.0
))
# Random reference energy
forces
=
np
.
zeros
((
1
,
3
))
stress
=
np
.
zeros
(
6
)
calc
=
SinglePointCalculator
(
atom
,
energy
=
energy
,
forces
=
forces
,
stress
=
stress
)
atom
.
calc
=
calc
atom
.
info
[
"config_type"
]
=
"IsolatedAtom"
isolated_atoms
.
append
(
atom
)
e0s_dict
[
str
(
z
)]
=
energy
# Store energy for E0s file
# Create E0s file
create_e0s_file
(
e0s_dict
,
e0s_file
)
# Create isolated atoms xyz file
create_xyz_file
(
isolated_atoms
,
iso_atoms_file
)
# Create atoms for each train dataset - use different seeds for variety
train_datasets
=
[]
for
i
,
file
in
enumerate
(
train_xyz_files
):
# Create atoms with different seeds
atoms
=
[
create_test_atoms
(
num_atoms
=
5
,
seed
=
seeds
[
i
]
+
j
)
for
j
in
range
(
5
)
]
create_xyz_file
(
atoms
,
file
)
train_datasets
.
append
(
atoms
)
# Create atoms for validation datasets
valid_datasets
=
[]
for
i
,
file
in
enumerate
(
valid_xyz_files
):
atoms
=
[
create_test_atoms
(
num_atoms
=
5
,
seed
=
seeds
[
i
+
3
]
+
j
)
for
j
in
range
(
3
)
]
create_xyz_file
(
atoms
,
file
)
valid_datasets
.
append
(
atoms
)
# Create atoms for test datasets
test_datasets
=
[]
for
i
,
file
in
enumerate
(
test_xyz_files
):
atoms
=
[
create_test_atoms
(
num_atoms
=
5
,
seed
=
seeds
[
i
+
5
]
+
j
)
for
j
in
range
(
3
)
]
create_xyz_file
(
atoms
,
file
)
test_datasets
.
append
(
atoms
)
# Create config.yaml for training with multiple xyz files per dataset
config
=
{
"name"
:
"multi_xyz_test"
,
"seed"
:
42
,
"model"
:
"MACE"
,
"hidden_irreps"
:
"32x0e"
,
"r_max"
:
5.0
,
"batch_size"
:
5
,
"max_num_epochs"
:
2
,
"patience"
:
5
,
"device"
:
"cpu"
,
"energy_weight"
:
1.0
,
"forces_weight"
:
10.0
,
"loss"
:
"weighted"
,
"optimizer"
:
"adam"
,
"default_dtype"
:
"float64"
,
"lr"
:
0.01
,
"swa"
:
False
,
"work_dir"
:
temp_dir
,
"results_dir"
:
results_dir
,
"checkpoints_dir"
:
checkpoints_dir
,
"model_dir"
:
model_dir
,
"E0s"
:
e0s_file
,
"atomic_numbers"
:
str
(
z_table_elements
),
"heads"
:
{
"multi_xyz_head"
:
{
# Using lists of multiple xyz files for each dataset
"train_file"
:
train_xyz_files
,
"valid_file"
:
valid_xyz_files
,
"test_file"
:
test_xyz_files
,
"energy_key"
:
"energy"
,
"forces_key"
:
"forces"
,
"stress_key"
:
"stress"
,
},
},
}
# Write config file
with
open
(
config_path
,
"w"
,
encoding
=
"utf-8"
)
as
f
:
yaml
.
dump
(
config
,
f
)
# Import the modified run_train from our local module
run_train_script
=
(
Path
(
__file__
).
parent
.
parent
/
"mace"
/
"cli"
/
"run_train.py"
)
# Run training with subprocess
cmd
=
[
sys
.
executable
,
str
(
run_train_script
),
f
"--config=
{
config_path
}
"
]
# Set environment to add the current path to PYTHONPATH
env
=
os
.
environ
.
copy
()
env
[
"PYTHONPATH"
]
=
(
str
(
Path
(
__file__
).
parent
.
parent
)
+
":"
+
env
.
get
(
"PYTHONPATH"
,
""
)
)
# Run the process
process
=
subprocess
.
run
(
cmd
,
env
=
env
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
check
=
False
,
)
# Print output for debugging
print
(
"
\n
"
+
"="
*
40
+
" STDOUT "
+
"="
*
40
)
print
(
process
.
stdout
.
decode
())
print
(
"
\n
"
+
"="
*
40
+
" STDERR "
+
"="
*
40
)
print
(
process
.
stderr
.
decode
())
# Check that process completed successfully
assert
(
process
.
returncode
==
0
),
f
"Training failed with error:
{
process
.
stderr
.
decode
()
}
"
# Check that model was created
model_path
=
os
.
path
.
join
(
model_dir
,
"multi_xyz_test.model"
)
assert
os
.
path
.
exists
(
model_path
),
f
"Model was not created at
{
model_path
}
"
# Try to load and run the model
model
=
torch
.
load
(
model_path
,
map_location
=
"cpu"
)
assert
model
is
not
None
,
"Failed to load model"
# Create a calculator
calc
=
MACECalculator
(
model_paths
=
model_path
,
device
=
"cpu"
,
head
=
"multi_xyz_head"
)
# Run prediction on a test atom
test_atom
=
create_test_atoms
(
num_atoms
=
5
,
seed
=
99999
)
test_atom
.
calc
=
calc
energy
=
test_atom
.
get_potential_energy
()
forces
=
test_atom
.
get_forces
()
# Assert we got sensible outputs
assert
np
.
isfinite
(
energy
),
"Model produced non-finite energy"
assert
np
.
all
(
np
.
isfinite
(
forces
)),
"Model produced non-finite forces"
finally
:
# Clean up
shutil
.
rmtree
(
temp_dir
)
@
pytest
.
mark
.
slow
def
test_multihead_finetuning_different_formats
():
"""Test multihead finetuning with different file formats for each head."""
# Create temporary directory
temp_dir
=
tempfile
.
mkdtemp
()
try
:
# Set up file paths
xyz_file
=
os
.
path
.
join
(
temp_dir
,
"finetuning_xyz.xyz"
)
h5_folder
=
os
.
path
.
join
(
temp_dir
,
"h5_data"
)
iso_atoms_file
=
os
.
path
.
join
(
temp_dir
,
"isolated_atoms.xyz"
)
config_path
=
os
.
path
.
join
(
temp_dir
,
"config.yaml"
)
results_dir
=
os
.
path
.
join
(
temp_dir
,
"results"
)
checkpoints_dir
=
os
.
path
.
join
(
temp_dir
,
"checkpoints"
)
model_dir
=
os
.
path
.
join
(
temp_dir
,
"models"
)
e0s_file
=
os
.
path
.
join
(
temp_dir
,
"e0s.json"
)
# Create directories
os
.
makedirs
(
results_dir
,
exist_ok
=
True
)
os
.
makedirs
(
checkpoints_dir
,
exist_ok
=
True
)
os
.
makedirs
(
model_dir
,
exist_ok
=
True
)
# Set atomic numbers for z_table
z_table_elements
=
[
1
,
6
,
7
,
8
]
# H, C, N, O
# Create test data with different seeds
rng
=
np
.
random
.
RandomState
(
42
)
seeds
=
rng
.
randint
(
0
,
10000
,
size
=
3
)
# Create isolated atoms for E0s (one of each element)
isolated_atoms
=
[]
e0s_dict
=
{}
for
z
in
z_table_elements
:
atom
=
Atoms
(
numbers
=
[
z
],
positions
=
[[
0
,
0
,
0
]],
cell
=
np
.
eye
(
3
)
*
10.0
,
pbc
=
True
)
energy
=
float
(
rng
.
uniform
(
-
5.0
,
-
1.0
))
forces
=
np
.
zeros
((
1
,
3
))
stress
=
np
.
zeros
(
6
)
calc
=
SinglePointCalculator
(
atom
,
energy
=
energy
,
forces
=
forces
,
stress
=
stress
)
atom
.
calc
=
calc
atom
.
info
[
"config_type"
]
=
"IsolatedAtom"
atom
.
info
[
"REF_energy"
]
=
energy
# Make sure energy is in the right place
atom
.
arrays
[
"REF_forces"
]
=
forces
atom
.
info
[
"REF_stress"
]
=
stress
isolated_atoms
.
append
(
atom
)
e0s_dict
[
str
(
z
)]
=
energy
# Create E0s file
create_e0s_file
(
e0s_dict
,
e0s_file
)
# Create isolated atoms xyz file
create_xyz_file
(
isolated_atoms
,
iso_atoms_file
)
# Create XYZ data for xyz_head
xyz_atoms
=
[
create_test_atoms
(
num_atoms
=
5
,
seed
=
seeds
[
0
]
+
i
)
for
i
in
range
(
30
)
]
# Add REF_ properties
for
atom
in
xyz_atoms
:
atom
.
info
[
"REF_energy"
]
=
atom
.
calc
.
results
[
"energy"
]
atom
.
arrays
[
"REF_forces"
]
=
atom
.
calc
.
results
[
"forces"
]
atom
.
info
[
"REF_stress"
]
=
atom
.
calc
.
results
[
"stress"
]
atom
.
info
[
"head"
]
=
"xyz_head"
# Assign head
create_xyz_file
(
xyz_atoms
,
xyz_file
)
# Create H5 data for h5_head
h5_atoms
=
[
create_test_atoms
(
num_atoms
=
5
,
seed
=
seeds
[
1
]
+
i
)
for
i
in
range
(
30
)
]
# Add REF_ properties
for
atom
in
h5_atoms
:
atom
.
info
[
"REF_energy"
]
=
atom
.
calc
.
results
[
"energy"
]
atom
.
arrays
[
"REF_forces"
]
=
atom
.
calc
.
results
[
"forces"
]
atom
.
info
[
"REF_stress"
]
=
atom
.
calc
.
results
[
"stress"
]
atom
.
info
[
"head"
]
=
"h5_head"
# Assign head
h5_atoms_xyz
=
os
.
path
.
join
(
temp_dir
,
"h5_atoms.xyz"
)
create_xyz_file
(
h5_atoms
,
h5_atoms_xyz
)
# Include isolated atoms for E0s in the h5 dataset
all_atoms_for_h5
=
h5_atoms
+
isolated_atoms
all_atoms_h5_xyz
=
os
.
path
.
join
(
temp_dir
,
"all_atoms_for_h5.xyz"
)
create_xyz_file
(
all_atoms_for_h5
,
all_atoms_h5_xyz
)
create_h5_dataset
(
all_atoms_h5_xyz
,
h5_folder
)
# Create config.yaml for multihead finetuning
heads
=
{
"xyz_head"
:
{
"train_file"
:
xyz_file
,
"valid_fraction"
:
0.2
,
"energy_key"
:
"REF_energy"
,
"forces_key"
:
"REF_forces"
,
"stress_key"
:
"REF_stress"
,
"E0s"
:
e0s_file
,
},
"h5_head"
:
{
"train_file"
:
os
.
path
.
join
(
h5_folder
,
"train"
),
"valid_file"
:
os
.
path
.
join
(
h5_folder
,
"val"
),
"energy_key"
:
"REF_energy"
,
"forces_key"
:
"REF_forces"
,
"stress_key"
:
"REF_stress"
,
"E0s"
:
e0s_file
,
},
}
yaml_str
=
"heads:
\n
"
for
key
,
value
in
heads
.
items
():
yaml_str
+=
f
"
{
key
}
:
\n
"
for
sub_key
,
sub_value
in
value
.
items
():
yaml_str
+=
f
"
{
sub_key
}
:
{
sub_value
}
\n
"
with
open
(
config_path
,
"w"
,
encoding
=
"utf-8"
)
as
f
:
f
.
write
(
yaml_str
)
# Now perform multihead finetuning
finetuning_params
=
{
"name"
:
"multihead_finetuned"
,
"config"
:
config_path
,
"foundation_model"
:
"small"
,
# Use the small foundation model
"energy_weight"
:
1.0
,
"forces_weight"
:
10.0
,
"model"
:
"MACE"
,
"hidden_irreps"
:
"128x0e"
,
# Match foundation model
"r_max"
:
5.0
,
"batch_size"
:
2
,
"max_num_epochs"
:
2
,
# Just do a quick finetuning for test
"device"
:
"cpu"
,
"seed"
:
42
,
"loss"
:
"weighted"
,
"default_dtype"
:
"float64"
,
"checkpoints_dir"
:
checkpoints_dir
,
"model_dir"
:
model_dir
,
"results_dir"
:
results_dir
,
"atomic_numbers"
:
"["
+
","
.
join
(
map
(
str
,
z_table_elements
))
+
"]"
,
"multiheads_finetuning"
:
True
,
"filter_type_pt"
:
"combinations"
,
"subselect_pt"
:
"random"
,
"num_samples_pt"
:
10
,
# Small number for testing
"force_mh_ft_lr"
:
True
,
# Force using specified learning rate
}
# Run finetuning
run_train_script
=
(
Path
(
__file__
).
parent
.
parent
/
"mace"
/
"cli"
/
"run_train.py"
)
env
=
os
.
environ
.
copy
()
env
[
"PYTHONPATH"
]
=
(
str
(
Path
(
__file__
).
parent
.
parent
)
+
":"
+
env
.
get
(
"PYTHONPATH"
,
""
)
)
cmd
=
[
sys
.
executable
,
str
(
run_train_script
)]
for
k
,
v
in
finetuning_params
.
items
():
if
v
is
None
:
cmd
.
append
(
f
"--
{
k
}
"
)
else
:
cmd
.
append
(
f
"--
{
k
}
=
{
v
}
"
)
# Run the process
process
=
subprocess
.
run
(
cmd
,
env
=
env
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
check
=
False
,
)
# Print output for debugging
print
(
"
\n
"
+
"="
*
40
+
" STDOUT "
+
"="
*
40
)
print
(
process
.
stdout
.
decode
())
print
(
"
\n
"
+
"="
*
40
+
" STDERR "
+
"="
*
40
)
print
(
process
.
stderr
.
decode
())
# Check that process completed successfully
assert
(
process
.
returncode
==
0
),
f
"Finetuning failed with error:
{
process
.
stderr
.
decode
()
}
"
# Check that model was created
model_path
=
os
.
path
.
join
(
model_dir
,
"multihead_finetuned.model"
)
assert
os
.
path
.
exists
(
model_path
),
f
"Model was not created at
{
model_path
}
"
# Load model and verify it has the expected heads
model
=
torch
.
load
(
model_path
,
map_location
=
"cpu"
)
assert
hasattr
(
model
,
"heads"
),
"Model does not have heads attribute"
assert
set
([
"xyz_head"
,
"h5_head"
,
"pt_head"
]).
issubset
(
set
(
model
.
heads
)
),
"Expected heads not found in model"
# Try to run the model with both heads
# For xyz_head
calc_xyz
=
MACECalculator
(
model_paths
=
model_path
,
device
=
"cpu"
,
head
=
"xyz_head"
,
default_dtype
=
"float64"
,
)
test_atom
=
create_test_atoms
(
num_atoms
=
5
,
seed
=
99999
)
test_atom
.
calc
=
calc_xyz
energy_xyz
=
test_atom
.
get_potential_energy
()
forces_xyz
=
test_atom
.
get_forces
()
# For h5_head
calc_h5
=
MACECalculator
(
model_paths
=
model_path
,
device
=
"cpu"
,
head
=
"h5_head"
,
default_dtype
=
"float64"
,
)
test_atom
.
calc
=
calc_h5
energy_h5
=
test_atom
.
get_potential_energy
()
forces_h5
=
test_atom
.
get_forces
()
# Verify results
assert
np
.
isfinite
(
energy_xyz
),
"xyz_head produced non-finite energy"
assert
np
.
all
(
np
.
isfinite
(
forces_xyz
)),
"xyz_head produced non-finite forces"
assert
np
.
isfinite
(
energy_h5
),
"h5_head produced non-finite energy"
assert
np
.
all
(
np
.
isfinite
(
forces_h5
)),
"h5_head produced non-finite forces"
finally
:
# Clean up
shutil
.
rmtree
(
temp_dir
)
mace-bench/3rdparty/mace/tests/test_preprocess.py
0 → 100644
View file @
1be78103
import
os
import
subprocess
import
sys
from
pathlib
import
Path
import
ase.io
import
numpy
as
np
import
pytest
import
yaml
from
ase.atoms
import
Atoms
pytest_mace_dir
=
Path
(
__file__
).
parent
.
parent
preprocess_data
=
Path
(
__file__
).
parent
.
parent
/
"mace"
/
"cli"
/
"preprocess_data.py"
@
pytest
.
fixture
(
name
=
"sample_configs"
)
def
fixture_sample_configs
():
water
=
Atoms
(
numbers
=
[
8
,
1
,
1
],
positions
=
[[
0
,
-
2.0
,
0
],
[
1
,
0
,
0
],
[
0
,
1
,
0
]],
cell
=
[
4
]
*
3
,
pbc
=
[
True
]
*
3
,
)
configs
=
[
Atoms
(
numbers
=
[
8
],
positions
=
[[
0
,
0
,
0
]],
cell
=
[
6
]
*
3
),
Atoms
(
numbers
=
[
1
],
positions
=
[[
0
,
0
,
0
]],
cell
=
[
6
]
*
3
),
]
configs
[
0
].
info
[
"REF_energy"
]
=
0.0
configs
[
0
].
info
[
"config_type"
]
=
"IsolatedAtom"
configs
[
1
].
info
[
"REF_energy"
]
=
0.0
configs
[
1
].
info
[
"config_type"
]
=
"IsolatedAtom"
np
.
random
.
seed
(
5
)
for
_
in
range
(
10
):
c
=
water
.
copy
()
c
.
positions
+=
np
.
random
.
normal
(
0.1
,
size
=
c
.
positions
.
shape
)
c
.
info
[
"REF_energy"
]
=
np
.
random
.
normal
(
0.1
)
c
.
new_array
(
"REF_forces"
,
np
.
random
.
normal
(
0.1
,
size
=
c
.
positions
.
shape
))
c
.
info
[
"REF_stress"
]
=
np
.
random
.
normal
(
0.1
,
size
=
6
)
configs
.
append
(
c
)
return
configs
def
test_preprocess_data
(
tmp_path
,
sample_configs
):
ase
.
io
.
write
(
tmp_path
/
"sample.xyz"
,
sample_configs
)
preprocess_params
=
{
"train_file"
:
tmp_path
/
"sample.xyz"
,
"r_max"
:
5.0
,
"config_type_weights"
:
"{'Default':1.0}"
,
"num_process"
:
2
,
"valid_fraction"
:
0.1
,
"h5_prefix"
:
tmp_path
/
"preprocessed_"
,
"compute_statistics"
:
None
,
"seed"
:
42
,
"energy_key"
:
"REF_energy"
,
"forces_key"
:
"REF_forces"
,
"stress_key"
:
"REF_stress"
,
}
run_env
=
os
.
environ
.
copy
()
sys
.
path
.
insert
(
0
,
str
(
Path
(
__file__
).
parent
.
parent
))
run_env
[
"PYTHONPATH"
]
=
":"
.
join
(
sys
.
path
)
print
(
"DEBUG subprocess PYTHONPATH"
,
run_env
[
"PYTHONPATH"
])
cmd
=
(
sys
.
executable
+
" "
+
str
(
preprocess_data
)
+
" "
+
" "
.
join
(
[
(
f
"--
{
k
}
=
{
v
}
"
if
v
is
not
None
else
f
"--
{
k
}
"
)
for
k
,
v
in
preprocess_params
.
items
()
]
)
)
p
=
subprocess
.
run
(
cmd
.
split
(),
env
=
run_env
,
check
=
True
)
assert
p
.
returncode
==
0
# Check if the output files are created
assert
(
tmp_path
/
"preprocessed_train"
).
is_dir
()
assert
(
tmp_path
/
"preprocessed_val"
).
is_dir
()
assert
(
tmp_path
/
"preprocessed_statistics.json"
).
is_file
()
# Check if the correct number of files are created
train_files
=
list
((
tmp_path
/
"preprocessed_train"
).
glob
(
"*.h5"
))
val_files
=
list
((
tmp_path
/
"preprocessed_val"
).
glob
(
"*.h5"
))
assert
len
(
train_files
)
==
preprocess_params
[
"num_process"
]
assert
len
(
val_files
)
==
preprocess_params
[
"num_process"
]
# Example of checking statistics file content:
import
json
with
open
(
tmp_path
/
"preprocessed_statistics.json"
,
"r"
,
encoding
=
"utf-8"
)
as
f
:
statistics
=
json
.
load
(
f
)
assert
"atomic_energies"
in
statistics
assert
"avg_num_neighbors"
in
statistics
assert
"mean"
in
statistics
assert
"std"
in
statistics
assert
"atomic_numbers"
in
statistics
assert
"r_max"
in
statistics
# Example of checking H5 file content:
import
h5py
with
h5py
.
File
(
train_files
[
0
],
"r"
)
as
f
:
assert
"config_batch_0"
in
f
config
=
f
[
"config_batch_0"
][
"config_0"
]
assert
"atomic_numbers"
in
config
assert
"positions"
in
config
assert
"energy"
in
config
[
"properties"
]
assert
"forces"
in
config
[
"properties"
]
original_energies
=
[
config
.
info
[
"REF_energy"
]
for
config
in
sample_configs
[
2
:]
if
"REF_energy"
in
config
.
info
]
original_forces
=
[
config
.
arrays
[
"REF_forces"
]
for
config
in
sample_configs
[
2
:]
if
"REF_forces"
in
config
.
arrays
]
h5_energies
=
[]
h5_forces
=
[]
for
train_file
in
train_files
:
with
h5py
.
File
(
train_file
,
"r"
)
as
f
:
for
_
,
batch
in
f
.
items
():
for
config_key
in
batch
.
keys
():
config
=
batch
[
config_key
]
assert
"atomic_numbers"
in
config
assert
"positions"
in
config
assert
"energy"
in
config
[
"properties"
]
assert
"forces"
in
config
[
"properties"
]
h5_energies
.
append
(
config
[
"properties"
][
"energy"
][()])
h5_forces
.
append
(
config
[
"properties"
][
"forces"
][()])
for
val_file
in
val_files
:
with
h5py
.
File
(
val_file
,
"r"
)
as
f
:
for
_
,
batch
in
f
.
items
():
for
config_key
in
batch
.
keys
():
config
=
batch
[
config_key
]
h5_energies
.
append
(
config
[
"properties"
][
"energy"
][()])
h5_forces
.
append
(
config
[
"properties"
][
"forces"
][()])
print
(
"Original energies"
,
original_energies
)
print
(
"H5 energies"
,
h5_energies
)
print
(
"Original forces"
,
original_forces
)
print
(
"H5 forces"
,
h5_forces
)
original_energies
.
sort
()
h5_energies
.
sort
()
original_forces
=
np
.
concatenate
(
original_forces
).
flatten
()
h5_forces
=
np
.
concatenate
(
h5_forces
).
flatten
()
original_forces
.
sort
()
h5_forces
.
sort
()
# Compare energies and forces
np
.
testing
.
assert_allclose
(
original_energies
,
h5_energies
,
rtol
=
1e-5
,
atol
=
1e-8
)
np
.
testing
.
assert_allclose
(
original_forces
,
h5_forces
,
rtol
=
1e-5
,
atol
=
1e-8
)
print
(
"All checks passed successfully!"
)
def
test_preprocess_config
(
tmp_path
,
sample_configs
):
ase
.
io
.
write
(
tmp_path
/
"sample.xyz"
,
sample_configs
)
preprocess_params
=
{
"train_file"
:
str
(
tmp_path
/
"sample.xyz"
),
"r_max"
:
5.0
,
"config_type_weights"
:
"{'Default':1.0}"
,
"num_process"
:
2
,
"valid_fraction"
:
0.1
,
"h5_prefix"
:
str
(
tmp_path
/
"preprocessed_"
),
"compute_statistics"
:
None
,
"seed"
:
42
,
"energy_key"
:
"REF_energy"
,
"forces_key"
:
"REF_forces"
,
"stress_key"
:
"REF_stress"
,
}
filename
=
tmp_path
/
"config.yaml"
with
open
(
filename
,
"w"
,
encoding
=
"utf-8"
)
as
file
:
yaml
.
dump
(
preprocess_params
,
file
)
run_env
=
os
.
environ
.
copy
()
sys
.
path
.
insert
(
0
,
str
(
Path
(
__file__
).
parent
.
parent
))
run_env
[
"PYTHONPATH"
]
=
":"
.
join
(
sys
.
path
)
print
(
"DEBUG subprocess PYTHONPATH"
,
run_env
[
"PYTHONPATH"
])
cmd
=
(
sys
.
executable
+
" "
+
str
(
preprocess_data
)
+
" "
+
"--config"
+
" "
+
str
(
filename
)
)
p
=
subprocess
.
run
(
cmd
.
split
(),
env
=
run_env
,
check
=
True
)
assert
p
.
returncode
==
0
mace-bench/3rdparty/mace/tests/test_run_train.py
0 → 100644
View file @
1be78103
import
json
import
os
import
subprocess
import
sys
from
pathlib
import
Path
import
ase.io
import
numpy
as
np
import
pytest
import
torch
from
ase.atoms
import
Atoms
from
mace.calculators
import
MACECalculator
,
mace_mp
try
:
import
cuequivariance
as
cue
# pylint: disable=unused-import
CUET_AVAILABLE
=
True
except
ImportError
:
CUET_AVAILABLE
=
False
run_train
=
Path
(
__file__
).
parent
.
parent
/
"mace"
/
"cli"
/
"run_train.py"
@
pytest
.
fixture
(
name
=
"fitting_configs"
)
def
fixture_fitting_configs
():
water
=
Atoms
(
numbers
=
[
8
,
1
,
1
],
positions
=
[[
0
,
-
2.0
,
0
],
[
1
,
0
,
0
],
[
0
,
1
,
0
]],
cell
=
[
4
]
*
3
,
pbc
=
[
True
]
*
3
,
)
fit_configs
=
[
Atoms
(
numbers
=
[
8
],
positions
=
[[
0
,
0
,
0
]],
cell
=
[
6
]
*
3
),
Atoms
(
numbers
=
[
1
],
positions
=
[[
0
,
0
,
0
]],
cell
=
[
6
]
*
3
),
]
fit_configs
[
0
].
info
[
"REF_energy"
]
=
0.0
fit_configs
[
0
].
info
[
"config_type"
]
=
"IsolatedAtom"
fit_configs
[
1
].
info
[
"REF_energy"
]
=
0.0
fit_configs
[
1
].
info
[
"config_type"
]
=
"IsolatedAtom"
np
.
random
.
seed
(
5
)
for
_
in
range
(
20
):
c
=
water
.
copy
()
c
.
positions
+=
np
.
random
.
normal
(
0.1
,
size
=
c
.
positions
.
shape
)
c
.
info
[
"REF_energy"
]
=
np
.
random
.
normal
(
0.1
)
print
(
c
.
info
[
"REF_energy"
])
c
.
new_array
(
"REF_forces"
,
np
.
random
.
normal
(
0.1
,
size
=
c
.
positions
.
shape
))
c
.
info
[
"REF_stress"
]
=
np
.
random
.
normal
(
0.1
,
size
=
6
)
fit_configs
.
append
(
c
)
return
fit_configs
@
pytest
.
fixture
(
name
=
"pretraining_configs"
)
def
fixture_pretraining_configs
():
configs
=
[]
for
_
in
range
(
10
):
atoms
=
Atoms
(
numbers
=
[
8
,
1
,
1
],
positions
=
np
.
random
.
rand
(
3
,
3
)
*
3
,
cell
=
[
5
,
5
,
5
],
pbc
=
[
True
]
*
3
,
)
atoms
.
info
[
"REF_energy"
]
=
np
.
random
.
normal
(
0
,
1
)
atoms
.
arrays
[
"REF_forces"
]
=
np
.
random
.
normal
(
0
,
1
,
size
=
(
3
,
3
))
atoms
.
info
[
"REF_stress"
]
=
np
.
random
.
normal
(
0
,
1
,
size
=
6
)
configs
.
append
(
atoms
)
configs
.
append
(
Atoms
(
numbers
=
[
8
],
positions
=
[[
0
,
0
,
0
]],
cell
=
[
6
]
*
3
,
pbc
=
[
True
]
*
3
),
)
configs
.
append
(
Atoms
(
numbers
=
[
1
],
positions
=
[[
0
,
0
,
0
]],
cell
=
[
6
]
*
3
,
pbc
=
[
True
]
*
3
)
)
configs
[
-
2
].
info
[
"REF_energy"
]
=
-
2.0
configs
[
-
2
].
info
[
"config_type"
]
=
"IsolatedAtom"
configs
[
-
1
].
info
[
"REF_energy"
]
=
-
4.0
configs
[
-
1
].
info
[
"config_type"
]
=
"IsolatedAtom"
return
configs
_mace_params
=
{
"name"
:
"MACE"
,
"valid_fraction"
:
0.05
,
"energy_weight"
:
1.0
,
"forces_weight"
:
10.0
,
"stress_weight"
:
1.0
,
"model"
:
"MACE"
,
"hidden_irreps"
:
"128x0e"
,
"r_max"
:
3.5
,
"batch_size"
:
5
,
"max_num_epochs"
:
10
,
"swa"
:
None
,
"start_swa"
:
5
,
"ema"
:
None
,
"ema_decay"
:
0.99
,
"amsgrad"
:
None
,
"restart_latest"
:
None
,
"device"
:
"cpu"
,
"seed"
:
5
,
"loss"
:
"stress"
,
"energy_key"
:
"REF_energy"
,
"forces_key"
:
"REF_forces"
,
"stress_key"
:
"REF_stress"
,
"eval_interval"
:
2
,
}
def
test_run_train
(
tmp_path
,
fitting_configs
):
ase
.
io
.
write
(
tmp_path
/
"fit.xyz"
,
fitting_configs
)
mace_params
=
_mace_params
.
copy
()
mace_params
[
"checkpoints_dir"
]
=
str
(
tmp_path
)
mace_params
[
"model_dir"
]
=
str
(
tmp_path
)
mace_params
[
"train_file"
]
=
tmp_path
/
"fit.xyz"
# make sure run_train.py is using the mace that is currently being tested
run_env
=
os
.
environ
.
copy
()
sys
.
path
.
insert
(
0
,
str
(
Path
(
__file__
).
parent
.
parent
))
run_env
[
"PYTHONPATH"
]
=
":"
.
join
(
sys
.
path
)
print
(
"DEBUG subprocess PYTHONPATH"
,
run_env
[
"PYTHONPATH"
])
cmd
=
(
sys
.
executable
+
" "
+
str
(
run_train
)
+
" "
+
" "
.
join
(
[
(
f
"--
{
k
}
=
{
v
}
"
if
v
is
not
None
else
f
"--
{
k
}
"
)
for
k
,
v
in
mace_params
.
items
()
]
)
)
p
=
subprocess
.
run
(
cmd
.
split
(),
env
=
run_env
,
check
=
True
)
assert
p
.
returncode
==
0
calc
=
MACECalculator
(
model_paths
=
tmp_path
/
"MACE.model"
,
device
=
"cpu"
)
Es
=
[]
for
at
in
fitting_configs
:
at
.
calc
=
calc
Es
.
append
(
at
.
get_potential_energy
())
print
(
"Es"
,
Es
)
# from a run on 04/06/2024 on stress_bugfix 967f0bfb6490086599da247874b24595d149caa7
ref_Es
=
[
0.0
,
0.0
,
-
0.039181344585828524
,
-
0.0915223395136733
,
-
0.14953484236456582
,
-
0.06662480820063998
,
-
0.09983737353050133
,
0.12477442296789745
,
-
0.06486086271762856
,
-
0.1460607988519944
,
0.12886334908465508
,
-
0.14000990081920373
,
-
0.05319886578958313
,
0.07780520158391
,
-
0.08895480281886901
,
-
0.15474719614734422
,
0.007756765146527644
,
-
0.044879267197498685
,
-
0.036065736712447574
,
-
0.24413743841886623
,
-
0.0838104612106429
,
-
0.14751978636626545
,
]
assert
np
.
allclose
(
Es
,
ref_Es
)
def
test_run_train_missing_data
(
tmp_path
,
fitting_configs
):
del
fitting_configs
[
5
].
info
[
"REF_energy"
]
del
fitting_configs
[
6
].
arrays
[
"REF_forces"
]
del
fitting_configs
[
7
].
info
[
"REF_stress"
]
ase
.
io
.
write
(
tmp_path
/
"fit.xyz"
,
fitting_configs
)
mace_params
=
_mace_params
.
copy
()
mace_params
[
"checkpoints_dir"
]
=
str
(
tmp_path
)
mace_params
[
"model_dir"
]
=
str
(
tmp_path
)
mace_params
[
"train_file"
]
=
tmp_path
/
"fit.xyz"
# make sure run_train.py is using the mace that is currently being tested
run_env
=
os
.
environ
.
copy
()
sys
.
path
.
insert
(
0
,
str
(
Path
(
__file__
).
parent
.
parent
))
run_env
[
"PYTHONPATH"
]
=
":"
.
join
(
sys
.
path
)
print
(
"DEBUG subprocess PYTHONPATH"
,
run_env
[
"PYTHONPATH"
])
cmd
=
(
sys
.
executable
+
" "
+
str
(
run_train
)
+
" "
+
" "
.
join
(
[
(
f
"--
{
k
}
=
{
v
}
"
if
v
is
not
None
else
f
"--
{
k
}
"
)
for
k
,
v
in
mace_params
.
items
()
]
)
)
p
=
subprocess
.
run
(
cmd
.
split
(),
env
=
run_env
,
check
=
True
)
assert
p
.
returncode
==
0
calc
=
MACECalculator
(
model_paths
=
tmp_path
/
"MACE.model"
,
device
=
"cpu"
)
Es
=
[]
for
at
in
fitting_configs
:
at
.
calc
=
calc
Es
.
append
(
at
.
get_potential_energy
())
print
(
"Es"
,
Es
)
# from a run on 04/06/2024 on stress_bugfix 967f0bfb6490086599da247874b24595d149caa7
ref_Es
=
[
0.0
,
0.0
,
-
0.05464025113696155
,
-
0.11272131295940478
,
0.039200919331076826
,
-
0.07517990972827505
,
-
0.13504202474582666
,
0.0292022872055344
,
-
0.06541099574579018
,
-
0.1497824717832886
,
0.19397709360828813
,
-
0.13587609467143014
,
-
0.05242956276828463
,
-
0.0504862057364953
,
-
0.07095795959430119
,
-
0.2463753796753703
,
-
0.002031543147676121
,
-
0.03864918790300681
,
-
0.13680153117705554
,
-
0.23418951968636786
,
-
0.11790833839379238
,
-
0.14930562311066484
,
]
assert
np
.
allclose
(
Es
,
ref_Es
)
def
test_run_train_no_stress
(
tmp_path
,
fitting_configs
):
del
fitting_configs
[
5
].
info
[
"REF_energy"
]
del
fitting_configs
[
6
].
arrays
[
"REF_forces"
]
del
fitting_configs
[
7
].
info
[
"REF_stress"
]
ase
.
io
.
write
(
tmp_path
/
"fit.xyz"
,
fitting_configs
)
mace_params
=
_mace_params
.
copy
()
mace_params
[
"checkpoints_dir"
]
=
str
(
tmp_path
)
mace_params
[
"model_dir"
]
=
str
(
tmp_path
)
mace_params
[
"train_file"
]
=
tmp_path
/
"fit.xyz"
mace_params
[
"loss"
]
=
"weighted"
# make sure run_train.py is using the mace that is currently being tested
run_env
=
os
.
environ
.
copy
()
sys
.
path
.
insert
(
0
,
str
(
Path
(
__file__
).
parent
.
parent
))
run_env
[
"PYTHONPATH"
]
=
":"
.
join
(
sys
.
path
)
print
(
"DEBUG subprocess PYTHONPATH"
,
run_env
[
"PYTHONPATH"
])
cmd
=
(
sys
.
executable
+
" "
+
str
(
run_train
)
+
" "
+
" "
.
join
(
[
(
f
"--
{
k
}
=
{
v
}
"
if
v
is
not
None
else
f
"--
{
k
}
"
)
for
k
,
v
in
mace_params
.
items
()
]
)
)
p
=
subprocess
.
run
(
cmd
.
split
(),
env
=
run_env
,
check
=
True
)
assert
p
.
returncode
==
0
calc
=
MACECalculator
(
model_paths
=
tmp_path
/
"MACE.model"
,
device
=
"cpu"
)
Es
=
[]
for
at
in
fitting_configs
:
at
.
calc
=
calc
Es
.
append
(
at
.
get_potential_energy
())
print
(
"Es"
,
Es
)
# from a run on 28/03/2023 on main 88d49f9ed6925dec07d1777043a36e1fe4872ff3
ref_Es
=
[
0.0
,
0.0
,
-
0.05450093218377135
,
-
0.11235475232750518
,
0.03914558031854152
,
-
0.07500839914816063
,
-
0.13469160624431492
,
0.029384214243251838
,
-
0.06521819204166135
,
-
0.14944896282001804
,
0.19413948083049481
,
-
0.13543541860473626
,
-
0.05235495076237124
,
-
0.049556206595684105
,
-
0.07080758913030646
,
-
0.24571898386301153
,
-
0.002070636306950905
,
-
0.03863113401320783
,
-
0.13620291339913712
,
-
0.23383074855679695
,
-
0.11776449630199368
,
-
0.1489441490225184
,
]
assert
np
.
allclose
(
Es
,
ref_Es
)
def
test_run_train_multihead
(
tmp_path
,
fitting_configs
):
fitting_configs_dft
=
[]
fitting_configs_mp2
=
[]
fitting_configs_ccd
=
[]
for
_
,
c
in
enumerate
(
fitting_configs
):
c_dft
=
c
.
copy
()
c_dft
.
info
[
"head"
]
=
"DFT"
fitting_configs_dft
.
append
(
c_dft
)
c_mp2
=
c
.
copy
()
c_mp2
.
info
[
"head"
]
=
"MP2"
fitting_configs_mp2
.
append
(
c_mp2
)
c_ccd
=
c
.
copy
()
c_ccd
.
info
[
"head"
]
=
"CCD"
fitting_configs_ccd
.
append
(
c_ccd
)
ase
.
io
.
write
(
tmp_path
/
"fit_multihead_dft.xyz"
,
fitting_configs_dft
)
ase
.
io
.
write
(
tmp_path
/
"fit_multihead_mp2.xyz"
,
fitting_configs_mp2
)
ase
.
io
.
write
(
tmp_path
/
"fit_multihead_ccd.xyz"
,
fitting_configs_ccd
)
heads
=
{
"DFT"
:
{
"train_file"
:
f
"
{
str
(
tmp_path
)
}
/fit_multihead_dft.xyz"
},
"MP2"
:
{
"train_file"
:
f
"
{
str
(
tmp_path
)
}
/fit_multihead_mp2.xyz"
},
"CCD"
:
{
"train_file"
:
f
"
{
str
(
tmp_path
)
}
/fit_multihead_ccd.xyz"
},
}
yaml_str
=
"heads:
\n
"
for
key
,
value
in
heads
.
items
():
yaml_str
+=
f
"
{
key
}
:
\n
"
for
sub_key
,
sub_value
in
value
.
items
():
yaml_str
+=
f
"
{
sub_key
}
:
{
sub_value
}
\n
"
filename
=
tmp_path
/
"config.yaml"
with
open
(
filename
,
"w"
,
encoding
=
"utf-8"
)
as
file
:
file
.
write
(
yaml_str
)
mace_params
=
_mace_params
.
copy
()
mace_params
[
"valid_fraction"
]
=
0.1
mace_params
[
"checkpoints_dir"
]
=
str
(
tmp_path
)
mace_params
[
"model_dir"
]
=
str
(
tmp_path
)
mace_params
[
"loss"
]
=
"weighted"
mace_params
[
"hidden_irreps"
]
=
"128x0e"
mace_params
[
"r_max"
]
=
6.0
mace_params
[
"default_dtype"
]
=
"float64"
mace_params
[
"num_radial_basis"
]
=
10
mace_params
[
"interaction_first"
]
=
"RealAgnosticResidualInteractionBlock"
mace_params
[
"config"
]
=
tmp_path
/
"config.yaml"
mace_params
[
"batch_size"
]
=
2
mace_params
[
"num_samples_pt"
]
=
50
mace_params
[
"subselect_pt"
]
=
"random"
# make sure run_train.py is using the mace that is currently being tested
run_env
=
os
.
environ
.
copy
()
sys
.
path
.
insert
(
0
,
str
(
Path
(
__file__
).
parent
.
parent
))
run_env
[
"PYTHONPATH"
]
=
":"
.
join
(
sys
.
path
)
print
(
"DEBUG subprocess PYTHONPATH"
,
run_env
[
"PYTHONPATH"
])
cmd
=
(
sys
.
executable
+
" "
+
str
(
run_train
)
+
" "
+
" "
.
join
(
[
(
f
"--
{
k
}
=
{
v
}
"
if
v
is
not
None
else
f
"--
{
k
}
"
)
for
k
,
v
in
mace_params
.
items
()
]
)
)
p
=
subprocess
.
run
(
cmd
.
split
(),
env
=
run_env
,
check
=
True
)
assert
p
.
returncode
==
0
calc
=
MACECalculator
(
model_paths
=
tmp_path
/
"MACE.model"
,
device
=
"cpu"
,
default_dtype
=
"float64"
,
head
=
"CCD"
,
)
Es
=
[]
for
at
in
fitting_configs
:
at
.
calc
=
calc
Es
.
append
(
at
.
get_potential_energy
())
print
(
"Es"
,
Es
)
# from a run on 02/09/2024 on develop branch
ref_Es
=
[
0.0
,
0.0
,
0.10637113905361611
,
-
0.012499594026624754
,
0.08983077108171753
,
0.21071322543112597
,
-
0.028921849222784398
,
-
0.02423359575741567
,
0.022923252188079057
,
-
0.02048334610058991
,
0.4349711162741364
,
-
0.04455577015569887
,
-
0.09765806785570091
,
0.16013134616829822
,
0.0758442928017698
,
-
0.05931856557011721
,
0.33964473532953265
,
0.134338442158641
,
0.18024119757783053
,
-
0.18914740992058765
,
-
0.06503477155294624
,
0.03436649147415213
,
]
assert
np
.
allclose
(
Es
,
ref_Es
)
def
test_run_train_foundation
(
tmp_path
,
fitting_configs
):
ase
.
io
.
write
(
tmp_path
/
"fit.xyz"
,
fitting_configs
)
mace_params
=
_mace_params
.
copy
()
mace_params
[
"checkpoints_dir"
]
=
str
(
tmp_path
)
mace_params
[
"model_dir"
]
=
str
(
tmp_path
)
mace_params
[
"train_file"
]
=
tmp_path
/
"fit.xyz"
mace_params
[
"loss"
]
=
"weighted"
mace_params
[
"foundation_model"
]
=
"small"
mace_params
[
"hidden_irreps"
]
=
"128x0e"
mace_params
[
"r_max"
]
=
6.0
mace_params
[
"default_dtype"
]
=
"float64"
mace_params
[
"num_radial_basis"
]
=
10
mace_params
[
"interaction_first"
]
=
"RealAgnosticResidualInteractionBlock"
mace_params
[
"multiheads_finetuning"
]
=
False
run_env
=
os
.
environ
.
copy
()
sys
.
path
.
insert
(
0
,
str
(
Path
(
__file__
).
parent
.
parent
))
run_env
[
"PYTHONPATH"
]
=
":"
.
join
(
sys
.
path
)
cmd
=
(
sys
.
executable
+
" "
+
str
(
run_train
)
+
" "
+
" "
.
join
(
[
(
f
"--
{
k
}
=
{
v
}
"
if
v
is
not
None
else
f
"--
{
k
}
"
)
for
k
,
v
in
mace_params
.
items
()
]
)
)
p
=
subprocess
.
run
(
cmd
.
split
(),
env
=
run_env
,
check
=
True
)
assert
p
.
returncode
==
0
calc
=
MACECalculator
(
model_paths
=
tmp_path
/
"MACE.model"
,
device
=
"cpu"
,
default_dtype
=
"float64"
)
Es
=
[]
for
at
in
fitting_configs
:
at
.
calc
=
calc
Es
.
append
(
at
.
get_potential_energy
())
print
(
"Es"
,
Es
)
# from a run on 28/03/2023 on repulsion a63434aaab70c84ee016e13e4aca8d57297a0f26
ref_Es
=
[
1.6780993938446045
,
0.8916864395141602
,
0.7290308475494385
,
0.6194742918014526
,
0.6697757840156555
,
0.7025266289710999
,
0.5818213224411011
,
0.7897703647613525
,
0.6558921337127686
,
0.5071806907653809
,
3.581131935119629
,
0.691562294960022
,
0.6257331967353821
,
0.9560437202453613
,
0.7716934680938721
,
0.6730310916900635
,
0.8297463655471802
,
0.8053972721099854
,
0.8337507247924805
,
0.4107491970062256
,
0.6019601821899414
,
0.7301387786865234
,
]
assert
np
.
allclose
(
Es
,
ref_Es
)
def
test_run_train_foundation_multihead
(
tmp_path
,
fitting_configs
):
fitting_configs_dft
=
[]
fitting_configs_mp2
=
[]
atomic_numbers
=
np
.
unique
(
np
.
concatenate
([
at
.
numbers
for
at
in
fitting_configs
])
).
tolist
()
for
i
,
c
in
enumerate
(
fitting_configs
):
if
i
in
(
0
,
1
):
c_dft
=
c
.
copy
()
c_dft
.
info
[
"head"
]
=
"DFT"
fitting_configs_dft
.
append
(
c_dft
)
fitting_configs_dft
.
append
(
c
)
c_mp2
=
c
.
copy
()
c_mp2
.
info
[
"head"
]
=
"MP2"
fitting_configs_mp2
.
append
(
c_mp2
)
elif
i
%
2
==
0
:
c
.
info
[
"head"
]
=
"DFT"
fitting_configs_dft
.
append
(
c
)
else
:
c
.
info
[
"head"
]
=
"MP2"
fitting_configs_mp2
.
append
(
c
)
ase
.
io
.
write
(
tmp_path
/
"fit_multihead_dft.xyz"
,
fitting_configs_dft
)
ase
.
io
.
write
(
tmp_path
/
"fit_multihead_mp2.xyz"
,
fitting_configs_mp2
)
heads
=
{
"DFT"
:
{
"train_file"
:
f
"
{
str
(
tmp_path
)
}
/fit_multihead_dft.xyz"
},
"MP2"
:
{
"train_file"
:
f
"
{
str
(
tmp_path
)
}
/fit_multihead_mp2.xyz"
},
}
yaml_str
=
"heads:
\n
"
for
key
,
value
in
heads
.
items
():
yaml_str
+=
f
"
{
key
}
:
\n
"
for
sub_key
,
sub_value
in
value
.
items
():
yaml_str
+=
f
"
{
sub_key
}
:
{
sub_value
}
\n
"
filename
=
tmp_path
/
"config.yaml"
with
open
(
filename
,
"w"
,
encoding
=
"utf-8"
)
as
file
:
file
.
write
(
yaml_str
)
mace_params
=
_mace_params
.
copy
()
mace_params
[
"valid_fraction"
]
=
0.1
mace_params
[
"checkpoints_dir"
]
=
str
(
tmp_path
)
mace_params
[
"model_dir"
]
=
str
(
tmp_path
)
mace_params
[
"config"
]
=
tmp_path
/
"config.yaml"
mace_params
[
"loss"
]
=
"weighted"
mace_params
[
"foundation_model"
]
=
"small"
mace_params
[
"hidden_irreps"
]
=
"128x0e"
mace_params
[
"r_max"
]
=
6.0
mace_params
[
"default_dtype"
]
=
"float64"
mace_params
[
"num_radial_basis"
]
=
10
mace_params
[
"interaction_first"
]
=
"RealAgnosticResidualInteractionBlock"
mace_params
[
"batch_size"
]
=
2
mace_params
[
"valid_batch_size"
]
=
1
mace_params
[
"num_samples_pt"
]
=
50
mace_params
[
"subselect_pt"
]
=
"random"
mace_params
[
"atomic_numbers"
]
=
"["
+
","
.
join
(
map
(
str
,
atomic_numbers
))
+
"]"
mace_params
[
"filter_type_pt"
]
=
"combinations"
mace_params
[
"force_mh_ft_lr"
]
=
True
# make sure run_train.py is using the mace that is currently being tested
run_env
=
os
.
environ
.
copy
()
sys
.
path
.
insert
(
0
,
str
(
Path
(
__file__
).
parent
.
parent
))
run_env
[
"PYTHONPATH"
]
=
":"
.
join
(
sys
.
path
)
print
(
"DEBUG subprocess PYTHONPATH"
,
run_env
[
"PYTHONPATH"
])
cmd
=
(
sys
.
executable
+
" "
+
str
(
run_train
)
+
" "
+
" "
.
join
(
[
(
f
"--
{
k
}
=
{
v
}
"
if
v
is
not
None
else
f
"--
{
k
}
"
)
for
k
,
v
in
mace_params
.
items
()
]
)
)
try
:
completed_process
=
subprocess
.
run
(
cmd
.
split
(),
env
=
run_env
,
capture_output
=
True
,
text
=
True
,
check
=
True
)
# Process executed successfully
print
(
completed_process
.
stdout
)
except
subprocess
.
CalledProcessError
as
e
:
# Process failed with non-zero exit code
print
(
f
"Command failed with exit code
{
e
.
returncode
}
"
)
print
(
f
"STDOUT:
{
e
.
stdout
}
"
)
print
(
f
"STDERR:
{
e
.
stderr
}
"
)
raise
e
assert
completed_process
.
returncode
==
0
Es
=
[]
for
at
in
fitting_configs
:
config_head
=
at
.
info
.
get
(
"head"
,
"MP2"
)
calc
=
MACECalculator
(
model_paths
=
tmp_path
/
"MACE.model"
,
device
=
"cpu"
,
default_dtype
=
"float64"
,
head
=
config_head
,
)
at
.
calc
=
calc
Es
.
append
(
at
.
get_potential_energy
())
print
(
"Es"
,
Es
)
# from a run on 20/08/2024 on commit
ref_Es
=
[
1.654685616493225
,
0.44693732261657715
,
0.8741313815116882
,
0.569085955619812
,
0.7161882519721985
,
0.8654778599739075
,
0.8722733855247498
,
0.49582308530807495
,
0.814422607421875
,
0.7027317881584167
,
0.7196993827819824
,
0.517953097820282
,
0.8631765246391296
,
0.4679797887802124
,
0.8163984417915344
,
0.4252359867095947
,
1.0861445665359497
,
0.6829671263694763
,
0.7136879563331604
,
0.5160345435142517
,
0.7002358436584473
,
0.5574042201042175
,
]
assert
np
.
allclose
(
Es
,
ref_Es
,
atol
=
1e-1
)
def
test_run_train_foundation_multihead_json
(
tmp_path
,
fitting_configs
):
fitting_configs_dft
=
[]
fitting_configs_mp2
=
[]
atomic_numbers
=
np
.
unique
(
np
.
concatenate
([
at
.
numbers
for
at
in
fitting_configs
])
).
tolist
()
for
i
,
c
in
enumerate
(
fitting_configs
):
if
i
in
(
0
,
1
):
continue
# skip isolated atoms, as energies specified by json files below
if
i
%
2
==
0
:
c
.
info
[
"head"
]
=
"DFT"
fitting_configs_dft
.
append
(
c
)
else
:
c
.
info
[
"head"
]
=
"MP2"
fitting_configs_mp2
.
append
(
c
)
ase
.
io
.
write
(
tmp_path
/
"fit_multihead_dft.xyz"
,
fitting_configs_dft
)
ase
.
io
.
write
(
tmp_path
/
"fit_multihead_mp2.xyz"
,
fitting_configs_mp2
)
# write E0s to json files
E0s
=
{
1
:
0.0
,
8
:
0.0
}
with
open
(
tmp_path
/
"fit_multihead_dft.json"
,
"w"
,
encoding
=
"utf-8"
)
as
f
:
json
.
dump
(
E0s
,
f
)
with
open
(
tmp_path
/
"fit_multihead_mp2.json"
,
"w"
,
encoding
=
"utf-8"
)
as
f
:
json
.
dump
(
E0s
,
f
)
heads
=
{
"DFT"
:
{
"train_file"
:
f
"
{
str
(
tmp_path
)
}
/fit_multihead_dft.xyz"
,
"E0s"
:
f
"
{
str
(
tmp_path
)
}
/fit_multihead_dft.json"
,
},
"MP2"
:
{
"train_file"
:
f
"
{
str
(
tmp_path
)
}
/fit_multihead_mp2.xyz"
,
"E0s"
:
f
"
{
str
(
tmp_path
)
}
/fit_multihead_mp2.json"
,
},
}
yaml_str
=
"heads:
\n
"
for
key
,
value
in
heads
.
items
():
yaml_str
+=
f
"
{
key
}
:
\n
"
for
sub_key
,
sub_value
in
value
.
items
():
yaml_str
+=
f
"
{
sub_key
}
:
{
sub_value
}
\n
"
filename
=
tmp_path
/
"config.yaml"
with
open
(
filename
,
"w"
,
encoding
=
"utf-8"
)
as
file
:
file
.
write
(
yaml_str
)
mace_params
=
_mace_params
.
copy
()
mace_params
[
"valid_fraction"
]
=
0.1
mace_params
[
"checkpoints_dir"
]
=
str
(
tmp_path
)
mace_params
[
"model_dir"
]
=
str
(
tmp_path
)
mace_params
[
"config"
]
=
tmp_path
/
"config.yaml"
mace_params
[
"loss"
]
=
"weighted"
mace_params
[
"foundation_model"
]
=
"small"
mace_params
[
"hidden_irreps"
]
=
"128x0e"
mace_params
[
"r_max"
]
=
6.0
mace_params
[
"default_dtype"
]
=
"float64"
mace_params
[
"num_radial_basis"
]
=
10
mace_params
[
"interaction_first"
]
=
"RealAgnosticResidualInteractionBlock"
mace_params
[
"batch_size"
]
=
2
mace_params
[
"valid_batch_size"
]
=
1
mace_params
[
"num_samples_pt"
]
=
50
mace_params
[
"subselect_pt"
]
=
"random"
mace_params
[
"atomic_numbers"
]
=
"["
+
","
.
join
(
map
(
str
,
atomic_numbers
))
+
"]"
mace_params
[
"filter_type_pt"
]
=
"combinations"
mace_params
[
"force_mh_ft_lr"
]
=
True
# make sure run_train.py is using the mace that is currently being tested
run_env
=
os
.
environ
.
copy
()
sys
.
path
.
insert
(
0
,
str
(
Path
(
__file__
).
parent
.
parent
))
run_env
[
"PYTHONPATH"
]
=
":"
.
join
(
sys
.
path
)
print
(
"DEBUG subprocess PYTHONPATH"
,
run_env
[
"PYTHONPATH"
])
cmd
=
(
sys
.
executable
+
" "
+
str
(
run_train
)
+
" "
+
" "
.
join
(
[
(
f
"--
{
k
}
=
{
v
}
"
if
v
is
not
None
else
f
"--
{
k
}
"
)
for
k
,
v
in
mace_params
.
items
()
]
)
)
try
:
completed_process
=
subprocess
.
run
(
cmd
.
split
(),
env
=
run_env
,
capture_output
=
True
,
text
=
True
,
check
=
True
)
# Process executed successfully
print
(
completed_process
.
stdout
)
except
subprocess
.
CalledProcessError
as
e
:
# Process failed with non-zero exit code
print
(
f
"Command failed with exit code
{
e
.
returncode
}
"
)
print
(
f
"STDOUT:
{
e
.
stdout
}
"
)
print
(
f
"STDERR:
{
e
.
stderr
}
"
)
raise
e
assert
completed_process
.
returncode
==
0
Es
=
[]
for
at
in
fitting_configs
:
config_head
=
at
.
info
.
get
(
"head"
,
"MP2"
)
calc
=
MACECalculator
(
model_paths
=
tmp_path
/
"MACE.model"
,
device
=
"cpu"
,
default_dtype
=
"float64"
,
head
=
config_head
,
)
at
.
calc
=
calc
Es
.
append
(
at
.
get_potential_energy
())
print
(
"Es"
,
Es
)
# from a run on 20/08/2024 on commit
ref_Es
=
[
1.654685616493225
,
0.44693732261657715
,
0.8741313815116882
,
0.569085955619812
,
0.7161882519721985
,
0.8654778599739075
,
0.8722733855247498
,
0.49582308530807495
,
0.814422607421875
,
0.7027317881584167
,
0.7196993827819824
,
0.517953097820282
,
0.8631765246391296
,
0.4679797887802124
,
0.8163984417915344
,
0.4252359867095947
,
1.0861445665359497
,
0.6829671263694763
,
0.7136879563331604
,
0.5160345435142517
,
0.7002358436584473
,
0.5574042201042175
,
]
assert
np
.
allclose
(
Es
,
ref_Es
,
atol
=
1e-1
)
def
test_run_train_multihead_replay_custum_finetuning
(
tmp_path
,
fitting_configs
,
pretraining_configs
):
ase
.
io
.
write
(
tmp_path
/
"pretrain.xyz"
,
pretraining_configs
)
foundation_params
=
{
"name"
:
"foundation"
,
"train_file"
:
os
.
path
.
join
(
tmp_path
,
"pretrain.xyz"
),
"valid_fraction"
:
0.2
,
"energy_weight"
:
1.0
,
"forces_weight"
:
10.0
,
"stress_weight"
:
1.0
,
"model"
:
"MACE"
,
"hidden_irreps"
:
"32x0e"
,
"r_max"
:
5.0
,
"batch_size"
:
2
,
"max_num_epochs"
:
5
,
"swa"
:
None
,
"start_swa"
:
3
,
"device"
:
"cpu"
,
"seed"
:
42
,
"loss"
:
"weighted"
,
"energy_key"
:
"REF_energy"
,
"forces_key"
:
"REF_forces"
,
"stress_key"
:
"REF_stress"
,
"default_dtype"
:
"float64"
,
"checkpoints_dir"
:
str
(
tmp_path
),
"model_dir"
:
str
(
tmp_path
),
}
run_env
=
os
.
environ
.
copy
()
sys
.
path
.
insert
(
0
,
str
(
Path
(
__file__
).
parent
.
parent
))
run_env
[
"PYTHONPATH"
]
=
":"
.
join
(
sys
.
path
)
cmd
=
[
sys
.
executable
,
str
(
run_train
)]
for
k
,
v
in
foundation_params
.
items
():
if
v
is
None
:
cmd
.
append
(
f
"--
{
k
}
"
)
else
:
cmd
.
append
(
f
"--
{
k
}
=
{
v
}
"
)
p
=
subprocess
.
run
(
cmd
,
env
=
run_env
,
check
=
True
)
assert
p
.
returncode
==
0
# Step 3: Create finetuning set
fitting_configs_dft
=
[]
fitting_configs_mp2
=
[]
for
i
,
c
in
enumerate
(
fitting_configs
):
if
i
in
(
0
,
1
):
c_dft
=
c
.
copy
()
c_dft
.
info
[
"head"
]
=
"DFT"
fitting_configs_dft
.
append
(
c_dft
)
fitting_configs_dft
.
append
(
c
)
c_mp2
=
c
.
copy
()
c_mp2
.
info
[
"head"
]
=
"MP2"
fitting_configs_mp2
.
append
(
c_mp2
)
elif
i
%
2
==
0
:
c
.
info
[
"head"
]
=
"DFT"
fitting_configs_dft
.
append
(
c
)
else
:
c
.
info
[
"head"
]
=
"MP2"
fitting_configs_mp2
.
append
(
c
)
ase
.
io
.
write
(
tmp_path
/
"fit_multihead_dft.xyz"
,
fitting_configs_dft
)
ase
.
io
.
write
(
tmp_path
/
"fit_multihead_mp2.xyz"
,
fitting_configs_mp2
)
# Step 4: Finetune the pretrained model with multihead replay
heads
=
{
"DFT"
:
{
"train_file"
:
f
"
{
str
(
tmp_path
)
}
/fit_multihead_dft.xyz"
},
"MP2"
:
{
"train_file"
:
f
"
{
str
(
tmp_path
)
}
/fit_multihead_mp2.xyz"
},
}
yaml_str
=
"heads:
\n
"
for
key
,
value
in
heads
.
items
():
yaml_str
+=
f
"
{
key
}
:
\n
"
for
sub_key
,
sub_value
in
value
.
items
():
yaml_str
+=
f
"
{
sub_key
}
:
{
sub_value
}
\n
"
filename
=
tmp_path
/
"config.yaml"
with
open
(
filename
,
"w"
,
encoding
=
"utf-8"
)
as
file
:
file
.
write
(
yaml_str
)
finetuning_params
=
{
"name"
:
"finetuned"
,
"valid_fraction"
:
0.1
,
"energy_weight"
:
1.0
,
"forces_weight"
:
10.0
,
"stress_weight"
:
1.0
,
"model"
:
"MACE"
,
"hidden_irreps"
:
"32x0e"
,
"r_max"
:
5.0
,
"batch_size"
:
2
,
"max_num_epochs"
:
5
,
"device"
:
"cpu"
,
"seed"
:
42
,
"loss"
:
"weighted"
,
"default_dtype"
:
"float64"
,
"checkpoints_dir"
:
str
(
tmp_path
),
"model_dir"
:
str
(
tmp_path
),
"foundation_model"
:
os
.
path
.
join
(
tmp_path
,
"foundation.model"
),
"config"
:
os
.
path
.
join
(
tmp_path
,
"config.yaml"
),
"pt_train_file"
:
os
.
path
.
join
(
tmp_path
,
"pretrain.xyz"
),
"num_samples_pt"
:
3
,
"subselect_pt"
:
"random"
,
"force_mh_ft_lr"
:
True
,
}
cmd
=
[
sys
.
executable
,
str
(
run_train
)]
for
k
,
v
in
finetuning_params
.
items
():
if
v
is
None
:
cmd
.
append
(
f
"--
{
k
}
"
)
else
:
cmd
.
append
(
f
"--
{
k
}
=
{
v
}
"
)
p
=
subprocess
.
run
(
cmd
,
env
=
run_env
,
check
=
True
)
assert
p
.
returncode
==
0
# Load and test the finetuned model
calc
=
MACECalculator
(
model_paths
=
tmp_path
/
"finetuned.model"
,
device
=
"cpu"
,
default_dtype
=
"float64"
,
head
=
"pt_head"
,
)
Es
=
[]
for
at
in
fitting_configs
:
at
.
calc
=
calc
Es
.
append
(
at
.
get_potential_energy
())
print
(
"Energies:"
,
Es
)
# Add some basic checks
assert
len
(
Es
)
==
len
(
fitting_configs
)
assert
all
(
isinstance
(
E
,
float
)
for
E
in
Es
)
assert
len
(
set
(
Es
))
>
1
# Ens
@
pytest
.
mark
.
skipif
(
not
CUET_AVAILABLE
,
reason
=
"cuequivariance not installed"
)
def
test_run_train_cueq
(
tmp_path
,
fitting_configs
):
torch
.
set_default_dtype
(
torch
.
float64
)
ase
.
io
.
write
(
tmp_path
/
"fit.xyz"
,
fitting_configs
)
mace_params
=
_mace_params
.
copy
()
mace_params
[
"checkpoints_dir"
]
=
str
(
tmp_path
)
mace_params
[
"model_dir"
]
=
str
(
tmp_path
)
mace_params
[
"train_file"
]
=
tmp_path
/
"fit.xyz"
mace_params
[
"enable_cueq"
]
=
True
mace_params
[
"default_dtype"
]
=
"float64"
# make sure run_train.py is using the mace that is currently being tested
run_env
=
os
.
environ
.
copy
()
sys
.
path
.
insert
(
0
,
str
(
Path
(
__file__
).
parent
.
parent
))
run_env
[
"PYTHONPATH"
]
=
":"
.
join
(
sys
.
path
)
print
(
"DEBUG subprocess PYTHONPATH"
,
run_env
[
"PYTHONPATH"
])
cmd
=
(
sys
.
executable
+
" "
+
str
(
run_train
)
+
" "
+
" "
.
join
(
[
(
f
"--
{
k
}
=
{
v
}
"
if
v
is
not
None
else
f
"--
{
k
}
"
)
for
k
,
v
in
mace_params
.
items
()
]
)
)
try
:
completed_process
=
subprocess
.
run
(
cmd
.
split
(),
env
=
run_env
,
capture_output
=
True
,
text
=
True
,
check
=
True
)
# Process executed successfully
print
(
completed_process
.
stdout
)
except
subprocess
.
CalledProcessError
as
e
:
# Process failed with non-zero exit code
print
(
f
"Command failed with exit code
{
e
.
returncode
}
"
)
print
(
f
"STDOUT:
{
e
.
stdout
}
"
)
print
(
f
"STDERR:
{
e
.
stderr
}
"
)
raise
e
assert
completed_process
.
returncode
==
0
calc
=
MACECalculator
(
model_paths
=
tmp_path
/
"MACE.model"
,
device
=
"cuda"
)
Es
=
[]
for
at
in
fitting_configs
[
2
:]:
at
.
calc
=
calc
Es
.
append
(
at
.
get_potential_energy
())
calc
=
MACECalculator
(
model_paths
=
tmp_path
/
"MACE.model"
,
device
=
"cpu"
,
enable_cueq
=
True
)
Es_cueq
=
[]
for
at
in
fitting_configs
[
2
:]:
at
.
calc
=
calc
Es_cueq
.
append
(
at
.
get_potential_energy
())
# from a run on 04/06/2024 on stress_bugfix 967f0bfb6490086599da247874b24595d149caa7
ref_Es
=
[
-
0.039181344585828524
,
-
0.0915223395136733
,
-
0.14953484236456582
,
-
0.06662480820063998
,
-
0.09983737353050133
,
0.12477442296789745
,
-
0.06486086271762856
,
-
0.1460607988519944
,
0.12886334908465508
,
-
0.14000990081920373
,
-
0.05319886578958313
,
0.07780520158391
,
-
0.08895480281886901
,
-
0.15474719614734422
,
0.007756765146527644
,
-
0.044879267197498685
,
-
0.036065736712447574
,
-
0.24413743841886623
,
-
0.0838104612106429
,
-
0.14751978636626545
,
]
assert
np
.
allclose
(
Es
,
ref_Es
)
assert
np
.
allclose
(
ref_Es
,
Es_cueq
)
@
pytest
.
mark
.
skipif
(
not
CUET_AVAILABLE
,
reason
=
"cuequivariance not installed"
)
def
test_run_train_foundation_multihead_json_cueq
(
tmp_path
,
fitting_configs
):
fitting_configs_dft
=
[]
fitting_configs_mp2
=
[]
atomic_numbers
=
np
.
unique
(
np
.
concatenate
([
at
.
numbers
for
at
in
fitting_configs
])
).
tolist
()
for
i
,
c
in
enumerate
(
fitting_configs
):
if
i
in
(
0
,
1
):
continue
# skip isolated atoms, as energies specified by json files below
if
i
%
2
==
0
:
c
.
info
[
"head"
]
=
"DFT"
fitting_configs_dft
.
append
(
c
)
else
:
c
.
info
[
"head"
]
=
"MP2"
fitting_configs_mp2
.
append
(
c
)
ase
.
io
.
write
(
tmp_path
/
"fit_multihead_dft.xyz"
,
fitting_configs_dft
)
ase
.
io
.
write
(
tmp_path
/
"fit_multihead_mp2.xyz"
,
fitting_configs_mp2
)
# write E0s to json files
E0s
=
{
1
:
0.0
,
8
:
0.0
}
with
open
(
tmp_path
/
"fit_multihead_dft.json"
,
"w"
,
encoding
=
"utf-8"
)
as
f
:
json
.
dump
(
E0s
,
f
)
with
open
(
tmp_path
/
"fit_multihead_mp2.json"
,
"w"
,
encoding
=
"utf-8"
)
as
f
:
json
.
dump
(
E0s
,
f
)
heads
=
{
"DFT"
:
{
"train_file"
:
f
"
{
str
(
tmp_path
)
}
/fit_multihead_dft.xyz"
,
"E0s"
:
f
"
{
str
(
tmp_path
)
}
/fit_multihead_dft.json"
,
},
"MP2"
:
{
"train_file"
:
f
"
{
str
(
tmp_path
)
}
/fit_multihead_mp2.xyz"
,
"E0s"
:
f
"
{
str
(
tmp_path
)
}
/fit_multihead_mp2.json"
,
},
}
yaml_str
=
"heads:
\n
"
for
key
,
value
in
heads
.
items
():
yaml_str
+=
f
"
{
key
}
:
\n
"
for
sub_key
,
sub_value
in
value
.
items
():
yaml_str
+=
f
"
{
sub_key
}
:
{
sub_value
}
\n
"
filename
=
tmp_path
/
"config.yaml"
with
open
(
filename
,
"w"
,
encoding
=
"utf-8"
)
as
file
:
file
.
write
(
yaml_str
)
mace_params
=
_mace_params
.
copy
()
mace_params
[
"valid_fraction"
]
=
0.1
mace_params
[
"checkpoints_dir"
]
=
str
(
tmp_path
)
mace_params
[
"model_dir"
]
=
str
(
tmp_path
)
mace_params
[
"config"
]
=
tmp_path
/
"config.yaml"
mace_params
[
"loss"
]
=
"weighted"
mace_params
[
"foundation_model"
]
=
"small"
mace_params
[
"hidden_irreps"
]
=
"128x0e"
mace_params
[
"r_max"
]
=
6.0
mace_params
[
"default_dtype"
]
=
"float64"
mace_params
[
"num_radial_basis"
]
=
10
mace_params
[
"interaction_first"
]
=
"RealAgnosticResidualInteractionBlock"
mace_params
[
"batch_size"
]
=
2
mace_params
[
"valid_batch_size"
]
=
1
mace_params
[
"num_samples_pt"
]
=
50
mace_params
[
"subselect_pt"
]
=
"random"
mace_params
[
"enable_cueq"
]
=
True
mace_params
[
"atomic_numbers"
]
=
"["
+
","
.
join
(
map
(
str
,
atomic_numbers
))
+
"]"
mace_params
[
"filter_type_pt"
]
=
"combinations"
mace_params
[
"device"
]
=
"cuda"
mace_params
[
"force_mh_ft_lr"
]
=
True
# make sure run_train.py is using the mace that is currently being tested
run_env
=
os
.
environ
.
copy
()
sys
.
path
.
insert
(
0
,
str
(
Path
(
__file__
).
parent
.
parent
))
run_env
[
"PYTHONPATH"
]
=
":"
.
join
(
sys
.
path
)
print
(
"DEBUG subprocess PYTHONPATH"
,
run_env
[
"PYTHONPATH"
])
cmd
=
(
sys
.
executable
+
" "
+
str
(
run_train
)
+
" "
+
" "
.
join
(
[
(
f
"--
{
k
}
=
{
v
}
"
if
v
is
not
None
else
f
"--
{
k
}
"
)
for
k
,
v
in
mace_params
.
items
()
]
)
)
try
:
completed_process
=
subprocess
.
run
(
cmd
.
split
(),
env
=
run_env
,
capture_output
=
True
,
text
=
True
,
check
=
True
)
# Process executed successfully
print
(
completed_process
.
stdout
)
except
subprocess
.
CalledProcessError
as
e
:
# Process failed with non-zero exit code
print
(
f
"Command failed with exit code
{
e
.
returncode
}
"
)
print
(
f
"STDOUT:
{
e
.
stdout
}
"
)
print
(
f
"STDERR:
{
e
.
stderr
}
"
)
raise
e
assert
completed_process
.
returncode
==
0
calc
=
MACECalculator
(
model_paths
=
tmp_path
/
"MACE.model"
,
device
=
"cuda"
,
default_dtype
=
"float64"
,
head
=
"DFT"
,
)
Es
=
[]
for
at
in
fitting_configs
:
at
.
calc
=
calc
Es
.
append
(
at
.
get_potential_energy
())
print
(
"Es"
,
Es
)
# from a run on 20/08/2024 on commit
ref_Es
=
[
1.654685616493225
,
0.44693732261657715
,
0.8741313815116882
,
0.569085955619812
,
0.7161882519721985
,
0.8654778599739075
,
0.8722733855247498
,
0.49582308530807495
,
0.814422607421875
,
0.7027317881584167
,
0.7196993827819824
,
0.517953097820282
,
0.8631765246391296
,
0.4679797887802124
,
0.8163984417915344
,
0.4252359867095947
,
1.0861445665359497
,
0.6829671263694763
,
0.7136879563331604
,
0.5160345435142517
,
0.7002358436584473
,
0.5574042201042175
,
]
assert
np
.
allclose
(
Es
,
ref_Es
,
atol
=
1e-1
)
def
test_run_train_lbfgs
(
tmp_path
,
fitting_configs
):
ase
.
io
.
write
(
tmp_path
/
"fit.xyz"
,
fitting_configs
)
mace_params
=
_mace_params
.
copy
()
mace_params
[
"checkpoints_dir"
]
=
str
(
tmp_path
)
mace_params
[
"model_dir"
]
=
str
(
tmp_path
)
mace_params
[
"train_file"
]
=
tmp_path
/
"fit.xyz"
mace_params
[
"lbfgs"
]
=
None
mace_params
[
"max_num_epochs"
]
=
2
# make sure run_train.py is using the mace that is currently being tested
run_env
=
os
.
environ
.
copy
()
sys
.
path
.
insert
(
0
,
str
(
Path
(
__file__
).
parent
.
parent
))
run_env
[
"PYTHONPATH"
]
=
":"
.
join
(
sys
.
path
)
print
(
"DEBUG subprocess PYTHONPATH"
,
run_env
[
"PYTHONPATH"
])
cmd
=
(
sys
.
executable
+
" "
+
str
(
run_train
)
+
" "
+
" "
.
join
(
[
(
f
"--
{
k
}
=
{
v
}
"
if
v
is
not
None
else
f
"--
{
k
}
"
)
for
k
,
v
in
mace_params
.
items
()
]
)
)
p
=
subprocess
.
run
(
cmd
.
split
(),
env
=
run_env
,
check
=
True
)
assert
p
.
returncode
==
0
calc
=
MACECalculator
(
model_paths
=
tmp_path
/
"MACE.model"
,
device
=
"cpu"
)
Es
=
[]
for
at
in
fitting_configs
:
at
.
calc
=
calc
Es
.
append
(
at
.
get_potential_energy
())
print
(
"Es"
,
Es
)
# from a run on 14/03/2025
ref_Es
=
[
0.0
,
0.0
,
-
0.1874197850340979
,
-
0.25991775038059006
,
0.18263492399322268
,
-
0.15026829765490662
,
-
0.2403061362015996
,
0.1689257170630718
,
-
0.2095568077455055
,
-
0.2957758160829075
,
-
0.0035370913684985364
,
-
0.2195416610745775
,
-
0.25405549447739517
,
-
0.06201390990366806
,
-
0.13332219494388334
,
-
0.19633181702040337
,
0.013014932630445699
,
-
0.08808335967147174
,
-
0.06664444189210728
,
-
0.4230467426992034
,
-
0.2348250569553676
,
-
0.17593904833220647
,
]
assert
np
.
allclose
(
Es
,
ref_Es
,
atol
=
1e-2
)
def
test_run_train_foundation_elements
(
tmp_path
,
fitting_configs
):
ase
.
io
.
write
(
tmp_path
/
"fit.xyz"
,
fitting_configs
)
base_params
=
{
"name"
:
"MACE"
,
"checkpoints_dir"
:
str
(
tmp_path
),
"model_dir"
:
str
(
tmp_path
),
"train_file"
:
tmp_path
/
"fit.xyz"
,
"loss"
:
"weighted"
,
"foundation_model"
:
"small"
,
"hidden_irreps"
:
"128x0e"
,
"r_max"
:
6.0
,
"default_dtype"
:
"float64"
,
"max_num_epochs"
:
5
,
"num_radial_basis"
:
10
,
"interaction_first"
:
"RealAgnosticResidualInteractionBlock"
,
"multiheads_finetuning"
:
False
,
}
# Run environment setup
run_env
=
os
.
environ
.
copy
()
sys
.
path
.
insert
(
0
,
str
(
Path
(
__file__
).
parent
.
parent
))
run_env
[
"PYTHONPATH"
]
=
":"
.
join
(
sys
.
path
)
# First run: without foundation_model_elements (default behavior)
mace_params
=
base_params
.
copy
()
cmd
=
(
sys
.
executable
+
" "
+
str
(
run_train
)
+
" "
+
" "
.
join
(
[
(
f
"--
{
k
}
=
{
v
}
"
if
v
is
not
None
else
f
"--
{
k
}
"
)
for
k
,
v
in
mace_params
.
items
()
]
)
)
p
=
subprocess
.
run
(
cmd
.
split
(),
env
=
run_env
,
check
=
True
)
assert
p
.
returncode
==
0
# Load model and check elements
model_filtered
=
torch
.
load
(
tmp_path
/
"MACE.model"
,
map_location
=
"cpu"
)
filtered_elements
=
set
(
int
(
z
)
for
z
in
model_filtered
.
atomic_numbers
)
assert
filtered_elements
==
{
1
,
8
}
# Only H and O should be present
# Second run: with foundation_model_elements
mace_params
=
base_params
.
copy
()
mace_params
[
"name"
]
=
"MACE_all_elements"
mace_params
[
"foundation_model_elements"
]
=
True
# Flag-only argument
cmd
=
(
sys
.
executable
+
" "
+
str
(
run_train
)
+
" "
+
" "
.
join
(
[
(
f
"--
{
k
}
=
{
v
}
"
if
v
is
not
None
else
f
"--
{
k
}
"
)
for
k
,
v
in
mace_params
.
items
()
]
)
)
p
=
subprocess
.
run
(
cmd
.
split
(),
env
=
run_env
,
check
=
True
)
assert
p
.
returncode
==
0
# Load model and check elements
model_all
=
torch
.
load
(
tmp_path
/
"MACE_all_elements.model"
,
map_location
=
"cpu"
)
all_elements
=
set
(
int
(
z
)
for
z
in
model_all
.
atomic_numbers
)
# Get elements from foundation model for comparison
calc
=
mace_mp
(
model
=
"small"
,
device
=
"cpu"
)
foundation_elements
=
set
(
int
(
z
)
for
z
in
calc
.
models
[
0
].
atomic_numbers
)
# Check that all foundation model elements are preserved
assert
all_elements
==
foundation_elements
assert
len
(
all_elements
)
>
len
(
filtered_elements
)
# Check that both models can make predictions
at
=
fitting_configs
[
2
].
copy
()
# Test filtered model
calc_filtered
=
MACECalculator
(
model_paths
=
tmp_path
/
"MACE.model"
,
device
=
"cpu"
,
default_dtype
=
"float64"
)
at
.
calc
=
calc_filtered
e1
=
at
.
get_potential_energy
()
# Test all-elements model
calc_all
=
MACECalculator
(
model_paths
=
tmp_path
/
"MACE_all_elements.model"
,
device
=
"cpu"
,
default_dtype
=
"float64"
,
)
at
.
calc
=
calc_all
e2
=
at
.
get_potential_energy
()
# Energies should be different since the models are trained differently,
# but both should give reasonable results
assert
np
.
isfinite
(
e1
)
assert
np
.
isfinite
(
e2
)
def
test_run_train_foundation_elements_multihead
(
tmp_path
,
fitting_configs
):
fitting_configs_dft
=
[]
fitting_configs_mp2
=
[]
atomic_numbers
=
np
.
unique
(
np
.
concatenate
([
at
.
numbers
for
at
in
fitting_configs
])
).
tolist
()
for
i
,
c
in
enumerate
(
fitting_configs
):
if
i
in
(
0
,
1
):
c_dft
=
c
.
copy
()
c_dft
.
info
[
"head"
]
=
"DFT"
fitting_configs_dft
.
append
(
c_dft
)
c_mp2
=
c
.
copy
()
c_mp2
.
info
[
"head"
]
=
"MP2"
fitting_configs_mp2
.
append
(
c_mp2
)
if
i
%
2
==
0
:
c_copy
=
c
.
copy
()
c_copy
.
info
[
"head"
]
=
"DFT"
fitting_configs_dft
.
append
(
c_copy
)
else
:
c_copy
=
c
.
copy
()
c_copy
.
info
[
"head"
]
=
"MP2"
fitting_configs_mp2
.
append
(
c_copy
)
ase
.
io
.
write
(
tmp_path
/
"fit_dft.xyz"
,
fitting_configs_dft
)
ase
.
io
.
write
(
tmp_path
/
"fit_mp2.xyz"
,
fitting_configs_mp2
)
# Create multihead configuration
heads
=
{
"DFT"
:
{
"train_file"
:
f
"
{
str
(
tmp_path
)
}
/fit_dft.xyz"
},
"MP2"
:
{
"train_file"
:
f
"
{
str
(
tmp_path
)
}
/fit_mp2.xyz"
},
}
yaml_str
=
"heads:
\n
"
for
key
,
value
in
heads
.
items
():
yaml_str
+=
f
"
{
key
}
:
\n
"
for
sub_key
,
sub_value
in
value
.
items
():
yaml_str
+=
f
"
{
sub_key
}
:
{
sub_value
}
\n
"
config_file
=
tmp_path
/
"config.yaml"
with
open
(
config_file
,
"w"
,
encoding
=
"utf-8"
)
as
file
:
file
.
write
(
yaml_str
)
base_params
=
{
"name"
:
"MACE"
,
"checkpoints_dir"
:
str
(
tmp_path
),
"model_dir"
:
str
(
tmp_path
),
"config"
:
str
(
config_file
),
"loss"
:
"weighted"
,
"foundation_model"
:
"small"
,
"hidden_irreps"
:
"128x0e"
,
"r_max"
:
6.0
,
"default_dtype"
:
"float64"
,
"max_num_epochs"
:
5
,
"num_radial_basis"
:
10
,
"interaction_first"
:
"RealAgnosticResidualInteractionBlock"
,
"force_mh_ft_lr"
:
True
,
"batch_size"
:
1
,
"num_samples_pt"
:
50
,
"subselect_pt"
:
"random"
,
"atomic_numbers"
:
"["
+
","
.
join
(
map
(
str
,
atomic_numbers
))
+
"]"
,
"filter_type_pt"
:
"combinations"
,
"valid_fraction"
:
0.1
,
"valid_batch_size"
:
1
,
}
# Run environment setup
run_env
=
os
.
environ
.
copy
()
sys
.
path
.
insert
(
0
,
str
(
Path
(
__file__
).
parent
.
parent
))
run_env
[
"PYTHONPATH"
]
=
":"
.
join
(
sys
.
path
)
# First run: without foundation_model_elements (default behavior)
mace_params
=
base_params
.
copy
()
cmd
=
(
sys
.
executable
+
" "
+
str
(
run_train
)
+
" "
+
" "
.
join
(
[
(
f
"--
{
k
}
=
{
v
}
"
if
v
is
not
None
else
f
"--
{
k
}
"
)
for
k
,
v
in
mace_params
.
items
()
]
)
)
try
:
completed_process
=
subprocess
.
run
(
cmd
.
split
(),
env
=
run_env
,
capture_output
=
True
,
text
=
True
,
check
=
True
)
# Process executed successfully
print
(
completed_process
.
stdout
)
except
subprocess
.
CalledProcessError
as
e
:
# Process failed with non-zero exit code
print
(
f
"Command failed with exit code
{
e
.
returncode
}
"
)
print
(
f
"STDOUT:
{
e
.
stdout
}
"
)
print
(
f
"STDERR:
{
e
.
stderr
}
"
)
raise
e
assert
completed_process
.
returncode
==
0
# Load model and check elements
model_filtered
=
torch
.
load
(
tmp_path
/
"MACE.model"
,
map_location
=
"cpu"
)
filtered_elements
=
set
(
int
(
z
)
for
z
in
model_filtered
.
atomic_numbers
)
assert
filtered_elements
==
{
1
,
8
}
# Only H and O should be present
assert
len
(
model_filtered
.
heads
)
==
3
# pt_head + DFT + MP2
# Second run: with foundation_model_elements
mace_params
=
base_params
.
copy
()
mace_params
[
"name"
]
=
"MACE_all_elements"
mace_params
[
"foundation_model_elements"
]
=
True
cmd
=
(
sys
.
executable
+
" "
+
str
(
run_train
)
+
" "
+
" "
.
join
(
[
(
f
"--
{
k
}
=
{
v
}
"
if
v
is
not
None
else
f
"--
{
k
}
"
)
for
k
,
v
in
mace_params
.
items
()
]
)
)
p
=
subprocess
.
run
(
cmd
.
split
(),
env
=
run_env
,
check
=
True
)
assert
p
.
returncode
==
0
# Load model and check elements
model_all
=
torch
.
load
(
tmp_path
/
"MACE_all_elements.model"
,
map_location
=
"cpu"
)
all_elements
=
set
(
int
(
z
)
for
z
in
model_all
.
atomic_numbers
)
# Get elements from foundation model for comparison
calc
=
mace_mp
(
model
=
"small"
,
device
=
"cpu"
)
foundation_elements
=
set
(
int
(
z
)
for
z
in
calc
.
models
[
0
].
atomic_numbers
)
# Check that all foundation model elements are preserved
assert
all_elements
==
foundation_elements
assert
len
(
all_elements
)
>
len
(
filtered_elements
)
assert
len
(
model_all
.
heads
)
==
3
# pt_head + DFT + MP2
# Check that both models can make predictions
at
=
fitting_configs_dft
[
2
].
copy
()
# Test filtered model
calc_filtered
=
MACECalculator
(
model_paths
=
tmp_path
/
"MACE.model"
,
device
=
"cpu"
,
default_dtype
=
"float64"
,
head
=
"DFT"
,
)
at
.
calc
=
calc_filtered
e1
=
at
.
get_potential_energy
()
# Test all-elements model
calc_all
=
MACECalculator
(
model_paths
=
tmp_path
/
"MACE_all_elements.model"
,
device
=
"cpu"
,
default_dtype
=
"float64"
,
head
=
"DFT"
,
)
at
.
calc
=
calc_all
e2
=
at
.
get_potential_energy
()
assert
np
.
isfinite
(
e1
)
assert
np
.
isfinite
(
e2
)
mace-bench/3rdparty/mace/tests/test_run_train_allkeys.py
0 → 100644
View file @
1be78103
import
os
import
subprocess
import
sys
from
copy
import
deepcopy
from
pathlib
import
Path
import
ase.io
import
numpy
as
np
import
pytest
from
ase.atoms
import
Atoms
from
mace.calculators.mace
import
MACECalculator
from
mace.cli.run_train
import
run
as
run_mace_train
from
mace.data.utils
import
KeySpecification
from
mace.tools
import
build_default_arg_parser
run_train
=
Path
(
__file__
).
parent
.
parent
/
"mace"
/
"cli"
/
"run_train.py"
_mace_params
=
{
"name"
:
"MACE"
,
"valid_fraction"
:
0.05
,
"energy_weight"
:
1.0
,
"forces_weight"
:
10.0
,
"stress_weight"
:
1.0
,
"model"
:
"MACE"
,
"hidden_irreps"
:
"128x0e"
,
"max_num_epochs"
:
10
,
"swa"
:
None
,
"start_swa"
:
5
,
"ema"
:
None
,
"ema_decay"
:
0.99
,
"amsgrad"
:
None
,
"device"
:
"cpu"
,
"seed"
:
5
,
"loss"
:
"weighted"
,
"energy_key"
:
"REF_energy"
,
"forces_key"
:
"REF_forces"
,
"stress_key"
:
"REF_stress"
,
"interaction_first"
:
"RealAgnosticResidualInteractionBlock"
,
"batch_size"
:
1
,
"valid_batch_size"
:
1
,
"num_samples_pt"
:
50
,
"subselect_pt"
:
"random"
,
"eval_interval"
:
2
,
"num_radial_basis"
:
10
,
"r_max"
:
6.0
,
"default_dtype"
:
"float64"
,
}
def
configs_numbered_keys
():
np
.
random
.
seed
(
0
)
water
=
Atoms
(
numbers
=
[
8
,
1
,
1
],
positions
=
[[
0
,
-
2.0
,
0
],
[
1
,
0
,
0
],
[
0
,
1
,
0
]],
cell
=
[
4
]
*
3
,
pbc
=
[
True
]
*
3
,
)
energies
=
list
(
np
.
random
.
normal
(
0.1
,
size
=
15
))
forces
=
list
(
np
.
random
.
normal
(
0.1
,
size
=
(
15
,
3
,
3
)))
trial_configs_lists
=
[]
# some keys present, some not
keys_to_use
=
(
[
"REF_energy"
]
+
[
"2_energy"
]
*
2
+
[
"3_energy"
]
*
3
+
[
"4_energy"
]
*
4
+
[
"5_energy"
]
*
5
)
force_keys_to_use
=
(
[
"REF_forces"
]
+
[
"2_forces"
]
*
2
+
[
"3_forces"
]
*
3
+
[
"4_forces"
]
*
4
+
[
"5_forces"
]
*
5
)
for
ind
in
range
(
15
):
c
=
deepcopy
(
water
)
c
.
info
[
keys_to_use
[
ind
]]
=
energies
[
ind
]
c
.
arrays
[
force_keys_to_use
[
ind
]]
=
forces
[
ind
]
c
.
positions
+=
np
.
random
.
normal
(
0.1
,
size
=
(
3
,
3
))
trial_configs_lists
.
append
(
c
)
return
trial_configs_lists
def
trial_yamls_and_and_expected
():
yamls
=
{}
command_line_kwargs
=
{
"energy_key"
:
"2_energy"
,
"forces_key"
:
"2_forces"
}
yamls
[
"no_heads"
]
=
{}
yamls
[
"one_head_no_dicts"
]
=
{
"heads"
:
{
"Default"
:
{
"energy_key"
:
"3_energy"
,
}
}
}
yamls
[
"one_head_with_dicts"
]
=
{
"heads"
:
{
"Default"
:
{
"info_keys"
:
{
"energy"
:
"3_energy"
,
},
"arrays_keys"
:
{
"forces"
:
"3_forces"
,
},
}
}
}
yamls
[
"two_heads_no_dicts"
]
=
{
"heads"
:
{
"dft"
:
{
"train_file"
:
"fit_multihead_dft.xyz"
,
"energy_key"
:
"3_energy"
,
},
"mp2"
:
{
"train_file"
:
"fit_multihead_mp2.xyz"
,
"energy_key"
:
"4_energy"
,
},
}
}
yamls
[
"two_heads_mixed"
]
=
{
"heads"
:
{
"dft"
:
{
"train_file"
:
"fit_multihead_dft.xyz"
,
"info_keys"
:
{
"energy"
:
"3_energy"
,
},
"arrays_keys"
:
{
"forces"
:
"3_forces"
,
},
"forces_key"
:
"4_forces"
,
},
"mp2"
:
{
"train_file"
:
"fit_multihead_mp2.xyz"
,
"energy_key"
:
"4_energy"
,
},
}
}
all_arg_sets
=
{
"with_command_line"
:
{
key
:
{
**
command_line_kwargs
,
**
value
}
for
key
,
value
in
yamls
.
items
()
},
"without_command_line"
:
yamls
,
}
all_expected_outputs
=
{
"with_command_line"
:
{
"no_heads"
:
[
1.0037831178668188
,
1.0183291323603265
,
1.0120784084221528
,
0.9935695881012243
,
1.0021641561865526
,
0.9999135609205868
,
0.9809440616323108
,
1.0025784765050076
,
1.0017901145495376
,
1.0136913185404515
,
1.006798563238269
,
1.0187758397828384
,
1.0180201540775071
,
1.0132368725061702
,
0.9998734173248169
,
],
"one_head_no_dicts"
:
[
1.0028437510688613
,
1.0514693378041775
,
1.059933403321331
,
1.034719940573569
,
1.0438040675561824
,
1.019719477728329
,
0.9841759692947915
,
1.0435266573857496
,
1.0339501989779065
,
1.0501795448530264
,
1.0402594216704781
,
1.0604998765679152
,
1.0633411200246015
,
1.0539071190201297
,
1.0393496428177804
,
],
"one_head_with_dicts"
:
[
0.8638341551096959
,
1.0078341354784144
,
1.0149701178418595
,
0.9945723048460148
,
1.0184158011731292
,
0.9992135295205004
,
0.8943420783639198
,
1.0327920054084088
,
0.9905731198078909
,
0.9838325204450648
,
1.0018725575620482
,
1.007263052421034
,
1.0335213929231966
,
1.0033503312511205
,
1.0174433894759563
,
],
"two_heads_no_dicts"
:
[
0.9836377578288774
,
1.0196844186291318
,
1.0151628222871238
,
0.957307281711648
,
0.985574141310865
,
0.9629670134047853
,
0.9242583185138095
,
0.9807770070311039
,
0.9973679440479541
,
1.0221127246963275
,
1.0031807967874216
,
1.0358701219543687
,
1.0434208761164758
,
1.0235606028124515
,
0.9797494630655053
,
],
"two_heads_mixed"
:
[
0.8664108574741868
,
0.9907166576278023
,
1.0051969372365164
,
0.978702477000018
,
1.025500166764692
,
0.9940095566375018
,
0.9034029726954119
,
1.0391739502744488
,
0.9717327061183668
,
0.972292103670355
,
1.0012510461663253
,
0.9978051155885286
,
1.0378611651753475
,
1.0003207628186224
,
1.0209509292189651
,
],
},
"without_command_line"
:
{
"no_heads"
:
[
0.9352605307451007
,
0.991084559389268
,
0.9940350095024881
,
0.9953849198103668
,
0.9954705498032904
,
0.9964815693808411
,
0.9663142667436776
,
0.9947223808739147
,
0.9897776682803257
,
0.989027769690667
,
0.9910280920241263
,
0.992067980667518
,
0.9917276132506404
,
0.9902848752169671
,
0.9928585982942544
,
],
"one_head_no_dicts"
:
[
0.9425342207393083
,
1.0149788456087416
,
1.0249228965652788
,
1.0247924743285792
,
1.02732103964481
,
1.0168852937950326
,
0.9771283495170653
,
1.0261776335561517
,
1.0130461033368028
,
1.0162619153561783
,
1.019995179866916
,
1.0209512298344965
,
1.0219971755636952
,
1.0195791901659124
,
1.0234662527729408
,
],
"one_head_with_dicts"
:
[
0.8638341551096959
,
1.0078341354784144
,
1.0149701178418595
,
0.9945723048460148
,
1.0184158011731292
,
0.9992135295205004
,
0.8943420783639198
,
1.0327920054084088
,
0.9905731198078909
,
0.9838325204450648
,
1.0018725575620482
,
1.007263052421034
,
1.0335213929231966
,
1.0033503312511205
,
1.0174433894759563
,
],
"two_heads_no_dicts"
:
[
0.9933763730233168
,
0.9986480398559268
,
1.0042486164355315
,
1.0025568793877726
,
1.0032598081704625
,
0.9926714183717912
,
0.9920385249670881
,
1.0020278841030676
,
1.0012474150830537
,
1.0039289677261019
,
1.0022718878661814
,
1.003586385624809
,
1.003436450009097
,
1.003805673887942
,
1.001450261102316
,
],
"two_heads_mixed"
:
[
0.8781767864616707
,
0.9843563603794138
,
1.0145197579049248
,
0.9835060778675391
,
1.0419060462994596
,
0.9917393978520056
,
0.9091521032773944
,
1.0605463095070453
,
0.9685381713826684
,
0.9866493058823766
,
1.00305061187164
,
1.0051273128414386
,
1.037964258398104
,
1.0106663924241408
,
1.0274351814133602
,
],
},
}
list_of_all
=
[]
for
key
,
value
in
all_arg_sets
.
items
():
for
key2
,
value2
in
value
.
items
():
list_of_all
.
append
(
(
value2
,
(
key
,
key2
),
np
.
asarray
(
all_expected_outputs
[
key
][
key2
]))
)
return
list_of_all
def
dict_to_yaml_str
(
data
,
indent
=
0
):
yaml_str
=
""
for
key
,
value
in
data
.
items
():
yaml_str
+=
" "
*
indent
+
str
(
key
)
+
":"
if
isinstance
(
value
,
dict
):
yaml_str
+=
"
\n
"
+
dict_to_yaml_str
(
value
,
indent
+
2
)
else
:
yaml_str
+=
" "
+
str
(
value
)
+
"
\n
"
return
yaml_str
_trial_yamls_and_and_expected
=
trial_yamls_and_and_expected
()
@
pytest
.
mark
.
parametrize
(
"yaml_contents, name, expected_value"
,
_trial_yamls_and_and_expected
)
def
test_key_specification_methods
(
tmp_path
,
yaml_contents
,
name
,
expected_value
):
fitting_configs
=
configs_numbered_keys
()
ase
.
io
.
write
(
tmp_path
/
"fit_multihead_dft.xyz"
,
fitting_configs
)
ase
.
io
.
write
(
tmp_path
/
"fit_multihead_mp2.xyz"
,
fitting_configs
)
ase
.
io
.
write
(
tmp_path
/
"duplicated_fit_multihead_dft.xyz"
,
fitting_configs
)
mace_params
=
_mace_params
.
copy
()
mace_params
[
"valid_fraction"
]
=
0.1
mace_params
[
"checkpoints_dir"
]
=
str
(
tmp_path
)
mace_params
[
"model_dir"
]
=
str
(
tmp_path
)
mace_params
[
"train_file"
]
=
"fit_multihead_dft.xyz"
mace_params
[
"E0s"
]
=
"{1:0.0,8:1.0}"
mace_params
[
"valid_file"
]
=
"duplicated_fit_multihead_dft.xyz"
del
mace_params
[
"valid_fraction"
]
mace_params
[
"max_num_epochs"
]
=
1
# many tests to do
del
mace_params
[
"energy_key"
]
del
mace_params
[
"forces_key"
]
del
mace_params
[
"stress_key"
]
mace_params
[
"name"
]
=
"MACE_"
filename
=
tmp_path
/
"config.yaml"
with
open
(
filename
,
"w"
,
encoding
=
"utf-8"
)
as
file
:
file
.
write
(
dict_to_yaml_str
(
yaml_contents
))
if
len
(
yaml_contents
)
>
0
:
mace_params
[
"config"
]
=
str
(
tmp_path
/
"config.yaml"
)
run_env
=
os
.
environ
.
copy
()
sys
.
path
.
insert
(
0
,
str
(
Path
(
__file__
).
parent
.
parent
))
run_env
[
"PYTHONPATH"
]
=
":"
.
join
(
sys
.
path
)
print
(
"DEBUG subprocess PYTHONPATH"
,
run_env
[
"PYTHONPATH"
])
cmd
=
(
sys
.
executable
+
" "
+
str
(
run_train
)
+
" "
+
" "
.
join
(
[
(
f
"--
{
k
}
=
{
v
}
"
if
v
is
not
None
else
f
"--
{
k
}
"
)
for
k
,
v
in
mace_params
.
items
()
]
)
)
p
=
subprocess
.
run
(
cmd
.
split
(),
env
=
run_env
,
cwd
=
tmp_path
,
check
=
True
)
assert
p
.
returncode
==
0
if
"heads"
in
yaml_contents
:
headname
=
list
(
yaml_contents
[
"heads"
].
keys
())[
0
]
else
:
headname
=
"Default"
calc
=
MACECalculator
(
tmp_path
/
"MACE_.model"
,
device
=
"cpu"
,
default_dtype
=
"float64"
,
head
=
headname
)
Es
=
[]
for
at
in
fitting_configs
:
at
.
calc
=
calc
Es
.
append
(
at
.
get_potential_energy
())
print
(
name
)
print
(
"Es"
,
Es
)
assert
np
.
allclose
(
np
.
asarray
(
Es
),
expected_value
,
rtol
=
1e-8
,
atol
=
1e-8
),
f
"Expected
{
expected_value
}
but got
{
Es
}
with error
{
np
.
max
(
np
.
abs
(
Es
-
expected_value
))
}
"
def
test_multihead_finetuning_does_not_modify_default_keyspec
(
tmp_path
):
fitting_configs
=
configs_numbered_keys
()
ase
.
io
.
write
(
tmp_path
/
"fit_multihead_dft.xyz"
,
fitting_configs
)
args
=
build_default_arg_parser
().
parse_args
(
[
"--name"
,
"_MACE_"
,
"--train_file"
,
str
(
tmp_path
/
"fit_multihead_dft.xyz"
),
"--foundation_model"
,
"small"
,
"--device"
,
"cpu"
,
"--E0s"
,
"{1:0.0,8:1.0}"
,
"--energy_key"
,
"2_energy"
,
"--dry_run"
,
]
)
default_key_spec
=
KeySpecification
.
from_defaults
()
default_key_spec
.
info_keys
[
"energy"
]
=
"2_energy"
run_mace_train
(
args
)
assert
args
.
key_specification
==
default_key_spec
# for creating values
def
make_output
():
outputs
=
{}
for
yaml_contents
,
name
,
expected_value
in
_trial_yamls_and_and_expected
:
if
name
[
0
]
not
in
outputs
:
outputs
[
name
[
0
]]
=
{}
expected
=
test_key_specification_methods
(
Path
(
"."
),
yaml_contents
,
name
,
expected_value
,
debug_test
=
False
)
outputs
[
name
[
0
]][
name
[
1
]]
=
expected
print
(
outputs
)
mace-bench/3rdparty/mace/tests/test_schedulefree.py
0 → 100644
View file @
1be78103
import
tempfile
from
unittest.mock
import
MagicMock
import
numpy
as
np
import
pytest
import
torch
import
torch.nn.functional
as
F
from
e3nn
import
o3
from
mace
import
data
,
modules
,
tools
from
mace.tools
import
scripts_utils
,
torch_geometric
try
:
import
schedulefree
except
ImportError
:
pytest
.
skip
(
"Skipping schedulefree tests due to ImportError"
,
allow_module_level
=
True
)
torch
.
set_default_dtype
(
torch
.
float64
)
table
=
tools
.
AtomicNumberTable
([
6
])
atomic_energies
=
np
.
array
([
1.0
],
dtype
=
float
)
cutoff
=
5.0
def
create_mace
(
device
:
str
,
seed
:
int
=
1702
):
torch_geometric
.
seed_everything
(
seed
)
model_config
=
{
"r_max"
:
cutoff
,
"num_bessel"
:
8
,
"num_polynomial_cutoff"
:
6
,
"max_ell"
:
3
,
"interaction_cls"
:
modules
.
interaction_classes
[
"RealAgnosticResidualInteractionBlock"
],
"interaction_cls_first"
:
modules
.
interaction_classes
[
"RealAgnosticResidualInteractionBlock"
],
"num_interactions"
:
2
,
"num_elements"
:
1
,
"hidden_irreps"
:
o3
.
Irreps
(
"8x0e + 8x1o"
),
"MLP_irreps"
:
o3
.
Irreps
(
"16x0e"
),
"gate"
:
F
.
silu
,
"atomic_energies"
:
atomic_energies
,
"avg_num_neighbors"
:
8
,
"atomic_numbers"
:
table
.
zs
,
"correlation"
:
3
,
"radial_type"
:
"bessel"
,
}
model
=
modules
.
MACE
(
**
model_config
)
return
model
.
to
(
device
)
def
create_batch
(
device
:
str
):
from
ase
import
build
size
=
2
atoms
=
build
.
bulk
(
"C"
,
"diamond"
,
a
=
3.567
,
cubic
=
True
)
atoms_list
=
[
atoms
.
repeat
((
size
,
size
,
size
))]
print
(
"Number of atoms"
,
len
(
atoms_list
[
0
]))
configs
=
[
data
.
config_from_atoms
(
atoms
)
for
atoms
in
atoms_list
]
data_loader
=
torch_geometric
.
dataloader
.
DataLoader
(
dataset
=
[
data
.
AtomicData
.
from_config
(
config
,
z_table
=
table
,
cutoff
=
cutoff
)
for
config
in
configs
],
batch_size
=
1
,
shuffle
=
False
,
drop_last
=
False
,
)
batch
=
next
(
iter
(
data_loader
))
batch
=
batch
.
to
(
device
)
batch
=
batch
.
to_dict
()
return
batch
def
do_optimization_step
(
model
,
optimizer
,
device
,
):
batch
=
create_batch
(
device
)
model
.
train
()
optimizer
.
train
()
optimizer
.
zero_grad
()
output
=
model
(
batch
,
training
=
True
,
compute_force
=
False
)
loss
=
output
[
"energy"
].
mean
()
loss
.
backward
()
optimizer
.
step
()
model
.
eval
()
optimizer
.
eval
()
@
pytest
.
mark
.
parametrize
(
"device"
,
[
"cpu"
,
"cuda"
])
def
test_can_load_checkpoint
(
device
):
model
=
create_mace
(
device
)
optimizer
=
schedulefree
.
adamw_schedulefree
.
AdamWScheduleFree
(
model
.
parameters
())
args
=
MagicMock
()
args
.
optimizer
=
"schedulefree"
args
.
scheduler
=
"ExponentialLR"
args
.
lr_scheduler_gamma
=
0.9
lr_scheduler
=
scripts_utils
.
LRScheduler
(
optimizer
,
args
)
with
tempfile
.
TemporaryDirectory
()
as
d
:
checkpoint_handler
=
tools
.
CheckpointHandler
(
directory
=
d
,
keep
=
False
,
tag
=
"schedulefree"
)
for
_
in
range
(
10
):
do_optimization_step
(
model
,
optimizer
,
device
)
batch
=
create_batch
(
device
)
output
=
model
(
batch
)
energy
=
output
[
"energy"
].
detach
().
cpu
().
numpy
()
state
=
tools
.
CheckpointState
(
model
=
model
,
optimizer
=
optimizer
,
lr_scheduler
=
lr_scheduler
)
checkpoint_handler
.
save
(
state
,
epochs
=
0
,
keep_last
=
False
)
checkpoint_handler
.
load_latest
(
state
=
tools
.
CheckpointState
(
model
,
optimizer
,
lr_scheduler
),
swa
=
False
,
)
batch
=
create_batch
(
device
)
output
=
model
(
batch
)
new_energy
=
output
[
"energy"
].
detach
().
cpu
().
numpy
()
assert
np
.
allclose
(
energy
,
new_energy
,
atol
=
1e-9
)
mace-bench/3rdparty/mace/tests/test_tools.py
0 → 100644
View file @
1be78103
import
tempfile
import
numpy
as
np
import
torch
import
torch.nn.functional
from
torch
import
nn
,
optim
from
mace.tools
import
(
AtomicNumberTable
,
CheckpointHandler
,
CheckpointState
,
atomic_numbers_to_indices
,
)
def
test_atomic_number_table
():
table
=
AtomicNumberTable
(
zs
=
[
1
,
8
])
array
=
np
.
array
([
8
,
8
,
1
])
indices
=
atomic_numbers_to_indices
(
array
,
z_table
=
table
)
expected
=
np
.
array
([
1
,
1
,
0
],
dtype
=
int
)
assert
np
.
allclose
(
expected
,
indices
)
class
MyModel
(
nn
.
Module
):
def
__init__
(
self
):
super
().
__init__
()
self
.
linear
=
torch
.
nn
.
Linear
(
3
,
4
)
def
forward
(
self
,
x
):
return
torch
.
nn
.
functional
.
relu
(
self
.
linear
(
x
))
def
test_save_load
():
model
=
MyModel
()
initial_lr
=
0.001
optimizer
=
optim
.
SGD
(
model
.
parameters
(),
lr
=
initial_lr
,
momentum
=
0.9
)
scheduler
=
optim
.
lr_scheduler
.
ExponentialLR
(
optimizer
=
optimizer
,
gamma
=
0.99
)
with
tempfile
.
TemporaryDirectory
()
as
directory
:
handler
=
CheckpointHandler
(
directory
=
directory
,
tag
=
"test"
,
keep
=
True
)
handler
.
save
(
state
=
CheckpointState
(
model
,
optimizer
,
scheduler
),
epochs
=
50
)
optimizer
.
step
()
scheduler
.
step
()
assert
not
np
.
isclose
(
optimizer
.
param_groups
[
0
][
"lr"
],
initial_lr
)
handler
.
load_latest
(
state
=
CheckpointState
(
model
,
optimizer
,
scheduler
))
assert
np
.
isclose
(
optimizer
.
param_groups
[
0
][
"lr"
],
initial_lr
)
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment