Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
nivren
ICT-CSP
Commits
fa84b16c
Unverified
Commit
fa84b16c
authored
Aug 24, 2025
by
zcxzcx1
Committed by
GitHub
Aug 24, 2025
Browse files
Add files via upload
parent
09624897
Changes
52
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1028 additions
and
0 deletions
+1028
-0
mace-bench/reproduce/init_7net.sh
mace-bench/reproduce/init_7net.sh
+11
-0
mace-bench/reproduce/init_mace.sh
mace-bench/reproduce/init_mace.sh
+14
-0
mace-bench/reproduce/mace_opt_new.py
mace-bench/reproduce/mace_opt_new.py
+300
-0
mace-bench/reproduce/mace_opt_origin.py
mace-bench/reproduce/mace_opt_origin.py
+297
-0
mace-bench/reproduce/perf_v2_base/run_mace.sh
mace-bench/reproduce/perf_v2_base/run_mace.sh
+5
-0
mace-bench/reproduce/perf_v2_batch/opt.sh
mace-bench/reproduce/perf_v2_batch/opt.sh
+6
-0
mace-bench/reproduce/subtest.sh
mace-bench/reproduce/subtest.sh
+25
-0
mace-bench/reproduce/subtest_baseline.sh
mace-bench/reproduce/subtest_baseline.sh
+24
-0
mace-bench/requirements.txt
mace-bench/requirements.txt
+137
-0
mace-bench/scripts/mace_opt_batch.py
mace-bench/scripts/mace_opt_batch.py
+111
-0
mace-bench/setup.py
mace-bench/setup.py
+23
-0
mace-bench/src/BOMLIP_CSP.egg-info/PKG-INFO
mace-bench/src/BOMLIP_CSP.egg-info/PKG-INFO
+23
-0
mace-bench/src/BOMLIP_CSP.egg-info/SOURCES.txt
mace-bench/src/BOMLIP_CSP.egg-info/SOURCES.txt
+20
-0
mace-bench/src/BOMLIP_CSP.egg-info/dependency_links.txt
mace-bench/src/BOMLIP_CSP.egg-info/dependency_links.txt
+1
-0
mace-bench/src/BOMLIP_CSP.egg-info/top_level.txt
mace-bench/src/BOMLIP_CSP.egg-info/top_level.txt
+1
-0
mace-bench/src/batchopt/__init__.py
mace-bench/src/batchopt/__init__.py
+30
-0
mace-bench/src/batchopt/__pycache__/__init__.cpython-310.pyc
mace-bench/src/batchopt/__pycache__/__init__.cpython-310.pyc
+0
-0
mace-bench/src/batchopt/__pycache__/atoms_to_graphs.cpython-310.pyc
.../src/batchopt/__pycache__/atoms_to_graphs.cpython-310.pyc
+0
-0
mace-bench/src/batchopt/__pycache__/baseline.cpython-310.pyc
mace-bench/src/batchopt/__pycache__/baseline.cpython-310.pyc
+0
-0
mace-bench/src/batchopt/__pycache__/pbc_graph.cpython-310.pyc
...-bench/src/batchopt/__pycache__/pbc_graph.cpython-310.pyc
+0
-0
No files found.
mace-bench/reproduce/init_7net.sh
0 → 100644
View file @
fa84b16c
#!/bin/bash
pip
install
torch_scatter
==
2.1.2+pt24cu121
-f
https://pytorch-geometric.com/whl/torch-2.4.0+cu121.html
pip
install
torch_sparse
==
0.6.18+pt24cu121
-f
https://pytorch-geometric.com/whl/torch-2.4.0+cu121.html
pip
install
torch_spline_conv
==
1.2.2+pt24cu121
-f
https://pytorch-geometric.com/whl/torch-2.4.0+cu121.html
pip
install
-r
requirements.txt
pip
install
-e
3rdparty/SevenNet
pip
install
-e
.
pip
install
ase
==
3.23.0
pip
install
ninja
pip
install
rdkit
==
2024.3.5
\ No newline at end of file
mace-bench/reproduce/init_mace.sh
0 → 100644
View file @
fa84b16c
#!/bin/bash
pip
install
torch_scatter
==
2.1.2+pt24cu121
-f
https://pytorch-geometric.com/whl/torch-2.4.0+cu121.html
pip
install
torch_sparse
==
0.6.18+pt24cu121
-f
https://pytorch-geometric.com/whl/torch-2.4.0+cu121.html
pip
install
torch_spline_conv
==
1.2.2+pt24cu121
-f
https://pytorch-geometric.com/whl/torch-2.4.0+cu121.html
pip
install
-r
requirements.txt
pip
install
-e
3rdparty/mace
pip
install
-e
.
pip
install
e3nn
==
0.4.4
pip
install
ase
==
3.23.0
pip
install
ninja
# for python_CSP
pip
install
rdkit-pypi
mace-bench/reproduce/mace_opt_new.py
0 → 100644
View file @
fa84b16c
"""
Copyright (c) 2025 Ma Zhaojia
This source code is licensed under the MIT license found in the
LICENSE file in the root directory of this source tree.
"""
import
os
os
.
environ
[
'OMP_NUM_THREADS'
]
=
'1'
os
.
environ
[
'MKL_NUM_THREADS'
]
=
'1'
os
.
environ
[
'OPENBLAS_NUM_THREADS'
]
=
'1'
import
sys
# sys.path.append('/home/jiangj1group/zcxzcx1/volatile/mace')
from
mace.calculators
import
mace_off
,
mace_mp
from
ase.io
import
read
,
write
from
ase.optimize
import
BFGS
,
LBFGS
,
FIRE
,
GPMin
,
MDMin
,
QuasiNewton
from
ase.filters
import
UnitCellFilter
,
ExpCellFilter
,
FrechetCellFilter
import
re
import
io
from
contextlib
import
redirect_stdout
import
os
import
pandas
as
pd
from
joblib
import
Parallel
,
delayed
import
json
import
torch
import
numpy
as
np
import
random
import
argparse
import
time
import
pathlib
import
logging
logging
.
basicConfig
(
level
=
logging
.
INFO
,
format
=
'%(asctime)s - %(levelname)s - %(message)s'
,
force
=
True
)
#####################################################################
os
.
environ
[
'PYTHONHASHSEED'
]
=
'1'
torch
.
manual_seed
(
1
)
np
.
random
.
seed
(
1
)
random
.
seed
(
1
)
torch
.
cuda
.
manual_seed
(
1
)
torch
.
cuda
.
manual_seed_all
(
1
)
#####################################################################
# n_jobs=32
# # n_jobs=2
# path = './'
# molecule_single = 64
# target_folder = "/data_raw/"
#####################################################################
def
calculate_density
(
crystal
):
# 计算总质量,ASE 中的 get_masses 方法返回一个数组,包含了所有原子的质量
total_mass
=
sum
(
crystal
.
get_masses
())
# 转换为克
# 获取体积,ASE 的 get_volume 方法返回晶胞的体积,单位是 Å^3
# 1 Å^3 = 1e-24 cm^3
volume
=
crystal
.
get_volume
()
# 转换为立方厘米
# 计算密度,质量除以体积
density
=
total_mass
/
(
volume
*
10
**-
24
)
/
(
6.022140857
*
10
**
23
)
# 单位是 g/cm^3
return
density
def
run_calculation_one
(
path
,
file
,
target_folder
,
molecule_single
,
idx
):
# os.environ['OMP_NUM_THREADS'] = '1'
# os.environ['MKL_NUM_THREADS'] = '1'
# os.environ['OPENBLAS_NUM_THREADS'] = '1'
if
reproduce
:
print
(
"Reproducing deterministic results."
)
torch
.
use_deterministic_algorithms
(
True
)
os
.
environ
[
"CUBLAS_WORKSPACE_CONFIG"
]
=
":4096:8"
np
.
set_printoptions
(
precision
=
17
,
suppress
=
False
)
torch
.
set_printoptions
(
precision
=
17
,
sci_mode
=
False
,
linewidth
=
200
)
if
multithread
and
(
not
reproduce
):
print
(
"Using OMP and MKL multithreads will introduce non-deterministic results."
)
else
:
os
.
environ
[
'OMP_NUM_THREADS'
]
=
'1'
os
.
environ
[
'MKL_NUM_THREADS'
]
=
'1'
os
.
environ
[
'OPENBLAS_NUM_THREADS'
]
=
'1'
os
.
environ
[
"CUDA_VISIBLE_DEVICES"
]
=
str
((
idx
%
n_gpus
)
+
gpu_offset
)
with
io
.
StringIO
()
as
buf
,
redirect_stdout
(
buf
):
crystal
=
read
(
path
+
target_folder
+
file
)
if
molecule_single
<
0
:
molecule_single
=
int
(
file
.
split
(
'_'
)[
-
1
].
split
(
'.'
)[
0
])
molecule_count
=
len
(
crystal
.
get_atomic_numbers
())
/
molecule_single
calc
=
mace_off
(
model
=
model_path
,
dispersion
=
True
,
device
=
'cuda'
,
enable_cueq
=
cueq
)
crystal
.
calc
=
calc
if
filter1
==
"UnitCellFilter"
:
sf
=
UnitCellFilter
(
crystal
,
scalar_pressure
=
0.0006
)
elif
filter1
==
"FrechetCellFilter"
:
sf
=
FrechetCellFilter
(
crystal
,
scalar_pressure
=
0.0006
)
else
:
raise
ValueError
(
f
"Unrecognized filter type '
{
filter1
}
'. "
"Supported types are 'UnitCellFilter' and 'FrechetCellFilter'."
)
if
optimizer_type1
==
"BFGS"
:
if
use_cuda_eigh
:
optimizer
=
BFGS
(
sf
,
use_cuda_eigh
=
True
)
else
:
optimizer
=
BFGS
(
sf
)
elif
optimizer_type1
==
"LBFGS"
:
optimizer
=
LBFGS
(
sf
)
elif
optimizer_type1
==
"QuasiNewton"
:
optimizer
=
QuasiNewton
(
sf
)
else
:
raise
ValueError
(
f
"Unrecognized optimizer type '
{
optimizer_type1
}
'. "
"Supported types are 'BFGS' and 'LBFGS'."
)
if
use_nsys
or
use_torch_profiler
:
# warmup for profiling
optimizer
.
run
(
fmax
=
0.01
,
steps
=
100
)
if
use_torch_profiler
:
profiler
=
torch
.
profiler
.
profile
(
activities
=
[
torch
.
profiler
.
ProfilerActivity
.
CPU
,
torch
.
profiler
.
ProfilerActivity
.
CUDA
],
# schedule=torch.profiler.schedule(wait=1, warmup=1, active=3, repeat=2),
on_trace_ready
=
torch
.
profiler
.
tensorboard_trace_handler
(
'./log'
),
with_stack
=
True
)
profiler
.
start
()
start_time1
=
time
.
time
()
optimizer
.
run
(
fmax
=
0.01
,
steps
=
max_steps
)
end_time1
=
time
.
time
()
if
use_torch_profiler
:
profiler
.
stop
()
crystal
.
write
(
path
+
'cif_result_press/'
+
file
[:
-
4
]
+
"_press.cif"
)
output_1
=
buf
.
getvalue
()
# step_used_1 = float(re.split("\\s+", output_1.split('\n')[-2])[1][:])
step_used_1
=
optimizer
.
nsteps
if
use_nsys
or
use_torch_profiler
:
step_used_1
=
step_used_1
-
100
total_time1
=
end_time1
-
start_time1
avg_time1
=
total_time1
/
step_used_1
if
step_used_1
!=
0
else
0
crystal
=
read
(
path
+
'cif_result_press/'
+
file
[:
-
4
]
+
"_press.cif"
)
crystal
.
calc
=
calc
if
filter2
==
"UnitCellFilter"
:
sf
=
UnitCellFilter
(
crystal
)
elif
filter2
==
"FrechetCellFilter"
:
sf
=
FrechetCellFilter
(
crystal
)
else
:
raise
ValueError
(
f
"Unrecognized filter type '
{
filter2
}
'. "
"Supported types are 'UnitCellFilter' and 'FrechetCellFilter'."
)
if
optimizer_type2
==
"BFGS"
:
if
use_cuda_eigh
:
optimizer
=
BFGS
(
sf
,
use_cuda_eigh
=
True
)
else
:
optimizer
=
BFGS
(
sf
)
elif
optimizer_type2
==
"LBFGS"
:
optimizer
=
LBFGS
(
sf
)
elif
optimizer_type2
==
"QuasiNewton"
:
optimizer
=
QuasiNewton
(
sf
)
else
:
raise
ValueError
(
f
"Unrecognized optimizer type '
{
optimizer_type2
}
'. "
"Supported types are 'BFGS' and 'LBFGS'."
)
if
use_torch_profiler
:
profiler
=
torch
.
profiler
.
profile
(
activities
=
[
torch
.
profiler
.
ProfilerActivity
.
CPU
,
torch
.
profiler
.
ProfilerActivity
.
CUDA
],
# schedule=torch.profiler.schedule(wait=1, warmup=1, active=3, repeat=2),
on_trace_ready
=
torch
.
profiler
.
tensorboard_trace_handler
(
'./log'
),
with_stack
=
True
)
profiler
.
start
()
start_time2
=
time
.
time
()
optimizer
.
run
(
fmax
=
0.01
,
steps
=
max_steps
)
end_time2
=
time
.
time
()
if
use_torch_profiler
:
profiler
.
stop
()
density
=
calculate_density
(
crystal
)
crystal
.
write
(
path
+
'cif_result_final/'
+
file
[:
-
4
]
+
"_opt.cif"
)
output_2
=
buf
.
getvalue
()
energy
=
float
(
re
.
split
(
"
\\
s+"
,
output_2
.
split
(
'
\n
'
)[
-
2
])[
3
][:])
# step_used_2 = float(re.split("\\s+", output_2.split('\n')[-2])[1][:])
step_used_2
=
optimizer
.
nsteps
energy_per_mol
=
energy
/
molecule_count
*
96.485
total_time2
=
end_time2
-
start_time2
avg_time2
=
total_time2
/
step_used_2
if
step_used_2
!=
0
else
0
new_row
=
{
'name'
:
file
[:
-
4
],
'density'
:
density
,
'energy_kj'
:
energy_per_mol
,
'step_used_1'
:
step_used_1
,
'step_used_2'
:
step_used_2
,
'total_time1_s'
:
total_time1
,
'avg_time1_s'
:
avg_time1
,
'total_time2_s'
:
total_time2
,
'avg_time2_s'
:
avg_time2
}
print
(
f
'output_2:
{
output_2
}
'
)
with
open
(
path
+
'json_result/'
+
file
[:
-
4
]
+
".json"
,
'w'
)
as
json_file
:
json
.
dump
(
new_row
,
json_file
,
indent
=
4
)
return
new_row
def
already_have_calculation_one
(
path
,
file
,
target_folder
,
molecule_single
,
idx
):
logging
.
info
(
f
"reading on structure
{
file
}
"
)
print
(
f
"reading on structure
{
file
}
"
)
with
open
(
path
+
'json_result/'
+
file
[:
-
4
]
+
".json"
,
'r'
)
as
file
:
old_row
=
json
.
load
(
file
)
return
old_row
def
run
():
df
=
pd
.
DataFrame
(
columns
=
[
'name'
,
'density'
,
'energy_kj'
,
'step_used_1'
,
'step_used_2'
,
'total_time1_s'
,
'avg_time1_s'
,
'total_time2_s'
,
'avg_time2_s'
])
for
root
,
dirs
,
files
in
os
.
walk
(
path
+
target_folder
):
old_row
=
Parallel
(
n_jobs
=
n_jobs
)(
delayed
(
already_have_calculation_one
)(
path
,
file
,
target_folder
,
molecule_single
,
idx
)
for
idx
,
file
in
enumerate
(
files
)
if
os
.
path
.
exists
(
path
+
'json_result/'
+
file
[:
-
4
]
+
".json"
))
filtered_files
=
[
file
for
file
in
files
if
not
os
.
path
.
exists
(
path
+
'json_result/'
+
file
[:
-
4
]
+
".json"
)]
new_row
=
Parallel
(
n_jobs
=
n_jobs
)(
delayed
(
run_calculation_one
)(
path
,
file
,
target_folder
,
molecule_single
,
idx
)
for
idx
,
file
in
enumerate
(
filtered_files
))
# show the length of new_row
print
(
f
'new_row length:
{
len
(
new_row
)
}
'
)
print
(
f
'root:
{
root
}
\n
dirs:
{
dirs
}
\n
files:
{
files
}
'
)
for
row
in
new_row
:
df
=
pd
.
concat
([
df
,
pd
.
DataFrame
([
row
])],
ignore_index
=
True
,
axis
=
0
)
for
row
in
old_row
:
df
=
pd
.
concat
([
df
,
pd
.
DataFrame
([
row
])],
ignore_index
=
True
,
axis
=
0
)
df
.
to_csv
(
path
+
'/result.csv'
)
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
(
description
=
"Run parallel calculations on molecular crystals."
)
parser
.
add_argument
(
"--n_jobs"
,
type
=
int
,
default
=
32
,
help
=
"Number of parallel jobs to run (default: 32)"
)
parser
.
add_argument
(
"--target_folder"
,
type
=
str
,
required
=
True
,
help
=
"Path to the target folder containing input files"
)
parser
.
add_argument
(
"--path"
,
type
=
str
,
default
=
'./'
,
help
=
"Base path for the project (default: './')"
)
parser
.
add_argument
(
"--molecule_single"
,
type
=
int
,
default
=-
1
,
help
=
"Number of atoms per molecule (default: 64)"
)
parser
.
add_argument
(
"--n_gpus"
,
type
=
int
,
default
=
2
,
help
=
"Number of GPUs to use (default: 2)"
)
parser
.
add_argument
(
"--cueq"
,
action
=
'store_true'
,
help
=
"Whether to use cuEquivariance Library (default: False)"
)
parser
.
add_argument
(
"--max_steps"
,
type
=
int
,
default
=
3000
,
help
=
"Number of max steps to run the optimization (default: 3000)"
)
parser
.
add_argument
(
"--use_torch_profiler"
,
action
=
'store_true'
,
help
=
"Whether to use torch profiler (default: False)"
)
parser
.
add_argument
(
"--use_nsys"
,
action
=
'store_true'
,
help
=
"Whether to use nsys profiler (default: False)"
)
parser
.
add_argument
(
"--model"
,
type
=
str
,
default
=
"small"
,
help
=
"Model to use for the calculation (default: 'small')"
)
parser
.
add_argument
(
"--optimizer"
,
type
=
str
,
default
=
"BFGS"
,
help
=
"Optimizer to use for the calculation (default: 'BFGS')"
)
parser
.
add_argument
(
"--use_cuda_eigh"
,
action
=
'store_true'
,
help
=
"Whether to use CUDA for eigh (default: False)"
)
parser
.
add_argument
(
"--gpu_offset"
,
type
=
int
,
default
=
0
,
help
=
"GPU offset to use for the calculation (default: 0)"
)
parser
.
add_argument
(
"--multithread"
,
action
=
'store_true'
,
help
=
"Whether to use multithread (default: False)"
)
parser
.
add_argument
(
"--reproduce"
,
action
=
'store_true'
,
help
=
"Whether to reproduce deterministic results (default: False)"
)
parser
.
add_argument
(
"--filter1"
,
type
=
str
,
default
=
"UnitCellFilter"
,
help
=
"1st filter to use for the calculation (default: 'UnitCellFilter')"
)
parser
.
add_argument
(
"--filter2"
,
type
=
str
,
default
=
"UnitCellFilter"
,
help
=
"2nd filter to use for the calculation (default: 'UnitCellFilter')"
)
parser
.
add_argument
(
"--optimizer1"
,
type
=
str
,
default
=
"BFGS"
,
help
=
"1st optimizer to use for the calculation (default: 'BFGS')"
)
parser
.
add_argument
(
"--optimizer2"
,
type
=
str
,
default
=
"BFGS"
,
help
=
"2nd optimizer to use for the calculation (default: 'BFGS')"
)
args
=
parser
.
parse_args
()
n_jobs
=
args
.
n_jobs
target_folder
=
args
.
target_folder
path
=
args
.
path
molecule_single
=
args
.
molecule_single
n_gpus
=
args
.
n_gpus
cueq
=
args
.
cueq
max_steps
=
args
.
max_steps
use_torch_profiler
=
args
.
use_torch_profiler
use_nsys
=
args
.
use_nsys
model_path
=
args
.
model
optimizer_type
=
args
.
optimizer
use_cuda_eigh
=
args
.
use_cuda_eigh
gpu_offset
=
args
.
gpu_offset
multithread
=
args
.
multithread
reproduce
=
args
.
reproduce
filter1
=
args
.
filter1
filter2
=
args
.
filter2
optimizer_type1
=
args
.
optimizer1
optimizer_type2
=
args
.
optimizer2
try
:
os
.
mkdir
(
"./cif_result_press"
)
os
.
mkdir
(
"./cif_result_final"
)
except
:
pass
try
:
os
.
mkdir
(
"./json_result"
)
except
:
pass
start_time_all
=
time
.
time
()
iter
=
0
while
iter
<
100
:
iter
+=
1
try
:
run
()
break
except
Exception
as
e
:
print
(
f
"Error occurred:
{
e
}
"
)
print
(
"Retrying..."
)
time
.
sleep
(
10
)
end_time_all
=
time
.
time
()
total_time_all
=
end_time_all
-
start_time_all
print
(
'dataset,total_time_all_s,attempts'
)
print
(
f
"
{
pathlib
.
Path
(
target_folder
).
name
}
,
{
total_time_all
}
,
{
iter
}
"
)
with
open
(
path
+
'timing.csv'
,
'w'
)
as
f
:
f
.
write
(
'dataset,total_time_all_s,attempts
\n
'
)
f
.
write
(
f
"
{
pathlib
.
Path
(
target_folder
).
name
}
,
{
total_time_all
}
,
{
iter
}
\n
"
)
\ No newline at end of file
mace-bench/reproduce/mace_opt_origin.py
0 → 100644
View file @
fa84b16c
"""
Copyright (c) 2025 Ma Zhaojia
This source code is licensed under the MIT license found in the
LICENSE file in the root directory of this source tree.
"""
import
os
import
sys
# sys.path.append('/home/jiangj1group/zcxzcx1/volatile/mace')
from
mace.calculators
import
mace_off
,
mace_mp
from
ase.io
import
read
,
write
from
ase.optimize
import
BFGS
,
LBFGS
,
FIRE
,
GPMin
,
MDMin
,
QuasiNewton
from
ase.filters
import
UnitCellFilter
,
ExpCellFilter
,
FrechetCellFilter
import
re
import
io
from
contextlib
import
redirect_stdout
import
os
import
pandas
as
pd
from
joblib
import
Parallel
,
delayed
import
json
import
torch
import
numpy
as
np
import
random
import
argparse
import
time
import
pathlib
import
logging
logging
.
basicConfig
(
level
=
logging
.
INFO
,
format
=
'%(asctime)s - %(levelname)s - %(message)s'
,
force
=
True
)
#####################################################################
os
.
environ
[
'PYTHONHASHSEED'
]
=
'1'
torch
.
manual_seed
(
1
)
np
.
random
.
seed
(
1
)
random
.
seed
(
1
)
torch
.
cuda
.
manual_seed
(
1
)
torch
.
cuda
.
manual_seed_all
(
1
)
#####################################################################
# n_jobs=32
# # n_jobs=2
# path = './'
# molecule_single = 64
# target_folder = "/data_raw/"
#####################################################################
def
calculate_density
(
crystal
):
# 计算总质量,ASE 中的 get_masses 方法返回一个数组,包含了所有原子的质量
total_mass
=
sum
(
crystal
.
get_masses
())
# 转换为克
# 获取体积,ASE 的 get_volume 方法返回晶胞的体积,单位是 Å^3
# 1 Å^3 = 1e-24 cm^3
volume
=
crystal
.
get_volume
()
# 转换为立方厘米
# 计算密度,质量除以体积
density
=
total_mass
/
(
volume
*
10
**-
24
)
/
(
6.022140857
*
10
**
23
)
# 单位是 g/cm^3
return
density
def
run_calculation_one
(
path
,
file
,
target_folder
,
molecule_single
,
idx
):
# os.environ['OMP_NUM_THREADS'] = '1'
# os.environ['MKL_NUM_THREADS'] = '1'
# os.environ['OPENBLAS_NUM_THREADS'] = '1'
if
reproduce
:
print
(
"Reproducing deterministic results."
)
torch
.
use_deterministic_algorithms
(
True
)
os
.
environ
[
"CUBLAS_WORKSPACE_CONFIG"
]
=
":4096:8"
np
.
set_printoptions
(
precision
=
17
,
suppress
=
False
)
torch
.
set_printoptions
(
precision
=
17
,
sci_mode
=
False
,
linewidth
=
200
)
if
multithread
and
(
not
reproduce
):
print
(
"Using OMP and MKL multithreads will introduce non-deterministic results."
)
else
:
os
.
environ
[
'OMP_NUM_THREADS'
]
=
'1'
os
.
environ
[
'MKL_NUM_THREADS'
]
=
'1'
os
.
environ
[
'OPENBLAS_NUM_THREADS'
]
=
'1'
os
.
environ
[
"CUDA_VISIBLE_DEVICES"
]
=
str
((
idx
%
n_gpus
)
+
gpu_offset
)
with
io
.
StringIO
()
as
buf
,
redirect_stdout
(
buf
):
crystal
=
read
(
path
+
target_folder
+
file
)
if
molecule_single
<
0
:
molecule_single
=
int
(
file
.
split
(
'_'
)[
-
1
].
split
(
'.'
)[
0
])
molecule_count
=
len
(
crystal
.
get_atomic_numbers
())
/
molecule_single
calc
=
mace_off
(
model
=
model_path
,
dispersion
=
True
,
device
=
'cuda'
,
enable_cueq
=
cueq
)
crystal
.
calc
=
calc
if
filter1
==
"UnitCellFilter"
:
sf
=
UnitCellFilter
(
crystal
,
scalar_pressure
=
0.0006
)
elif
filter1
==
"FrechetCellFilter"
:
sf
=
FrechetCellFilter
(
crystal
,
scalar_pressure
=
0.0006
)
else
:
raise
ValueError
(
f
"Unrecognized filter type '
{
filter1
}
'. "
"Supported types are 'UnitCellFilter' and 'FrechetCellFilter'."
)
if
optimizer_type1
==
"BFGS"
:
if
use_cuda_eigh
:
optimizer
=
BFGS
(
sf
,
use_cuda_eigh
=
True
)
else
:
optimizer
=
BFGS
(
sf
)
elif
optimizer_type1
==
"LBFGS"
:
optimizer
=
LBFGS
(
sf
)
elif
optimizer_type1
==
"QuasiNewton"
:
optimizer
=
QuasiNewton
(
sf
)
else
:
raise
ValueError
(
f
"Unrecognized optimizer type '
{
optimizer_type1
}
'. "
"Supported types are 'BFGS' and 'LBFGS'."
)
if
use_nsys
or
use_torch_profiler
:
# warmup for profiling
optimizer
.
run
(
fmax
=
0.01
,
steps
=
100
)
if
use_torch_profiler
:
profiler
=
torch
.
profiler
.
profile
(
activities
=
[
torch
.
profiler
.
ProfilerActivity
.
CPU
,
torch
.
profiler
.
ProfilerActivity
.
CUDA
],
# schedule=torch.profiler.schedule(wait=1, warmup=1, active=3, repeat=2),
on_trace_ready
=
torch
.
profiler
.
tensorboard_trace_handler
(
'./log'
),
with_stack
=
True
)
profiler
.
start
()
start_time1
=
time
.
time
()
optimizer
.
run
(
fmax
=
0.01
,
steps
=
max_steps
)
end_time1
=
time
.
time
()
if
use_torch_profiler
:
profiler
.
stop
()
crystal
.
write
(
path
+
'cif_result_press/'
+
file
[:
-
4
]
+
"_press.cif"
)
output_1
=
buf
.
getvalue
()
# step_used_1 = float(re.split("\\s+", output_1.split('\n')[-2])[1][:])
step_used_1
=
optimizer
.
nsteps
if
use_nsys
or
use_torch_profiler
:
step_used_1
=
step_used_1
-
100
total_time1
=
end_time1
-
start_time1
avg_time1
=
total_time1
/
step_used_1
if
step_used_1
!=
0
else
0
crystal
=
read
(
path
+
'cif_result_press/'
+
file
[:
-
4
]
+
"_press.cif"
)
crystal
.
calc
=
calc
if
filter2
==
"UnitCellFilter"
:
sf
=
UnitCellFilter
(
crystal
)
elif
filter2
==
"FrechetCellFilter"
:
sf
=
FrechetCellFilter
(
crystal
)
else
:
raise
ValueError
(
f
"Unrecognized filter type '
{
filter2
}
'. "
"Supported types are 'UnitCellFilter' and 'FrechetCellFilter'."
)
if
optimizer_type2
==
"BFGS"
:
if
use_cuda_eigh
:
optimizer
=
BFGS
(
sf
,
use_cuda_eigh
=
True
)
else
:
optimizer
=
BFGS
(
sf
)
elif
optimizer_type2
==
"LBFGS"
:
optimizer
=
LBFGS
(
sf
)
elif
optimizer_type2
==
"QuasiNewton"
:
optimizer
=
QuasiNewton
(
sf
)
else
:
raise
ValueError
(
f
"Unrecognized optimizer type '
{
optimizer_type2
}
'. "
"Supported types are 'BFGS' and 'LBFGS'."
)
if
use_torch_profiler
:
profiler
=
torch
.
profiler
.
profile
(
activities
=
[
torch
.
profiler
.
ProfilerActivity
.
CPU
,
torch
.
profiler
.
ProfilerActivity
.
CUDA
],
# schedule=torch.profiler.schedule(wait=1, warmup=1, active=3, repeat=2),
on_trace_ready
=
torch
.
profiler
.
tensorboard_trace_handler
(
'./log'
),
with_stack
=
True
)
profiler
.
start
()
start_time2
=
time
.
time
()
optimizer
.
run
(
fmax
=
0.01
,
steps
=
max_steps
)
end_time2
=
time
.
time
()
if
use_torch_profiler
:
profiler
.
stop
()
density
=
calculate_density
(
crystal
)
crystal
.
write
(
path
+
'cif_result_final/'
+
file
[:
-
4
]
+
"_opt.cif"
)
output_2
=
buf
.
getvalue
()
energy
=
float
(
re
.
split
(
"
\\
s+"
,
output_2
.
split
(
'
\n
'
)[
-
2
])[
3
][:])
# step_used_2 = float(re.split("\\s+", output_2.split('\n')[-2])[1][:])
step_used_2
=
optimizer
.
nsteps
energy_per_mol
=
energy
/
molecule_count
*
96.485
total_time2
=
end_time2
-
start_time2
avg_time2
=
total_time2
/
step_used_2
if
step_used_2
!=
0
else
0
new_row
=
{
'name'
:
file
[:
-
4
],
'density'
:
density
,
'energy_kj'
:
energy_per_mol
,
'step_used_1'
:
step_used_1
,
'step_used_2'
:
step_used_2
,
'total_time1_s'
:
total_time1
,
'avg_time1_s'
:
avg_time1
,
'total_time2_s'
:
total_time2
,
'avg_time2_s'
:
avg_time2
}
print
(
f
'output_2:
{
output_2
}
'
)
with
open
(
path
+
'json_result/'
+
file
[:
-
4
]
+
".json"
,
'w'
)
as
json_file
:
json
.
dump
(
new_row
,
json_file
,
indent
=
4
)
return
new_row
def
already_have_calculation_one
(
path
,
file
,
target_folder
,
molecule_single
,
idx
):
logging
.
info
(
f
"reading on structure
{
file
}
"
)
print
(
f
"reading on structure
{
file
}
"
)
with
open
(
path
+
'json_result/'
+
file
[:
-
4
]
+
".json"
,
'r'
)
as
file
:
old_row
=
json
.
load
(
file
)
return
old_row
def
run
():
df
=
pd
.
DataFrame
(
columns
=
[
'name'
,
'density'
,
'energy_kj'
,
'step_used_1'
,
'step_used_2'
,
'total_time1_s'
,
'avg_time1_s'
,
'total_time2_s'
,
'avg_time2_s'
])
for
root
,
dirs
,
files
in
os
.
walk
(
path
+
target_folder
):
old_row
=
Parallel
(
n_jobs
=
n_jobs
)(
delayed
(
already_have_calculation_one
)(
path
,
file
,
target_folder
,
molecule_single
,
idx
)
for
idx
,
file
in
enumerate
(
files
)
if
os
.
path
.
exists
(
path
+
'json_result/'
+
file
[:
-
4
]
+
".json"
))
filtered_files
=
[
file
for
file
in
files
if
not
os
.
path
.
exists
(
path
+
'json_result/'
+
file
[:
-
4
]
+
".json"
)]
new_row
=
Parallel
(
n_jobs
=
n_jobs
)(
delayed
(
run_calculation_one
)(
path
,
file
,
target_folder
,
molecule_single
,
idx
)
for
idx
,
file
in
enumerate
(
filtered_files
))
# show the length of new_row
print
(
f
'new_row length:
{
len
(
new_row
)
}
'
)
print
(
f
'root:
{
root
}
\n
dirs:
{
dirs
}
\n
files:
{
files
}
'
)
for
row
in
new_row
:
df
=
pd
.
concat
([
df
,
pd
.
DataFrame
([
row
])],
ignore_index
=
True
,
axis
=
0
)
for
row
in
old_row
:
df
=
pd
.
concat
([
df
,
pd
.
DataFrame
([
row
])],
ignore_index
=
True
,
axis
=
0
)
df
.
to_csv
(
path
+
'/result.csv'
)
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
(
description
=
"Run parallel calculations on molecular crystals."
)
parser
.
add_argument
(
"--n_jobs"
,
type
=
int
,
default
=
32
,
help
=
"Number of parallel jobs to run (default: 32)"
)
parser
.
add_argument
(
"--target_folder"
,
type
=
str
,
required
=
True
,
help
=
"Path to the target folder containing input files"
)
parser
.
add_argument
(
"--path"
,
type
=
str
,
default
=
'./'
,
help
=
"Base path for the project (default: './')"
)
parser
.
add_argument
(
"--molecule_single"
,
type
=
int
,
default
=-
1
,
help
=
"Number of atoms per molecule (default: 64)"
)
parser
.
add_argument
(
"--n_gpus"
,
type
=
int
,
default
=
2
,
help
=
"Number of GPUs to use (default: 2)"
)
parser
.
add_argument
(
"--cueq"
,
action
=
'store_true'
,
help
=
"Whether to use cuEquivariance Library (default: False)"
)
parser
.
add_argument
(
"--max_steps"
,
type
=
int
,
default
=
3000
,
help
=
"Number of max steps to run the optimization (default: 3000)"
)
parser
.
add_argument
(
"--use_torch_profiler"
,
action
=
'store_true'
,
help
=
"Whether to use torch profiler (default: False)"
)
parser
.
add_argument
(
"--use_nsys"
,
action
=
'store_true'
,
help
=
"Whether to use nsys profiler (default: False)"
)
parser
.
add_argument
(
"--model"
,
type
=
str
,
default
=
"small"
,
help
=
"Model to use for the calculation (default: 'small')"
)
parser
.
add_argument
(
"--optimizer"
,
type
=
str
,
default
=
"BFGS"
,
help
=
"Optimizer to use for the calculation (default: 'BFGS')"
)
parser
.
add_argument
(
"--use_cuda_eigh"
,
action
=
'store_true'
,
help
=
"Whether to use CUDA for eigh (default: False)"
)
parser
.
add_argument
(
"--gpu_offset"
,
type
=
int
,
default
=
0
,
help
=
"GPU offset to use for the calculation (default: 0)"
)
parser
.
add_argument
(
"--multithread"
,
action
=
'store_true'
,
help
=
"Whether to use multithread (default: False)"
)
parser
.
add_argument
(
"--reproduce"
,
action
=
'store_true'
,
help
=
"Whether to reproduce deterministic results (default: False)"
)
parser
.
add_argument
(
"--filter1"
,
type
=
str
,
default
=
"UnitCellFilter"
,
help
=
"1st filter to use for the calculation (default: 'UnitCellFilter')"
)
parser
.
add_argument
(
"--filter2"
,
type
=
str
,
default
=
"UnitCellFilter"
,
help
=
"2nd filter to use for the calculation (default: 'UnitCellFilter')"
)
parser
.
add_argument
(
"--optimizer1"
,
type
=
str
,
default
=
"BFGS"
,
help
=
"1st optimizer to use for the calculation (default: 'BFGS')"
)
parser
.
add_argument
(
"--optimizer2"
,
type
=
str
,
default
=
"BFGS"
,
help
=
"2nd optimizer to use for the calculation (default: 'BFGS')"
)
args
=
parser
.
parse_args
()
n_jobs
=
args
.
n_jobs
target_folder
=
args
.
target_folder
path
=
args
.
path
molecule_single
=
args
.
molecule_single
n_gpus
=
args
.
n_gpus
cueq
=
args
.
cueq
max_steps
=
args
.
max_steps
use_torch_profiler
=
args
.
use_torch_profiler
use_nsys
=
args
.
use_nsys
model_path
=
args
.
model
optimizer_type
=
args
.
optimizer
use_cuda_eigh
=
args
.
use_cuda_eigh
gpu_offset
=
args
.
gpu_offset
multithread
=
args
.
multithread
reproduce
=
args
.
reproduce
filter1
=
args
.
filter1
filter2
=
args
.
filter2
optimizer_type1
=
args
.
optimizer1
optimizer_type2
=
args
.
optimizer2
try
:
os
.
mkdir
(
"./cif_result_press"
)
os
.
mkdir
(
"./cif_result_final"
)
except
:
pass
try
:
os
.
mkdir
(
"./json_result"
)
except
:
pass
start_time_all
=
time
.
time
()
iter
=
0
while
iter
<
100
:
iter
+=
1
try
:
run
()
break
except
Exception
as
e
:
print
(
f
"Error occurred:
{
e
}
"
)
print
(
"Retrying..."
)
time
.
sleep
(
10
)
end_time_all
=
time
.
time
()
total_time_all
=
end_time_all
-
start_time_all
print
(
'dataset,total_time_all_s,attempts'
)
print
(
f
"
{
pathlib
.
Path
(
target_folder
).
name
}
,
{
total_time_all
}
,
{
iter
}
"
)
with
open
(
path
+
'timing.csv'
,
'w'
)
as
f
:
f
.
write
(
'dataset,total_time_all_s,attempts
\n
'
)
f
.
write
(
f
"
{
pathlib
.
Path
(
target_folder
).
name
}
,
{
total_time_all
}
,
{
iter
}
\n
"
)
\ No newline at end of file
mace-bench/reproduce/perf_v2_base/run_mace.sh
0 → 100644
View file @
fa84b16c
#!/bin/bash
python ../mace_opt_new.py
--n_jobs
64
--molecule_single
46
\
--target_folder
../../data/perf_v2/
--model
small
--n_gpus
4
--gpu_offset
0
\
--optimizer1
QuasiNewton
--filter1
UnitCellFilter
--filter2
UnitCellFilter
\ No newline at end of file
mace-bench/reproduce/perf_v2_batch/opt.sh
0 → 100644
View file @
fa84b16c
#!/bin/bash
rm
-r
*
_result_
*
python ../../scripts/mace_opt_batch.py
--target_folder
"../../data/perf_v2"
--molecule_single
46
--gpu_offset
0
--n_gpus
4
--num_workers
40
--batch_size
0
\
--max_steps
6000
--filter1
UnitCellFilter
--filter2
UnitCellFilter
--optimizer1
BFGSFusedLS
--optimizer2
BFGS
--num_threads
2
--cueq
true
--use_ordered_files
true
\ No newline at end of file
mace-bench/reproduce/subtest.sh
0 → 100644
View file @
fa84b16c
#!/bin/bash
top_dir
=
$(
pwd
)
natoms_nw_bs
=(
"92 48 25"
"184 40 12"
"368 40 5"
)
for
config
in
"
${
natoms_nw_bs
[@]
}
"
;
do
read
natoms nw bs
<<<
"
$config
"
dir
=
"
$top_dir
/subtest_BATCH_
${
natoms
}
_g4_j
${
nw
}
_bs
${
bs
}
_cueq_cupbc"
mkdir
-p
"
$dir
"
cd
"
$dir
"
||
continue
pwd
python ../../scripts/mace_opt_batch.py
\
--target_folder
"../../data/perf_v2_sorted/perf_v2_
${
natoms
}
"
\
--molecule_single
46
--gpu_offset
0
--n_gpus
4
--num_workers
${
nw
}
--batch_size
${
bs
}
\
--max_steps
6000
--filter1
UnitCellFilter
--filter2
UnitCellFilter
\
--optimizer1
BFGSFusedLS
--optimizer2
BFGS
--num_threads
2
\
--use_ordered_files
true
--cueq
true
>
opt.log 2>&1
done
\ No newline at end of file
mace-bench/reproduce/subtest_baseline.sh
0 → 100644
View file @
fa84b16c
#!/bin/bash
top_dir
=
$(
pwd
)
natoms_nw_bs
=(
"92 64"
"184 64"
"368 64"
)
for
config
in
"
${
natoms_nw_bs
[@]
}
"
;
do
read
natoms nw
<<<
"
$config
"
dir
=
"
$top_dir
/subtest_BASE_
${
natoms
}
_g4_j
${
nw
}
"
mkdir
-p
"
$dir
"
cd
"
$dir
"
||
continue
pwd
python ../mace_opt_new.py
--n_jobs
${
nw
}
--molecule_single
46
\
--target_folder
../../data/perf_v2_sorted/perf_v2_
${
natoms
}
/
--model
small
--n_gpus
4
\
--gpu_offset
0
--optimizer1
QuasiNewton
--filter1
UnitCellFilter
\
--filter2
UnitCellFilter
--max_steps
3000
>
opt.log 2>&1
done
\ No newline at end of file
mace-bench/requirements.txt
0 → 100644
View file @
fa84b16c
--extra-index-url https://download.pytorch.org/whl/cu121
absl-py==2.1.0
aiohappyeyeballs==2.4.4
aiohttp==3.11.11
aiosignal==1.3.2
annotated-types==0.7.0
antlr4-python3-runtime==4.9.3
# -e git+https://gitlab.com/ase/ase.git@72c50c76bac2396c7d58385b231c65bd07458279#egg=ase&subdirectory=../../../3rdparty/ase
async-timeout==5.0.1
attrs==24.3.0
certifi==2024.8.30
cfgv==3.4.0
charset-normalizer==3.4.0
click==8.1.8
cloudpickle==3.1.0
ConfigArgParse==1.7
contourpy==1.3.1
coverage==7.6.9
cuequivariance==0.4.0
cuequivariance-ops-torch-cu12==0.4.0
cuequivariance-ops-cu12==0.4.0
cuequivariance-torch==0.4.0
cycler==0.12.1
distlib==0.3.9
docker-pycreds==0.4.0
e3nn==0.4.4
exceptiongroup==1.2.2
# -e git+https://github.com/mazhaojia123/fairchem.git@f50db9d5b29debdfb265d9c3fad394f18e16cab8#egg=fairchem_core&subdirectory=../../../3rdparty/fairchem/packages/fairchem-core
filelock==3.13.1
fonttools==4.55.1
frozenlist==1.5.0
fsspec==2024.2.0
gitdb==4.0.11
GitPython==3.1.43
grpcio==1.68.1
h5py==3.12.1
hydra-core==1.3.2
identify==2.6.3
idna==3.10
iniconfig==2.0.0
Jinja2==3.1.3
joblib==1.4.2
kiwisolver==1.4.7
latexcodec==3.0.0
lightning-utilities==0.11.9
llvmlite==0.43.0
lmdb==1.5.1
# -e git+https://github.com/mazhaojia123/mace.git@edd6b479f4974d0b8162712872ad2eed1aa2fb75#egg=mace_torch&subdirectory=../../../3rdparty/mace
Markdown==3.7
MarkupSafe==2.1.5
matplotlib==3.9.3
matscipy==1.1.1
monty==2024.10.21
mpmath==1.3.0
multidict==6.1.0
networkx==3.2.1
nodeenv==1.9.1
numba==0.60.0
numpy==1.26.4
nvidia-cublas-cu12==12.1.3.1
nvidia-cuda-cupti-cu12==12.1.105
nvidia-cuda-nvrtc-cu12==12.1.105
nvidia-cuda-runtime-cu12==12.1.105
nvidia-cudnn-cu12==9.1.0.70
nvidia-cufft-cu12==11.0.2.54
nvidia-curand-cu12==10.3.2.106
nvidia-cusolver-cu12==11.4.5.107
nvidia-cusparse-cu12==12.1.0.106
nvidia-nccl-cu12==2.20.5
nvidia-nvjitlink-cu12==12.1.105
nvidia-nvtx-cu12==12.1.105
omegaconf==2.3.0
opt-einsum-fx==0.1.4
opt_einsum==3.4.0
orjson==3.10.12
packaging==24.2
palettable==3.3.3
pandas==2.2.3
pillow==11.0.0
platformdirs==4.3.6
plotly==5.24.1
pluggy==1.5.0
pre_commit==4.0.1
prettytable==3.12.0
propcache==0.2.1
protobuf==5.29.2
psutil==6.1.1
pybtex==0.24.0
pydantic==2.10.4
pydantic_core==2.27.2
pymatgen==2024.11.13
pyparsing==3.2.0
pytest==8.3.4
pytest-cov==6.0.0
python-dateutil==2.9.0.post0
python-hostlist==2.0.0
pytz==2024.2
PyYAML==6.0.2
requests==2.32.3
ruamel.yaml==0.18.6
ruamel.yaml.clib==0.2.12
ruff==0.5.1
scipy==1.14.1
sentry-sdk==2.19.2
setproctitle==1.3.4
six==1.16.0
smmap==5.0.1
spglib==2.5.0
submitit==1.5.2
sympy==1.13.1
syrupy==4.8.0
tabulate==0.9.0
tenacity==9.0.0
tensorboard==2.18.0
tensorboard-data-server==0.7.2
tomli==2.2.1
torch==2.4.1+cu121
# ./torch-2.4.1+cu121-cp310-cp310-linux_x86_64.whl
torch-dftd==0.5.1
torch-ema==0.3
torch-geometric==2.6.1
# torch_scatter==2.1.2+pt24cu121
# torch_sparse==0.6.18+pt24cu121
# torch_spline_conv==1.2.2+pt24cu121
torchmetrics==1.6.0
tqdm==4.67.1
triton==3.0.0
typing_extensions==4.12.2
tzdata==2024.2
uncertainties==3.2.2
urllib3==2.2.3
virtualenv==20.28.0
wandb==0.19.1
wcwidth==0.2.13
Werkzeug==3.1.3
yarl==1.18.3
torch-tb-profiler==0.4.3
\ No newline at end of file
mace-bench/scripts/mace_opt_batch.py
0 → 100644
View file @
fa84b16c
"""
Copyright (c) 2025 Ma Zhaojia
This source code is licensed under the MIT license found in the
LICENSE file in the root directory of this source tree.
"""
import
os
import
argparse
parser
=
argparse
.
ArgumentParser
(
description
=
"Run batch optimization on molecular crystals."
)
parser
.
add_argument
(
"--target_folder"
,
type
=
str
,
required
=
True
,
help
=
"Target folder containing crystal files"
)
parser
.
add_argument
(
"--num_workers"
,
type
=
int
,
default
=
4
,
help
=
"Number of workers to distribute the files to"
)
parser
.
add_argument
(
"--n_gpus"
,
type
=
int
,
default
=
1
,
help
=
"Number of GPUs to use for the optimization"
)
parser
.
add_argument
(
"--gpu_offset"
,
type
=
int
,
default
=
0
,
help
=
"Offset for GPU numbering"
)
parser
.
add_argument
(
"--batch_size"
,
type
=
int
,
default
=
4
,
help
=
"Number of files to process in a single batch"
)
parser
.
add_argument
(
"--run_baseline"
,
type
=
bool
,
default
=
False
,
help
=
"Run baseline optimization using LBFGS from ase.optimize"
)
parser
.
add_argument
(
"--max_steps"
,
type
=
int
,
default
=
100
,
help
=
"Number of max steps to run the optimization (default: 100)"
)
parser
.
add_argument
(
"--filter1"
,
type
=
str
,
default
=
None
,
choices
=
[
None
,
"UnitCellFilter"
],
help
=
"Type of cell filter to use in first optimization"
)
parser
.
add_argument
(
"--filter2"
,
type
=
str
,
default
=
None
,
choices
=
[
None
,
"UnitCellFilter"
],
help
=
"Type of cell filter to use in second optimization"
)
parser
.
add_argument
(
"--optimizer1"
,
type
=
str
,
default
=
"LBFGS"
,
choices
=
[
"LBFGS"
,
"QuasiNewton"
,
"BFGS"
,
"BFGSLineSearch"
,
"BFGSFusedLS"
],
help
=
"First optimizer to use (default: LBFGS)"
)
parser
.
add_argument
(
"--optimizer2"
,
type
=
str
,
default
=
"LBFGS"
,
choices
=
[
"LBFGS"
,
"QuasiNewton"
,
"BFGS"
,
"BFGSLineSearch"
,
"BFGSFusedLS"
],
help
=
"Second optimizer to use (default: LBFGS)"
)
parser
.
add_argument
(
"--skip_second_stage"
,
type
=
bool
,
default
=
False
,
help
=
"Skip the second optimization stage"
)
parser
.
add_argument
(
"--scalar_pressure"
,
type
=
float
,
default
=
0.0006
,
help
=
"Scalar pressure for cell optimization (default: 0.0006)"
)
parser
.
add_argument
(
"--compile_mode"
,
type
=
str
,
default
=
None
,
choices
=
[
None
,
"default"
,
"reduce-overhead"
,
"max-autotune"
,
"max-autotune-no-cudagraphs"
],
help
=
"Compile mode for MACE calculator"
)
parser
.
add_argument
(
"--profile"
,
type
=
str
,
default
=
"False"
,
help
=
"Enable profiling. Set to 'True' for basic profiling or provide a JSON string with profiler config options for wait, warmup, active, and repeat"
)
parser
.
add_argument
(
"--num_threads"
,
type
=
int
,
default
=
16
,
help
=
"Number of cpu threads per process to use while running the optimization"
)
parser
.
add_argument
(
"--bind_cores"
,
type
=
str
,
default
=
None
,
help
=
(
"Specify a comma-separated list of core ranges (e.g., '0-15,16-31,...') for each worker. The number of ranges must equal --num_workers."
))
parser
.
add_argument
(
"--cueq"
,
type
=
bool
,
default
=
False
,
help
=
"Whether to use cuEquivariance Library (default: False)"
)
parser
.
add_argument
(
"--molecule_single"
,
type
=
int
,
default
=
64
,
help
=
"Number of atoms per molecule (default: 64)"
)
parser
.
add_argument
(
"--output_path"
,
type
=
str
,
default
=
"./"
,
help
=
"Absolute path for output files"
)
parser
.
add_argument
(
"--model"
,
type
=
str
,
default
=
"mace"
,
choices
=
[
"mace"
,
"chgnet"
,
"sevennet"
],
help
=
"Model to use for optimization"
)
parser
.
add_argument
(
"--use_ordered_files"
,
type
=
bool
,
default
=
False
,
help
=
"Whether to sort files by atomic number in descending order before optimization"
)
args
=
parser
.
parse_args
()
os
.
environ
[
'OMP_NUM_THREADS'
]
=
str
(
args
.
num_threads
)
os
.
environ
[
'MKL_NUM_THREADS'
]
=
str
(
args
.
num_threads
)
import
pathlib
import
logging
from
batchopt
import
Scheduler
,
ensure_directory
,
run_baseline
,
count_atoms_cif
logging
.
basicConfig
(
level
=
logging
.
WARNING
,
format
=
'%(asctime)s - %(process)d - %(levelname)s - %(message)s'
,
datefmt
=
'%H:%M:%S'
,
force
=
True
)
if
__name__
==
'__main__'
:
target_folder
=
pathlib
.
Path
(
args
.
target_folder
)
files
=
[
str
(
file
)
for
file
in
target_folder
.
glob
(
"*.cif"
)]
devices
=
[
f
"cuda:
{
i
}
"
for
i
in
range
(
args
.
gpu_offset
,
args
.
gpu_offset
+
args
.
n_gpus
)]
logging
.
info
(
"Starting batch optimization."
)
logging
.
info
(
f
"Use devices:
{
devices
}
"
)
logging
.
info
(
f
"files:
{
files
}
"
)
output_path
=
args
.
output_path
if
not
os
.
path
.
isabs
(
output_path
):
output_path
=
os
.
path
.
abspath
(
output_path
)
logging
.
info
(
f
"Output path:
{
output_path
}
"
)
for
output_dir
in
[
"cif_result_press"
,
"cif_result_final"
,
"json_result_press"
,
"json_result_final"
,
"worker_results"
,
"log"
]:
dir_path
=
os
.
path
.
join
(
output_path
,
output_dir
)
ensure_directory
(
dir_path
)
import
time
start_time
=
time
.
perf_counter
()
use_ordered_files
=
args
.
use_ordered_files
if
use_ordered_files
:
logging
.
info
(
f
"Use ordered files."
)
if
files
[
0
].
endswith
(
"cif"
):
files
=
sorted
(
files
,
key
=
count_atoms_cif
,
reverse
=
True
)
else
:
logging
.
error
(
f
"No support for the file type in
{
target_folder
}
."
)
end_time
=
time
.
perf_counter
()
logging
.
info
(
f
"atomic sorting time:
{
end_time
-
start_time
:.
4
f
}
seconds."
)
if
args
.
run_baseline
:
run_baseline
(
files
,
args
.
num_workers
,
devices
,
args
.
max_steps
,
args
.
filter1
,
args
.
filter2
,
args
.
skip_second_stage
,
args
.
scalar_pressure
,
args
.
optimizer1
,
args
.
optimizer2
,
output_path
=
output_path
)
else
:
scheduler
=
Scheduler
(
files
=
files
,
num_workers
=
args
.
num_workers
,
devices
=
devices
,
batch_size
=
args
.
batch_size
,
max_steps
=
args
.
max_steps
,
filter1
=
args
.
filter1
,
filter2
=
args
.
filter2
,
skip_second_stage
=
args
.
skip_second_stage
,
scalar_pressure
=
args
.
scalar_pressure
,
optimizer1
=
args
.
optimizer1
,
optimizer2
=
args
.
optimizer2
,
compile_mode
=
args
.
compile_mode
,
profile
=
args
.
profile
,
num_threads
=
args
.
num_threads
,
bind_cores
=
args
.
bind_cores
,
cueq
=
args
.
cueq
,
molecule_single
=
args
.
molecule_single
,
output_path
=
output_path
,
model
=
args
.
model
)
scheduler
.
run
()
logging
.
info
(
"Batch optimization completed."
)
mace-bench/setup.py
0 → 100644
View file @
fa84b16c
from
setuptools
import
setup
,
find_packages
setup
(
name
=
'BOMLIP-CSP'
,
version
=
'0.1'
,
author
=
'Chengxi Zhao, Zhaojia Ma, Dingrui Fan'
,
author_email
=
'chengxi_zhao@ustc.edu.cn, zhaojia_ma@foxmail.com'
,
description
=
'Integrating machine learning interatomic potentials with batched optimization for crystal structure prediction'
,
url
=
'https://github.com/pic-ai-robotic-chemistry/BOMLIP-CSP'
,
license
=
'MIT'
,
classifiers
=
[
'Development Status :: 3 - Alpha'
,
'Intended Audience :: Science/Research'
,
'License :: OSI Approved :: MIT License'
,
'Programming Language :: Python :: 3'
,
'Programming Language :: Python :: 3.10'
,
'Topic :: Scientific/Engineering :: Chemistry'
,
'Topic :: Scientific/Engineering :: Physics'
,
],
python_requires
=
'>=3.10'
,
package_dir
=
{
''
:
'src'
},
packages
=
find_packages
(
'src'
),
)
\ No newline at end of file
mace-bench/src/BOMLIP_CSP.egg-info/PKG-INFO
0 → 100644
View file @
fa84b16c
Metadata-Version: 2.4
Name: BOMLIP-CSP
Version: 0.1
Summary: Integrating machine learning interatomic potentials with batched optimization for crystal structure prediction
Home-page: https://github.com/pic-ai-robotic-chemistry/BOMLIP-CSP
Author: Chengxi Zhao, Zhaojia Ma, Dingrui Fan
Author-email: chengxi_zhao@ustc.edu.cn, zhaojia_ma@foxmail.com
License: MIT
Classifier: Development Status :: 3 - Alpha
Classifier: Intended Audience :: Science/Research
Classifier: License :: OSI Approved :: MIT License
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.10
Classifier: Topic :: Scientific/Engineering :: Chemistry
Classifier: Topic :: Scientific/Engineering :: Physics
Requires-Python: >=3.10
Dynamic: author
Dynamic: author-email
Dynamic: classifier
Dynamic: home-page
Dynamic: license
Dynamic: requires-python
Dynamic: summary
mace-bench/src/BOMLIP_CSP.egg-info/SOURCES.txt
0 → 100644
View file @
fa84b16c
setup.py
src/BOMLIP_CSP.egg-info/PKG-INFO
src/BOMLIP_CSP.egg-info/SOURCES.txt
src/BOMLIP_CSP.egg-info/dependency_links.txt
src/BOMLIP_CSP.egg-info/top_level.txt
src/batchopt/__init__.py
src/batchopt/atoms_to_graphs.py
src/batchopt/baseline.py
src/batchopt/pbc_graph.py
src/batchopt/pbc_graph_legacy.py
src/batchopt/relaxengine.py
src/batchopt/utils.py
src/batchopt/extensions/__init__.py
src/batchopt/extensions/cuda_ops/__init__.py
src/batchopt/relaxation/__init__.py
src/batchopt/relaxation/ase_utils.py
src/batchopt/relaxation/optimizable.py
src/batchopt/relaxation/optimizers/__init__.py
src/batchopt/relaxation/optimizers/bfgs_torch.py
src/batchopt/relaxation/optimizers/bfgsfusedls.py
\ No newline at end of file
mace-bench/src/BOMLIP_CSP.egg-info/dependency_links.txt
0 → 100644
View file @
fa84b16c
mace-bench/src/BOMLIP_CSP.egg-info/top_level.txt
0 → 100644
View file @
fa84b16c
batchopt
mace-bench/src/batchopt/__init__.py
0 → 100644
View file @
fa84b16c
"""
Copyright (c) 2025 Ma Zhaojia
This source code is licensed under the MIT license found in the
LICENSE file in the root directory of this source tree.
"""
from
.relaxengine
import
Scheduler
,
Worker
from
.baseline
import
ensure_directory
,
run_baseline
from
.utils
import
count_atoms_cif
from
.pbc_graph
import
radius_graph_pbc_cuda
try
:
from
.
import
extensions
_extensions_available
=
True
except
ImportError
as
e
:
import
warnings
warnings
.
warn
(
f
"Extensions not available:
{
e
}
. Falling back to PyTorch implementations."
)
extensions
=
None
_extensions_available
=
False
__all__
=
[
"Scheduler"
,
"ensure_directory"
,
"run_baseline"
,
"count_atoms_cif"
,
"Worker"
,
"extensions"
,
"radius_graph_pbc_cuda"
,
]
\ No newline at end of file
mace-bench/src/batchopt/__pycache__/__init__.cpython-310.pyc
0 → 100644
View file @
fa84b16c
File added
mace-bench/src/batchopt/__pycache__/atoms_to_graphs.cpython-310.pyc
0 → 100644
View file @
fa84b16c
File added
mace-bench/src/batchopt/__pycache__/baseline.cpython-310.pyc
0 → 100644
View file @
fa84b16c
File added
mace-bench/src/batchopt/__pycache__/pbc_graph.cpython-310.pyc
0 → 100644
View file @
fa84b16c
File added
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment