Unverified Commit fa84b16c authored by zcxzcx1's avatar zcxzcx1 Committed by GitHub
Browse files

Add files via upload

parent 09624897
#!/bin/bash
pip install torch_scatter==2.1.2+pt24cu121 -f https://pytorch-geometric.com/whl/torch-2.4.0+cu121.html
pip install torch_sparse==0.6.18+pt24cu121 -f https://pytorch-geometric.com/whl/torch-2.4.0+cu121.html
pip install torch_spline_conv==1.2.2+pt24cu121 -f https://pytorch-geometric.com/whl/torch-2.4.0+cu121.html
pip install -r requirements.txt
pip install -e 3rdparty/SevenNet
pip install -e .
pip install ase==3.23.0
pip install ninja
pip install rdkit==2024.3.5
\ No newline at end of file
#!/bin/bash
pip install torch_scatter==2.1.2+pt24cu121 -f https://pytorch-geometric.com/whl/torch-2.4.0+cu121.html
pip install torch_sparse==0.6.18+pt24cu121 -f https://pytorch-geometric.com/whl/torch-2.4.0+cu121.html
pip install torch_spline_conv==1.2.2+pt24cu121 -f https://pytorch-geometric.com/whl/torch-2.4.0+cu121.html
pip install -r requirements.txt
pip install -e 3rdparty/mace
pip install -e .
pip install e3nn==0.4.4
pip install ase==3.23.0
pip install ninja
# for python_CSP
pip install rdkit-pypi
"""
Copyright (c) 2025 Ma Zhaojia
This source code is licensed under the MIT license found in the
LICENSE file in the root directory of this source tree.
"""
import os
os.environ['OMP_NUM_THREADS'] = '1'
os.environ['MKL_NUM_THREADS'] = '1'
os.environ['OPENBLAS_NUM_THREADS'] = '1'
import sys
# sys.path.append('/home/jiangj1group/zcxzcx1/volatile/mace')
from mace.calculators import mace_off, mace_mp
from ase.io import read, write
from ase.optimize import BFGS,LBFGS,FIRE,GPMin,MDMin, QuasiNewton
from ase.filters import UnitCellFilter, ExpCellFilter, FrechetCellFilter
import re
import io
from contextlib import redirect_stdout
import os
import pandas as pd
from joblib import Parallel, delayed
import json
import torch
import numpy as np
import random
import argparse
import time
import pathlib
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)
#####################################################################
os.environ['PYTHONHASHSEED'] = '1'
torch.manual_seed(1)
np.random.seed(1)
random.seed(1)
torch.cuda.manual_seed(1)
torch.cuda.manual_seed_all(1)
#####################################################################
# n_jobs=32
# # n_jobs=2
# path = './'
# molecule_single = 64
# target_folder = "/data_raw/"
#####################################################################
def calculate_density(crystal):
# 计算总质量,ASE 中的 get_masses 方法返回一个数组,包含了所有原子的质量
total_mass = sum(crystal.get_masses()) # 转换为克
# 获取体积,ASE 的 get_volume 方法返回晶胞的体积,单位是 Å^3
# 1 Å^3 = 1e-24 cm^3
volume = crystal.get_volume() # 转换为立方厘米
# 计算密度,质量除以体积
density = total_mass / (volume*10**-24)/(6.022140857*10**23) # 单位是 g/cm^3
return density
def run_calculation_one(path,file,target_folder,molecule_single,idx):
# os.environ['OMP_NUM_THREADS'] = '1'
# os.environ['MKL_NUM_THREADS'] = '1'
# os.environ['OPENBLAS_NUM_THREADS'] = '1'
if reproduce:
print("Reproducing deterministic results.")
torch.use_deterministic_algorithms(True)
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
np.set_printoptions(precision=17, suppress=False)
torch.set_printoptions(precision=17, sci_mode=False, linewidth=200)
if multithread and (not reproduce):
print("Using OMP and MKL multithreads will introduce non-deterministic results.")
else:
os.environ['OMP_NUM_THREADS'] = '1'
os.environ['MKL_NUM_THREADS'] = '1'
os.environ['OPENBLAS_NUM_THREADS'] = '1'
os.environ["CUDA_VISIBLE_DEVICES"]=str((idx%n_gpus)+gpu_offset)
with io.StringIO() as buf, redirect_stdout(buf):
crystal = read(path+target_folder+file)
if molecule_single < 0:
molecule_single = int(file.split('_')[-1].split('.')[0])
molecule_count = len(crystal.get_atomic_numbers())/molecule_single
calc = mace_off(model=model_path,dispersion=True, device='cuda', enable_cueq=cueq)
crystal.calc = calc
if filter1 == "UnitCellFilter":
sf = UnitCellFilter(crystal,scalar_pressure=0.0006)
elif filter1 == "FrechetCellFilter":
sf = FrechetCellFilter(crystal,scalar_pressure=0.0006)
else:
raise ValueError(f"Unrecognized filter type '{filter1}'. "
"Supported types are 'UnitCellFilter' and 'FrechetCellFilter'.")
if optimizer_type1 == "BFGS":
if use_cuda_eigh:
optimizer = BFGS(sf, use_cuda_eigh=True)
else:
optimizer = BFGS(sf)
elif optimizer_type1 == "LBFGS":
optimizer = LBFGS(sf)
elif optimizer_type1 == "QuasiNewton":
optimizer = QuasiNewton(sf)
else:
raise ValueError(f"Unrecognized optimizer type '{optimizer_type1}'. "
"Supported types are 'BFGS' and 'LBFGS'.")
if use_nsys or use_torch_profiler : # warmup for profiling
optimizer.run(fmax=0.01,steps=100)
if use_torch_profiler:
profiler = torch.profiler.profile(
activities=[
torch.profiler.ProfilerActivity.CPU,
torch.profiler.ProfilerActivity.CUDA
],
# schedule=torch.profiler.schedule(wait=1, warmup=1, active=3, repeat=2),
on_trace_ready=torch.profiler.tensorboard_trace_handler('./log'),
with_stack=True
)
profiler.start()
start_time1 = time.time()
optimizer.run(fmax=0.01,steps=max_steps)
end_time1 = time.time()
if use_torch_profiler:
profiler.stop()
crystal.write(path+'cif_result_press/'+file[:-4]+"_press.cif")
output_1 = buf.getvalue()
# step_used_1 = float(re.split("\\s+", output_1.split('\n')[-2])[1][:])
step_used_1 = optimizer.nsteps
if use_nsys or use_torch_profiler :
step_used_1 = step_used_1 - 100
total_time1 = end_time1 - start_time1
avg_time1 = total_time1 / step_used_1 if step_used_1 != 0 else 0
crystal = read(path+'cif_result_press/'+file[:-4]+"_press.cif")
crystal.calc = calc
if filter2 == "UnitCellFilter":
sf = UnitCellFilter(crystal)
elif filter2 == "FrechetCellFilter":
sf = FrechetCellFilter(crystal)
else:
raise ValueError(f"Unrecognized filter type '{filter2}'. "
"Supported types are 'UnitCellFilter' and 'FrechetCellFilter'.")
if optimizer_type2 == "BFGS":
if use_cuda_eigh:
optimizer = BFGS(sf, use_cuda_eigh=True)
else:
optimizer = BFGS(sf)
elif optimizer_type2 == "LBFGS":
optimizer = LBFGS(sf)
elif optimizer_type2 == "QuasiNewton":
optimizer = QuasiNewton(sf)
else:
raise ValueError(f"Unrecognized optimizer type '{optimizer_type2}'. "
"Supported types are 'BFGS' and 'LBFGS'.")
if use_torch_profiler:
profiler = torch.profiler.profile(
activities=[
torch.profiler.ProfilerActivity.CPU,
torch.profiler.ProfilerActivity.CUDA
],
# schedule=torch.profiler.schedule(wait=1, warmup=1, active=3, repeat=2),
on_trace_ready=torch.profiler.tensorboard_trace_handler('./log'),
with_stack=True
)
profiler.start()
start_time2 = time.time()
optimizer.run(fmax=0.01,steps=max_steps)
end_time2 = time.time()
if use_torch_profiler:
profiler.stop()
density = calculate_density(crystal)
crystal.write(path+'cif_result_final/'+file[:-4]+"_opt.cif")
output_2 = buf.getvalue()
energy = float(re.split("\\s+", output_2.split('\n')[-2])[3][:])
# step_used_2 = float(re.split("\\s+", output_2.split('\n')[-2])[1][:])
step_used_2 = optimizer.nsteps
energy_per_mol = energy / molecule_count * 96.485
total_time2 = end_time2 - start_time2
avg_time2 = total_time2 / step_used_2 if step_used_2 != 0 else 0
new_row = {
'name': file[:-4], 'density': density, 'energy_kj': energy_per_mol,
'step_used_1': step_used_1, 'step_used_2': step_used_2,
'total_time1_s': total_time1, 'avg_time1_s': avg_time1,
'total_time2_s': total_time2, 'avg_time2_s': avg_time2
}
print(f'output_2: {output_2}')
with open(path+'json_result/'+file[:-4]+".json", 'w') as json_file:
json.dump(new_row, json_file, indent=4)
return new_row
def already_have_calculation_one(path, file, target_folder, molecule_single, idx):
logging.info(f"reading on structure {file}")
print(f"reading on structure {file}")
with open(path + 'json_result/' + file[:-4] + ".json", 'r') as file:
old_row = json.load(file)
return old_row
def run():
df = pd.DataFrame(columns=['name', 'density', 'energy_kj', 'step_used_1', 'step_used_2', 'total_time1_s', 'avg_time1_s', 'total_time2_s', 'avg_time2_s'])
for root, dirs, files in os.walk(path + target_folder):
old_row = Parallel(n_jobs=n_jobs)(
delayed(already_have_calculation_one)(path, file, target_folder, molecule_single, idx) for idx, file in
enumerate(files) if os.path.exists(path + 'json_result/' + file[:-4] + ".json"))
filtered_files = [file for file in files if not os.path.exists(path + 'json_result/' + file[:-4] + ".json")]
new_row = Parallel(n_jobs=n_jobs)(
delayed(run_calculation_one)(path, file, target_folder, molecule_single, idx) for idx, file in
enumerate(filtered_files))
# show the length of new_row
print(f'new_row length: {len(new_row)}')
print(f'root: {root}\ndirs: {dirs}\nfiles: {files}')
for row in new_row:
df = pd.concat([df, pd.DataFrame([row])], ignore_index=True, axis=0)
for row in old_row:
df = pd.concat([df, pd.DataFrame([row])], ignore_index=True, axis=0)
df.to_csv(path + '/result.csv')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="Run parallel calculations on molecular crystals.")
parser.add_argument("--n_jobs", type=int, default=32, help="Number of parallel jobs to run (default: 32)")
parser.add_argument("--target_folder", type=str, required=True, help="Path to the target folder containing input files")
parser.add_argument("--path", type=str, default='./', help="Base path for the project (default: './')")
parser.add_argument("--molecule_single", type=int, default=-1, help="Number of atoms per molecule (default: 64)")
parser.add_argument("--n_gpus", type=int, default=2, help="Number of GPUs to use (default: 2)")
parser.add_argument("--cueq", action='store_true', help="Whether to use cuEquivariance Library (default: False)")
parser.add_argument("--max_steps", type=int, default=3000, help="Number of max steps to run the optimization (default: 3000)")
parser.add_argument("--use_torch_profiler", action='store_true', help="Whether to use torch profiler (default: False)")
parser.add_argument("--use_nsys", action='store_true', help="Whether to use nsys profiler (default: False)")
parser.add_argument("--model", type=str, default="small", help="Model to use for the calculation (default: 'small')")
parser.add_argument("--optimizer", type=str, default="BFGS", help="Optimizer to use for the calculation (default: 'BFGS')")
parser.add_argument("--use_cuda_eigh", action='store_true', help="Whether to use CUDA for eigh (default: False)")
parser.add_argument("--gpu_offset", type=int, default=0, help="GPU offset to use for the calculation (default: 0)")
parser.add_argument("--multithread", action='store_true', help="Whether to use multithread (default: False)")
parser.add_argument("--reproduce", action='store_true', help="Whether to reproduce deterministic results (default: False)")
parser.add_argument("--filter1", type=str, default="UnitCellFilter", help="1st filter to use for the calculation (default: 'UnitCellFilter')")
parser.add_argument("--filter2", type=str, default="UnitCellFilter", help="2nd filter to use for the calculation (default: 'UnitCellFilter')")
parser.add_argument("--optimizer1", type=str, default="BFGS", help="1st optimizer to use for the calculation (default: 'BFGS')")
parser.add_argument("--optimizer2", type=str, default="BFGS", help="2nd optimizer to use for the calculation (default: 'BFGS')")
args = parser.parse_args()
n_jobs = args.n_jobs
target_folder = args.target_folder
path = args.path
molecule_single = args.molecule_single
n_gpus = args.n_gpus
cueq = args.cueq
max_steps = args.max_steps
use_torch_profiler = args.use_torch_profiler
use_nsys = args.use_nsys
model_path = args.model
optimizer_type = args.optimizer
use_cuda_eigh = args.use_cuda_eigh
gpu_offset = args.gpu_offset
multithread = args.multithread
reproduce = args.reproduce
filter1 = args.filter1
filter2 = args.filter2
optimizer_type1 = args.optimizer1
optimizer_type2 = args.optimizer2
try:
os.mkdir("./cif_result_press")
os.mkdir("./cif_result_final")
except:
pass
try:
os.mkdir("./json_result")
except:
pass
start_time_all = time.time()
iter = 0
while iter < 100:
iter += 1
try:
run()
break
except Exception as e:
print(f"Error occurred: {e}")
print("Retrying...")
time.sleep(10)
end_time_all = time.time()
total_time_all = end_time_all - start_time_all
print('dataset,total_time_all_s,attempts')
print(f"{pathlib.Path(target_folder).name},{total_time_all},{iter}")
with open(path + 'timing.csv', 'w') as f:
f.write('dataset,total_time_all_s,attempts\n')
f.write(f"{pathlib.Path(target_folder).name},{total_time_all},{iter}\n")
\ No newline at end of file
"""
Copyright (c) 2025 Ma Zhaojia
This source code is licensed under the MIT license found in the
LICENSE file in the root directory of this source tree.
"""
import os
import sys
# sys.path.append('/home/jiangj1group/zcxzcx1/volatile/mace')
from mace.calculators import mace_off, mace_mp
from ase.io import read, write
from ase.optimize import BFGS,LBFGS,FIRE,GPMin,MDMin, QuasiNewton
from ase.filters import UnitCellFilter, ExpCellFilter, FrechetCellFilter
import re
import io
from contextlib import redirect_stdout
import os
import pandas as pd
from joblib import Parallel, delayed
import json
import torch
import numpy as np
import random
import argparse
import time
import pathlib
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)
#####################################################################
os.environ['PYTHONHASHSEED'] = '1'
torch.manual_seed(1)
np.random.seed(1)
random.seed(1)
torch.cuda.manual_seed(1)
torch.cuda.manual_seed_all(1)
#####################################################################
# n_jobs=32
# # n_jobs=2
# path = './'
# molecule_single = 64
# target_folder = "/data_raw/"
#####################################################################
def calculate_density(crystal):
# 计算总质量,ASE 中的 get_masses 方法返回一个数组,包含了所有原子的质量
total_mass = sum(crystal.get_masses()) # 转换为克
# 获取体积,ASE 的 get_volume 方法返回晶胞的体积,单位是 Å^3
# 1 Å^3 = 1e-24 cm^3
volume = crystal.get_volume() # 转换为立方厘米
# 计算密度,质量除以体积
density = total_mass / (volume*10**-24)/(6.022140857*10**23) # 单位是 g/cm^3
return density
def run_calculation_one(path,file,target_folder,molecule_single,idx):
# os.environ['OMP_NUM_THREADS'] = '1'
# os.environ['MKL_NUM_THREADS'] = '1'
# os.environ['OPENBLAS_NUM_THREADS'] = '1'
if reproduce:
print("Reproducing deterministic results.")
torch.use_deterministic_algorithms(True)
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
np.set_printoptions(precision=17, suppress=False)
torch.set_printoptions(precision=17, sci_mode=False, linewidth=200)
if multithread and (not reproduce):
print("Using OMP and MKL multithreads will introduce non-deterministic results.")
else:
os.environ['OMP_NUM_THREADS'] = '1'
os.environ['MKL_NUM_THREADS'] = '1'
os.environ['OPENBLAS_NUM_THREADS'] = '1'
os.environ["CUDA_VISIBLE_DEVICES"]=str((idx%n_gpus)+gpu_offset)
with io.StringIO() as buf, redirect_stdout(buf):
crystal = read(path+target_folder+file)
if molecule_single < 0:
molecule_single = int(file.split('_')[-1].split('.')[0])
molecule_count = len(crystal.get_atomic_numbers())/molecule_single
calc = mace_off(model=model_path,dispersion=True, device='cuda', enable_cueq=cueq)
crystal.calc = calc
if filter1 == "UnitCellFilter":
sf = UnitCellFilter(crystal,scalar_pressure=0.0006)
elif filter1 == "FrechetCellFilter":
sf = FrechetCellFilter(crystal,scalar_pressure=0.0006)
else:
raise ValueError(f"Unrecognized filter type '{filter1}'. "
"Supported types are 'UnitCellFilter' and 'FrechetCellFilter'.")
if optimizer_type1 == "BFGS":
if use_cuda_eigh:
optimizer = BFGS(sf, use_cuda_eigh=True)
else:
optimizer = BFGS(sf)
elif optimizer_type1 == "LBFGS":
optimizer = LBFGS(sf)
elif optimizer_type1 == "QuasiNewton":
optimizer = QuasiNewton(sf)
else:
raise ValueError(f"Unrecognized optimizer type '{optimizer_type1}'. "
"Supported types are 'BFGS' and 'LBFGS'.")
if use_nsys or use_torch_profiler : # warmup for profiling
optimizer.run(fmax=0.01,steps=100)
if use_torch_profiler:
profiler = torch.profiler.profile(
activities=[
torch.profiler.ProfilerActivity.CPU,
torch.profiler.ProfilerActivity.CUDA
],
# schedule=torch.profiler.schedule(wait=1, warmup=1, active=3, repeat=2),
on_trace_ready=torch.profiler.tensorboard_trace_handler('./log'),
with_stack=True
)
profiler.start()
start_time1 = time.time()
optimizer.run(fmax=0.01,steps=max_steps)
end_time1 = time.time()
if use_torch_profiler:
profiler.stop()
crystal.write(path+'cif_result_press/'+file[:-4]+"_press.cif")
output_1 = buf.getvalue()
# step_used_1 = float(re.split("\\s+", output_1.split('\n')[-2])[1][:])
step_used_1 = optimizer.nsteps
if use_nsys or use_torch_profiler :
step_used_1 = step_used_1 - 100
total_time1 = end_time1 - start_time1
avg_time1 = total_time1 / step_used_1 if step_used_1 != 0 else 0
crystal = read(path+'cif_result_press/'+file[:-4]+"_press.cif")
crystal.calc = calc
if filter2 == "UnitCellFilter":
sf = UnitCellFilter(crystal)
elif filter2 == "FrechetCellFilter":
sf = FrechetCellFilter(crystal)
else:
raise ValueError(f"Unrecognized filter type '{filter2}'. "
"Supported types are 'UnitCellFilter' and 'FrechetCellFilter'.")
if optimizer_type2 == "BFGS":
if use_cuda_eigh:
optimizer = BFGS(sf, use_cuda_eigh=True)
else:
optimizer = BFGS(sf)
elif optimizer_type2 == "LBFGS":
optimizer = LBFGS(sf)
elif optimizer_type2 == "QuasiNewton":
optimizer = QuasiNewton(sf)
else:
raise ValueError(f"Unrecognized optimizer type '{optimizer_type2}'. "
"Supported types are 'BFGS' and 'LBFGS'.")
if use_torch_profiler:
profiler = torch.profiler.profile(
activities=[
torch.profiler.ProfilerActivity.CPU,
torch.profiler.ProfilerActivity.CUDA
],
# schedule=torch.profiler.schedule(wait=1, warmup=1, active=3, repeat=2),
on_trace_ready=torch.profiler.tensorboard_trace_handler('./log'),
with_stack=True
)
profiler.start()
start_time2 = time.time()
optimizer.run(fmax=0.01,steps=max_steps)
end_time2 = time.time()
if use_torch_profiler:
profiler.stop()
density = calculate_density(crystal)
crystal.write(path+'cif_result_final/'+file[:-4]+"_opt.cif")
output_2 = buf.getvalue()
energy = float(re.split("\\s+", output_2.split('\n')[-2])[3][:])
# step_used_2 = float(re.split("\\s+", output_2.split('\n')[-2])[1][:])
step_used_2 = optimizer.nsteps
energy_per_mol = energy / molecule_count * 96.485
total_time2 = end_time2 - start_time2
avg_time2 = total_time2 / step_used_2 if step_used_2 != 0 else 0
new_row = {
'name': file[:-4], 'density': density, 'energy_kj': energy_per_mol,
'step_used_1': step_used_1, 'step_used_2': step_used_2,
'total_time1_s': total_time1, 'avg_time1_s': avg_time1,
'total_time2_s': total_time2, 'avg_time2_s': avg_time2
}
print(f'output_2: {output_2}')
with open(path+'json_result/'+file[:-4]+".json", 'w') as json_file:
json.dump(new_row, json_file, indent=4)
return new_row
def already_have_calculation_one(path, file, target_folder, molecule_single, idx):
logging.info(f"reading on structure {file}")
print(f"reading on structure {file}")
with open(path + 'json_result/' + file[:-4] + ".json", 'r') as file:
old_row = json.load(file)
return old_row
def run():
df = pd.DataFrame(columns=['name', 'density', 'energy_kj', 'step_used_1', 'step_used_2', 'total_time1_s', 'avg_time1_s', 'total_time2_s', 'avg_time2_s'])
for root, dirs, files in os.walk(path + target_folder):
old_row = Parallel(n_jobs=n_jobs)(
delayed(already_have_calculation_one)(path, file, target_folder, molecule_single, idx) for idx, file in
enumerate(files) if os.path.exists(path + 'json_result/' + file[:-4] + ".json"))
filtered_files = [file for file in files if not os.path.exists(path + 'json_result/' + file[:-4] + ".json")]
new_row = Parallel(n_jobs=n_jobs)(
delayed(run_calculation_one)(path, file, target_folder, molecule_single, idx) for idx, file in
enumerate(filtered_files))
# show the length of new_row
print(f'new_row length: {len(new_row)}')
print(f'root: {root}\ndirs: {dirs}\nfiles: {files}')
for row in new_row:
df = pd.concat([df, pd.DataFrame([row])], ignore_index=True, axis=0)
for row in old_row:
df = pd.concat([df, pd.DataFrame([row])], ignore_index=True, axis=0)
df.to_csv(path + '/result.csv')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="Run parallel calculations on molecular crystals.")
parser.add_argument("--n_jobs", type=int, default=32, help="Number of parallel jobs to run (default: 32)")
parser.add_argument("--target_folder", type=str, required=True, help="Path to the target folder containing input files")
parser.add_argument("--path", type=str, default='./', help="Base path for the project (default: './')")
parser.add_argument("--molecule_single", type=int, default=-1, help="Number of atoms per molecule (default: 64)")
parser.add_argument("--n_gpus", type=int, default=2, help="Number of GPUs to use (default: 2)")
parser.add_argument("--cueq", action='store_true', help="Whether to use cuEquivariance Library (default: False)")
parser.add_argument("--max_steps", type=int, default=3000, help="Number of max steps to run the optimization (default: 3000)")
parser.add_argument("--use_torch_profiler", action='store_true', help="Whether to use torch profiler (default: False)")
parser.add_argument("--use_nsys", action='store_true', help="Whether to use nsys profiler (default: False)")
parser.add_argument("--model", type=str, default="small", help="Model to use for the calculation (default: 'small')")
parser.add_argument("--optimizer", type=str, default="BFGS", help="Optimizer to use for the calculation (default: 'BFGS')")
parser.add_argument("--use_cuda_eigh", action='store_true', help="Whether to use CUDA for eigh (default: False)")
parser.add_argument("--gpu_offset", type=int, default=0, help="GPU offset to use for the calculation (default: 0)")
parser.add_argument("--multithread", action='store_true', help="Whether to use multithread (default: False)")
parser.add_argument("--reproduce", action='store_true', help="Whether to reproduce deterministic results (default: False)")
parser.add_argument("--filter1", type=str, default="UnitCellFilter", help="1st filter to use for the calculation (default: 'UnitCellFilter')")
parser.add_argument("--filter2", type=str, default="UnitCellFilter", help="2nd filter to use for the calculation (default: 'UnitCellFilter')")
parser.add_argument("--optimizer1", type=str, default="BFGS", help="1st optimizer to use for the calculation (default: 'BFGS')")
parser.add_argument("--optimizer2", type=str, default="BFGS", help="2nd optimizer to use for the calculation (default: 'BFGS')")
args = parser.parse_args()
n_jobs = args.n_jobs
target_folder = args.target_folder
path = args.path
molecule_single = args.molecule_single
n_gpus = args.n_gpus
cueq = args.cueq
max_steps = args.max_steps
use_torch_profiler = args.use_torch_profiler
use_nsys = args.use_nsys
model_path = args.model
optimizer_type = args.optimizer
use_cuda_eigh = args.use_cuda_eigh
gpu_offset = args.gpu_offset
multithread = args.multithread
reproduce = args.reproduce
filter1 = args.filter1
filter2 = args.filter2
optimizer_type1 = args.optimizer1
optimizer_type2 = args.optimizer2
try:
os.mkdir("./cif_result_press")
os.mkdir("./cif_result_final")
except:
pass
try:
os.mkdir("./json_result")
except:
pass
start_time_all = time.time()
iter = 0
while iter < 100:
iter += 1
try:
run()
break
except Exception as e:
print(f"Error occurred: {e}")
print("Retrying...")
time.sleep(10)
end_time_all = time.time()
total_time_all = end_time_all - start_time_all
print('dataset,total_time_all_s,attempts')
print(f"{pathlib.Path(target_folder).name},{total_time_all},{iter}")
with open(path + 'timing.csv', 'w') as f:
f.write('dataset,total_time_all_s,attempts\n')
f.write(f"{pathlib.Path(target_folder).name},{total_time_all},{iter}\n")
\ No newline at end of file
#!/bin/bash
python ../mace_opt_new.py --n_jobs 64 --molecule_single 46 \
--target_folder ../../data/perf_v2/ --model small --n_gpus 4 --gpu_offset 0 \
--optimizer1 QuasiNewton --filter1 UnitCellFilter --filter2 UnitCellFilter
\ No newline at end of file
#!/bin/bash
rm -r *_result_*
python ../../scripts/mace_opt_batch.py --target_folder "../../data/perf_v2" --molecule_single 46 --gpu_offset 0 --n_gpus 4 --num_workers 40 --batch_size 0 \
--max_steps 6000 --filter1 UnitCellFilter --filter2 UnitCellFilter --optimizer1 BFGSFusedLS --optimizer2 BFGS --num_threads 2 --cueq true --use_ordered_files true
\ No newline at end of file
#!/bin/bash
top_dir=$(pwd)
natoms_nw_bs=(
"92 48 25"
"184 40 12"
"368 40 5"
)
for config in "${natoms_nw_bs[@]}"; do
read natoms nw bs <<< "$config"
dir="$top_dir/subtest_BATCH_${natoms}_g4_j${nw}_bs${bs}_cueq_cupbc"
mkdir -p "$dir"
cd "$dir" || continue
pwd
python ../../scripts/mace_opt_batch.py \
--target_folder "../../data/perf_v2_sorted/perf_v2_${natoms}" \
--molecule_single 46 --gpu_offset 0 --n_gpus 4 --num_workers ${nw} --batch_size ${bs} \
--max_steps 6000 --filter1 UnitCellFilter --filter2 UnitCellFilter \
--optimizer1 BFGSFusedLS --optimizer2 BFGS --num_threads 2 \
--use_ordered_files true --cueq true > opt.log 2>&1
done
\ No newline at end of file
#!/bin/bash
top_dir=$(pwd)
natoms_nw_bs=(
"92 64"
"184 64"
"368 64"
)
for config in "${natoms_nw_bs[@]}"; do
read natoms nw <<< "$config"
dir="$top_dir/subtest_BASE_${natoms}_g4_j${nw}"
mkdir -p "$dir"
cd "$dir" || continue
pwd
python ../mace_opt_new.py --n_jobs ${nw} --molecule_single 46 \
--target_folder ../../data/perf_v2_sorted/perf_v2_${natoms}/ --model small --n_gpus 4 \
--gpu_offset 0 --optimizer1 QuasiNewton --filter1 UnitCellFilter \
--filter2 UnitCellFilter --max_steps 3000 > opt.log 2>&1
done
\ No newline at end of file
--extra-index-url https://download.pytorch.org/whl/cu121
absl-py==2.1.0
aiohappyeyeballs==2.4.4
aiohttp==3.11.11
aiosignal==1.3.2
annotated-types==0.7.0
antlr4-python3-runtime==4.9.3
# -e git+https://gitlab.com/ase/ase.git@72c50c76bac2396c7d58385b231c65bd07458279#egg=ase&subdirectory=../../../3rdparty/ase
async-timeout==5.0.1
attrs==24.3.0
certifi==2024.8.30
cfgv==3.4.0
charset-normalizer==3.4.0
click==8.1.8
cloudpickle==3.1.0
ConfigArgParse==1.7
contourpy==1.3.1
coverage==7.6.9
cuequivariance==0.4.0
cuequivariance-ops-torch-cu12==0.4.0
cuequivariance-ops-cu12==0.4.0
cuequivariance-torch==0.4.0
cycler==0.12.1
distlib==0.3.9
docker-pycreds==0.4.0
e3nn==0.4.4
exceptiongroup==1.2.2
# -e git+https://github.com/mazhaojia123/fairchem.git@f50db9d5b29debdfb265d9c3fad394f18e16cab8#egg=fairchem_core&subdirectory=../../../3rdparty/fairchem/packages/fairchem-core
filelock==3.13.1
fonttools==4.55.1
frozenlist==1.5.0
fsspec==2024.2.0
gitdb==4.0.11
GitPython==3.1.43
grpcio==1.68.1
h5py==3.12.1
hydra-core==1.3.2
identify==2.6.3
idna==3.10
iniconfig==2.0.0
Jinja2==3.1.3
joblib==1.4.2
kiwisolver==1.4.7
latexcodec==3.0.0
lightning-utilities==0.11.9
llvmlite==0.43.0
lmdb==1.5.1
# -e git+https://github.com/mazhaojia123/mace.git@edd6b479f4974d0b8162712872ad2eed1aa2fb75#egg=mace_torch&subdirectory=../../../3rdparty/mace
Markdown==3.7
MarkupSafe==2.1.5
matplotlib==3.9.3
matscipy==1.1.1
monty==2024.10.21
mpmath==1.3.0
multidict==6.1.0
networkx==3.2.1
nodeenv==1.9.1
numba==0.60.0
numpy==1.26.4
nvidia-cublas-cu12==12.1.3.1
nvidia-cuda-cupti-cu12==12.1.105
nvidia-cuda-nvrtc-cu12==12.1.105
nvidia-cuda-runtime-cu12==12.1.105
nvidia-cudnn-cu12==9.1.0.70
nvidia-cufft-cu12==11.0.2.54
nvidia-curand-cu12==10.3.2.106
nvidia-cusolver-cu12==11.4.5.107
nvidia-cusparse-cu12==12.1.0.106
nvidia-nccl-cu12==2.20.5
nvidia-nvjitlink-cu12==12.1.105
nvidia-nvtx-cu12==12.1.105
omegaconf==2.3.0
opt-einsum-fx==0.1.4
opt_einsum==3.4.0
orjson==3.10.12
packaging==24.2
palettable==3.3.3
pandas==2.2.3
pillow==11.0.0
platformdirs==4.3.6
plotly==5.24.1
pluggy==1.5.0
pre_commit==4.0.1
prettytable==3.12.0
propcache==0.2.1
protobuf==5.29.2
psutil==6.1.1
pybtex==0.24.0
pydantic==2.10.4
pydantic_core==2.27.2
pymatgen==2024.11.13
pyparsing==3.2.0
pytest==8.3.4
pytest-cov==6.0.0
python-dateutil==2.9.0.post0
python-hostlist==2.0.0
pytz==2024.2
PyYAML==6.0.2
requests==2.32.3
ruamel.yaml==0.18.6
ruamel.yaml.clib==0.2.12
ruff==0.5.1
scipy==1.14.1
sentry-sdk==2.19.2
setproctitle==1.3.4
six==1.16.0
smmap==5.0.1
spglib==2.5.0
submitit==1.5.2
sympy==1.13.1
syrupy==4.8.0
tabulate==0.9.0
tenacity==9.0.0
tensorboard==2.18.0
tensorboard-data-server==0.7.2
tomli==2.2.1
torch==2.4.1+cu121
# ./torch-2.4.1+cu121-cp310-cp310-linux_x86_64.whl
torch-dftd==0.5.1
torch-ema==0.3
torch-geometric==2.6.1
# torch_scatter==2.1.2+pt24cu121
# torch_sparse==0.6.18+pt24cu121
# torch_spline_conv==1.2.2+pt24cu121
torchmetrics==1.6.0
tqdm==4.67.1
triton==3.0.0
typing_extensions==4.12.2
tzdata==2024.2
uncertainties==3.2.2
urllib3==2.2.3
virtualenv==20.28.0
wandb==0.19.1
wcwidth==0.2.13
Werkzeug==3.1.3
yarl==1.18.3
torch-tb-profiler==0.4.3
\ No newline at end of file
"""
Copyright (c) 2025 Ma Zhaojia
This source code is licensed under the MIT license found in the
LICENSE file in the root directory of this source tree.
"""
import os
import argparse
parser = argparse.ArgumentParser(description="Run batch optimization on molecular crystals.")
parser.add_argument("--target_folder", type=str, required=True, help="Target folder containing crystal files")
parser.add_argument("--num_workers", type=int, default=4, help="Number of workers to distribute the files to")
parser.add_argument("--n_gpus", type=int, default=1, help="Number of GPUs to use for the optimization")
parser.add_argument("--gpu_offset", type=int, default=0, help="Offset for GPU numbering")
parser.add_argument("--batch_size", type=int, default=4, help="Number of files to process in a single batch")
parser.add_argument("--run_baseline", type=bool, default=False, help="Run baseline optimization using LBFGS from ase.optimize")
parser.add_argument("--max_steps", type=int, default=100, help="Number of max steps to run the optimization (default: 100)")
parser.add_argument("--filter1", type=str, default=None,
choices=[None, "UnitCellFilter"],
help="Type of cell filter to use in first optimization")
parser.add_argument("--filter2", type=str, default=None,
choices=[None, "UnitCellFilter"],
help="Type of cell filter to use in second optimization")
parser.add_argument("--optimizer1", type=str, default="LBFGS",
choices=["LBFGS", "QuasiNewton", "BFGS", "BFGSLineSearch", "BFGSFusedLS"],
help="First optimizer to use (default: LBFGS)")
parser.add_argument("--optimizer2", type=str, default="LBFGS",
choices=["LBFGS", "QuasiNewton", "BFGS", "BFGSLineSearch", "BFGSFusedLS"],
help="Second optimizer to use (default: LBFGS)")
parser.add_argument("--skip_second_stage", type=bool, default=False, help="Skip the second optimization stage")
parser.add_argument("--scalar_pressure", type=float, default=0.0006,
help="Scalar pressure for cell optimization (default: 0.0006)")
parser.add_argument("--compile_mode", type=str, default=None,
choices=[None, "default", "reduce-overhead", "max-autotune", "max-autotune-no-cudagraphs"],
help="Compile mode for MACE calculator")
parser.add_argument("--profile", type=str, default="False",
help="Enable profiling. Set to 'True' for basic profiling or provide a JSON string with profiler config options for wait, warmup, active, and repeat")
parser.add_argument("--num_threads", type=int, default=16, help="Number of cpu threads per process to use while running the optimization")
parser.add_argument("--bind_cores", type=str, default=None,
help=("Specify a comma-separated list of core ranges (e.g., '0-15,16-31,...') for each worker. The number of ranges must equal --num_workers."))
parser.add_argument("--cueq", type=bool, default=False, help="Whether to use cuEquivariance Library (default: False)")
parser.add_argument("--molecule_single", type=int, default=64, help="Number of atoms per molecule (default: 64)")
parser.add_argument("--output_path", type=str, default="./", help="Absolute path for output files")
parser.add_argument("--model", type=str, default="mace", choices=["mace", "chgnet", "sevennet"], help="Model to use for optimization")
parser.add_argument("--use_ordered_files", type=bool, default=False,
help="Whether to sort files by atomic number in descending order before optimization")
args = parser.parse_args()
os.environ['OMP_NUM_THREADS'] = str(args.num_threads)
os.environ['MKL_NUM_THREADS'] = str(args.num_threads)
import pathlib
import logging
from batchopt import Scheduler, ensure_directory, run_baseline, count_atoms_cif
logging.basicConfig(
level=logging.WARNING,
format='%(asctime)s - %(process)d - %(levelname)s - %(message)s',
datefmt='%H:%M:%S',
force=True
)
if __name__ == '__main__':
target_folder = pathlib.Path(args.target_folder)
files = [str(file) for file in target_folder.glob("*.cif")]
devices = [f"cuda:{i}" for i in range(args.gpu_offset, args.gpu_offset + args.n_gpus)]
logging.info("Starting batch optimization.")
logging.info(f"Use devices: {devices}")
logging.info(f"files: {files}")
output_path = args.output_path
if not os.path.isabs(output_path):
output_path = os.path.abspath(output_path)
logging.info(f"Output path: {output_path}")
for output_dir in ["cif_result_press", "cif_result_final", "json_result_press", "json_result_final", "worker_results", "log"]:
dir_path = os.path.join(output_path, output_dir)
ensure_directory(dir_path)
import time
start_time = time.perf_counter()
use_ordered_files = args.use_ordered_files
if use_ordered_files:
logging.info(f"Use ordered files.")
if files[0].endswith("cif"):
files = sorted(files, key=count_atoms_cif, reverse=True)
else:
logging.error(f"No support for the file type in {target_folder}.")
end_time = time.perf_counter()
logging.info(f"atomic sorting time: {end_time - start_time:.4f} seconds.")
if args.run_baseline:
run_baseline(files, args.num_workers, devices, args.max_steps,
args.filter1, args.filter2, args.skip_second_stage,
args.scalar_pressure, args.optimizer1, args.optimizer2,
output_path=output_path)
else:
scheduler = Scheduler(files=files, num_workers=args.num_workers, devices=devices,
batch_size=args.batch_size, max_steps=args.max_steps,
filter1=args.filter1, filter2=args.filter2,
skip_second_stage=args.skip_second_stage,
scalar_pressure=args.scalar_pressure, optimizer1=args.optimizer1, optimizer2=args.optimizer2,
compile_mode=args.compile_mode, profile=args.profile,
num_threads=args.num_threads, bind_cores=args.bind_cores,
cueq=args.cueq, molecule_single=args.molecule_single,
output_path=output_path, model=args.model)
scheduler.run()
logging.info("Batch optimization completed.")
from setuptools import setup, find_packages
setup(
name='BOMLIP-CSP',
version='0.1',
author='Chengxi Zhao, Zhaojia Ma, Dingrui Fan',
author_email='chengxi_zhao@ustc.edu.cn, zhaojia_ma@foxmail.com',
description='Integrating machine learning interatomic potentials with batched optimization for crystal structure prediction',
url='https://github.com/pic-ai-robotic-chemistry/BOMLIP-CSP',
license='MIT',
classifiers=[
'Development Status :: 3 - Alpha',
'Intended Audience :: Science/Research',
'License :: OSI Approved :: MIT License',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.10',
'Topic :: Scientific/Engineering :: Chemistry',
'Topic :: Scientific/Engineering :: Physics',
],
python_requires='>=3.10',
package_dir={'': 'src'},
packages=find_packages('src'),
)
\ No newline at end of file
Metadata-Version: 2.4
Name: BOMLIP-CSP
Version: 0.1
Summary: Integrating machine learning interatomic potentials with batched optimization for crystal structure prediction
Home-page: https://github.com/pic-ai-robotic-chemistry/BOMLIP-CSP
Author: Chengxi Zhao, Zhaojia Ma, Dingrui Fan
Author-email: chengxi_zhao@ustc.edu.cn, zhaojia_ma@foxmail.com
License: MIT
Classifier: Development Status :: 3 - Alpha
Classifier: Intended Audience :: Science/Research
Classifier: License :: OSI Approved :: MIT License
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.10
Classifier: Topic :: Scientific/Engineering :: Chemistry
Classifier: Topic :: Scientific/Engineering :: Physics
Requires-Python: >=3.10
Dynamic: author
Dynamic: author-email
Dynamic: classifier
Dynamic: home-page
Dynamic: license
Dynamic: requires-python
Dynamic: summary
setup.py
src/BOMLIP_CSP.egg-info/PKG-INFO
src/BOMLIP_CSP.egg-info/SOURCES.txt
src/BOMLIP_CSP.egg-info/dependency_links.txt
src/BOMLIP_CSP.egg-info/top_level.txt
src/batchopt/__init__.py
src/batchopt/atoms_to_graphs.py
src/batchopt/baseline.py
src/batchopt/pbc_graph.py
src/batchopt/pbc_graph_legacy.py
src/batchopt/relaxengine.py
src/batchopt/utils.py
src/batchopt/extensions/__init__.py
src/batchopt/extensions/cuda_ops/__init__.py
src/batchopt/relaxation/__init__.py
src/batchopt/relaxation/ase_utils.py
src/batchopt/relaxation/optimizable.py
src/batchopt/relaxation/optimizers/__init__.py
src/batchopt/relaxation/optimizers/bfgs_torch.py
src/batchopt/relaxation/optimizers/bfgsfusedls.py
\ No newline at end of file
"""
Copyright (c) 2025 Ma Zhaojia
This source code is licensed under the MIT license found in the
LICENSE file in the root directory of this source tree.
"""
from .relaxengine import Scheduler, Worker
from .baseline import ensure_directory, run_baseline
from .utils import count_atoms_cif
from .pbc_graph import radius_graph_pbc_cuda
try:
from . import extensions
_extensions_available = True
except ImportError as e:
import warnings
warnings.warn(f"Extensions not available: {e}. Falling back to PyTorch implementations.")
extensions = None
_extensions_available = False
__all__ = [
"Scheduler",
"ensure_directory",
"run_baseline",
"count_atoms_cif",
"Worker",
"extensions",
"radius_graph_pbc_cuda",
]
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment