Add files via upload

fa84b16c · zcxzcx1 · GitHub · 09624897 · fa84b16c · fa84b16c
Unverified Commit fa84b16c authored Aug 24, 2025 by zcxzcx1 Committed by GitHub Aug 24, 2025
12 changed files
--- a/mace-bench/src/batchopt/relaxation/optimizers/__pycache__/bfgs_torch.cpython-310.pyc
+++ b/mace-bench/src/batchopt/relaxation/optimizers/__pycache__/bfgs_torch.cpython-310.pyc
--- a/mace-bench/src/batchopt/relaxation/optimizers/__pycache__/bfgsfusedls.cpython-310.pyc
+++ b/mace-bench/src/batchopt/relaxation/optimizers/__pycache__/bfgsfusedls.cpython-310.pyc
--- a/mace-bench/src/batchopt/relaxation/optimizers/__pycache__/bfgslinesearch_torch.cpython-310.pyc
+++ b/mace-bench/src/batchopt/relaxation/optimizers/__pycache__/bfgslinesearch_torch.cpython-310.pyc
--- a/mace-bench/src/batchopt/relaxation/optimizers/__pycache__/lbfgs_torch.cpython-310.pyc
+++ b/mace-bench/src/batchopt/relaxation/optimizers/__pycache__/lbfgs_torch.cpython-310.pyc
--- a/mace-bench/src/batchopt/relaxation/optimizers/__pycache__/linesearch_torch.cpython-310.pyc
+++ b/mace-bench/src/batchopt/relaxation/optimizers/__pycache__/linesearch_torch.cpython-310.pyc
--- a/mace-bench/src/batchopt/relaxation/optimizers/bfgs_torch.py
+++ b/mace-bench/src/batchopt/relaxation/optimizers/bfgs_torch.py
+
+"""
+Copyright (c) 2025 Ma Zhaojia
+
+This source code is licensed under the MIT license found in the
+LICENSE file in the root directory of this source tree.
+"""
+
+from __future__ import annotations
+
+import logging
+import torch
+from torch_scatter import scatter
+
+from ..optimizable import OptimizableBatch
+
+class BFGS:
+    def __init__(
+        self,
+        optimizable_batch: OptimizableBatch,
+        maxstep: float = 0.2,
+        alpha: float = 70.0,
+        early_stop = False,
+    ) -> None:
+        """
+        Args:
+        """
+        self.optimizable = optimizable_batch
+        self.maxstep = maxstep
+        self.alpha = alpha
+        # self.H0 = 1.0 / self.alpha
+        self.trajectories = None
+        self.device=self.optimizable.device
+
+        self.fmax = None
+        self.steps = None
+
+        self.initialize()
+        self.early_stop = early_stop
+    
+    
+    def initialize(self):
+        # initial hessian
+        self.H0 = [
+            torch.eye(3 * size, device=self.optimizable.device, dtype=torch.float64) * self.alpha 
+            for size in self.optimizable.elem_per_group
+        ]
+
+        self.H = [None] * self.optimizable.batch_size
+        self.pos0 = torch.zeros_like(self.optimizable.get_positions().reshape(-1), device=self.device, dtype=torch.float64)
+        self.forces0 = torch.zeros_like(self.pos0, device=self.device, dtype=torch.float64)
+
+    def restart_from_earlystop(self, restart_indices, old_batch_indices):
+        H_new = []
+        pos0_new = torch.zeros_like(self.optimizable.get_positions().reshape(-1), device=self.device, dtype=torch.float64)
+        forces0_new = torch.zeros_like(pos0_new, device=self.device, dtype=torch.float64)
+
+        # collect the preserved historical data by old_batch_indices
+        for i, idx in enumerate(restart_indices):
+            mask_old = (idx==old_batch_indices.repeat_interleave(3))
+            mask = (i==self.optimizable.batch_indices.repeat_interleave(3))
+            H_new.append(self.H[idx])
+            pos0_new[mask] = self.pos0[mask_old]
+            forces0_new[mask] = self.forces0[mask_old]
+
+        # append new info for the new batch
+        for i in range(len(H_new), self.optimizable.batch_size):
+            H_new.append(None)
+
+        self.H = H_new
+        self.pos0 = pos0_new
+        self.forces0 = forces0_new
+        
+
+    def run(self, fmax, maxstep, is_restart_earlystop=False, restart_indices=None, old_batch_indices=None):
+        logging.info("Enter bfgs's main program.")
+        self.fmax = fmax
+        self.max_iter = maxstep
+
+        if is_restart_earlystop:
+            self.restart_from_earlystop(restart_indices, old_batch_indices)
+
+        iteration = 0
+        max_forces = self.optimizable.get_max_forces(apply_constraint=True)
+        logging.info("Step   Fmax(eV/A)")
+
+        while iteration < self.max_iter and not self.optimizable.converged(
+            forces=None, fmax=self.fmax, max_forces=max_forces, f_upper_limit=1e25,
+        ):
+            if self.early_stop and iteration > 0:
+                converge_indices = self.optimizable.converge_indices_list
+                if len(converge_indices) > 0:
+                    logging.info(f"Early stopping at iteration {iteration}")
+                    break
+
+            logging.info(
+                f"{iteration} " + " ".join(f"{x:18.15g}" for x in max_forces.tolist())
+            )
+
+            self.step()
+            max_forces = self.optimizable.get_max_forces(apply_constraint=True)
+            iteration += 1
+
+        logging.info(
+            f"{iteration} " + " ".join(f"{x:18.15g}" for x in max_forces.tolist())
+        )
+
+        # GPU memory usage as per nvidia-smi seems to gradually build up as
+        # batches are processed. This releases unoccupied cached memory.
+        torch.cuda.empty_cache()
+
+        # set predicted values to batch
+        for name, value in self.optimizable.results.items():
+            setattr(self.optimizable.batch, name, value)
+
+        self.nsteps = iteration
+
+        if self.early_stop:
+            converge_indices_list = self.optimizable.converge_indices_list
+            return converge_indices_list
+        else:
+            return self.optimizable.converged(
+                forces=None, fmax=self.fmax, max_forces=max_forces
+            )
+        
+
+    def step(self): 
+        forces = self.optimizable.get_forces(apply_constraint=True).to(
+            dtype=torch.float64
+        )
+        pos = self.optimizable.get_positions().to(dtype=torch.float64)
+        dpos, steplengths = self.prepare_step(pos, forces)
+        dpos = self.determine_step(dpos, steplengths)
+        self.optimizable.set_positions(pos+dpos)
+
+
+    def prepare_step(self, pos, forces):
+        forces = forces.reshape(-1)
+        pos = pos.view(-1)
+        self.update(pos, forces, self.pos0, self.forces0)
+
+        dpos_list = []
+        cur_indices = self.optimizable.batch_indices.repeat_interleave(3)
+        # 预初始化结果列表
+        dpos_list = [None] * len(self.H)
+        
+        # 分离计算任务：仅对需要计算的H矩阵创建流
+        calc_indices = [i for i, need_update in enumerate(self.optimizable.update_mask) if need_update]
+        streams = [torch.cuda.Stream() for _ in calc_indices]
+        
+        # 并行执行实际计算
+        for i, stream in zip(calc_indices, streams):
+            with torch.cuda.stream(stream):
+                omega, V = torch.linalg.eigh(self.H[i])
+                dpos_list[i] = (V @ (forces[cur_indices==i].t() @ V / torch.abs(omega)).t())
+
+        # 同步所有计算流
+        torch.cuda.current_stream().synchronize()
+        
+        # 在主线程处理零张量
+        for i in range(len(self.H)):
+            if not self.optimizable.update_mask[i]:
+                dpos_list[i] = torch.zeros_like(forces[cur_indices==i])
+
+        # 同步所有流
+        for stream in streams:
+            stream.synchronize()
+        
+        # dpos = torch.vstack(dpos_list)
+        dpos = torch.zeros_like(forces)
+        for i in torch.unique(cur_indices):
+            mask = (cur_indices == i)
+            dpos[mask] = dpos_list[i]
+        dpos = dpos.reshape(-1, 3)
+
+        steplengths = (dpos ** 2).sum(dim=-1).sqrt()
+        self.pos0 = pos
+        self.forces0 = forces
+
+        return dpos, steplengths
+
+
+    def determine_step(self, dpos, steplengths):
+        longest_steps = scatter(
+            steplengths, self.optimizable.batch_indices, reduce="max"
+        )
+        longest_steps = longest_steps[self.optimizable.batch_indices]
+        maxstep = longest_steps.new_tensor(self.maxstep)
+        scale = (longest_steps).reciprocal() * torch.min(longest_steps, maxstep)
+        dpos *= scale.unsqueeze(1)
+        return dpos
+
+    def update(self, pos, forces, pos0, forces0):
+        if self.H is None:
+            self.H = self.H0
+            return
+        dpos = pos - pos0
+        dforces = forces - forces0
+        batch_indices_flatten = self.optimizable.batch_indices.repeat_interleave(3)
+        dg = torch.zeros_like(dforces)
+        all_size = self.optimizable.elem_per_group
+
+        for i in range(self.optimizable.batch_size):
+            if self.H[i] is None:
+                continue
+            mask = (i==batch_indices_flatten)
+            if torch.abs(dpos[mask]).max() < 1e-7:
+                continue
+
+            dg[mask] = self.H[i] @ dpos[mask]
+
+        a = self._batched_dot_1d(dforces, dpos)
+        b = self._batched_dot_1d(dpos, dg)
+
+        for i in range(self.optimizable.batch_size):
+            if self.H[i] is None:
+                self.H[i] = torch.eye(3*all_size[i], device=self.device, dtype=torch.float64) * self.alpha
+                continue
+            mask = (i==batch_indices_flatten)
+            if not self.optimizable.update_mask[i]:
+                continue
+            if torch.abs(dpos[mask]).max() < 1e-7:
+                continue
+
+            outer_force = torch.outer(dforces[mask], dforces[mask])
+            outer_dg = torch.outer(dg[mask], dg[mask])
+            self.H[i] -= outer_force / a[i] + outer_dg / b[i]
+
+        
+
+    def update_parallel(self, pos, forces, pos0, forces0):
+        if self.H is None:
+            self.H = self.H0
+            return
+
+        dpos = pos - pos0
+
+        if torch.abs(dpos).max() < 1e-7:
+            return
+
+        dforces = forces - forces0
+        cur_indices = self.optimizable.batch_indices.repeat_interleave(3)
+        a = self._batched_dot_1d(dforces, dpos)
+        # DONE: There is a bug using hstack.
+        # dg = torch.hstack([self.H[i] @ dpos[cur_indices == i] for i in range(len(self.H))])
+        # DONE: parallel this part
+        # dg_list = [self.H[i] @ dpos[cur_indices == i] for i in range(len(self.H))]
+        dg_list = [None] * len(self.H)
+        streams = [torch.cuda.Stream() for _ in dg_list]
+        for i, stream in zip(range(len(dg_list)), streams):
+            with torch.cuda.stream(stream):
+                dg_list[i] = self.H[i] @ dpos[cur_indices == i]
+
+        torch.cuda.current_stream().synchronize()
+        for stream in streams: 
+            stream.synchronize()
+
+        dg = torch.zeros_like(dforces)
+        for i in torch.unique(cur_indices):
+            mask = (cur_indices == i)
+            dg[mask] = dg_list[i]
+        b = self._batched_dot_1d(dpos, dg)
+
+        # DONE: parallel this part
+        for i, stream in zip(range(len(self.H)), streams):
+            if not self.optimizable.update_mask[i]:
+                continue
+            with torch.cuda.stream(stream):
+                outer_force = torch.outer(dforces[cur_indices==i], dforces[cur_indices==i])
+                outer_dg = torch.outer(dg[cur_indices==i], dg[cur_indices==i])
+                self.H[i] -= outer_force / a[i] + outer_dg / b[i]
+        
+        torch.cuda.current_stream().synchronize()
+        for stream in streams: 
+            stream.synchronize()
+
+
+    def _batched_dot_2d(self, x: torch.Tensor, y: torch.Tensor):
+        return scatter(
+            (x * y).sum(dim=-1), self.optimizable.batch_indices, reduce="sum"
+        )
+    
+    def _batched_dot_1d(self, x: torch.Tensor, y: torch.Tensor):
+        return scatter(
+            (x * y), self.optimizable.batch_indices.repeat_interleave(3), reduce="sum"
+        ) 
\ No newline at end of file
--- a/mace-bench/src/batchopt/relaxation/optimizers/bfgsfusedls.py
+++ b/mace-bench/src/batchopt/relaxation/optimizers/bfgsfusedls.py
--- a/mace-bench/src/batchopt/relaxengine.py
+++ b/mace-bench/src/batchopt/relaxengine.py
--- a/mace-bench/src/batchopt/utils.py
+++ b/mace-bench/src/batchopt/utils.py
+"""
+Copyright (c) Meta, Inc. and its affiliates.
+
+This source code is licensed under the MIT license found in the
+LICENSE file in the root directory of this source tree.
+"""
+
+import ast
+import collections
+import copy
+import datetime
+import errno
+import functools
+import importlib
+import itertools
+import json
+import logging
+import os
+import subprocess
+import sys
+import time
+from bisect import bisect
+from contextlib import contextmanager
+from dataclasses import dataclass
+from functools import wraps
+from itertools import product
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+from uuid import uuid4
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch_geometric
+import yaml
+from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
+from matplotlib.figure import Figure
+from torch_geometric.data import Data
+from torch_geometric.utils import remove_self_loops
+from torch_scatter import scatter, segment_coo, segment_csr
+
+from torch_geometric.data.data import BaseData
+from torch_geometric.data import Batch
+
+# sort files by atomic number in descending order
+def count_atoms_cif(file):
+    in_atom_site = False 
+    natoms = 0
+    with open(file, 'r') as f:
+        while line := f.readline():
+            if line.lower().startswith("loop_"):
+                in_atom_site = False 
+                continue 
+            # if line.lower().startswith("_atom_site_"):
+            if "_atom_site_" in line.lower():
+                in_atom_site = True 
+                continue 
+            if in_atom_site:
+                if line.startswith("_"):
+                    in_atom_site = False 
+                    continue 
+                elif line: 
+                    natoms += 1
+    return natoms 
+
+# Override the collation method in `pytorch_geometric.data.InMemoryDataset`
+def collate(data_list):
+    keys = data_list[0].keys
+    data = data_list[0].__class__()
+
+    for key in keys:
+        data[key] = []
+    slices = {key: [0] for key in keys}
+
+    for item, key in product(data_list, keys):
+        data[key].append(item[key])
+        if torch.is_tensor(item[key]):
+            s = slices[key][-1] + item[key].size(item.__cat_dim__(key, item[key]))
+        elif isinstance(item[key], (int, float)):
+            s = slices[key][-1] + 1
+        else:
+            raise ValueError("Unsupported attribute type")
+        slices[key].append(s)
+
+    if hasattr(data_list[0], "__num_nodes__"):
+        data.__num_nodes__ = []
+        for item in data_list:
+            data.__num_nodes__.append(item.num_nodes)
+
+    for key in keys:
+        if torch.is_tensor(data_list[0][key]):
+            data[key] = torch.cat(
+                data[key], dim=data.__cat_dim__(key, data_list[0][key])
+            )
+        else:
+            data[key] = torch.tensor(data[key])
+        slices[key] = torch.tensor(slices[key], dtype=torch.long)
+
+    return data, slices
+
+def data_list_collater(
+    data_list: list[BaseData], otf_graph: bool = False, to_dict: bool = False
+) -> BaseData | dict[str, torch.Tensor]:
+    batch = Batch.from_data_list(data_list)
+
+    if not otf_graph:
+        try:
+            n_neighbors = []
+            for _, data in enumerate(data_list):
+                n_index = data.edge_index[1, :]
+                n_neighbors.append(n_index.shape[0])
+            batch.neighbors = torch.tensor(n_neighbors)
+        except (NotImplementedError, TypeError):
+            logging.warning(
+                "LMDB does not contain edge index information, set otf_graph=True"
+            )
+
+    if to_dict:
+        batch = dict(batch.items())
+
+    return batch
\ No newline at end of file
--- a/mace-bench/util/env.sh
+++ b/mace-bench/util/env.sh
+#!/bin/bash
+
+export CUDA_HOME=/usr/local/cuda
+export PATH=$CUDA_HOME/bin:$PATH
+export LD_LIBRARY_PATH=$CUDA_HOME/lib64:$LD_LIBRARY_PATH
+export LIBRARY_PATH=$CUDA_HOME/lib64:$LIBRARY_PATH
+export CPATH=$CUDA_HOME/include:$CPATH
\ No newline at end of file
--- a/mace-bench/util/mps_clean.sh
+++ b/mace-bench/util/mps_clean.sh
+#!/bin/bash
+
+echo quit | nvidia-cuda-mps-control
+nvidia-smi -i 0 -c DEFAULT
+nvidia-smi -i 1 -c DEFAULT
+nvidia-smi -i 2 -c DEFAULT
+nvidia-smi -i 3 -c DEFAULT
+nvidia-smi -i 4 -c DEFAULT
+nvidia-smi -i 5 -c DEFAULT
+nvidia-smi -i 6 -c DEFAULT
+nvidia-smi -i 7 -c DEFAULT
\ No newline at end of file
--- a/mace-bench/util/mps_start.sh
+++ b/mace-bench/util/mps_start.sh
+#!/bin/bash
+nvidia-smi -i 0 -c EXCLUSIVE_PROCESS # Set GPU 0 to exclusive mode.  
+nvidia-smi -i 1 -c EXCLUSIVE_PROCESS # Set GPU 1 to exclusive mode.  
+nvidia-smi -i 2 -c EXCLUSIVE_PROCESS # Set GPU 2 to exclusive mode.  
+nvidia-smi -i 3 -c EXCLUSIVE_PROCESS # Set GPU 3 to exclusive mode.  
+nvidia-smi -i 4 -c EXCLUSIVE_PROCESS # Set GPU 4 to exclusive mode.  
+nvidia-smi -i 5 -c EXCLUSIVE_PROCESS # Set GPU 5 to exclusive mode.  
+nvidia-smi -i 6 -c EXCLUSIVE_PROCESS # Set GPU 6 to exclusive mode.  
+nvidia-smi -i 7 -c EXCLUSIVE_PROCESS # Set GPU 7 to exclusive mode.  
+nvidia-cuda-mps-control -d # Start the daemon.
\ No newline at end of file