Add files via upload

1be78103 · zcxzcx1 · GitHub · f675ef76 · 1be78103 · 1be78103
Unverified Commit 1be78103 authored Aug 24, 2025 by zcxzcx1 Committed by GitHub Aug 24, 2025
18 changed files
--- a/mace-bench/3rdparty/mace/tests/test_benchmark.py
+++ b/mace-bench/3rdparty/mace/tests/test_benchmark.py
+import json
+import os
+from pathlib import Path
+from typing import List, Optional
+
+import pandas as pd
+import pytest
+import torch
+from ase import build
+
+from mace import data as mace_data
+from mace.calculators.foundations_models import mace_mp
+from mace.tools import AtomicNumberTable, torch_geometric, torch_tools
+
+
+def is_mace_full_bench():
+    return os.environ.get("MACE_FULL_BENCH", "0") == "1"
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="cuda is not available")
+@pytest.mark.benchmark(warmup=True, warmup_iterations=4, min_rounds=8)
+@pytest.mark.parametrize("size", (3, 5, 7, 9))
+@pytest.mark.parametrize("dtype", ["float32", "float64"])
+@pytest.mark.parametrize("compile_mode", [None, "default"])
+def test_inference(
+    benchmark, size: int, dtype: str, compile_mode: Optional[str], device: str = "cuda"
+):
+    if not is_mace_full_bench() and compile_mode is not None:
+        pytest.skip("Skipping long running benchmark, set MACE_FULL_BENCH=1 to execute")
+
+    with torch_tools.default_dtype(dtype):
+        model = load_mace_mp_medium(dtype, compile_mode, device)
+        batch = create_batch(size, model, device)
+        log_bench_info(benchmark, dtype, compile_mode, batch)
+
+        def func():
+            torch.cuda.synchronize()
+            model(batch, training=compile_mode is not None, compute_force=True)
+
+        torch.cuda.empty_cache()
+        benchmark(func)
+
+
+def load_mace_mp_medium(dtype, compile_mode, device):
+    calc = mace_mp(
+        model="medium",
+        default_dtype=dtype,
+        device=device,
+        compile_mode=compile_mode,
+        fullgraph=False,
+    )
+    model = calc.models[0].to(device)
+    return model
+
+
+def create_batch(size: int, model: torch.nn.Module, device: str) -> dict:
+    cutoff = model.r_max.item()
+    z_table = AtomicNumberTable([int(z) for z in model.atomic_numbers])
+    atoms = build.bulk("C", "diamond", a=3.567, cubic=True)
+    atoms = atoms.repeat((size, size, size))
+    config = mace_data.config_from_atoms(atoms)
+    dataset = [mace_data.AtomicData.from_config(config, z_table=z_table, cutoff=cutoff)]
+    data_loader = torch_geometric.dataloader.DataLoader(
+        dataset=dataset,
+        batch_size=1,
+        shuffle=False,
+        drop_last=False,
+    )
+    batch = next(iter(data_loader))
+    batch.to(device)
+    return batch.to_dict()
+
+
+def log_bench_info(benchmark, dtype, compile_mode, batch):
+    benchmark.extra_info["num_atoms"] = int(batch["positions"].shape[0])
+    benchmark.extra_info["num_edges"] = int(batch["edge_index"].shape[1])
+    benchmark.extra_info["dtype"] = dtype
+    benchmark.extra_info["is_compiled"] = compile_mode is not None
+    benchmark.extra_info["device_name"] = torch.cuda.get_device_name()
+
+
+def process_benchmark_file(bench_file: Path) -> pd.DataFrame:
+    with open(bench_file, "r", encoding="utf-8") as f:
+        bench_data = json.load(f)
+
+    records = []
+    for bench in bench_data["benchmarks"]:
+        record = {**bench["extra_info"], **bench["stats"]}
+        records.append(record)
+
+    result_df = pd.DataFrame(records)
+    result_df["ns/day (1 fs/step)"] = 0.086400 / result_df["median"]
+    result_df["Steps per day"] = result_df["ops"] * 86400
+    columns = [
+        "num_atoms",
+        "num_edges",
+        "dtype",
+        "is_compiled",
+        "device_name",
+        "median",
+        "Steps per day",
+        "ns/day (1 fs/step)",
+    ]
+    return result_df[columns]
+
+
+def read_bench_results(result_files: List[str]) -> pd.DataFrame:
+    return pd.concat([process_benchmark_file(Path(f)) for f in result_files])
+
+
+if __name__ == "__main__":
+    # Print to stdout a csv of the benchmark metrics
+    import subprocess
+
+    result = subprocess.run(
+        ["pytest-benchmark", "list"], capture_output=True, text=True, check=True
+    )
+
+    bench_files = result.stdout.strip().split("\n")
+    bench_results = read_bench_results(bench_files)
+    print(bench_results.to_csv(index=False))
--- a/mace-bench/3rdparty/mace/tests/test_calculator.py
+++ b/mace-bench/3rdparty/mace/tests/test_calculator.py
+import os
+import subprocess
+import sys
+from pathlib import Path
+
+import ase.io
+import numpy as np
+import pytest
+import torch
+from ase import build
+from ase.atoms import Atoms
+from ase.calculators.test import gradient_test
+from ase.constraints import ExpCellFilter
+
+from mace.calculators import mace_mp, mace_off
+from mace.calculators.mace import MACECalculator
+from mace.modules.models import ScaleShiftMACE
+
+try:
+    import cuequivariance as cue  # pylint: disable=unused-import
+
+    CUET_AVAILABLE = True
+except ImportError:
+    CUET_AVAILABLE = False
+
+pytest_mace_dir = Path(__file__).parent.parent
+run_train = Path(__file__).parent.parent / "mace" / "cli" / "run_train.py"
+
+
+@pytest.fixture(scope="module", name="fitting_configs")
+def fitting_configs_fixture():
+    water = Atoms(
+        numbers=[8, 1, 1],
+        positions=[[0, -2.0, 0], [1, 0, 0], [0, 1, 0]],
+        cell=[4] * 3,
+        pbc=[True] * 3,
+    )
+    fit_configs = [
+        Atoms(numbers=[8], positions=[[0, 0, 0]], cell=[6] * 3),
+        Atoms(numbers=[1], positions=[[0, 0, 0]], cell=[6] * 3),
+    ]
+    fit_configs[0].info["REF_energy"] = 1.0
+    fit_configs[0].info["config_type"] = "IsolatedAtom"
+    fit_configs[1].info["REF_energy"] = -0.5
+    fit_configs[1].info["config_type"] = "IsolatedAtom"
+
+    np.random.seed(5)
+    for _ in range(20):
+        c = water.copy()
+        c.positions += np.random.normal(0.1, size=c.positions.shape)
+        c.info["REF_energy"] = np.random.normal(0.1)
+        c.info["REF_dipole"] = np.random.normal(0.1, size=3)
+        c.new_array("REF_forces", np.random.normal(0.1, size=c.positions.shape))
+        c.new_array("Qs", np.random.normal(0.1, size=c.positions.shape[0]))
+        c.info["REF_stress"] = np.random.normal(0.1, size=6)
+        fit_configs.append(c)
+
+    return fit_configs
+
+
+@pytest.fixture(scope="module", name="trained_model")
+def trained_model_fixture(tmp_path_factory, fitting_configs):
+    _mace_params = {
+        "name": "MACE",
+        "valid_fraction": 0.05,
+        "energy_weight": 1.0,
+        "forces_weight": 10.0,
+        "stress_weight": 1.0,
+        "model": "MACE",
+        "hidden_irreps": "128x0e",
+        "r_max": 3.5,
+        "batch_size": 5,
+        "max_num_epochs": 10,
+        "swa": None,
+        "start_swa": 5,
+        "ema": None,
+        "ema_decay": 0.99,
+        "amsgrad": None,
+        "restart_latest": None,
+        "device": "cpu",
+        "seed": 5,
+        "loss": "stress",
+        "energy_key": "REF_energy",
+        "forces_key": "REF_forces",
+        "stress_key": "REF_stress",
+        "eval_interval": 2,
+    }
+
+    tmp_path = tmp_path_factory.mktemp("run_")
+
+    ase.io.write(tmp_path / "fit.xyz", fitting_configs)
+
+    mace_params = _mace_params.copy()
+    mace_params["checkpoints_dir"] = str(tmp_path)
+    mace_params["model_dir"] = str(tmp_path)
+    mace_params["train_file"] = tmp_path / "fit.xyz"
+
+    # make sure run_train.py is using the mace that is currently being tested
+    run_env = os.environ.copy()
+    sys.path.insert(0, str(Path(__file__).parent.parent))
+    run_env["PYTHONPATH"] = ":".join(sys.path)
+    print("DEBUG subprocess PYTHONPATH", run_env["PYTHONPATH"])
+
+    cmd = (
+        sys.executable
+        + " "
+        + str(run_train)
+        + " "
+        + " ".join(
+            [
+                (f"--{k}={v}" if v is not None else f"--{k}")
+                for k, v in mace_params.items()
+            ]
+        )
+    )
+
+    p = subprocess.run(cmd.split(), env=run_env, check=True)
+
+    assert p.returncode == 0
+
+    return MACECalculator(model_paths=tmp_path / "MACE.model", device="cpu")
+
+
+@pytest.fixture(scope="module", name="trained_equivariant_model")
+def trained_model_equivariant_fixture(tmp_path_factory, fitting_configs):
+    _mace_params = {
+        "name": "MACE",
+        "valid_fraction": 0.05,
+        "energy_weight": 1.0,
+        "forces_weight": 10.0,
+        "stress_weight": 1.0,
+        "model": "MACE",
+        "hidden_irreps": "16x0e+16x1o",
+        "r_max": 3.5,
+        "batch_size": 5,
+        "max_num_epochs": 10,
+        "swa": None,
+        "start_swa": 5,
+        "ema": None,
+        "ema_decay": 0.99,
+        "amsgrad": None,
+        "restart_latest": None,
+        "device": "cpu",
+        "seed": 5,
+        "loss": "stress",
+        "energy_key": "REF_energy",
+        "forces_key": "REF_forces",
+        "stress_key": "REF_stress",
+        "eval_interval": 2,
+    }
+
+    tmp_path = tmp_path_factory.mktemp("run_")
+
+    ase.io.write(tmp_path / "fit.xyz", fitting_configs)
+
+    mace_params = _mace_params.copy()
+    mace_params["checkpoints_dir"] = str(tmp_path)
+    mace_params["model_dir"] = str(tmp_path)
+    mace_params["train_file"] = tmp_path / "fit.xyz"
+
+    # make sure run_train.py is using the mace that is currently being tested
+    run_env = os.environ.copy()
+    sys.path.insert(0, str(Path(__file__).parent.parent))
+    run_env["PYTHONPATH"] = ":".join(sys.path)
+    print("DEBUG subprocess PYTHONPATH", run_env["PYTHONPATH"])
+
+    cmd = (
+        sys.executable
+        + " "
+        + str(run_train)
+        + " "
+        + " ".join(
+            [
+                (f"--{k}={v}" if v is not None else f"--{k}")
+                for k, v in mace_params.items()
+            ]
+        )
+    )
+
+    p = subprocess.run(cmd.split(), env=run_env, check=True)
+
+    assert p.returncode == 0
+
+    return MACECalculator(model_paths=tmp_path / "MACE.model", device="cpu")
+
+
+@pytest.fixture(scope="module", name="trained_equivariant_model_cueq")
+def trained_model_equivariant_fixture_cueq(tmp_path_factory, fitting_configs):
+    _mace_params = {
+        "name": "MACE",
+        "valid_fraction": 0.05,
+        "energy_weight": 1.0,
+        "forces_weight": 10.0,
+        "stress_weight": 1.0,
+        "model": "MACE",
+        "hidden_irreps": "16x0e+16x1o",
+        "r_max": 3.5,
+        "batch_size": 5,
+        "max_num_epochs": 10,
+        "swa": None,
+        "start_swa": 5,
+        "ema": None,
+        "ema_decay": 0.99,
+        "amsgrad": None,
+        "restart_latest": None,
+        "device": "cpu",
+        "seed": 5,
+        "loss": "stress",
+        "energy_key": "REF_energy",
+        "forces_key": "REF_forces",
+        "stress_key": "REF_stress",
+        "eval_interval": 2,
+    }
+
+    tmp_path = tmp_path_factory.mktemp("run_")
+
+    ase.io.write(tmp_path / "fit.xyz", fitting_configs)
+
+    mace_params = _mace_params.copy()
+    mace_params["checkpoints_dir"] = str(tmp_path)
+    mace_params["model_dir"] = str(tmp_path)
+    mace_params["train_file"] = tmp_path / "fit.xyz"
+
+    # make sure run_train.py is using the mace that is currently being tested
+    run_env = os.environ.copy()
+    sys.path.insert(0, str(Path(__file__).parent.parent))
+    run_env["PYTHONPATH"] = ":".join(sys.path)
+    print("DEBUG subprocess PYTHONPATH", run_env["PYTHONPATH"])
+
+    cmd = (
+        sys.executable
+        + " "
+        + str(run_train)
+        + " "
+        + " ".join(
+            [
+                (f"--{k}={v}" if v is not None else f"--{k}")
+                for k, v in mace_params.items()
+            ]
+        )
+    )
+
+    p = subprocess.run(cmd.split(), env=run_env, check=True)
+
+    assert p.returncode == 0
+
+    return MACECalculator(
+        model_paths=tmp_path / "MACE.model", device="cpu", enable_cueq=True
+    )
+
+
+@pytest.fixture(scope="module", name="trained_dipole_model")
+def trained_dipole_fixture(tmp_path_factory, fitting_configs):
+    _mace_params = {
+        "name": "MACE",
+        "valid_fraction": 0.05,
+        "energy_weight": 1.0,
+        "forces_weight": 10.0,
+        "stress_weight": 1.0,
+        "model": "AtomicDipolesMACE",
+        "num_channels": 8,
+        "max_L": 2,
+        "r_max": 3.5,
+        "batch_size": 5,
+        "max_num_epochs": 10,
+        "ema": None,
+        "ema_decay": 0.99,
+        "amsgrad": None,
+        "restart_latest": None,
+        "device": "cpu",
+        "seed": 5,
+        "loss": "dipole",
+        "energy_key": "",
+        "forces_key": "",
+        "stress_key": "",
+        "dipole_key": "REF_dipole",
+        "error_table": "DipoleRMSE",
+        "eval_interval": 2,
+    }
+
+    tmp_path = tmp_path_factory.mktemp("run_")
+
+    ase.io.write(tmp_path / "fit.xyz", fitting_configs)
+
+    mace_params = _mace_params.copy()
+    mace_params["checkpoints_dir"] = str(tmp_path)
+    mace_params["model_dir"] = str(tmp_path)
+    mace_params["train_file"] = tmp_path / "fit.xyz"
+
+    # make sure run_train.py is using the mace that is currently being tested
+    run_env = os.environ.copy()
+    sys.path.insert(0, str(Path(__file__).parent.parent))
+    run_env["PYTHONPATH"] = ":".join(sys.path)
+    print("DEBUG subprocess PYTHONPATH", run_env["PYTHONPATH"])
+
+    cmd = (
+        sys.executable
+        + " "
+        + str(run_train)
+        + " "
+        + " ".join(
+            [
+                (f"--{k}={v}" if v is not None else f"--{k}")
+                for k, v in mace_params.items()
+            ]
+        )
+    )
+
+    p = subprocess.run(cmd.split(), env=run_env, check=True)
+
+    assert p.returncode == 0
+
+    return MACECalculator(
+        model_paths=tmp_path / "MACE.model", device="cpu", model_type="DipoleMACE"
+    )
+
+
+@pytest.fixture(scope="module", name="trained_energy_dipole_model")
+def trained_energy_dipole_fixture(tmp_path_factory, fitting_configs):
+    _mace_params = {
+        "name": "MACE",
+        "valid_fraction": 0.05,
+        "energy_weight": 1.0,
+        "forces_weight": 10.0,
+        "stress_weight": 1.0,
+        "model": "EnergyDipolesMACE",
+        "num_channels": 32,
+        "max_L": 1,
+        "r_max": 3.5,
+        "batch_size": 5,
+        "max_num_epochs": 10,
+        "ema": None,
+        "ema_decay": 0.99,
+        "amsgrad": None,
+        "restart_latest": None,
+        "device": "cpu",
+        "seed": 5,
+        "loss": "energy_forces_dipole",
+        "energy_key": "REF_energy",
+        "forces_key": "",
+        "stress_key": "",
+        "dipole_key": "REF_dipole",
+        "error_table": "EnergyDipoleRMSE",
+        "eval_interval": 2,
+    }
+
+    tmp_path = tmp_path_factory.mktemp("run_")
+
+    ase.io.write(tmp_path / "fit.xyz", fitting_configs)
+
+    mace_params = _mace_params.copy()
+    mace_params["checkpoints_dir"] = str(tmp_path)
+    mace_params["model_dir"] = str(tmp_path)
+    mace_params["train_file"] = tmp_path / "fit.xyz"
+
+    # make sure run_train.py is using the mace that is currently being tested
+    run_env = os.environ.copy()
+    sys.path.insert(0, str(Path(__file__).parent.parent))
+    run_env["PYTHONPATH"] = ":".join(sys.path)
+    print("DEBUG subprocess PYTHONPATH", run_env["PYTHONPATH"])
+
+    cmd = (
+        sys.executable
+        + " "
+        + str(run_train)
+        + " "
+        + " ".join(
+            [
+                (f"--{k}={v}" if v is not None else f"--{k}")
+                for k, v in mace_params.items()
+            ]
+        )
+    )
+
+    p = subprocess.run(cmd.split(), env=run_env, check=True)
+
+    assert p.returncode == 0
+
+    return MACECalculator(
+        model_paths=tmp_path / "MACE.model", device="cpu", model_type="EnergyDipoleMACE"
+    )
+
+
+@pytest.fixture(scope="module", name="trained_committee")
+def trained_committee_fixture(tmp_path_factory, fitting_configs):
+    _seeds = [5, 6, 7]
+    _model_paths = []
+    for seed in _seeds:
+        _mace_params = {
+            "name": f"MACE{seed}",
+            "valid_fraction": 0.05,
+            "energy_weight": 1.0,
+            "forces_weight": 10.0,
+            "stress_weight": 1.0,
+            "model": "MACE",
+            "hidden_irreps": "16x0e",
+            "r_max": 3.5,
+            "batch_size": 5,
+            "max_num_epochs": 10,
+            "swa": None,
+            "start_swa": 5,
+            "ema": None,
+            "ema_decay": 0.99,
+            "amsgrad": None,
+            "restart_latest": None,
+            "device": "cpu",
+            "seed": seed,
+            "loss": "stress",
+            "energy_key": "REF_energy",
+            "forces_key": "REF_forces",
+            "stress_key": "REF_stress",
+            "eval_interval": 2,
+        }
+
+        tmp_path = tmp_path_factory.mktemp(f"run{seed}_")
+
+        ase.io.write(tmp_path / "fit.xyz", fitting_configs)
+
+        mace_params = _mace_params.copy()
+        mace_params["checkpoints_dir"] = str(tmp_path)
+        mace_params["model_dir"] = str(tmp_path)
+        mace_params["train_file"] = tmp_path / "fit.xyz"
+
+        # make sure run_train.py is using the mace that is currently being tested
+        run_env = os.environ.copy()
+        sys.path.insert(0, str(Path(__file__).parent.parent))
+        run_env["PYTHONPATH"] = ":".join(sys.path)
+        print("DEBUG subprocess PYTHONPATH", run_env["PYTHONPATH"])
+
+        cmd = (
+            sys.executable
+            + " "
+            + str(run_train)
+            + " "
+            + " ".join(
+                [
+                    (f"--{k}={v}" if v is not None else f"--{k}")
+                    for k, v in mace_params.items()
+                ]
+            )
+        )
+
+        p = subprocess.run(cmd.split(), env=run_env, check=True)
+
+        assert p.returncode == 0
+
+        _model_paths.append(tmp_path / f"MACE{seed}.model")
+
+    return MACECalculator(model_paths=_model_paths, device="cpu")
+
+
+def test_calculator_node_energy(fitting_configs, trained_model):
+    for at in fitting_configs:
+        trained_model.calculate(at)
+        node_energies = trained_model.results["node_energy"]
+        batch = trained_model._atoms_to_batch(at)  # pylint: disable=protected-access
+        node_heads = batch["head"][batch["batch"]]
+        num_atoms_arange = torch.arange(batch["positions"].shape[0])
+        node_e0 = (
+            trained_model.models[0].atomic_energies_fn(batch["node_attrs"]).detach()
+        )
+        node_e0 = node_e0[num_atoms_arange, node_heads].cpu().numpy()
+        energy_via_nodes = np.sum(node_energies + node_e0)
+        energy = trained_model.results["energy"]
+        np.testing.assert_allclose(energy, energy_via_nodes, atol=1e-6)
+
+
+def test_calculator_forces(fitting_configs, trained_model):
+    at = fitting_configs[2].copy()
+    at.calc = trained_model
+
+    # test just forces
+    grads = gradient_test(at)
+
+    assert np.allclose(grads[0], grads[1])
+
+
+def test_calculator_stress(fitting_configs, trained_model):
+    at = fitting_configs[2].copy()
+    at.calc = trained_model
+
+    # test forces and stress
+    at_wrapped = ExpCellFilter(at)
+    grads = gradient_test(at_wrapped)
+
+    assert np.allclose(grads[0], grads[1])
+
+
+def test_calculator_committee(fitting_configs, trained_committee):
+    at = fitting_configs[2].copy()
+    at.calc = trained_committee
+
+    # test just forces
+    grads = gradient_test(at)
+
+    assert np.allclose(grads[0], grads[1])
+
+    E = at.get_potential_energy()
+    energies = at.calc.results["energies"]
+    energies_var = at.calc.results["energy_var"]
+    forces_var = np.var(at.calc.results["forces_comm"], axis=0)
+    assert np.allclose(E, np.mean(energies))
+    assert np.allclose(energies_var, np.var(energies))
+    assert forces_var.shape == at.calc.results["forces"].shape
+
+
+def test_calculator_from_model(fitting_configs, trained_committee):
+    # test single model
+    test_calculator_forces(
+        fitting_configs,
+        trained_model=MACECalculator(models=trained_committee.models[0], device="cpu"),
+    )
+
+    # test committee model
+    test_calculator_committee(
+        fitting_configs,
+        trained_committee=MACECalculator(models=trained_committee.models, device="cpu"),
+    )
+
+
+def test_calculator_dipole(fitting_configs, trained_dipole_model):
+    at = fitting_configs[2].copy()
+    at.calc = trained_dipole_model
+
+    dip = at.get_dipole_moment()
+
+    assert len(dip) == 3
+
+
+def test_calculator_energy_dipole(fitting_configs, trained_energy_dipole_model):
+    at = fitting_configs[2].copy()
+    at.calc = trained_energy_dipole_model
+
+    grads = gradient_test(at)
+    dip = at.get_dipole_moment()
+
+    assert np.allclose(grads[0], grads[1])
+    assert len(dip) == 3
+
+
+def test_calculator_descriptor(fitting_configs, trained_equivariant_model):
+    at = fitting_configs[2].copy()
+    at_rotated = fitting_configs[2].copy()
+    at_rotated.rotate(90, "x")
+    calc = trained_equivariant_model
+
+    desc_invariant = calc.get_descriptors(at, invariants_only=True)
+    desc_invariant_rotated = calc.get_descriptors(at_rotated, invariants_only=True)
+    desc_invariant_single_layer = calc.get_descriptors(
+        at, invariants_only=True, num_layers=1
+    )
+    desc_invariant_single_layer_rotated = calc.get_descriptors(
+        at_rotated, invariants_only=True, num_layers=1
+    )
+    desc = calc.get_descriptors(at, invariants_only=False)
+    desc_single_layer = calc.get_descriptors(at, invariants_only=False, num_layers=1)
+    desc_rotated = calc.get_descriptors(at_rotated, invariants_only=False)
+    desc_rotated_single_layer = calc.get_descriptors(
+        at_rotated, invariants_only=False, num_layers=1
+    )
+
+    assert desc_invariant.shape[0] == 3
+    assert desc_invariant.shape[1] == 32
+    assert desc_invariant_single_layer.shape[0] == 3
+    assert desc_invariant_single_layer.shape[1] == 16
+    assert desc.shape[0] == 3
+    assert desc.shape[1] == 80
+    assert desc_single_layer.shape[0] == 3
+    assert desc_single_layer.shape[1] == 16 * 4
+    assert desc_rotated_single_layer.shape[0] == 3
+    assert desc_rotated_single_layer.shape[1] == 16 * 4
+
+    np.testing.assert_allclose(desc_invariant, desc_invariant_rotated, atol=1e-6)
+    np.testing.assert_allclose(
+        desc_invariant_single_layer, desc_invariant[:, :16], atol=1e-6
+    )
+    np.testing.assert_allclose(
+        desc_invariant_single_layer_rotated, desc_invariant[:, :16], atol=1e-6
+    )
+    np.testing.assert_allclose(
+        desc_single_layer[:, :16], desc_rotated_single_layer[:, :16], atol=1e-6
+    )
+    assert not np.allclose(
+        desc_single_layer[:, 16:], desc_rotated_single_layer[:, 16:], atol=1e-6
+    )
+    assert not np.allclose(desc, desc_rotated, atol=1e-6)
+
+
+@pytest.mark.skipif(not CUET_AVAILABLE, reason="cuequivariance not installed")
+def test_calculator_descriptor_cueq(fitting_configs, trained_equivariant_model_cueq):
+    at = fitting_configs[2].copy()
+    at_rotated = fitting_configs[2].copy()
+    at_rotated.rotate(90, "x")
+    calc = trained_equivariant_model_cueq
+
+    desc_invariant = calc.get_descriptors(at, invariants_only=True)
+    desc_invariant_rotated = calc.get_descriptors(at_rotated, invariants_only=True)
+    desc_invariant_single_layer = calc.get_descriptors(
+        at, invariants_only=True, num_layers=1
+    )
+    desc_invariant_single_layer_rotated = calc.get_descriptors(
+        at_rotated, invariants_only=True, num_layers=1
+    )
+    desc = calc.get_descriptors(at, invariants_only=False)
+    desc_single_layer = calc.get_descriptors(at, invariants_only=False, num_layers=1)
+    desc_rotated = calc.get_descriptors(at_rotated, invariants_only=False)
+    desc_rotated_single_layer = calc.get_descriptors(
+        at_rotated, invariants_only=False, num_layers=1
+    )
+
+    assert desc_invariant.shape[0] == 3
+    assert desc_invariant.shape[1] == 32
+    assert desc_invariant_single_layer.shape[0] == 3
+    assert desc_invariant_single_layer.shape[1] == 16
+    assert desc.shape[0] == 3
+    assert desc.shape[1] == 80
+    assert desc_single_layer.shape[0] == 3
+    assert desc_single_layer.shape[1] == 16 * 4
+    assert desc_rotated_single_layer.shape[0] == 3
+    assert desc_rotated_single_layer.shape[1] == 16 * 4
+
+    np.testing.assert_allclose(desc_invariant, desc_invariant_rotated, atol=1e-6)
+    np.testing.assert_allclose(
+        desc_invariant_single_layer, desc_invariant[:, :16], atol=1e-6
+    )
+    np.testing.assert_allclose(
+        desc_invariant_single_layer_rotated, desc_invariant[:, :16], atol=1e-6
+    )
+    np.testing.assert_allclose(
+        desc_single_layer[:, :16], desc_rotated_single_layer[:, :16], atol=1e-6
+    )
+    assert not np.allclose(
+        desc_single_layer[:, 16:], desc_rotated_single_layer[:, 16:], atol=1e-6
+    )
+    assert not np.allclose(desc, desc_rotated, atol=1e-6)
+
+
+def test_mace_mp(capsys: pytest.CaptureFixture):
+    mp_mace = mace_mp()
+    assert isinstance(mp_mace, MACECalculator)
+    assert mp_mace.model_type == "MACE"
+    assert len(mp_mace.models) == 1
+    assert isinstance(mp_mace.models[0], ScaleShiftMACE)
+
+    _, stderr = capsys.readouterr()
+    assert stderr == ""
+
+
+def test_mace_off():
+    mace_off_model = mace_off(model="small", device="cpu")
+    assert isinstance(mace_off_model, MACECalculator)
+    assert mace_off_model.model_type == "MACE"
+    assert len(mace_off_model.models) == 1
+    assert isinstance(mace_off_model.models[0], ScaleShiftMACE)
+
+    atoms = build.molecule("H2O")
+    atoms.calc = mace_off_model
+
+    E = atoms.get_potential_energy()
+
+    assert np.allclose(E, -2081.116128586803, atol=1e-9)
+
+
+@pytest.mark.skipif(not CUET_AVAILABLE, reason="cuequivariance not installed")
+def test_mace_off_cueq(model="medium", device="cpu"):
+    mace_off_model = mace_off(model=model, device=device, enable_cueq=True)
+    assert isinstance(mace_off_model, MACECalculator)
+    assert mace_off_model.model_type == "MACE"
+    assert len(mace_off_model.models) == 1
+    assert isinstance(mace_off_model.models[0], ScaleShiftMACE)
+
+    atoms = build.molecule("H2O")
+    atoms.calc = mace_off_model
+
+    E = atoms.get_potential_energy()
+
+    assert np.allclose(E, -2081.116128586803, atol=1e-9)
+
+
+def test_mace_mp_stresses(model="medium", device="cpu"):
+    atoms = build.bulk("Al", "fcc", a=4.05, cubic=True)
+    atoms = atoms.repeat((2, 2, 2))
+    mace_mp_model = mace_mp(model=model, device=device, compute_atomic_stresses=True)
+    atoms.set_calculator(mace_mp_model)
+    stress = atoms.get_stress()
+    stresses = atoms.get_stresses()
+    assert stress.shape == (6,)
+    assert stresses.shape == (32, 6)
+    assert np.allclose(stress, stresses.sum(axis=0), atol=1e-6)
--- a/mace-bench/3rdparty/mace/tests/test_cg.py
+++ b/mace-bench/3rdparty/mace/tests/test_cg.py
+from e3nn import o3
+
+from mace.tools import cg
+
+
+def test_U_matrix():
+    irreps_in = o3.Irreps("1x0e + 1x1o + 1x2e")
+    irreps_out = o3.Irreps("1x0e + 1x1o")
+    u_matrix = cg.U_matrix_real(
+        irreps_in=irreps_in, irreps_out=irreps_out, correlation=3
+    )[-1]
+    assert u_matrix.shape == (3, 9, 9, 9, 21)
--- a/mace-bench/3rdparty/mace/tests/test_compile.py
+++ b/mace-bench/3rdparty/mace/tests/test_compile.py
+import os
+from functools import wraps
+from typing import Callable
+
+import numpy as np
+import pytest
+import torch
+import torch.nn.functional as F
+from e3nn import o3
+from torch.testing import assert_close
+
+from mace import data, modules, tools
+from mace.tools import compile as mace_compile
+from mace.tools import torch_geometric
+
+table = tools.AtomicNumberTable([6])
+atomic_energies = np.array([1.0], dtype=float)
+cutoff = 5.0
+
+
+def create_mace(device: str, seed: int = 1702):
+    torch_geometric.seed_everything(seed)
+
+    model_config = {
+        "r_max": cutoff,
+        "num_bessel": 8,
+        "num_polynomial_cutoff": 6,
+        "max_ell": 3,
+        "interaction_cls": modules.interaction_classes[
+            "RealAgnosticResidualInteractionBlock"
+        ],
+        "interaction_cls_first": modules.interaction_classes[
+            "RealAgnosticResidualInteractionBlock"
+        ],
+        "num_interactions": 2,
+        "num_elements": 1,
+        "hidden_irreps": o3.Irreps("128x0e + 128x1o"),
+        "MLP_irreps": o3.Irreps("16x0e"),
+        "gate": F.silu,
+        "atomic_energies": atomic_energies,
+        "avg_num_neighbors": 8,
+        "atomic_numbers": table.zs,
+        "correlation": 3,
+        "radial_type": "bessel",
+        "atomic_inter_scale": 1.0,
+        "atomic_inter_shift": 0.0,
+    }
+    model = modules.ScaleShiftMACE(**model_config)
+    return model.to(device)
+
+
+def create_batch(device: str):
+    from ase import build
+
+    size = 2
+    atoms = build.bulk("C", "diamond", a=3.567, cubic=True)
+    atoms_list = [atoms.repeat((size, size, size))]
+    print("Number of atoms", len(atoms_list[0]))
+
+    configs = [data.config_from_atoms(atoms) for atoms in atoms_list]
+    data_loader = torch_geometric.dataloader.DataLoader(
+        dataset=[
+            data.AtomicData.from_config(config, z_table=table, cutoff=cutoff)
+            for config in configs
+        ],
+        batch_size=1,
+        shuffle=False,
+        drop_last=False,
+    )
+    batch = next(iter(data_loader))
+    batch = batch.to(device)
+    batch = batch.to_dict()
+    return batch
+
+
+def time_func(func: Callable):
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        torch._inductor.cudagraph_mark_step_begin()  # pylint: disable=W0212
+        outputs = func(*args, **kwargs)
+        torch.cuda.synchronize()
+        return outputs
+
+    return wrapper
+
+
+@pytest.fixture(params=[torch.float32, torch.float64], ids=["fp32", "fp64"])
+def default_dtype(request):
+    with tools.torch_tools.default_dtype(request.param):
+        yield torch.get_default_dtype()
+
+
+# skip if on windows
+@pytest.mark.skipif(os.name == "nt", reason="Not supported on Windows")
+@pytest.mark.parametrize("device", ["cpu", "cuda"])
+def test_mace(device, default_dtype):  # pylint: disable=W0621
+    print(f"using default dtype = {default_dtype}")
+    if device == "cuda" and not torch.cuda.is_available():
+        pytest.skip(reason="cuda is not available")
+
+    model_defaults = create_mace(device)
+    tmp_model = mace_compile.prepare(create_mace)(device)
+    model_compiled = torch.compile(tmp_model, mode="default")
+
+    batch = create_batch(device)
+    output1 = model_defaults(batch, training=True)
+    output2 = model_compiled(batch, training=True)
+    assert_close(output1["energy"], output2["energy"])
+    assert_close(output1["forces"], output2["forces"])
+
+
+@pytest.mark.skipif(os.name == "nt", reason="Not supported on Windows")
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="cuda is not available")
+def test_eager_benchmark(benchmark, default_dtype):  # pylint: disable=W0621
+    print(f"using default dtype = {default_dtype}")
+    batch = create_batch("cuda")
+    model = create_mace("cuda")
+    model = time_func(model)
+    benchmark(model, batch, training=True)
+
+
+@pytest.mark.skipif(os.name == "nt", reason="Not supported on Windows")
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="cuda is not available")
+@pytest.mark.parametrize("compile_mode", ["default", "reduce-overhead", "max-autotune"])
+@pytest.mark.parametrize("enable_amp", [False, True], ids=["fp32", "mixed"])
+def test_compile_benchmark(benchmark, compile_mode, enable_amp):
+    if enable_amp:
+        pytest.skip(reason="autocast compiler assertion aten.slice_scatter.default")
+
+    with tools.torch_tools.default_dtype(torch.float32):
+        batch = create_batch("cuda")
+        torch.compiler.reset()
+        model = mace_compile.prepare(create_mace)("cuda")
+        model = torch.compile(model, mode=compile_mode)
+        model = time_func(model)
+
+        with torch.autocast("cuda", enabled=enable_amp):
+            benchmark(model, batch, training=True)
+
+
+@pytest.mark.skipif(os.name == "nt", reason="Not supported on Windows")
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="cuda is not available")
+def test_graph_breaks():
+    import torch._dynamo as dynamo
+
+    batch = create_batch("cuda")
+    model = mace_compile.prepare(create_mace)("cuda")
+    explanation = dynamo.explain(model)(batch, training=False)
+
+    # these clutter the output but might be useful for investigating graph breaks
+    explanation.ops_per_graph = None
+    explanation.out_guards = None
+    print(explanation)
+    assert explanation.graph_break_count == 0
--- a/mace-bench/3rdparty/mace/tests/test_cueq.py
+++ b/mace-bench/3rdparty/mace/tests/test_cueq.py
+# pylint: disable=wrong-import-position
+import os
+from copy import deepcopy
+from typing import Any, Dict
+
+os.environ["TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD"] = "1"
+
+import pytest
+import torch
+import torch.nn.functional as F
+from e3nn import o3
+
+from mace import data, modules, tools
+from mace.cli.convert_cueq_e3nn import run as run_cueq_to_e3nn
+from mace.cli.convert_e3nn_cueq import run as run_e3nn_to_cueq
+from mace.tools import torch_geometric
+
+try:
+    import cuequivariance as cue  # pylint: disable=unused-import
+
+    CUET_AVAILABLE = True
+except ImportError:
+    CUET_AVAILABLE = False
+
+CUDA_AVAILABLE = torch.cuda.is_available()
+
+
+@pytest.mark.skipif(not CUET_AVAILABLE, reason="cuequivariance not installed")
+class TestCueq:
+    @pytest.fixture
+    def model_config(self, interaction_cls_first, hidden_irreps) -> Dict[str, Any]:
+        table = tools.AtomicNumberTable([6])
+        return {
+            "r_max": 5.0,
+            "num_bessel": 8,
+            "num_polynomial_cutoff": 6,
+            "max_ell": 3,
+            "interaction_cls": modules.interaction_classes[
+                "RealAgnosticResidualInteractionBlock"
+            ],
+            "interaction_cls_first": interaction_cls_first,
+            "num_interactions": 2,
+            "num_elements": 1,
+            "hidden_irreps": hidden_irreps,
+            "MLP_irreps": o3.Irreps("16x0e"),
+            "gate": F.silu,
+            "atomic_energies": torch.tensor([1.0]),
+            "avg_num_neighbors": 8,
+            "atomic_numbers": table.zs,
+            "correlation": 3,
+            "radial_type": "bessel",
+            "atomic_inter_scale": 1.0,
+            "atomic_inter_shift": 0.0,
+        }
+
+    @pytest.fixture
+    def batch(self, device: str, default_dtype: torch.dtype) -> Dict[str, torch.Tensor]:
+        from ase import build
+
+        torch.set_default_dtype(default_dtype)
+
+        table = tools.AtomicNumberTable([6])
+
+        atoms = build.bulk("C", "diamond", a=3.567, cubic=True)
+        import numpy as np
+
+        displacement = np.random.uniform(-0.1, 0.1, size=atoms.positions.shape)
+        atoms.positions += displacement
+        atoms_list = [atoms.repeat((2, 2, 2))]
+
+        configs = [data.config_from_atoms(atoms) for atoms in atoms_list]
+        data_loader = torch_geometric.dataloader.DataLoader(
+            dataset=[
+                data.AtomicData.from_config(config, z_table=table, cutoff=5.0)
+                for config in configs
+            ],
+            batch_size=1,
+            shuffle=False,
+            drop_last=False,
+        )
+        batch = next(iter(data_loader))
+        return batch.to(device).to_dict()
+
+    @pytest.mark.parametrize(
+        "device",
+        ["cpu"] + (["cuda"] if CUDA_AVAILABLE else []),
+    )
+    @pytest.mark.parametrize(
+        "interaction_cls_first",
+        [
+            modules.interaction_classes["RealAgnosticResidualInteractionBlock"],
+            modules.interaction_classes["RealAgnosticInteractionBlock"],
+            modules.interaction_classes["RealAgnosticDensityInteractionBlock"],
+        ],
+    )
+    @pytest.mark.parametrize(
+        "hidden_irreps",
+        [
+            o3.Irreps("32x0e + 32x1o"),
+            o3.Irreps("32x0e + 32x1o + 32x2e"),
+            o3.Irreps("32x0e"),
+        ],
+    )
+    @pytest.mark.parametrize("default_dtype", [torch.float32, torch.float64])
+    def test_bidirectional_conversion(
+        self,
+        model_config: Dict[str, Any],
+        batch: Dict[str, torch.Tensor],
+        device: str,
+        default_dtype: torch.dtype,
+    ):
+        if device == "cuda" and not CUDA_AVAILABLE:
+            pytest.skip("CUDA not available")
+        torch.manual_seed(42)
+
+        # Create original E3nn model
+        model_e3nn = modules.ScaleShiftMACE(**model_config).to(device)
+
+        # Convert E3nn to CuEq
+        model_cueq = run_e3nn_to_cueq(model_e3nn).to(device)
+
+        # Convert CuEq back to E3nn
+        model_e3nn_back = run_cueq_to_e3nn(model_cueq).to(device)
+
+        # Test forward pass equivalence
+        out_e3nn = model_e3nn(deepcopy(batch), training=True, compute_stress=True)
+        out_cueq = model_cueq(deepcopy(batch), training=True, compute_stress=True)
+        out_e3nn_back = model_e3nn_back(
+            deepcopy(batch), training=True, compute_stress=True
+        )
+
+        # Check outputs match for both conversions
+        torch.testing.assert_close(out_e3nn["energy"], out_cueq["energy"])
+        torch.testing.assert_close(out_cueq["energy"], out_e3nn_back["energy"])
+        torch.testing.assert_close(out_e3nn["forces"], out_cueq["forces"])
+        torch.testing.assert_close(out_cueq["forces"], out_e3nn_back["forces"])
+        torch.testing.assert_close(out_e3nn["stress"], out_cueq["stress"])
+        torch.testing.assert_close(out_cueq["stress"], out_e3nn_back["stress"])
+
+        # Test backward pass equivalence
+        loss_e3nn = out_e3nn["energy"].sum()
+        loss_cueq = out_cueq["energy"].sum()
+        loss_e3nn_back = out_e3nn_back["energy"].sum()
+
+        loss_e3nn.backward()
+        loss_cueq.backward()
+        loss_e3nn_back.backward()
+
+        # Compare gradients for all conversions
+        tol = 1e-4 if default_dtype == torch.float32 else 1e-7
+
+        def print_gradient_diff(name1, p1, name2, p2, conv_type):
+            if p1.grad is not None and p1.grad.shape == p2.grad.shape:
+                if name1.split(".", 2)[:2] == name2.split(".", 2)[:2]:
+                    error = torch.abs(p1.grad - p2.grad)
+                    print(
+                        f"{conv_type} - Parameter {name1}/{name2}, Max error: {error.max()}"
+                    )
+                    torch.testing.assert_close(p1.grad, p2.grad, atol=tol, rtol=tol)
+
+        # E3nn to CuEq gradients
+        for (name_e3nn, p_e3nn), (name_cueq, p_cueq) in zip(
+            model_e3nn.named_parameters(), model_cueq.named_parameters()
+        ):
+            print_gradient_diff(name_e3nn, p_e3nn, name_cueq, p_cueq, "E3nn->CuEq")
+
+        # CuEq to E3nn gradients
+        for (name_cueq, p_cueq), (name_e3nn_back, p_e3nn_back) in zip(
+            model_cueq.named_parameters(), model_e3nn_back.named_parameters()
+        ):
+            print_gradient_diff(
+                name_cueq, p_cueq, name_e3nn_back, p_e3nn_back, "CuEq->E3nn"
+            )
+
+        # Full circle comparison (E3nn -> E3nn)
+        for (name_e3nn, p_e3nn), (name_e3nn_back, p_e3nn_back) in zip(
+            model_e3nn.named_parameters(), model_e3nn_back.named_parameters()
+        ):
+            print_gradient_diff(
+                name_e3nn, p_e3nn, name_e3nn_back, p_e3nn_back, "Full circle"
+            )
--- a/mace-bench/3rdparty/mace/tests/test_data.py
+++ b/mace-bench/3rdparty/mace/tests/test_data.py
+from copy import deepcopy
+from pathlib import Path
+
+import ase.build
+import h5py
+import numpy as np
+import torch
+
+from mace.data import (
+    AtomicData,
+    Configuration,
+    HDF5Dataset,
+    config_from_atoms,
+    get_neighborhood,
+    save_configurations_as_HDF5,
+)
+from mace.tools import AtomicNumberTable, torch_geometric
+
+mace_path = Path(__file__).parent.parent
+
+
+class TestAtomicData:
+    config = Configuration(
+        atomic_numbers=np.array([8, 1, 1]),
+        positions=np.array(
+            [
+                [0.0, -2.0, 0.0],
+                [1.0, 0.0, 0.0],
+                [0.0, 1.0, 0.0],
+            ]
+        ),
+        properties={
+            "forces": np.array(
+                [
+                    [0.0, -1.3, 0.0],
+                    [1.0, 0.2, 0.0],
+                    [0.0, 1.1, 0.3],
+                ]
+            ),
+            "energy": -1.5,
+        },
+        property_weights={
+            "forces": 1.0,
+            "energy": 1.0,
+        },
+    )
+    config_2 = deepcopy(config)
+    config_2.positions = config.positions + 0.01
+
+    table = AtomicNumberTable([1, 8])
+
+    def test_atomic_data(self):
+        data = AtomicData.from_config(self.config, z_table=self.table, cutoff=3.0)
+
+        assert data.edge_index.shape == (2, 4)
+        assert data.forces.shape == (3, 3)
+        assert data.node_attrs.shape == (3, 2)
+
+    def test_data_loader(self):
+        data1 = AtomicData.from_config(self.config, z_table=self.table, cutoff=3.0)
+        data2 = AtomicData.from_config(self.config, z_table=self.table, cutoff=3.0)
+
+        data_loader = torch_geometric.dataloader.DataLoader(
+            dataset=[data1, data2],
+            batch_size=2,
+            shuffle=True,
+            drop_last=False,
+        )
+
+        for batch in data_loader:
+            assert batch.batch.shape == (6,)
+            assert batch.edge_index.shape == (2, 8)
+            assert batch.shifts.shape == (8, 3)
+            assert batch.positions.shape == (6, 3)
+            assert batch.node_attrs.shape == (6, 2)
+            assert batch.energy.shape == (2,)
+            assert batch.forces.shape == (6, 3)
+
+    def test_to_atomic_data_dict(self):
+        data1 = AtomicData.from_config(self.config, z_table=self.table, cutoff=3.0)
+        data2 = AtomicData.from_config(self.config, z_table=self.table, cutoff=3.0)
+
+        data_loader = torch_geometric.dataloader.DataLoader(
+            dataset=[data1, data2],
+            batch_size=2,
+            shuffle=True,
+            drop_last=False,
+        )
+        for batch in data_loader:
+            batch_dict = batch.to_dict()
+            assert batch_dict["batch"].shape == (6,)
+            assert batch_dict["edge_index"].shape == (2, 8)
+            assert batch_dict["shifts"].shape == (8, 3)
+            assert batch_dict["positions"].shape == (6, 3)
+            assert batch_dict["node_attrs"].shape == (6, 2)
+            assert batch_dict["energy"].shape == (2,)
+            assert batch_dict["forces"].shape == (6, 3)
+
+    def test_hdf5_dataloader(self):
+        datasets = [self.config, self.config_2] * 5
+        # get path of the mace package
+        with h5py.File(str(mace_path) + "test.h5", "w") as f:
+            save_configurations_as_HDF5(datasets, 0, f)
+        train_dataset = HDF5Dataset(
+            str(mace_path) + "test.h5", z_table=self.table, r_max=3.0
+        )
+        train_loader = torch_geometric.dataloader.DataLoader(
+            dataset=train_dataset,
+            batch_size=2,
+            shuffle=False,
+            drop_last=False,
+        )
+        batch_count = 0
+        for batch in train_loader:
+            batch_count += 1
+            assert batch.batch.shape == (6,)
+            assert batch.edge_index.shape == (2, 8)
+            assert batch.shifts.shape == (8, 3)
+            assert batch.positions.shape == (6, 3)
+            assert batch.node_attrs.shape == (6, 2)
+            assert batch.energy.shape == (2,)
+            assert batch.forces.shape == (6, 3)
+        print(batch_count, len(train_loader), len(train_dataset))
+        assert batch_count == len(train_loader) == len(train_dataset) / 2
+        train_loader_direct = torch_geometric.dataloader.DataLoader(
+            dataset=[
+                AtomicData.from_config(config, z_table=self.table, cutoff=3.0)
+                for config in datasets
+            ],
+            batch_size=2,
+            shuffle=False,
+            drop_last=False,
+        )
+        for batch_direct, batch in zip(train_loader_direct, train_loader):
+            assert torch.all(batch_direct.edge_index == batch.edge_index)
+            assert torch.all(batch_direct.shifts == batch.shifts)
+            assert torch.all(batch_direct.positions == batch.positions)
+            assert torch.all(batch_direct.node_attrs == batch.node_attrs)
+            assert torch.all(batch_direct.energy == batch.energy)
+            assert torch.all(batch_direct.forces == batch.forces)
+
+
+class TestNeighborhood:
+    def test_basic(self):
+        positions = np.array(
+            [
+                [-1.0, 0.0, 0.0],
+                [+0.0, 0.0, 0.0],
+                [+1.0, 0.0, 0.0],
+            ]
+        )
+
+        indices, shifts, unit_shifts, _ = get_neighborhood(positions, cutoff=1.5)
+        assert indices.shape == (2, 4)
+        assert shifts.shape == (4, 3)
+        assert unit_shifts.shape == (4, 3)
+
+    def test_signs(self):
+        positions = np.array(
+            [
+                [+0.5, 0.5, 0.0],
+                [+1.0, 1.0, 0.0],
+            ]
+        )
+
+        cell = np.array([[2.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
+        edge_index, shifts, unit_shifts, _ = get_neighborhood(
+            positions, cutoff=3.5, pbc=(True, False, False), cell=cell
+        )
+        num_edges = 10
+        assert edge_index.shape == (2, num_edges)
+        assert shifts.shape == (num_edges, 3)
+        assert unit_shifts.shape == (num_edges, 3)
+
+
+# Based on mir-group/nequip
+def test_periodic_edge():
+    atoms = ase.build.bulk("Cu", "fcc")
+    dist = np.linalg.norm(atoms.cell[0]).item()
+    config = config_from_atoms(atoms)
+    edge_index, shifts, _, _ = get_neighborhood(
+        config.positions, cutoff=1.05 * dist, pbc=(True, True, True), cell=config.cell
+    )
+    sender, receiver = edge_index
+    vectors = (
+        config.positions[receiver] - config.positions[sender] + shifts
+    )  # [n_edges, 3]
+    assert vectors.shape == (12, 3)  # 12 neighbors in close-packed bulk
+    assert np.allclose(
+        np.linalg.norm(vectors, axis=-1),
+        dist,
+    )
+
+
+def test_half_periodic():
+    atoms = ase.build.fcc111("Al", size=(3, 3, 1), vacuum=0.0)
+    assert all(atoms.pbc == (True, True, False))
+    config = config_from_atoms(atoms)  # first shell dist is 2.864A
+    edge_index, shifts, _, _ = get_neighborhood(
+        config.positions, cutoff=2.9, pbc=(True, True, False), cell=config.cell
+    )
+    sender, receiver = edge_index
+    vectors = (
+        config.positions[receiver] - config.positions[sender] + shifts
+    )  # [n_edges, 3]
+    # Check number of neighbors:
+    _, neighbor_count = np.unique(edge_index[0], return_counts=True)
+    assert (neighbor_count == 6).all()  # 6 neighbors
+    # Check not periodic in z
+    assert np.allclose(
+        vectors[:, 2],
+        np.zeros(vectors.shape[0]),
+    )
--- a/mace-bench/3rdparty/mace/tests/test_finetuning_select.py
+++ b/mace-bench/3rdparty/mace/tests/test_finetuning_select.py
+import ase.io as aio
+import numpy as np
+import pytest
+from ase import Atoms
+from ase.build import molecule
+
+from mace.cli.fine_tuning_select import (
+    FilteringType,
+    SelectionSettings,
+    SubselectType,
+    _filter_pretraining_data,
+    _load_descriptors,
+    _maybe_save_descriptors,
+    filter_atoms,
+    select_samples,
+)
+
+
+@pytest.fixture(name="train_atoms_fixture")
+def train_atoms():
+    return [
+        molecule("H2O"),
+        molecule("CH4"),
+        Atoms("Fe2O3"),
+        Atoms("C"),
+        Atoms("FeON"),
+        Atoms("Fe"),
+    ]
+
+
+@pytest.fixture(name="train_atom_descriptors_fixture")
+def train_atom_descriptors(train_atoms_fixture):
+    return [
+        {x: np.zeros(5) + i for x in atoms.symbols}
+        for i, atoms in enumerate(train_atoms_fixture)
+    ]
+
+
+@pytest.mark.parametrize(
+    "filtering_type, passes_filter, element_sublist",
+    [
+        (FilteringType.NONE, [True] * 6, []),
+        (FilteringType.NONE, [True] * 6, ["C", "U", "Anything really"]),
+        (
+            FilteringType.COMBINATIONS,
+            [False, False, True, False, False, True],
+            ["O", "Fe"],
+        ),
+        (
+            FilteringType.INCLUSIVE,
+            [False, False, True, False, True, False],
+            ["O", "Fe"],
+        ),
+        (
+            FilteringType.EXCLUSIVE,
+            [False, False, True, False, False, False],
+            ["O", "Fe"],
+        ),
+    ],
+)
+def test_filter_data(
+    train_atoms_fixture, filtering_type, passes_filter, element_sublist
+):
+    filtered, _, passes = _filter_pretraining_data(
+        train_atoms_fixture, filtering_type, element_sublist
+    )
+    assert passes == passes_filter
+    assert len(filtered) == sum(passes_filter)
+
+
+@pytest.mark.parametrize(
+    "passes_filter", [[True] * 6, [False, True, False, True, False, True]]
+)
+def test_load_descriptors(
+    train_atoms_fixture, train_atom_descriptors_fixture, passes_filter, tmp_path
+):
+    for i, atoms in enumerate(train_atoms_fixture):
+        atoms.info["mace_descriptors"] = train_atom_descriptors_fixture[i]
+    save_path = tmp_path / "test.xyz"
+    _maybe_save_descriptors(train_atoms_fixture, save_path.as_posix())
+    assert all(not "mace_descriptors" in atoms.info for atoms in train_atoms_fixture)
+    filtered_atoms = [
+        x for x, passes in zip(train_atoms_fixture, passes_filter) if passes
+    ]
+    descriptors_path = save_path.as_posix().replace(".xyz", "_descriptors.npy")
+
+    _load_descriptors(
+        filtered_atoms,
+        passes_filter,
+        descriptors_path=descriptors_path,
+        calc=None,
+        full_data_length=len(train_atoms_fixture),
+    )
+    expected_descriptors = [
+        train_atom_descriptors_fixture[i]
+        for i, passes in enumerate(passes_filter)
+        if passes
+    ]
+    for i, atoms in enumerate(filtered_atoms):
+        assert "mace_descriptors" in atoms.info
+        for key, value in expected_descriptors[i].items():
+            assert np.allclose(atoms.info["mace_descriptors"][key], value)
+
+
+def test_select_samples_random(train_atoms_fixture, tmp_path):
+    input_file_path = tmp_path / "input.xyz"
+    aio.write(input_file_path, train_atoms_fixture, format="extxyz")
+    output_file_path = tmp_path / "output.xyz"
+
+    settings = SelectionSettings(
+        configs_pt=input_file_path.as_posix(),
+        output=output_file_path.as_posix(),
+        num_samples=2,
+        subselect=SubselectType.RANDOM,
+        filtering_type=FilteringType.NONE,
+    )
+    select_samples(settings)
+
+    # Check if output file is created
+    assert output_file_path.exists()
+    combined_output_file_path = tmp_path / "output_combined.xyz"
+    assert combined_output_file_path.exists()
+
+    output_atoms = aio.read(output_file_path, index=":")
+    assert isinstance(output_atoms, list)
+    assert len(output_atoms) == 2
+
+    combined_output_atoms = aio.read(combined_output_file_path, index=":")
+    assert isinstance(combined_output_atoms, list)
+    assert (
+        len(combined_output_atoms) == 2
+    )  # combined same as output since no FT data provided
+
+
+def test_select_samples_ft_provided(train_atoms_fixture, tmp_path):
+    input_file_path = tmp_path / "input.xyz"
+    aio.write(input_file_path, train_atoms_fixture, format="extxyz")
+    output_file_path = tmp_path / "output.xyz"
+    ft_file_path = tmp_path / "ft_data.xyz"
+    ft_data = [Atoms("FeO")]
+    aio.write(ft_file_path.as_posix(), ft_data, format="extxyz")
+
+    settings = SelectionSettings(
+        configs_pt=input_file_path.as_posix(),
+        output=output_file_path.as_posix(),
+        num_samples=2,
+        subselect=SubselectType.RANDOM,
+        configs_ft=ft_file_path.as_posix(),
+    )
+    select_samples(settings)
+
+    # Check if output file is created
+    assert output_file_path.exists()
+    combined_output_file_path = tmp_path / "output_combined.xyz"
+    assert combined_output_file_path.exists()
+
+    output_atoms = aio.read(output_file_path, index=":")
+    assert isinstance(output_atoms, list)
+    assert len(output_atoms) == 2
+    assert all(filter_atoms(x, ["Fe", "O"]) for x in output_atoms)
+
+    combined_atoms = aio.read(combined_output_file_path, index=":")
+    assert isinstance(combined_atoms, list)
+    assert len(combined_atoms) == len(output_atoms) + len(ft_data)
--- a/mace-bench/3rdparty/mace/tests/test_foundations.py
+++ b/mace-bench/3rdparty/mace/tests/test_foundations.py
+from pathlib import Path
+
+import numpy as np
+import pytest
+import torch
+import torch.nn.functional
+from ase.build import molecule
+from e3nn import o3
+from e3nn.util import jit
+from scipy.spatial.transform import Rotation as R
+
+from mace import data, modules, tools
+from mace.calculators import mace_mp, mace_off
+from mace.tools import torch_geometric
+from mace.tools.finetuning_utils import load_foundations_elements
+from mace.tools.scripts_utils import extract_config_mace_model, remove_pt_head
+from mace.tools.utils import AtomicNumberTable
+
+MODEL_PATH = (
+    Path(__file__).parent.parent
+    / "mace"
+    / "calculators"
+    / "foundations_models"
+    / "2023-12-03-mace-mp.model"
+)
+
+torch.set_default_dtype(torch.float64)
+
+@pytest.skip("Problem with the float type", allow_module_level=True)
+def test_foundations():
+    # Create MACE model
+    config = data.Configuration(
+    atomic_numbers=molecule("H2COH").numbers,
+    positions=molecule("H2COH").positions,
+    properties={
+        "forces": molecule("H2COH").positions,
+        "energy": -1.5,
+        "charges": molecule("H2COH").numbers,
+        "dipole": np.array([-1.5, 1.5, 2.0]),
+    },
+    property_weights={
+        "forces": 1.0,
+        "energy": 1.0,
+        "charges": 1.0,
+        "dipole": 1.0,
+    },
+    )
+
+    # Created the rotated environment
+    rot = R.from_euler("z", 60, degrees=True).as_matrix()
+    positions_rotated = np.array(rot @ config.positions.T).T
+    config_rotated = data.Configuration(
+        atomic_numbers=molecule("H2COH").numbers,
+        positions=positions_rotated,
+        properties={
+            "forces": molecule("H2COH").positions,
+            "energy": -1.5,
+            "charges": molecule("H2COH").numbers,
+            "dipole": np.array([-1.5, 1.5, 2.0]),
+        },
+        property_weights={
+            "forces": 1.0,
+            "energy": 1.0,
+            "charges": 1.0,
+            "dipole": 1.0,
+        },
+    )
+    table = tools.AtomicNumberTable([1, 6, 8])
+    atomic_energies = np.array([0.0, 0.0, 0.0], dtype=float)
+    model_config = dict(
+        r_max=6,
+        num_bessel=10,
+        num_polynomial_cutoff=5,
+        max_ell=3,
+        interaction_cls=modules.interaction_classes[
+            "RealAgnosticResidualInteractionBlock"
+        ],
+        interaction_cls_first=modules.interaction_classes[
+            "RealAgnosticResidualInteractionBlock"
+        ],
+        num_interactions=2,
+        num_elements=3,
+        hidden_irreps=o3.Irreps("128x0e + 128x1o"),
+        MLP_irreps=o3.Irreps("16x0e"),
+        gate=torch.nn.functional.silu,
+        atomic_energies=atomic_energies,
+        avg_num_neighbors=3,
+        atomic_numbers=table.zs,
+        correlation=3,
+        radial_type="bessel",
+        atomic_inter_scale=0.1,
+        atomic_inter_shift=0.0,
+    )
+    model = modules.ScaleShiftMACE(**model_config)
+    calc_foundation = mace_mp(model="medium", device="cpu", default_dtype="float64")
+    model_loaded = load_foundations_elements(
+        model,
+        calc_foundation.models[0],
+        table=table,
+        load_readout=True,
+        use_shift=False,
+        max_L=1,
+    )
+    atomic_data = data.AtomicData.from_config(config, z_table=table, cutoff=6.0)
+    atomic_data2 = data.AtomicData.from_config(
+        config_rotated, z_table=table, cutoff=6.0
+    )
+
+    data_loader = torch_geometric.dataloader.DataLoader(
+        dataset=[atomic_data, atomic_data2],
+        batch_size=2,
+        shuffle=True,
+        drop_last=False,
+    )
+    batch = next(iter(data_loader))
+    forces_loaded = model_loaded(batch.to_dict())["forces"]
+    forces = model(batch.to_dict())["forces"]
+    assert torch.allclose(forces, forces_loaded)
+
+
+def test_multi_reference():
+    config_multi = data.Configuration(
+        atomic_numbers=molecule("H2COH").numbers,
+        positions=molecule("H2COH").positions,
+        properties={
+            "forces": molecule("H2COH").positions,
+            "energy": -1.5,
+            "charges": molecule("H2COH").numbers,
+            "dipole": np.array([-1.5, 1.5, 2.0]),
+        },
+        property_weights={
+            "forces": 1.0,
+            "energy": 1.0,
+            "charges": 1.0,
+            "dipole": 1.0,
+        },
+        head="MP2",
+    )
+    table_multi = tools.AtomicNumberTable([1, 6, 8])
+    atomic_energies_multi = np.array([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], dtype=float)
+    table = tools.AtomicNumberTable([1, 6, 8])
+
+
+    # Create MACE model
+    model_config = dict(
+        r_max=6,
+        num_bessel=10,
+        num_polynomial_cutoff=5,
+        max_ell=3,
+        interaction_cls=modules.interaction_classes[
+            "RealAgnosticResidualInteractionBlock"
+        ],
+        interaction_cls_first=modules.interaction_classes[
+            "RealAgnosticResidualInteractionBlock"
+        ],
+        num_interactions=2,
+        num_elements=3,
+        hidden_irreps=o3.Irreps("128x0e + 128x1o"),
+        MLP_irreps=o3.Irreps("16x0e"),
+        gate=torch.nn.functional.silu,
+        atomic_energies=atomic_energies_multi,
+        avg_num_neighbors=61,
+        atomic_numbers=table.zs,
+        correlation=3,
+        radial_type="bessel",
+        atomic_inter_scale=[1.0, 1.0],
+        atomic_inter_shift=[0.0, 0.0],
+        heads=["MP2", "DFT"],
+    )
+    model = modules.ScaleShiftMACE(**model_config)
+    calc_foundation = mace_mp(model="medium", device="cpu", default_dtype="float64")
+    model_loaded = load_foundations_elements(
+        model,
+        calc_foundation.models[0],
+        table=table,
+        load_readout=True,
+        use_shift=False,
+        max_L=1,
+    )
+    atomic_data = data.AtomicData.from_config(
+        config_multi, z_table=table_multi, cutoff=6.0, heads=["MP2", "DFT"]
+    )
+    data_loader = torch_geometric.dataloader.DataLoader(
+        dataset=[atomic_data, atomic_data],
+        batch_size=2,
+        shuffle=True,
+        drop_last=False,
+    )
+    batch = next(iter(data_loader))
+    forces_loaded = model_loaded(batch.to_dict())["forces"]
+    calc_foundation = mace_mp(model="medium", device="cpu", default_dtype="float64")
+    atoms = molecule("H2COH")
+    atoms.info["head"] = "MP2"
+    atoms.calc = calc_foundation
+    forces = atoms.get_forces()
+    assert np.allclose(
+        forces, forces_loaded.detach().numpy()[:5, :], atol=1e-5, rtol=1e-5
+    )
+
+
+@pytest.mark.parametrize(
+    "calc",
+    [
+        mace_mp(device="cpu", default_dtype="float64"),
+        mace_mp(model="small", device="cpu", default_dtype="float64"),
+        mace_mp(model="medium", device="cpu", default_dtype="float64"),
+        mace_mp(model="large", device="cpu", default_dtype="float64"),
+        mace_mp(model=MODEL_PATH, device="cpu", default_dtype="float64"),
+        mace_off(model="small", device="cpu", default_dtype="float64"),
+        mace_off(model="medium", device="cpu", default_dtype="float64"),
+        mace_off(model="large", device="cpu", default_dtype="float64"),
+        mace_off(model=MODEL_PATH, device="cpu", default_dtype="float64"),
+    ],
+)
+def test_compile_foundation(calc):
+    model = calc.models[0]
+    atoms = molecule("CH4")
+    atoms.positions += np.random.randn(*atoms.positions.shape) * 0.1
+    batch = calc._atoms_to_batch(atoms)  # pylint: disable=protected-access
+    output_1 = model(batch.to_dict())
+    model_compiled = jit.compile(model)
+    output = model_compiled(batch.to_dict())
+    for key in output_1.keys():
+        if isinstance(output_1[key], torch.Tensor):
+            assert torch.allclose(output_1[key], output[key], atol=1e-5)
+
+
+@pytest.mark.parametrize(
+    "model",
+    [
+        mace_mp(model="small", device="cpu", default_dtype="float64").models[0],
+        mace_mp(model="medium", device="cpu", default_dtype="float64").models[0],
+        mace_mp(model="large", device="cpu", default_dtype="float64").models[0],
+        mace_mp(model=MODEL_PATH, device="cpu", default_dtype="float64").models[0],
+        mace_off(model="small", device="cpu", default_dtype="float64").models[0],
+        mace_off(model="medium", device="cpu", default_dtype="float64").models[0],
+        mace_off(model="large", device="cpu", default_dtype="float64").models[0],
+        mace_off(model=MODEL_PATH, device="cpu", default_dtype="float64").models[0],
+    ],
+)
+def test_extract_config(model):
+    assert isinstance(model, modules.ScaleShiftMACE)
+    config = data.Configuration(
+    atomic_numbers=molecule("H2COH").numbers,
+    positions=molecule("H2COH").positions,
+    properties={
+        "forces": molecule("H2COH").positions,
+        "energy": -1.5,
+        "charges": molecule("H2COH").numbers,
+        "dipole": np.array([-1.5, 1.5, 2.0]),
+    },
+    property_weights={
+        "forces": 1.0,
+        "energy": 1.0,
+        "charges": 1.0,
+        "dipole": 1.0,
+    },
+    )
+    model_copy = modules.ScaleShiftMACE(**extract_config_mace_model(model))
+    model_copy.load_state_dict(model.state_dict())
+    z_table = AtomicNumberTable([int(z) for z in model.atomic_numbers])
+    atomic_data = data.AtomicData.from_config(config, z_table=z_table, cutoff=6.0)
+    data_loader = torch_geometric.dataloader.DataLoader(
+        dataset=[atomic_data, atomic_data],
+        batch_size=2,
+        shuffle=True,
+        drop_last=False,
+    )
+    batch = next(iter(data_loader))
+    output = model(batch.to_dict())
+    output_copy = model_copy(batch.to_dict())
+    # assert all items of the output dicts are equal
+    for key in output.keys():
+        if isinstance(output[key], torch.Tensor):
+            assert torch.allclose(output[key], output_copy[key], atol=1e-5)
+
+
+def test_remove_pt_head():
+    # Set up test data
+    torch.manual_seed(42)
+    atomic_energies_pt_head = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=float)
+    z_table = AtomicNumberTable([1, 8])  # H and O
+
+    # Create multihead model
+    model_config = {
+        "r_max": 5.0,
+        "num_bessel": 8,
+        "num_polynomial_cutoff": 5,
+        "max_ell": 2,
+        "interaction_cls": modules.interaction_classes[
+            "RealAgnosticResidualInteractionBlock"
+        ],
+        "interaction_cls_first": modules.interaction_classes[
+            "RealAgnosticResidualInteractionBlock"
+        ],
+        "num_interactions": 2,
+        "num_elements": len(z_table),
+        "hidden_irreps": o3.Irreps("32x0e + 32x1o"),
+        "MLP_irreps": o3.Irreps("16x0e"),
+        "gate": torch.nn.functional.silu,
+        "atomic_energies": atomic_energies_pt_head,
+        "avg_num_neighbors": 8,
+        "atomic_numbers": z_table.zs,
+        "correlation": 3,
+        "heads": ["pt_head", "DFT"],
+        "atomic_inter_scale": [1.0, 1.0],
+        "atomic_inter_shift": [0.0, 0.1],
+    }
+
+    model = modules.ScaleShiftMACE(**model_config)
+
+    # Create test molecule
+    mol = molecule("H2O")
+    config_pt_head = data.Configuration(
+        atomic_numbers=mol.numbers,
+        positions=mol.positions,
+        properties={"energy": 1.0, "forces": np.random.randn(len(mol), 3)},
+        property_weights={"forces": 1.0, "energy": 1.0},
+        head="DFT",
+    )
+    atomic_data = data.AtomicData.from_config(
+        config_pt_head, z_table=z_table, cutoff=5.0, heads=["pt_head", "DFT"]
+    )
+    dataloader = torch_geometric.dataloader.DataLoader(
+        dataset=[atomic_data], batch_size=1, shuffle=False
+    )
+    batch = next(iter(dataloader))
+    # Test original mode
+    output_orig = model(batch.to_dict())
+
+    # Convert to single head model
+    new_model = remove_pt_head(model, head_to_keep="DFT")
+
+    # Basic structure tests
+    assert len(new_model.heads) == 1
+    assert new_model.heads[0] == "DFT"
+    assert new_model.atomic_energies_fn.atomic_energies.shape[0] == 1
+    assert len(torch.atleast_1d(new_model.scale_shift.scale)) == 1
+    assert len(torch.atleast_1d(new_model.scale_shift.shift)) == 1
+
+    # Test output consistency
+    atomic_data = data.AtomicData.from_config(
+        config_pt_head, z_table=z_table, cutoff=5.0, heads=["DFT"]
+    )
+    dataloader = torch_geometric.dataloader.DataLoader(
+        dataset=[atomic_data], batch_size=1, shuffle=False
+    )
+    batch = next(iter(dataloader))
+    output_new = new_model(batch.to_dict())
+    torch.testing.assert_close(
+        output_orig["energy"], output_new["energy"], rtol=1e-5, atol=1e-5
+    )
+    torch.testing.assert_close(
+        output_orig["forces"], output_new["forces"], rtol=1e-5, atol=1e-5
+    )
+
+
+def test_remove_pt_head_multihead():
+    # Set up test data
+    torch.manual_seed(42)
+    atomic_energies_pt_head = np.array(
+        [
+            [1.0, 2.0],  # H energies for each head
+            [3.0, 4.0],  # O energies for each head
+        ]
+        * 2
+    )
+    z_table = AtomicNumberTable([1, 8])  # H and O
+
+    # Create multihead model
+    model_config = {
+        "r_max": 5.0,
+        "num_bessel": 8,
+        "num_polynomial_cutoff": 5,
+        "max_ell": 2,
+        "interaction_cls": modules.interaction_classes[
+            "RealAgnosticResidualInteractionBlock"
+        ],
+        "interaction_cls_first": modules.interaction_classes[
+            "RealAgnosticResidualInteractionBlock"
+        ],
+        "num_interactions": 2,
+        "num_elements": len(z_table),
+        "hidden_irreps": o3.Irreps("32x0e + 32x1o"),
+        "MLP_irreps": o3.Irreps("16x0e"),
+        "gate": torch.nn.functional.silu,
+        "atomic_energies": atomic_energies_pt_head,
+        "avg_num_neighbors": 8,
+        "atomic_numbers": z_table.zs,
+        "correlation": 3,
+        "heads": ["pt_head", "DFT", "MP2", "CCSD"],
+        "atomic_inter_scale": [1.0, 1.0, 1.0, 1.0],
+        "atomic_inter_shift": [0.0, 0.1, 0.2, 0.3],
+    }
+
+    model = modules.ScaleShiftMACE(**model_config)
+
+    # Create test configurations for each head
+    mol = molecule("H2O")
+    configs = {}
+    atomic_datas = {}
+    dataloaders = {}
+    original_outputs = {}
+
+    # First get outputs from original model for each head
+    for head in model.heads:
+        config_pt_head = data.Configuration(
+            atomic_numbers=mol.numbers,
+            positions=mol.positions,
+            properties={"energy": 1.0, "forces": np.random.randn(len(mol), 3)},
+            property_weights={"forces": 1.0, "energy": 1.0},
+            head=head,
+        )
+        configs[head] = config_pt_head
+
+        atomic_data = data.AtomicData.from_config(
+            config_pt_head, z_table=z_table, cutoff=5.0, heads=model.heads
+        )
+        atomic_datas[head] = atomic_data
+
+        dataloader = torch_geometric.dataloader.DataLoader(
+            dataset=[atomic_data], batch_size=1, shuffle=False
+        )
+        dataloaders[head] = dataloader
+
+        batch = next(iter(dataloader))
+        output = model(batch.to_dict())
+        original_outputs[head] = output
+
+    # Now test each head separately
+    for i, head in enumerate(model.heads):
+        # Convert to single head model
+        new_model = remove_pt_head(model, head_to_keep=head)
+
+        # Basic structure tests
+        assert len(new_model.heads) == 1, f"Failed for head {head}"
+        assert new_model.heads[0] == head, f"Failed for head {head}"
+        assert (
+            new_model.atomic_energies_fn.atomic_energies.shape[0] == 1
+        ), f"Failed for head {head}"
+        assert (
+            len(torch.atleast_1d(new_model.scale_shift.scale)) == 1
+        ), f"Failed for head {head}"
+        assert (
+            len(torch.atleast_1d(new_model.scale_shift.shift)) == 1
+        ), f"Failed for head {head}"
+
+        # Verify scale and shift values
+        assert torch.allclose(
+            new_model.scale_shift.scale, model.scale_shift.scale[i : i + 1]
+        ), f"Failed for head {head}"
+        assert torch.allclose(
+            new_model.scale_shift.shift, model.scale_shift.shift[i : i + 1]
+        ), f"Failed for head {head}"
+
+        # Test output consistency
+        single_head_data = data.AtomicData.from_config(
+            configs[head], z_table=z_table, cutoff=5.0, heads=[head]
+        )
+        single_head_loader = torch_geometric.dataloader.DataLoader(
+            dataset=[single_head_data], batch_size=1, shuffle=False
+        )
+        batch = next(iter(single_head_loader))
+        new_output = new_model(batch.to_dict())
+
+        # Compare outputs
+        print(
+            original_outputs[head]["energy"],
+            new_output["energy"],
+        )
+        torch.testing.assert_close(
+            original_outputs[head]["energy"],
+            new_output["energy"],
+            rtol=1e-5,
+            atol=1e-5,
+            msg=f"Energy mismatch for head {head}",
+        )
+        torch.testing.assert_close(
+            original_outputs[head]["forces"],
+            new_output["forces"],
+            rtol=1e-5,
+            atol=1e-5,
+            msg=f"Forces mismatch for head {head}",
+        )
+
+    # Test error cases
+    with pytest.raises(ValueError, match="Head non_existent not found in model"):
+        remove_pt_head(model, head_to_keep="non_existent")
+
+    # Test default behavior (first non-PT head)
+    default_model = remove_pt_head(model)
+    assert default_model.heads[0] == "DFT"
+
+    # Additional test: check if each model's computation graph is independent
+    models = {head: remove_pt_head(model, head_to_keep=head) for head in model.heads}
+    results = {}
+
+    for head, head_model in models.items():
+        single_head_data = data.AtomicData.from_config(
+            configs[head], z_table=z_table, cutoff=5.0, heads=[head]
+        )
+        single_head_loader = torch_geometric.dataloader.DataLoader(
+            dataset=[single_head_data], batch_size=1, shuffle=False
+        )
+        batch = next(iter(single_head_loader))
+        results[head] = head_model(batch.to_dict())
+
+    # Verify each model produces different outputs
+    energies = torch.stack([results[head]["energy"] for head in model.heads])
+    assert not torch.allclose(
+        energies[0], energies[1], rtol=1e-3
+    ), "Different heads should produce different outputs"
--- a/mace-bench/3rdparty/mace/tests/test_hessian.py
+++ b/mace-bench/3rdparty/mace/tests/test_hessian.py
+import numpy as np
+import pytest
+from ase.build import fcc111
+
+from mace.calculators import mace_mp
+
+
+@pytest.fixture(name="setup_calculator_")
+def setup_calculator():
+    calc = mace_mp(
+        model="medium", dispersion=False, default_dtype="float64", device="cpu"
+    )
+    return calc
+
+
+@pytest.fixture(name="setup_structure_")
+def setup_structure(setup_calculator_):
+    initial = fcc111("Pt", size=(4, 4, 1), vacuum=10.0, orthogonal=True)
+    initial.calc = setup_calculator_
+    return initial
+
+
+def test_potential_energy_and_hessian(setup_structure_):
+    initial = setup_structure_
+    h_autograd = initial.calc.get_hessian(atoms=initial)
+    assert h_autograd.shape == (len(initial) * 3, len(initial), 3)
+
+
+def test_finite_difference_hessian(setup_structure_):
+    initial = setup_structure_
+    indicies = list(range(len(initial)))
+    delta, ndim = 1e-4, 3
+    hessian = np.zeros((len(indicies) * ndim, len(indicies) * ndim))
+    atoms_h = initial.copy()
+    for i, index in enumerate(indicies):
+        for j in range(ndim):
+            atoms_i = atoms_h.copy()
+            atoms_i.positions[index, j] += delta
+            atoms_i.calc = initial.calc
+            forces_i = atoms_i.get_forces()
+
+            atoms_j = atoms_h.copy()
+            atoms_j.positions[index, j] -= delta
+            atoms_j.calc = initial.calc
+            forces_j = atoms_j.get_forces()
+
+            hessian[:, i * ndim + j] = -(forces_i - forces_j)[indicies].flatten() / (
+                2 * delta
+            )
+
+    hessian = hessian.reshape((-1, len(initial), 3))
+    h_autograd = initial.calc.get_hessian(atoms=initial)
+    is_close = np.allclose(h_autograd, hessian, atol=1e-6)
+    assert is_close
--- a/mace-bench/3rdparty/mace/tests/test_lmdb_database.py
+++ b/mace-bench/3rdparty/mace/tests/test_lmdb_database.py
+import os
+import tempfile
+
+import numpy as np
+import torch
+from ase.build import molecule
+from ase.calculators.singlepoint import SinglePointCalculator
+
+from mace.data.lmdb_dataset import LMDBDataset
+from mace.tools import AtomicNumberTable, torch_geometric
+from mace.tools.fairchem_dataset.lmdb_dataset_tools import LMDBDatabase
+
+
+def test_lmdb_dataset():
+    """Test the LMDBDataset by creating a fake database and verifying batch creation."""
+    # Set default dtype to match typical MACE usage
+    torch.set_default_dtype(torch.float64)
+
+    # Set random seed for reproducibility
+    np.random.seed(42)
+
+    # Create temporary directories for the databases
+    with tempfile.TemporaryDirectory() as tmpdir:
+        # Create 3 folders for databases
+        db_paths = []
+        for i in range(3):
+            folder_path = os.path.join(tmpdir, f"folder_{i}")
+            os.makedirs(folder_path, exist_ok=True)
+
+            # Create LMDB database files in each folder (2 per folder)
+            for j in range(2):
+                db_path = os.path.join(folder_path, f"data_{j}.aselmdb")
+                db = LMDBDatabase(db_path, readonly=False)
+
+                # Add 2 configurations to each database
+                for _ in range(2):
+                    # Create a water molecule using ASE's build functionality
+                    atoms = molecule("H2O")
+
+                    # Apply small random displacements to the positions
+                    displacement = np.random.rand(*atoms.positions.shape) * 0.1
+                    atoms.positions += displacement
+
+                    # Set cell and PBC
+                    atoms.set_cell(np.eye(3) * 5.0)
+                    atoms.set_pbc(True)
+
+                    # Add random energy, forces, and stress
+                    energy = np.random.uniform(
+                        -15.0, -5.0
+                    )  # Random energy between -15 and -5 eV
+                    forces = (
+                        np.random.randn(*atoms.positions.shape) * 0.5
+                    )  # Random forces
+                    stress = np.random.randn(6) * 0.2  # Random stress in Voigt notation
+
+                    # Add calculator to atoms with results
+                    calc = SinglePointCalculator(
+                        atoms, energy=energy, forces=forces, stress=stress
+                    )
+                    atoms.calc = calc
+
+                    # Store in database
+                    db.write(atoms)
+
+                db.close()
+
+            # Add folder path to our list
+            db_paths.append(folder_path)
+
+        # Create the dataset using paths joined with colons
+        paths_str = ":".join(db_paths)
+        z_table = AtomicNumberTable([1, 8])  # H and O
+        dataset = LMDBDataset(file_path=paths_str, r_max=5.0, z_table=z_table)
+
+        # Check dataset size (3 folders * 2 files * 2 configs = 12 entries)
+        assert len(dataset) == 12
+
+        # Test retrieving a single item
+        item = dataset[0]
+        print(item)
+        assert item.positions.shape == (3, 3)  # 3 atoms, 3 coordinates
+        assert hasattr(item, "energy")
+        assert hasattr(item, "forces")
+        assert hasattr(item, "stress")
+
+        # Create a dataloader
+        dataloader = torch_geometric.dataloader.DataLoader(
+            dataset=dataset, batch_size=4, shuffle=False, drop_last=False
+        )
+
+        # Get a batch and validate it
+        batch = next(iter(dataloader))
+
+        # Verify batch properties - should have 12 atoms (4 configs * 3 atoms per water)
+        assert batch.positions.shape == (12, 3)  # 12 atoms, 3 coordinates
+        assert batch.energy.shape[0] == 4  # 4 energies (one per config)
+        assert batch.forces.shape == (12, 3)  # Forces for each atom
+        print(batch.stress.shape)
+        assert batch.stress.shape == (4, 3, 3)  # Stress for each config
+
+        # Check batch has required attributes for MACE model processing
+        assert hasattr(batch, "batch")  # Batch indices
+        assert batch.batch.shape[0] == 12  # One index per atom
+        assert hasattr(batch, "ptr")  # Pointer for batch processing
+        assert batch.ptr.shape[0] == 5  # One pointer per config + 1
+
+        # Check that batch indices are correctly assigned
+        # First 3 atoms should be from config 0, next 3 from config 1, etc.
+        expected_batch = torch.tensor([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3])
+        assert torch.all(batch.batch == expected_batch)
+
+        # Check ptr correctly points to start of each configuration
+        assert batch.ptr.tolist() == [0, 3, 6, 9, 12]
+
+        # Create a batch dictionary that can be passed to a MACE model
+        batch_dict = batch.to_dict()
+        assert "positions" in batch_dict
+        assert "energy" in batch_dict
+        assert "forces" in batch_dict
+        assert "stress" in batch_dict
+        assert "batch" in batch_dict
+        assert "ptr" in batch_dict
+
+        # Verify additional properties required by MACE
+        assert hasattr(batch, "edge_index")  # Connectivity information
+        assert hasattr(batch, "shifts")  # For periodic boundary conditions
+        assert hasattr(batch, "cell")  # Unit cell information
+
+        # Test that a full batch can be processed (without errors)
+        all_batches = list(dataloader)
+        assert (
+            len(all_batches) == 3
+        )  # Should have 3 batches (12 configs with batch size 4)
--- a/mace-bench/3rdparty/mace/tests/test_models.py
+++ b/mace-bench/3rdparty/mace/tests/test_models.py
+import numpy as np
+import torch
+import torch.nn.functional
+from ase import build
+from e3nn import o3
+from e3nn.util import jit
+from scipy.spatial.transform import Rotation as R
+
+from mace import data, modules, tools
+from mace.tools import torch_geometric
+
+torch.set_default_dtype(torch.float64)
+config = data.Configuration(
+    atomic_numbers=np.array([8, 1, 1]),
+    positions=np.array(
+        [
+            [0.0, -2.0, 0.0],
+            [1.0, 0.0, 0.0],
+            [0.0, 1.0, 0.0],
+        ]
+    ),
+    properties={
+        "forces": np.array(
+            [
+                [0.0, -1.3, 0.0],
+                [1.0, 0.2, 0.0],
+                [0.0, 1.1, 0.3],
+            ]
+        ),
+        "energy": -1.5,
+        "charges": np.array([-2.0, 1.0, 1.0]),
+        "dipole": np.array([-1.5, 1.5, 2.0]),
+    },
+    property_weights={
+        "forces": 1.0,
+        "energy": 1.0,
+        "charges": 1.0,
+        "dipole": 1.0,
+    },
+)
+# Created the rotated environment
+rot = R.from_euler("z", 60, degrees=True).as_matrix()
+positions_rotated = np.array(rot @ config.positions.T).T
+config_rotated = data.Configuration(
+    atomic_numbers=np.array([8, 1, 1]),
+    positions=positions_rotated,
+    properties={
+        "forces": np.array(
+            [
+                [0.0, -1.3, 0.0],
+                [1.0, 0.2, 0.0],
+                [0.0, 1.1, 0.3],
+            ]
+        ),
+        "energy": -1.5,
+        "charges": np.array([-2.0, 1.0, 1.0]),
+        "dipole": np.array([-1.5, 1.5, 2.0]),
+    },
+    property_weights={
+        "forces": 1.0,
+        "energy": 1.0,
+        "charges": 1.0,
+        "dipole": 1.0,
+    },
+)
+table = tools.AtomicNumberTable([1, 8])
+atomic_energies = np.array([1.0, 3.0], dtype=float)
+
+
+def test_mace():
+    # Create MACE model
+    model_config = dict(
+        r_max=5,
+        num_bessel=8,
+        num_polynomial_cutoff=6,
+        max_ell=2,
+        interaction_cls=modules.interaction_classes[
+            "RealAgnosticResidualInteractionBlock"
+        ],
+        interaction_cls_first=modules.interaction_classes[
+            "RealAgnosticResidualInteractionBlock"
+        ],
+        num_interactions=5,
+        num_elements=2,
+        hidden_irreps=o3.Irreps("32x0e + 32x1o"),
+        MLP_irreps=o3.Irreps("16x0e"),
+        gate=torch.nn.functional.silu,
+        atomic_energies=atomic_energies,
+        avg_num_neighbors=8,
+        atomic_numbers=table.zs,
+        correlation=3,
+        radial_type="bessel",
+    )
+    model = modules.MACE(**model_config)
+    model_compiled = jit.compile(model)
+
+    atomic_data = data.AtomicData.from_config(config, z_table=table, cutoff=3.0)
+    atomic_data2 = data.AtomicData.from_config(
+        config_rotated, z_table=table, cutoff=3.0
+    )
+
+    data_loader = torch_geometric.dataloader.DataLoader(
+        dataset=[atomic_data, atomic_data2],
+        batch_size=2,
+        shuffle=True,
+        drop_last=False,
+    )
+    batch = next(iter(data_loader))
+    output1 = model(batch.to_dict(), training=True)
+    output2 = model_compiled(batch.to_dict(), training=True)
+    assert torch.allclose(output1["energy"][0], output2["energy"][0])
+    assert torch.allclose(output2["energy"][0], output2["energy"][1])
+
+
+def test_dipole_mace():
+    # create dipole MACE model
+    model_config = dict(
+        r_max=5,
+        num_bessel=8,
+        num_polynomial_cutoff=5,
+        max_ell=2,
+        interaction_cls=modules.interaction_classes[
+            "RealAgnosticResidualInteractionBlock"
+        ],
+        interaction_cls_first=modules.interaction_classes[
+            "RealAgnosticResidualInteractionBlock"
+        ],
+        num_interactions=2,
+        num_elements=2,
+        hidden_irreps=o3.Irreps("16x0e + 16x1o + 16x2e"),
+        MLP_irreps=o3.Irreps("16x0e"),
+        gate=torch.nn.functional.silu,
+        atomic_energies=None,
+        avg_num_neighbors=3,
+        atomic_numbers=table.zs,
+        correlation=3,
+        radial_type="gaussian",
+    )
+    model = modules.AtomicDipolesMACE(**model_config)
+
+    atomic_data = data.AtomicData.from_config(config, z_table=table, cutoff=3.0)
+    atomic_data2 = data.AtomicData.from_config(
+        config_rotated, z_table=table, cutoff=3.0
+    )
+
+    data_loader = torch_geometric.dataloader.DataLoader(
+        dataset=[atomic_data, atomic_data2],
+        batch_size=2,
+        shuffle=False,
+        drop_last=False,
+    )
+    batch = next(iter(data_loader))
+    output = model(
+        batch,
+        training=True,
+    )
+    # sanity check of dipoles being the right shape
+    assert output["dipole"][0].unsqueeze(0).shape == atomic_data.dipole.shape
+    # test equivariance of output dipoles
+    assert np.allclose(
+        np.array(rot @ output["dipole"][0].detach().numpy()),
+        output["dipole"][1].detach().numpy(),
+    )
+
+
+def test_energy_dipole_mace():
+    # create dipole MACE model
+    model_config = dict(
+        r_max=5,
+        num_bessel=8,
+        num_polynomial_cutoff=5,
+        max_ell=2,
+        interaction_cls=modules.interaction_classes[
+            "RealAgnosticResidualInteractionBlock"
+        ],
+        interaction_cls_first=modules.interaction_classes[
+            "RealAgnosticResidualInteractionBlock"
+        ],
+        num_interactions=2,
+        num_elements=2,
+        hidden_irreps=o3.Irreps("16x0e + 16x1o + 16x2e"),
+        MLP_irreps=o3.Irreps("16x0e"),
+        gate=torch.nn.functional.silu,
+        atomic_energies=atomic_energies,
+        avg_num_neighbors=3,
+        atomic_numbers=table.zs,
+        correlation=3,
+    )
+    model = modules.EnergyDipolesMACE(**model_config)
+
+    atomic_data = data.AtomicData.from_config(config, z_table=table, cutoff=3.0)
+    atomic_data2 = data.AtomicData.from_config(
+        config_rotated, z_table=table, cutoff=3.0
+    )
+
+    data_loader = torch_geometric.dataloader.DataLoader(
+        dataset=[atomic_data, atomic_data2],
+        batch_size=2,
+        shuffle=False,
+        drop_last=False,
+    )
+    batch = next(iter(data_loader))
+    output = model(
+        batch,
+        training=True,
+    )
+    # sanity check of dipoles being the right shape
+    assert output["dipole"][0].unsqueeze(0).shape == atomic_data.dipole.shape
+    # test energy is invariant
+    assert torch.allclose(output["energy"][0], output["energy"][1])
+    # test equivariance of output dipoles
+    assert np.allclose(
+        np.array(rot @ output["dipole"][0].detach().numpy()),
+        output["dipole"][1].detach().numpy(),
+    )
+
+
+def test_mace_multi_reference():
+    atomic_energies_multi = np.array([[1.0, 3.0], [0.0, 0.0]], dtype=float)
+    model_config = dict(
+        r_max=5,
+        num_bessel=8,
+        num_polynomial_cutoff=6,
+        max_ell=3,
+        interaction_cls=modules.interaction_classes[
+            "RealAgnosticResidualInteractionBlock"
+        ],
+        interaction_cls_first=modules.interaction_classes[
+            "RealAgnosticResidualInteractionBlock"
+        ],
+        num_interactions=2,
+        num_elements=2,
+        hidden_irreps=o3.Irreps("96x0e + 96x1o"),
+        MLP_irreps=o3.Irreps("16x0e"),
+        gate=torch.nn.functional.silu,
+        atomic_energies=atomic_energies_multi,
+        avg_num_neighbors=8,
+        atomic_numbers=table.zs,
+        distance_transform=True,
+        pair_repulsion=True,
+        correlation=3,
+        heads=["Default", "dft"],
+        # radial_type="chebyshev",
+        atomic_inter_scale=[1.0, 1.0],
+        atomic_inter_shift=[0.0, 0.1],
+    )
+    model = modules.ScaleShiftMACE(**model_config)
+    model_compiled = jit.compile(model)
+    config.head = "Default"
+    config_rotated.head = "dft"
+    atomic_data = data.AtomicData.from_config(
+        config, z_table=table, cutoff=3.0, heads=["Default", "dft"]
+    )
+    atomic_data2 = data.AtomicData.from_config(
+        config_rotated, z_table=table, cutoff=3.0, heads=["Default", "dft"]
+    )
+
+    data_loader = torch_geometric.dataloader.DataLoader(
+        dataset=[atomic_data, atomic_data2],
+        batch_size=2,
+        shuffle=True,
+        drop_last=False,
+    )
+    batch = next(iter(data_loader))
+    output1 = model(batch.to_dict(), training=True)
+    output2 = model_compiled(batch.to_dict(), training=True)
+    assert torch.allclose(output1["energy"][0], output2["energy"][0])
+    assert output2["energy"].shape[0] == 2
+
+
+def test_atomic_virials_stresses():
+    """
+    Test that atomic virials and stresses sum to the total virials and stress.
+    """
+    # Set default dtype for reproducibility
+    torch.set_default_dtype(torch.float64)
+
+    # Create a periodic cell with ASE
+    atoms = build.bulk("Si", "diamond", a=5.43)
+    # Apply strain to ensure non-zero stress
+    strain_tensor = np.eye(3) * 1.02  # 2% strain
+    atoms.set_cell(np.dot(atoms.get_cell(), strain_tensor), scale_atoms=True)
+
+    # Add forces and energy for completeness
+    atoms.arrays["REF_forces"] = np.random.normal(0, 0.1, size=atoms.positions.shape)
+    atoms.info["REF_energy"] = np.random.normal(0, 1)
+    atoms.info["REF_stress"] = np.random.normal(0, 0.1, size=6)
+
+    # Setup MACE model configuration
+    stress_z_table = tools.AtomicNumberTable([14])  # Silicon
+    stress_atomic_energies = np.array([0.0])
+
+    model_config = dict(
+        r_max=5.0,
+        num_bessel=8,
+        num_polynomial_cutoff=6,
+        max_ell=2,
+        interaction_cls=modules.interaction_classes[
+            "RealAgnosticResidualInteractionBlock"
+        ],
+        interaction_cls_first=modules.interaction_classes[
+            "RealAgnosticResidualInteractionBlock"
+        ],
+        num_interactions=3,
+        num_elements=1,
+        hidden_irreps=o3.Irreps("32x0e + 32x1o"),
+        MLP_irreps=o3.Irreps("16x0e"),
+        gate=torch.nn.functional.silu,
+        atomic_energies=stress_atomic_energies,
+        avg_num_neighbors=4.0,
+        atomic_numbers=table.zs,
+        correlation=3,
+        atomic_inter_scale=1.0,
+        atomic_inter_shift=0.0,
+    )
+
+    # Create the model
+    model = modules.ScaleShiftMACE(**model_config)
+
+    # Create atomic data
+    atomic_data = data.AtomicData.from_config(
+        data.config_from_atoms(
+            atoms, key_specification=data.KeySpecification.from_defaults()
+        ),
+        z_table=stress_z_table,
+        cutoff=5.0,
+    )
+
+    data_loader = torch_geometric.dataloader.DataLoader(
+        dataset=[atomic_data],
+        batch_size=2,
+        shuffle=True,
+        drop_last=False,
+    )
+    batch = next(iter(data_loader))
+    batch_dict = batch.to_dict()
+
+    # Run the model with compute_atomic_stresses=True
+    output = model(
+        batch_dict,
+        compute_force=True,
+        compute_virials=True,
+        compute_stress=True,
+        compute_atomic_stresses=True,
+    )
+
+    # Get total virials/stress and atomic virials/stresses
+    total_virials = output["virials"]
+    atomic_virials = output["atomic_virials"]
+    total_stress = output["stress"]
+    atomic_stresses = output["atomic_stresses"]
+
+    # Test that atomic values are not None
+    assert atomic_virials is not None, "Atomic virials were not computed"
+    assert atomic_stresses is not None, "Atomic stresses were not computed"
+
+    # Test shape of atomic values
+    assert atomic_virials.shape[0] == len(atoms), "Wrong shape for atomic virials"
+    assert atomic_virials.shape[1:] == (3, 3), "Atomic virials should be 3x3 matrices"
+    assert atomic_stresses.shape[0] == len(atoms), "Wrong shape for atomic stresses"
+    assert atomic_stresses.shape[1:] == (3, 3), "Atomic stresses should be 3x3 matrices"
+
+    # Compute sum of atomic values
+    summed_atomic_virials = torch.sum(atomic_virials, dim=0)
+    summed_atomic_stresses = torch.sum(atomic_stresses, dim=0)
+
+    # Test that sums match total values
+    assert torch.allclose(
+        summed_atomic_virials, total_virials.squeeze(0), atol=1e-6
+    ), f"Sum of atomic virials {summed_atomic_virials} does not match total virials {total_virials.squeeze(0)}"
+
+    assert torch.allclose(
+        summed_atomic_stresses, total_stress.squeeze(0), atol=1e-6
+    ), f"Sum of atomic stresses (normalized by volume) {summed_atomic_stresses} does not match total stress {total_stress.squeeze(0)}"
--- a/mace-bench/3rdparty/mace/tests/test_modules.py
+++ b/mace-bench/3rdparty/mace/tests/test_modules.py
+import numpy as np
+import pytest
+import torch
+import torch.nn.functional
+from e3nn import o3
+
+from mace.data import AtomicData, Configuration
+from mace.modules import (
+    AtomicEnergiesBlock,
+    BesselBasis,
+    PolynomialCutoff,
+    SymmetricContraction,
+    WeightedEnergyForcesLoss,
+    WeightedHuberEnergyForcesStressLoss,
+    compute_mean_rms_energy_forces,
+    compute_statistics,
+)
+from mace.tools import AtomicNumberTable, scatter, to_numpy, torch_geometric
+from mace.tools.scripts_utils import dict_to_array
+
+
+@pytest.fixture(name="config")
+def _config():
+    return Configuration(
+        atomic_numbers=np.array([8, 1, 1]),
+        positions=np.array(
+            [
+                [0.0, -2.0, 0.0],
+                [1.0, 0.0, 0.0],
+                [0.0, 1.0, 0.0],
+            ]
+        ),
+        properties={
+            "forces": np.array(
+                [
+                    [0.0, -1.3, 0.0],
+                    [1.0, 0.2, 0.0],
+                    [0.0, 1.1, 0.3],
+                ]
+            ),
+            "energy": -1.5,
+            "stress": np.array([1.0, 0.0, 0.5, 0.0, -1.0, 0.0]),
+        },
+        property_weights={
+            "forces": 1.0,
+            "energy": 1.0,
+            "stress": 1.0,
+        },
+    )
+
+
+@pytest.fixture(name="table")
+def _table():
+    return AtomicNumberTable([1, 8])
+
+
+@pytest.fixture(name="config1")
+def _config1():
+    return Configuration(
+        atomic_numbers=np.array([8, 1, 1]),
+        positions=np.array(
+            [
+                [0.0, -2.0, 0.0],
+                [1.0, 0.0, 0.0],
+                [0.0, 1.0, 0.0],
+            ]
+        ),
+        properties={
+            "forces": np.array(
+                [
+                    [0.0, -1.3, 0.0],
+                    [1.0, 0.2, 0.0],
+                    [0.0, 1.1, 0.3],
+                ]
+            ),
+            "energy": -1.5,
+        },
+        property_weights={
+            "forces": 1.0,
+            "energy": 1.0,
+        },
+        head="DFT",
+    )
+
+
+@pytest.fixture(name="config2")
+def _config2():
+    return Configuration(
+        atomic_numbers=np.array([8, 1, 1]),
+        positions=np.array(
+            [
+                [0.1, -1.9, 0.1],
+                [1.1, 0.1, 0.1],
+                [0.1, 1.1, 0.1],
+            ]
+        ),
+        properties={
+            "forces": np.array(
+                [
+                    [0.1, -1.2, 0.1],
+                    [1.1, 0.3, 0.1],
+                    [0.1, 1.2, 0.4],
+                ]
+            ),
+            "energy": -1.4,
+        },
+        property_weights={
+            "forces": 1.0,
+            "energy": 1.0,
+        },
+        head="MP2",
+    )
+
+
+@pytest.fixture(name="atomic_data")
+def _atomic_data(config1, config2, table):
+    atomic_data1 = AtomicData.from_config(
+        config1, z_table=table, cutoff=3.0, heads=["DFT", "MP2"]
+    )
+    atomic_data2 = AtomicData.from_config(
+        config2, z_table=table, cutoff=3.0, heads=["DFT", "MP2"]
+    )
+    return [atomic_data1, atomic_data2]
+
+
+@pytest.fixture(name="data_loader")
+def _data_loader(atomic_data):
+    return torch_geometric.dataloader.DataLoader(
+        dataset=atomic_data,
+        batch_size=2,
+        shuffle=False,
+        drop_last=False,
+    )
+
+
+@pytest.fixture(name="atomic_energies")
+def _atomic_energies():
+    atomic_energies_dict = {
+        "DFT": np.array([0.0, 0.0]),
+        "MP2": np.array([0.1, 0.1]),
+    }
+    return dict_to_array(atomic_energies_dict, ["DFT", "MP2"])
+
+
+@pytest.fixture(autouse=True)
+def _set_torch_default_dtype():
+    torch.set_default_dtype(torch.float64)
+
+
+def test_weighted_loss(config, table):
+    loss1 = WeightedEnergyForcesLoss(energy_weight=1, forces_weight=10)
+    loss2 = WeightedHuberEnergyForcesStressLoss(energy_weight=1, forces_weight=10)
+    data = AtomicData.from_config(config, z_table=table, cutoff=3.0)
+    data_loader = torch_geometric.dataloader.DataLoader(
+        dataset=[data, data],
+        batch_size=2,
+        shuffle=True,
+        drop_last=False,
+    )
+    batch = next(iter(data_loader))
+    pred = {
+        "energy": batch.energy,
+        "forces": batch.forces,
+        "stress": batch.stress,
+    }
+    out1 = loss1(batch, pred)
+    assert out1 == 0.0
+    out2 = loss2(batch, pred)
+    assert out2 == 0.0
+
+
+def test_symmetric_contraction():
+    operation = SymmetricContraction(
+        irreps_in=o3.Irreps("16x0e + 16x1o + 16x2e"),
+        irreps_out=o3.Irreps("16x0e + 16x1o"),
+        correlation=3,
+        num_elements=2,
+    )
+    torch.manual_seed(123)
+    features = torch.randn(30, 16, 9)
+    one_hots = torch.nn.functional.one_hot(torch.arange(0, 30) % 2).to(
+        torch.get_default_dtype()
+    )
+    out = operation(features, one_hots)
+    assert out.shape == (30, 64)
+    assert operation.contractions[0].weights_max.shape == (2, 11, 16)
+
+
+def test_bessel_basis():
+    d = torch.linspace(start=0.5, end=5.5, steps=10)
+    bessel_basis = BesselBasis(r_max=6.0, num_basis=5)
+    output = bessel_basis(d.unsqueeze(-1))
+    assert output.shape == (10, 5)
+
+
+def test_polynomial_cutoff():
+    d = torch.linspace(start=0.5, end=5.5, steps=10)
+    cutoff_fn = PolynomialCutoff(r_max=5.0)
+    output = cutoff_fn(d)
+    assert output.shape == (10,)
+
+
+def test_atomic_energies(config, table):
+    energies_block = AtomicEnergiesBlock(atomic_energies=np.array([1.0, 3.0]))
+    data = AtomicData.from_config(config, z_table=table, cutoff=3.0)
+    data_loader = torch_geometric.dataloader.DataLoader(
+        dataset=[data, data],
+        batch_size=2,
+        shuffle=True,
+        drop_last=False,
+    )
+    batch = next(iter(data_loader))
+    energies = energies_block(batch.node_attrs).squeeze(-1)
+    out = scatter.scatter_sum(src=energies, index=batch.batch, dim=-1, reduce="sum")
+    out = to_numpy(out)
+    assert np.allclose(out, np.array([5.0, 5.0]))
+
+
+def test_atomic_energies_multireference(config, table):
+    energies_block = AtomicEnergiesBlock(
+        atomic_energies=np.array([[1.0, 3.0], [2.0, 4.0]])
+    )
+    config.head = "MP2"
+    data = AtomicData.from_config(
+        config, z_table=table, cutoff=3.0, heads=["DFT", "MP2"]
+    )
+    data_loader = torch_geometric.dataloader.DataLoader(
+        dataset=[data, data],
+        batch_size=2,
+        shuffle=True,
+        drop_last=False,
+    )
+    batch = next(iter(data_loader))
+    num_atoms_arange = torch.arange(batch["positions"].shape[0])
+    node_heads = (
+        batch["head"][batch["batch"]]
+        if "head" in batch
+        else torch.zeros_like(batch["batch"])
+    )
+    energies = energies_block(batch.node_attrs).squeeze(-1)
+    energies = energies[num_atoms_arange, node_heads]
+    out = scatter.scatter_sum(src=energies, index=batch.batch, dim=-1, reduce="sum")
+    out = to_numpy(out)
+    assert np.allclose(out, np.array([8.0, 8.0]))
+
+
+def test_compute_mean_rms_energy_forces_multi_head(data_loader, atomic_energies):
+    mean, rms = compute_mean_rms_energy_forces(data_loader, atomic_energies)
+    assert isinstance(mean, np.ndarray)
+    assert isinstance(rms, np.ndarray)
+    assert mean.shape == (2,)
+    assert rms.shape == (2,)
+    assert np.all(rms >= 0)
+    assert rms[0] != rms[1]
+
+
+def test_compute_statistics(data_loader, atomic_energies):
+    avg_num_neighbors, mean, std = compute_statistics(data_loader, atomic_energies)
+    assert isinstance(avg_num_neighbors, float)
+    assert isinstance(mean, np.ndarray)
+    assert isinstance(std, np.ndarray)
+    assert mean.shape == (2,)
+    assert std.shape == (2,)
+    assert avg_num_neighbors > 0
+    assert np.all(mean != 0)
+    assert np.all(std > 0)
+    assert mean[0] != mean[1]
+    assert std[0] != std[1]
--- a/mace-bench/3rdparty/mace/tests/test_multifiles.py
+++ b/mace-bench/3rdparty/mace/tests/test_multifiles.py
+import json
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+import zlib
+from pathlib import Path
+
+import lmdb
+import numpy as np
+import orjson
+import pytest
+import torch
+import yaml
+from ase.atoms import Atoms
+from ase.calculators.singlepoint import SinglePointCalculator
+
+from mace.calculators import MACECalculator
+
+
+def create_test_atoms(num_atoms=5, seed=42):
+    """Create random atoms for testing purposes with energy, forces, and stress."""
+    # Set random seed for reproducibility
+    rng = np.random.RandomState(seed)
+
+    # Create random positions
+    positions = rng.rand(num_atoms, 3) * 5.0
+
+    # Create random atomic numbers (H, C, N, O)
+    atomic_numbers = rng.choice([1, 6, 7, 8], size=num_atoms)
+
+    # Create atoms object
+    atoms = Atoms(
+        numbers=atomic_numbers,
+        positions=positions,
+        cell=np.eye(3) * 10.0,  # 10 Å periodic box
+        pbc=True,
+    )
+
+    # Add random energy, forces and stress
+    energy = float(rng.uniform(-15.0, -5.0))
+    forces = rng.rand(num_atoms, 3) * 0.5 - 0.25  # Forces between -0.25 and 0.25 eV/Å
+    stress = rng.rand(6) * 0.2 - 0.1  # Stress tensor in Voigt notation
+
+    # Add calculator to atoms with results
+    calc = SinglePointCalculator(atoms, energy=energy, forces=forces, stress=stress)
+    atoms.calc = calc
+
+    # Mark isolated atoms with config_type
+    if num_atoms == 1:
+        atoms.info["config_type"] = "IsolatedAtom"
+
+    return atoms
+
+
+def create_xyz_file(atoms_list, filename):
+    """Write a list of atoms to an xyz file."""
+    from ase.io import write
+
+    write(filename, atoms_list, format="extxyz")
+    return filename
+
+
+def create_e0s_file(e0s_dict, filename):
+    """Create an E0s JSON file with isolated atom energies."""
+    # Convert keys to integers since MACE expects atomic numbers as integers
+    e0s_dict_int_keys = {int(k): v for k, v in e0s_dict.items()}
+
+    with open(filename, "w", encoding="utf-8") as f:
+        json.dump(e0s_dict_int_keys, f)
+    return filename
+
+
+def create_h5_dataset(xyz_file, output_dir, e0s_file=None, r_max=5.0, seed=42):
+    """
+    Run MACE's preprocess_data.py script to convert an xyz file to h5 format.
+
+    Args:
+        xyz_file: Path to the input xyz file
+        output_dir: Directory to store the preprocessed h5 files
+        e0s_file: Path to the E0s file with isolated atom energies
+        r_max: Cutoff radius
+        seed: Random seed
+
+    Returns:
+        The output directory containing the h5 files
+    """
+    # Make sure output directory exists
+    os.makedirs(output_dir, exist_ok=True)
+
+    # Find the path to the preprocess_data.py script
+    preprocess_script = (
+        Path(__file__).parent.parent / "mace" / "cli" / "preprocess_data.py"
+    )
+
+    # Set up command to run preprocess_data.py
+    cmd = [
+        sys.executable,
+        str(preprocess_script),
+        f"--train_file={xyz_file}",
+        f"--r_max={r_max}",
+        f"--h5_prefix={output_dir}/",
+        f"--seed={seed}",
+        "--compute_statistics",  # Generate statistics file
+        "--num_process=2",  # Create 2 files for testing sharded loading
+    ]
+
+    # Add E0s file if provided
+    if e0s_file:
+        cmd.append(f"--E0s={e0s_file}")
+
+    # Set up environment
+    env = os.environ.copy()
+    env["PYTHONPATH"] = (
+        str(Path(__file__).parent.parent) + ":" + env.get("PYTHONPATH", "")
+    )
+
+    # Run the script
+    print(f"Running preprocess command: {' '.join(cmd)}")
+    try:
+        process = subprocess.run(
+            cmd, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True
+        )
+        # Print output for debugging
+        print("Preprocess stdout:", process.stdout.decode())
+        print("Preprocess stderr:", process.stderr.decode())
+    except subprocess.CalledProcessError as e:
+        print("Preprocess failed with error:", e)
+        print("Stdout:", e.stdout.decode() if e.stdout else "")
+        print("Stderr:", e.stderr.decode() if e.stderr else "")
+        raise
+
+    return output_dir
+
+
+def create_lmdb_dataset(atoms_list, folder_path, head_name="Default"):
+    """Create an LMDB dataset from a list of atoms objects that MACE can read."""
+    # Create the folder if it doesn't exist
+    os.makedirs(folder_path, exist_ok=True)
+
+    # Create the LMDB database file
+    db_path = os.path.join(folder_path, "data.aselmdb")
+
+    # Initialize LMDB environment
+    env = lmdb.open(
+        db_path,
+        map_size=1099511627776,  # 1TB
+        subdir=False,
+        meminit=False,
+        map_async=True,
+    )
+
+    # Open a transaction
+    with env.begin(write=True) as txn:
+        # Store metadata
+        metadata = {"format_version": 1}
+        txn.put(
+            "metadata".encode("ascii"),
+            zlib.compress(orjson.dumps(metadata, option=orjson.OPT_SERIALIZE_NUMPY)),
+        )
+
+        # Store nextid
+        nextid = len(atoms_list) + 1
+        txn.put(
+            "nextid".encode("ascii"),
+            zlib.compress(orjson.dumps(nextid, option=orjson.OPT_SERIALIZE_NUMPY)),
+        )
+
+        # Store deleted_ids (empty)
+        txn.put(
+            "deleted_ids".encode("ascii"),
+            zlib.compress(orjson.dumps([], option=orjson.OPT_SERIALIZE_NUMPY)),
+        )
+
+        # Store each atom
+        for i, atoms in enumerate(atoms_list):
+            id_num = i + 1  # Start from 1
+
+            # Convert atoms to dictionary
+            positions = atoms.get_positions()
+            cell = atoms.get_cell()
+
+            # Create a dictionary with all necessary fields
+            dct = {
+                "numbers": atoms.get_atomic_numbers().tolist(),
+                "positions": positions.tolist(),
+                "cell": cell.tolist(),
+                "pbc": atoms.get_pbc().tolist(),
+                "ctime": 0.0,  # Creation time
+                "mtime": 0.0,  # Modification time
+                "user": "test",
+                "energy": atoms.calc.results["energy"],
+                "forces": atoms.calc.results["forces"].tolist(),
+                "stress": atoms.calc.results["stress"].tolist(),
+                "key_value_pairs": {
+                    "config_type": atoms.info.get("config_type", "Default"),
+                    "head": head_name,
+                },
+            }
+
+            # Store the atom in LMDB
+            txn.put(
+                f"{id_num}".encode("ascii"),
+                zlib.compress(orjson.dumps(dct, option=orjson.OPT_SERIALIZE_NUMPY)),
+            )
+
+    # Close the environment
+    env.close()
+
+    return folder_path
+
+
+@pytest.mark.slow
+def test_multifile_training():
+    """Test training with multiple file formats per head"""
+    # Create temporary directory
+    temp_dir = tempfile.mkdtemp()
+    try:
+        # Set up file paths
+        xyz_file1 = os.path.join(temp_dir, "data1.xyz")
+        xyz_file2 = os.path.join(temp_dir, "data2.xyz")
+        iso_atoms_file = os.path.join(temp_dir, "isolated_atoms.xyz")
+        h5_folder = os.path.join(temp_dir, "h5_data")
+        lmdb_folder1 = os.path.join(
+            temp_dir, "lmdb_data1_lmdb"
+        )  # Add _lmdb suffix for LMDB recognition
+        lmdb_folder2 = os.path.join(
+            temp_dir, "lmdb_data2_lmdb"
+        )  # Add _lmdb suffix for LMDB recognition
+
+        config_path = os.path.join(temp_dir, "config.yaml")
+        results_dir = os.path.join(temp_dir, "results")
+        checkpoints_dir = os.path.join(temp_dir, "checkpoints")
+        model_dir = os.path.join(temp_dir, "models")
+        e0s_file = os.path.join(temp_dir, "e0s.json")
+
+        # Create directories
+        os.makedirs(results_dir, exist_ok=True)
+        os.makedirs(checkpoints_dir, exist_ok=True)
+        os.makedirs(model_dir, exist_ok=True)
+
+        # Set atomic numbers for z_table
+        z_table_elements = [1, 6, 7, 8]  # H, C, N, O
+
+        # Create test data for each format
+        rng = np.random.RandomState(42)
+        seeds = rng.randint(0, 10000, size=5)
+
+        # Create isolated atoms for E0s (one of each element)
+        isolated_atoms = []
+        e0s_dict = {}
+        for z in z_table_elements:
+            # Create isolated atom
+            atom = Atoms(
+                numbers=[z], positions=[[0, 0, 0]], cell=np.eye(3) * 10.0, pbc=True
+            )
+            energy = float(rng.uniform(-5.0, -1.0))  # Random reference energy
+            forces = np.zeros((1, 3))
+            stress = np.zeros(6)
+            calc = SinglePointCalculator(
+                atom, energy=energy, forces=forces, stress=stress
+            )
+            atom.calc = calc
+            atom.info["config_type"] = "IsolatedAtom"
+            atom.info["REF_energy"] = energy  # Make sure energy is in the right place
+            isolated_atoms.append(atom)
+            e0s_dict[str(z)] = energy  # Store energy for E0s file
+
+        # Create E0s file
+        create_e0s_file(e0s_dict, e0s_file)
+
+        # Create isolated atoms xyz file
+        create_xyz_file(isolated_atoms, iso_atoms_file)
+
+        # Create 10 atoms for each dataset
+        xyz_atoms1 = [
+            create_test_atoms(num_atoms=5, seed=seeds[0] + i) for i in range(10)
+        ]
+        xyz_atoms2 = [
+            create_test_atoms(num_atoms=5, seed=seeds[1] + i) for i in range(10)
+        ]
+
+        # Create h5 data directly - first convert the xyz file to a format with REF_ keys
+        for atom in xyz_atoms1:
+            atom.info["REF_energy"] = atom.calc.results["energy"]
+            atom.arrays["REF_forces"] = atom.calc.results["forces"]
+            atom.info["REF_stress"] = atom.calc.results["stress"]
+
+        for atom in xyz_atoms2:
+            atom.info["REF_energy"] = atom.calc.results["energy"]
+            atom.arrays["REF_forces"] = atom.calc.results["forces"]
+            atom.info["REF_stress"] = atom.calc.results["stress"]
+
+        # Save isolated atoms to xyz files first, then create the h5 datasets
+        create_xyz_file(xyz_atoms1, xyz_file1)
+        create_xyz_file(xyz_atoms2, xyz_file2)
+
+        # Create h5 data from xyz file, using both isolated atoms and real data
+        all_atoms_for_h5 = isolated_atoms + xyz_atoms2
+        all_atoms_xyz = os.path.join(temp_dir, "all_atoms_for_h5.xyz")
+        create_xyz_file(all_atoms_for_h5, all_atoms_xyz)
+        create_h5_dataset(all_atoms_xyz, h5_folder)
+
+        # Create LMDB datasets
+        lmdb_atoms1 = [
+            create_test_atoms(num_atoms=5, seed=seeds[3] + i) for i in range(10)
+        ]
+        lmdb_atoms2 = [
+            create_test_atoms(num_atoms=5, seed=seeds[4] + i) for i in range(10)
+        ]
+        create_lmdb_dataset(lmdb_atoms1, lmdb_folder1, head_name="head1")
+        create_lmdb_dataset(lmdb_atoms2, lmdb_folder2, head_name="head2")
+
+        # Create config.yaml for training with proper format specification
+        config = {
+            "name": "multifile_test",
+            "seed": 42,
+            "model": "MACE",
+            "hidden_irreps": "32x0e",
+            "r_max": 5.0,
+            "batch_size": 5,
+            "max_num_epochs": 2,
+            "patience": 5,
+            "device": "cpu",
+            "energy_weight": 1.0,
+            "forces_weight": 10.0,
+            "loss": "weighted",
+            "optimizer": "adam",
+            "default_dtype": "float64",
+            "lr": 0.01,
+            "swa": False,
+            "work_dir": temp_dir,
+            "results_dir": results_dir,
+            "checkpoints_dir": checkpoints_dir,
+            "model_dir": model_dir,
+            "E0s": e0s_file,
+            "atomic_numbers": str(z_table_elements),
+            "heads": {
+                "head1": {
+                    "train_file": [lmdb_folder1, xyz_file1],
+                    "valid_file": xyz_file1,
+                    "energy_key": "REF_energy",
+                    "forces_key": "REF_forces",
+                    "stress_key": "REF_stress",
+                },
+                "head2": {
+                    "train_file": [h5_folder + "/train", xyz_file2],
+                    "valid_file": xyz_file2,
+                    "energy_key": "REF_energy",
+                    "forces_key": "REF_forces",
+                    "stress_key": "REF_stress",
+                },
+            },
+        }
+
+        # Write config file
+        with open(config_path, "w", encoding="utf-8") as f:
+            yaml.dump(config, f)
+
+        # Import the modified run_train from our local module
+        run_train_script = (
+            Path(__file__).parent.parent / "mace" / "cli" / "run_train.py"
+        )
+
+        # Run training with subprocess
+        cmd = [sys.executable, str(run_train_script), f"--config={config_path}"]
+
+        # Set environment to add the current path to PYTHONPATH
+        env = os.environ.copy()
+        env["PYTHONPATH"] = (
+            str(Path(__file__).parent.parent) + ":" + env.get("PYTHONPATH", "")
+        )
+
+        # Run the process
+        process = subprocess.run(
+            cmd,
+            env=env,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            check=False,  # Don't raise exception on non-zero exit, we'll check manually
+        )
+
+        # Print output for debugging
+        print("\n" + "=" * 40 + " STDOUT " + "=" * 40)
+        print(process.stdout.decode())
+        print("\n" + "=" * 40 + " STDERR " + "=" * 40)
+        print(process.stderr.decode())
+
+        # Check that process completed successfully
+        assert (
+            process.returncode == 0
+        ), f"Training failed with error: {process.stderr.decode()}"
+
+        # Check that model was created
+        model_path = os.path.join(model_dir, "multifile_test.model")
+        assert os.path.exists(model_path), f"Model was not created at {model_path}"
+
+        # Try to load and run the model
+        model = torch.load(model_path, map_location="cpu")
+        assert model is not None, "Failed to load model"
+
+        # Create a calculator
+        calc = MACECalculator(model_paths=model_path, device="cpu", head="head1")
+
+        # Run prediction on a test atom
+        test_atom = create_test_atoms(num_atoms=5, seed=99999)
+        test_atom.calc = calc
+        energy = test_atom.get_potential_energy()
+        forces = test_atom.get_forces()
+
+        # Assert we got sensible outputs
+        assert np.isfinite(energy), "Model produced non-finite energy"
+        assert np.all(np.isfinite(forces)), "Model produced non-finite forces"
+
+    finally:
+        # Clean up
+        shutil.rmtree(temp_dir)
+
+
+@pytest.mark.slow
+def test_multiple_xyz_per_head():
+    """Test training with multiple XYZ files per head for train, valid and test sets"""
+    # Create temporary directory
+    temp_dir = tempfile.mkdtemp()
+    try:
+        # Set up file paths - create multiple xyz files for each dataset
+        train_xyz_files = [
+            os.path.join(temp_dir, f"train_data{i}.xyz") for i in range(1, 4)
+        ]  # 3 train files
+        valid_xyz_files = [
+            os.path.join(temp_dir, f"valid_data{i}.xyz") for i in range(1, 3)
+        ]  # 2 valid files
+        test_xyz_files = [
+            os.path.join(temp_dir, f"test_data{i}.xyz") for i in range(1, 3)
+        ]  # 2 test files
+
+        iso_atoms_file = os.path.join(temp_dir, "isolated_atoms.xyz")
+
+        config_path = os.path.join(temp_dir, "config.yaml")
+        results_dir = os.path.join(temp_dir, "results")
+        checkpoints_dir = os.path.join(temp_dir, "checkpoints")
+        model_dir = os.path.join(temp_dir, "models")
+        e0s_file = os.path.join(temp_dir, "e0s.json")
+
+        # Create directories
+        os.makedirs(results_dir, exist_ok=True)
+        os.makedirs(checkpoints_dir, exist_ok=True)
+        os.makedirs(model_dir, exist_ok=True)
+
+        # Set atomic numbers for z_table
+        z_table_elements = [1, 6, 7, 8]  # H, C, N, O
+
+        # Create test data for each format
+        rng = np.random.RandomState(42)
+        seeds = rng.randint(0, 10000, size=10)  # More seeds for multiple files
+
+        # Create isolated atoms for E0s (one of each element)
+        isolated_atoms = []
+        e0s_dict = {}
+        for z in z_table_elements:
+            # Create isolated atom
+            atom = Atoms(
+                numbers=[z], positions=[[0, 0, 0]], cell=np.eye(3) * 10.0, pbc=True
+            )
+            energy = float(rng.uniform(-5.0, -1.0))  # Random reference energy
+            forces = np.zeros((1, 3))
+            stress = np.zeros(6)
+            calc = SinglePointCalculator(
+                atom, energy=energy, forces=forces, stress=stress
+            )
+            atom.calc = calc
+            atom.info["config_type"] = "IsolatedAtom"
+            isolated_atoms.append(atom)
+            e0s_dict[str(z)] = energy  # Store energy for E0s file
+
+        # Create E0s file
+        create_e0s_file(e0s_dict, e0s_file)
+
+        # Create isolated atoms xyz file
+        create_xyz_file(isolated_atoms, iso_atoms_file)
+
+        # Create atoms for each train dataset - use different seeds for variety
+        train_datasets = []
+        for i, file in enumerate(train_xyz_files):
+            # Create atoms with different seeds
+            atoms = [
+                create_test_atoms(num_atoms=5, seed=seeds[i] + j) for j in range(5)
+            ]
+            create_xyz_file(atoms, file)
+            train_datasets.append(atoms)
+
+        # Create atoms for validation datasets
+        valid_datasets = []
+        for i, file in enumerate(valid_xyz_files):
+            atoms = [
+                create_test_atoms(num_atoms=5, seed=seeds[i + 3] + j) for j in range(3)
+            ]
+            create_xyz_file(atoms, file)
+            valid_datasets.append(atoms)
+
+        # Create atoms for test datasets
+        test_datasets = []
+        for i, file in enumerate(test_xyz_files):
+            atoms = [
+                create_test_atoms(num_atoms=5, seed=seeds[i + 5] + j) for j in range(3)
+            ]
+            create_xyz_file(atoms, file)
+            test_datasets.append(atoms)
+
+        # Create config.yaml for training with multiple xyz files per dataset
+        config = {
+            "name": "multi_xyz_test",
+            "seed": 42,
+            "model": "MACE",
+            "hidden_irreps": "32x0e",
+            "r_max": 5.0,
+            "batch_size": 5,
+            "max_num_epochs": 2,
+            "patience": 5,
+            "device": "cpu",
+            "energy_weight": 1.0,
+            "forces_weight": 10.0,
+            "loss": "weighted",
+            "optimizer": "adam",
+            "default_dtype": "float64",
+            "lr": 0.01,
+            "swa": False,
+            "work_dir": temp_dir,
+            "results_dir": results_dir,
+            "checkpoints_dir": checkpoints_dir,
+            "model_dir": model_dir,
+            "E0s": e0s_file,
+            "atomic_numbers": str(z_table_elements),
+            "heads": {
+                "multi_xyz_head": {
+                    # Using lists of multiple xyz files for each dataset
+                    "train_file": train_xyz_files,
+                    "valid_file": valid_xyz_files,
+                    "test_file": test_xyz_files,
+                    "energy_key": "energy",
+                    "forces_key": "forces",
+                    "stress_key": "stress",
+                },
+            },
+        }
+
+        # Write config file
+        with open(config_path, "w", encoding="utf-8") as f:
+            yaml.dump(config, f)
+
+        # Import the modified run_train from our local module
+        run_train_script = (
+            Path(__file__).parent.parent / "mace" / "cli" / "run_train.py"
+        )
+
+        # Run training with subprocess
+        cmd = [sys.executable, str(run_train_script), f"--config={config_path}"]
+
+        # Set environment to add the current path to PYTHONPATH
+        env = os.environ.copy()
+        env["PYTHONPATH"] = (
+            str(Path(__file__).parent.parent) + ":" + env.get("PYTHONPATH", "")
+        )
+
+        # Run the process
+        process = subprocess.run(
+            cmd,
+            env=env,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            check=False,
+        )
+
+        # Print output for debugging
+        print("\n" + "=" * 40 + " STDOUT " + "=" * 40)
+        print(process.stdout.decode())
+        print("\n" + "=" * 40 + " STDERR " + "=" * 40)
+        print(process.stderr.decode())
+
+        # Check that process completed successfully
+        assert (
+            process.returncode == 0
+        ), f"Training failed with error: {process.stderr.decode()}"
+
+        # Check that model was created
+        model_path = os.path.join(model_dir, "multi_xyz_test.model")
+        assert os.path.exists(model_path), f"Model was not created at {model_path}"
+
+        # Try to load and run the model
+        model = torch.load(model_path, map_location="cpu")
+        assert model is not None, "Failed to load model"
+
+        # Create a calculator
+        calc = MACECalculator(
+            model_paths=model_path, device="cpu", head="multi_xyz_head"
+        )
+
+        # Run prediction on a test atom
+        test_atom = create_test_atoms(num_atoms=5, seed=99999)
+        test_atom.calc = calc
+        energy = test_atom.get_potential_energy()
+        forces = test_atom.get_forces()
+
+        # Assert we got sensible outputs
+        assert np.isfinite(energy), "Model produced non-finite energy"
+        assert np.all(np.isfinite(forces)), "Model produced non-finite forces"
+
+    finally:
+        # Clean up
+        shutil.rmtree(temp_dir)
+
+
+@pytest.mark.slow
+def test_single_xyz_per_head():
+    """Test training with multiple XYZ files per head for train, valid and test sets"""
+    # Create temporary directory
+    temp_dir = tempfile.mkdtemp()
+    try:
+        # Set up file paths - create multiple xyz files for each dataset
+        train_xyz_files = [
+            os.path.join(temp_dir, f"train_data{i}.xyz") for i in range(1, 2)
+        ]  # 3 train files
+        valid_xyz_files = [
+            os.path.join(temp_dir, f"valid_data{i}.xyz") for i in range(1, 2)
+        ]  # 2 valid files
+        test_xyz_files = [
+            os.path.join(temp_dir, f"test_data{i}.xyz") for i in range(1, 2)
+        ]  # 2 test files
+
+        iso_atoms_file = os.path.join(temp_dir, "isolated_atoms.xyz")
+
+        config_path = os.path.join(temp_dir, "config.yaml")
+        results_dir = os.path.join(temp_dir, "results")
+        checkpoints_dir = os.path.join(temp_dir, "checkpoints")
+        model_dir = os.path.join(temp_dir, "models")
+        e0s_file = os.path.join(temp_dir, "e0s.json")
+
+        # Create directories
+        os.makedirs(results_dir, exist_ok=True)
+        os.makedirs(checkpoints_dir, exist_ok=True)
+        os.makedirs(model_dir, exist_ok=True)
+
+        # Set atomic numbers for z_table
+        z_table_elements = [1, 6, 7, 8]  # H, C, N, O
+
+        # Create test data for each format
+        rng = np.random.RandomState(42)
+        seeds = rng.randint(0, 10000, size=10)  # More seeds for multiple files
+
+        # Create isolated atoms for E0s (one of each element)
+        isolated_atoms = []
+        e0s_dict = {}
+        for z in z_table_elements:
+            # Create isolated atom
+            atom = Atoms(
+                numbers=[z], positions=[[0, 0, 0]], cell=np.eye(3) * 10.0, pbc=True
+            )
+            energy = float(rng.uniform(-5.0, -1.0))  # Random reference energy
+            forces = np.zeros((1, 3))
+            stress = np.zeros(6)
+            calc = SinglePointCalculator(
+                atom, energy=energy, forces=forces, stress=stress
+            )
+            atom.calc = calc
+            atom.info["config_type"] = "IsolatedAtom"
+            isolated_atoms.append(atom)
+            e0s_dict[str(z)] = energy  # Store energy for E0s file
+
+        # Create E0s file
+        create_e0s_file(e0s_dict, e0s_file)
+
+        # Create isolated atoms xyz file
+        create_xyz_file(isolated_atoms, iso_atoms_file)
+
+        # Create atoms for each train dataset - use different seeds for variety
+        train_datasets = []
+        for i, file in enumerate(train_xyz_files):
+            # Create atoms with different seeds
+            atoms = [
+                create_test_atoms(num_atoms=5, seed=seeds[i] + j) for j in range(5)
+            ]
+            create_xyz_file(atoms, file)
+            train_datasets.append(atoms)
+
+        # Create atoms for validation datasets
+        valid_datasets = []
+        for i, file in enumerate(valid_xyz_files):
+            atoms = [
+                create_test_atoms(num_atoms=5, seed=seeds[i + 3] + j) for j in range(3)
+            ]
+            create_xyz_file(atoms, file)
+            valid_datasets.append(atoms)
+
+        # Create atoms for test datasets
+        test_datasets = []
+        for i, file in enumerate(test_xyz_files):
+            atoms = [
+                create_test_atoms(num_atoms=5, seed=seeds[i + 5] + j) for j in range(3)
+            ]
+            create_xyz_file(atoms, file)
+            test_datasets.append(atoms)
+
+        # Create config.yaml for training with multiple xyz files per dataset
+        config = {
+            "name": "multi_xyz_test",
+            "seed": 42,
+            "model": "MACE",
+            "hidden_irreps": "32x0e",
+            "r_max": 5.0,
+            "batch_size": 5,
+            "max_num_epochs": 2,
+            "patience": 5,
+            "device": "cpu",
+            "energy_weight": 1.0,
+            "forces_weight": 10.0,
+            "loss": "weighted",
+            "optimizer": "adam",
+            "default_dtype": "float64",
+            "lr": 0.01,
+            "swa": False,
+            "work_dir": temp_dir,
+            "results_dir": results_dir,
+            "checkpoints_dir": checkpoints_dir,
+            "model_dir": model_dir,
+            "E0s": e0s_file,
+            "atomic_numbers": str(z_table_elements),
+            "heads": {
+                "multi_xyz_head": {
+                    # Using lists of multiple xyz files for each dataset
+                    "train_file": train_xyz_files,
+                    "valid_file": valid_xyz_files,
+                    "test_file": test_xyz_files,
+                    "energy_key": "energy",
+                    "forces_key": "forces",
+                    "stress_key": "stress",
+                },
+            },
+        }
+
+        # Write config file
+        with open(config_path, "w", encoding="utf-8") as f:
+            yaml.dump(config, f)
+
+        # Import the modified run_train from our local module
+        run_train_script = (
+            Path(__file__).parent.parent / "mace" / "cli" / "run_train.py"
+        )
+
+        # Run training with subprocess
+        cmd = [sys.executable, str(run_train_script), f"--config={config_path}"]
+
+        # Set environment to add the current path to PYTHONPATH
+        env = os.environ.copy()
+        env["PYTHONPATH"] = (
+            str(Path(__file__).parent.parent) + ":" + env.get("PYTHONPATH", "")
+        )
+
+        # Run the process
+        process = subprocess.run(
+            cmd,
+            env=env,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            check=False,
+        )
+
+        # Print output for debugging
+        print("\n" + "=" * 40 + " STDOUT " + "=" * 40)
+        print(process.stdout.decode())
+        print("\n" + "=" * 40 + " STDERR " + "=" * 40)
+        print(process.stderr.decode())
+
+        # Check that process completed successfully
+        assert (
+            process.returncode == 0
+        ), f"Training failed with error: {process.stderr.decode()}"
+
+        # Check that model was created
+        model_path = os.path.join(model_dir, "multi_xyz_test.model")
+        assert os.path.exists(model_path), f"Model was not created at {model_path}"
+
+        # Try to load and run the model
+        model = torch.load(model_path, map_location="cpu")
+        assert model is not None, "Failed to load model"
+
+        # Create a calculator
+        calc = MACECalculator(
+            model_paths=model_path, device="cpu", head="multi_xyz_head"
+        )
+
+        # Run prediction on a test atom
+        test_atom = create_test_atoms(num_atoms=5, seed=99999)
+        test_atom.calc = calc
+        energy = test_atom.get_potential_energy()
+        forces = test_atom.get_forces()
+
+        # Assert we got sensible outputs
+        assert np.isfinite(energy), "Model produced non-finite energy"
+        assert np.all(np.isfinite(forces)), "Model produced non-finite forces"
+
+    finally:
+        # Clean up
+        shutil.rmtree(temp_dir)
+
+
+@pytest.mark.slow
+def test_multihead_finetuning_different_formats():
+    """Test multihead finetuning with different file formats for each head."""
+    # Create temporary directory
+    temp_dir = tempfile.mkdtemp()
+    try:
+        # Set up file paths
+        xyz_file = os.path.join(temp_dir, "finetuning_xyz.xyz")
+        h5_folder = os.path.join(temp_dir, "h5_data")
+        iso_atoms_file = os.path.join(temp_dir, "isolated_atoms.xyz")
+
+        config_path = os.path.join(temp_dir, "config.yaml")
+        results_dir = os.path.join(temp_dir, "results")
+        checkpoints_dir = os.path.join(temp_dir, "checkpoints")
+        model_dir = os.path.join(temp_dir, "models")
+        e0s_file = os.path.join(temp_dir, "e0s.json")
+
+        # Create directories
+        os.makedirs(results_dir, exist_ok=True)
+        os.makedirs(checkpoints_dir, exist_ok=True)
+        os.makedirs(model_dir, exist_ok=True)
+
+        # Set atomic numbers for z_table
+        z_table_elements = [1, 6, 7, 8]  # H, C, N, O
+
+        # Create test data with different seeds
+        rng = np.random.RandomState(42)
+        seeds = rng.randint(0, 10000, size=3)
+
+        # Create isolated atoms for E0s (one of each element)
+        isolated_atoms = []
+        e0s_dict = {}
+        for z in z_table_elements:
+            atom = Atoms(
+                numbers=[z], positions=[[0, 0, 0]], cell=np.eye(3) * 10.0, pbc=True
+            )
+            energy = float(rng.uniform(-5.0, -1.0))
+            forces = np.zeros((1, 3))
+            stress = np.zeros(6)
+            calc = SinglePointCalculator(
+                atom, energy=energy, forces=forces, stress=stress
+            )
+            atom.calc = calc
+            atom.info["config_type"] = "IsolatedAtom"
+            atom.info["REF_energy"] = energy  # Make sure energy is in the right place
+            atom.arrays["REF_forces"] = forces
+            atom.info["REF_stress"] = stress
+            isolated_atoms.append(atom)
+            e0s_dict[str(z)] = energy
+
+        # Create E0s file
+        create_e0s_file(e0s_dict, e0s_file)
+
+        # Create isolated atoms xyz file
+        create_xyz_file(isolated_atoms, iso_atoms_file)
+
+        # Create XYZ data for xyz_head
+        xyz_atoms = [
+            create_test_atoms(num_atoms=5, seed=seeds[0] + i) for i in range(30)
+        ]
+        # Add REF_ properties
+        for atom in xyz_atoms:
+            atom.info["REF_energy"] = atom.calc.results["energy"]
+            atom.arrays["REF_forces"] = atom.calc.results["forces"]
+            atom.info["REF_stress"] = atom.calc.results["stress"]
+            atom.info["head"] = "xyz_head"  # Assign head
+        create_xyz_file(xyz_atoms, xyz_file)
+
+        # Create H5 data for h5_head
+        h5_atoms = [
+            create_test_atoms(num_atoms=5, seed=seeds[1] + i) for i in range(30)
+        ]
+        # Add REF_ properties
+        for atom in h5_atoms:
+            atom.info["REF_energy"] = atom.calc.results["energy"]
+            atom.arrays["REF_forces"] = atom.calc.results["forces"]
+            atom.info["REF_stress"] = atom.calc.results["stress"]
+            atom.info["head"] = "h5_head"  # Assign head
+
+        h5_atoms_xyz = os.path.join(temp_dir, "h5_atoms.xyz")
+        create_xyz_file(h5_atoms, h5_atoms_xyz)
+        # Include isolated atoms for E0s in the h5 dataset
+        all_atoms_for_h5 = h5_atoms + isolated_atoms
+        all_atoms_h5_xyz = os.path.join(temp_dir, "all_atoms_for_h5.xyz")
+        create_xyz_file(all_atoms_for_h5, all_atoms_h5_xyz)
+        create_h5_dataset(all_atoms_h5_xyz, h5_folder)
+
+        # Create config.yaml for multihead finetuning
+        heads = {
+            "xyz_head": {
+                "train_file": xyz_file,
+                "valid_fraction": 0.2,
+                "energy_key": "REF_energy",
+                "forces_key": "REF_forces",
+                "stress_key": "REF_stress",
+                "E0s": e0s_file,
+            },
+            "h5_head": {
+                "train_file": os.path.join(h5_folder, "train"),
+                "valid_file": os.path.join(h5_folder, "val"),
+                "energy_key": "REF_energy",
+                "forces_key": "REF_forces",
+                "stress_key": "REF_stress",
+                "E0s": e0s_file,
+            },
+        }
+
+        yaml_str = "heads:\n"
+        for key, value in heads.items():
+            yaml_str += f"  {key}:\n"
+            for sub_key, sub_value in value.items():
+                yaml_str += f"    {sub_key}: {sub_value}\n"
+
+        with open(config_path, "w", encoding="utf-8") as f:
+            f.write(yaml_str)
+
+        # Now perform multihead finetuning
+        finetuning_params = {
+            "name": "multihead_finetuned",
+            "config": config_path,
+            "foundation_model": "small",  # Use the small foundation model
+            "energy_weight": 1.0,
+            "forces_weight": 10.0,
+            "model": "MACE",
+            "hidden_irreps": "128x0e",  # Match foundation model
+            "r_max": 5.0,
+            "batch_size": 2,
+            "max_num_epochs": 2,  # Just do a quick finetuning for test
+            "device": "cpu",
+            "seed": 42,
+            "loss": "weighted",
+            "default_dtype": "float64",
+            "checkpoints_dir": checkpoints_dir,
+            "model_dir": model_dir,
+            "results_dir": results_dir,
+            "atomic_numbers": "[" + ",".join(map(str, z_table_elements)) + "]",
+            "multiheads_finetuning": True,
+            "filter_type_pt": "combinations",
+            "subselect_pt": "random",
+            "num_samples_pt": 10,  # Small number for testing
+            "force_mh_ft_lr": True,  # Force using specified learning rate
+        }
+
+        # Run finetuning
+        run_train_script = (
+            Path(__file__).parent.parent / "mace" / "cli" / "run_train.py"
+        )
+        env = os.environ.copy()
+        env["PYTHONPATH"] = (
+            str(Path(__file__).parent.parent) + ":" + env.get("PYTHONPATH", "")
+        )
+
+        cmd = [sys.executable, str(run_train_script)]
+        for k, v in finetuning_params.items():
+            if v is None:
+                cmd.append(f"--{k}")
+            else:
+                cmd.append(f"--{k}={v}")
+
+        # Run the process
+        process = subprocess.run(
+            cmd,
+            env=env,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            check=False,
+        )
+
+        # Print output for debugging
+        print("\n" + "=" * 40 + " STDOUT " + "=" * 40)
+        print(process.stdout.decode())
+        print("\n" + "=" * 40 + " STDERR " + "=" * 40)
+        print(process.stderr.decode())
+
+        # Check that process completed successfully
+        assert (
+            process.returncode == 0
+        ), f"Finetuning failed with error: {process.stderr.decode()}"
+
+        # Check that model was created
+        model_path = os.path.join(model_dir, "multihead_finetuned.model")
+        assert os.path.exists(model_path), f"Model was not created at {model_path}"
+
+        # Load model and verify it has the expected heads
+        model = torch.load(model_path, map_location="cpu")
+        assert hasattr(model, "heads"), "Model does not have heads attribute"
+        assert set(["xyz_head", "h5_head", "pt_head"]).issubset(
+            set(model.heads)
+        ), "Expected heads not found in model"
+
+        # Try to run the model with both heads
+        # For xyz_head
+        calc_xyz = MACECalculator(
+            model_paths=model_path,
+            device="cpu",
+            head="xyz_head",
+            default_dtype="float64",
+        )
+        test_atom = create_test_atoms(num_atoms=5, seed=99999)
+        test_atom.calc = calc_xyz
+        energy_xyz = test_atom.get_potential_energy()
+        forces_xyz = test_atom.get_forces()
+
+        # For h5_head
+        calc_h5 = MACECalculator(
+            model_paths=model_path,
+            device="cpu",
+            head="h5_head",
+            default_dtype="float64",
+        )
+        test_atom.calc = calc_h5
+        energy_h5 = test_atom.get_potential_energy()
+        forces_h5 = test_atom.get_forces()
+
+        # Verify results
+        assert np.isfinite(energy_xyz), "xyz_head produced non-finite energy"
+        assert np.all(np.isfinite(forces_xyz)), "xyz_head produced non-finite forces"
+        assert np.isfinite(energy_h5), "h5_head produced non-finite energy"
+        assert np.all(np.isfinite(forces_h5)), "h5_head produced non-finite forces"
+
+    finally:
+        # Clean up
+        shutil.rmtree(temp_dir)
--- a/mace-bench/3rdparty/mace/tests/test_preprocess.py
+++ b/mace-bench/3rdparty/mace/tests/test_preprocess.py
+import os
+import subprocess
+import sys
+from pathlib import Path
+
+import ase.io
+import numpy as np
+import pytest
+import yaml
+from ase.atoms import Atoms
+
+pytest_mace_dir = Path(__file__).parent.parent
+preprocess_data = Path(__file__).parent.parent / "mace" / "cli" / "preprocess_data.py"
+
+
+@pytest.fixture(name="sample_configs")
+def fixture_sample_configs():
+    water = Atoms(
+        numbers=[8, 1, 1],
+        positions=[[0, -2.0, 0], [1, 0, 0], [0, 1, 0]],
+        cell=[4] * 3,
+        pbc=[True] * 3,
+    )
+    configs = [
+        Atoms(numbers=[8], positions=[[0, 0, 0]], cell=[6] * 3),
+        Atoms(numbers=[1], positions=[[0, 0, 0]], cell=[6] * 3),
+    ]
+    configs[0].info["REF_energy"] = 0.0
+    configs[0].info["config_type"] = "IsolatedAtom"
+    configs[1].info["REF_energy"] = 0.0
+    configs[1].info["config_type"] = "IsolatedAtom"
+
+    np.random.seed(5)
+    for _ in range(10):
+        c = water.copy()
+        c.positions += np.random.normal(0.1, size=c.positions.shape)
+        c.info["REF_energy"] = np.random.normal(0.1)
+        c.new_array("REF_forces", np.random.normal(0.1, size=c.positions.shape))
+        c.info["REF_stress"] = np.random.normal(0.1, size=6)
+        configs.append(c)
+
+    return configs
+
+
+def test_preprocess_data(tmp_path, sample_configs):
+    ase.io.write(tmp_path / "sample.xyz", sample_configs)
+
+    preprocess_params = {
+        "train_file": tmp_path / "sample.xyz",
+        "r_max": 5.0,
+        "config_type_weights": "{'Default':1.0}",
+        "num_process": 2,
+        "valid_fraction": 0.1,
+        "h5_prefix": tmp_path / "preprocessed_",
+        "compute_statistics": None,
+        "seed": 42,
+        "energy_key": "REF_energy",
+        "forces_key": "REF_forces",
+        "stress_key": "REF_stress",
+    }
+
+    run_env = os.environ.copy()
+    sys.path.insert(0, str(Path(__file__).parent.parent))
+    run_env["PYTHONPATH"] = ":".join(sys.path)
+    print("DEBUG subprocess PYTHONPATH", run_env["PYTHONPATH"])
+
+    cmd = (
+        sys.executable
+        + " "
+        + str(preprocess_data)
+        + " "
+        + " ".join(
+            [
+                (f"--{k}={v}" if v is not None else f"--{k}")
+                for k, v in preprocess_params.items()
+            ]
+        )
+    )
+
+    p = subprocess.run(cmd.split(), env=run_env, check=True)
+    assert p.returncode == 0
+
+    # Check if the output files are created
+    assert (tmp_path / "preprocessed_train").is_dir()
+    assert (tmp_path / "preprocessed_val").is_dir()
+    assert (tmp_path / "preprocessed_statistics.json").is_file()
+
+    # Check if the correct number of files are created
+    train_files = list((tmp_path / "preprocessed_train").glob("*.h5"))
+    val_files = list((tmp_path / "preprocessed_val").glob("*.h5"))
+    assert len(train_files) == preprocess_params["num_process"]
+    assert len(val_files) == preprocess_params["num_process"]
+
+    # Example of checking statistics file content:
+    import json
+
+    with open(tmp_path / "preprocessed_statistics.json", "r", encoding="utf-8") as f:
+        statistics = json.load(f)
+    assert "atomic_energies" in statistics
+    assert "avg_num_neighbors" in statistics
+    assert "mean" in statistics
+    assert "std" in statistics
+    assert "atomic_numbers" in statistics
+    assert "r_max" in statistics
+
+    # Example of checking H5 file content:
+    import h5py
+
+    with h5py.File(train_files[0], "r") as f:
+        assert "config_batch_0" in f
+        config = f["config_batch_0"]["config_0"]
+        assert "atomic_numbers" in config
+        assert "positions" in config
+        assert "energy" in config["properties"]
+        assert "forces" in config["properties"]
+
+    original_energies = [
+        config.info["REF_energy"]
+        for config in sample_configs[2:]
+        if "REF_energy" in config.info
+    ]
+    original_forces = [
+        config.arrays["REF_forces"]
+        for config in sample_configs[2:]
+        if "REF_forces" in config.arrays
+    ]
+
+    h5_energies = []
+    h5_forces = []
+
+    for train_file in train_files:
+        with h5py.File(train_file, "r") as f:
+            for _, batch in f.items():
+                for config_key in batch.keys():
+                    config = batch[config_key]
+                    assert "atomic_numbers" in config
+                    assert "positions" in config
+                    assert "energy" in config["properties"]
+                    assert "forces" in config["properties"]
+
+                    h5_energies.append(config["properties"]["energy"][()])
+                    h5_forces.append(config["properties"]["forces"][()])
+
+    for val_file in val_files:
+        with h5py.File(val_file, "r") as f:
+            for _, batch in f.items():
+                for config_key in batch.keys():
+                    config = batch[config_key]
+                    h5_energies.append(config["properties"]["energy"][()])
+                    h5_forces.append(config["properties"]["forces"][()])
+
+    print("Original energies", original_energies)
+    print("H5 energies", h5_energies)
+    print("Original forces", original_forces)
+    print("H5 forces", h5_forces)
+    original_energies.sort()
+    h5_energies.sort()
+    original_forces = np.concatenate(original_forces).flatten()
+    h5_forces = np.concatenate(h5_forces).flatten()
+    original_forces.sort()
+    h5_forces.sort()
+
+    # Compare energies and forces
+    np.testing.assert_allclose(original_energies, h5_energies, rtol=1e-5, atol=1e-8)
+    np.testing.assert_allclose(original_forces, h5_forces, rtol=1e-5, atol=1e-8)
+
+    print("All checks passed successfully!")
+
+
+def test_preprocess_config(tmp_path, sample_configs):
+    ase.io.write(tmp_path / "sample.xyz", sample_configs)
+
+    preprocess_params = {
+        "train_file": str(tmp_path / "sample.xyz"),
+        "r_max": 5.0,
+        "config_type_weights": "{'Default':1.0}",
+        "num_process": 2,
+        "valid_fraction": 0.1,
+        "h5_prefix": str(tmp_path / "preprocessed_"),
+        "compute_statistics": None,
+        "seed": 42,
+        "energy_key": "REF_energy",
+        "forces_key": "REF_forces",
+        "stress_key": "REF_stress",
+    }
+    filename = tmp_path / "config.yaml"
+    with open(filename, "w", encoding="utf-8") as file:
+        yaml.dump(preprocess_params, file)
+
+    run_env = os.environ.copy()
+    sys.path.insert(0, str(Path(__file__).parent.parent))
+    run_env["PYTHONPATH"] = ":".join(sys.path)
+    print("DEBUG subprocess PYTHONPATH", run_env["PYTHONPATH"])
+
+    cmd = (
+        sys.executable
+        + " "
+        + str(preprocess_data)
+        + " "
+        + "--config"
+        + " "
+        + str(filename)
+    )
+
+    p = subprocess.run(cmd.split(), env=run_env, check=True)
+    assert p.returncode == 0
--- a/mace-bench/3rdparty/mace/tests/test_run_train.py
+++ b/mace-bench/3rdparty/mace/tests/test_run_train.py
+import json
+import os
+import subprocess
+import sys
+from pathlib import Path
+
+import ase.io
+import numpy as np
+import pytest
+import torch
+from ase.atoms import Atoms
+
+from mace.calculators import MACECalculator, mace_mp
+
+try:
+    import cuequivariance as cue  # pylint: disable=unused-import
+
+    CUET_AVAILABLE = True
+except ImportError:
+    CUET_AVAILABLE = False
+
+run_train = Path(__file__).parent.parent / "mace" / "cli" / "run_train.py"
+
+
+@pytest.fixture(name="fitting_configs")
+def fixture_fitting_configs():
+    water = Atoms(
+        numbers=[8, 1, 1],
+        positions=[[0, -2.0, 0], [1, 0, 0], [0, 1, 0]],
+        cell=[4] * 3,
+        pbc=[True] * 3,
+    )
+    fit_configs = [
+        Atoms(numbers=[8], positions=[[0, 0, 0]], cell=[6] * 3),
+        Atoms(numbers=[1], positions=[[0, 0, 0]], cell=[6] * 3),
+    ]
+    fit_configs[0].info["REF_energy"] = 0.0
+    fit_configs[0].info["config_type"] = "IsolatedAtom"
+    fit_configs[1].info["REF_energy"] = 0.0
+    fit_configs[1].info["config_type"] = "IsolatedAtom"
+
+    np.random.seed(5)
+    for _ in range(20):
+        c = water.copy()
+        c.positions += np.random.normal(0.1, size=c.positions.shape)
+        c.info["REF_energy"] = np.random.normal(0.1)
+        print(c.info["REF_energy"])
+        c.new_array("REF_forces", np.random.normal(0.1, size=c.positions.shape))
+        c.info["REF_stress"] = np.random.normal(0.1, size=6)
+        fit_configs.append(c)
+
+    return fit_configs
+
+
+@pytest.fixture(name="pretraining_configs")
+def fixture_pretraining_configs():
+    configs = []
+    for _ in range(10):
+        atoms = Atoms(
+            numbers=[8, 1, 1],
+            positions=np.random.rand(3, 3) * 3,
+            cell=[5, 5, 5],
+            pbc=[True] * 3,
+        )
+        atoms.info["REF_energy"] = np.random.normal(0, 1)
+        atoms.arrays["REF_forces"] = np.random.normal(0, 1, size=(3, 3))
+        atoms.info["REF_stress"] = np.random.normal(0, 1, size=6)
+        configs.append(atoms)
+    configs.append(
+        Atoms(numbers=[8], positions=[[0, 0, 0]], cell=[6] * 3, pbc=[True] * 3),
+    )
+    configs.append(
+        Atoms(numbers=[1], positions=[[0, 0, 0]], cell=[6] * 3, pbc=[True] * 3)
+    )
+    configs[-2].info["REF_energy"] = -2.0
+    configs[-2].info["config_type"] = "IsolatedAtom"
+    configs[-1].info["REF_energy"] = -4.0
+    configs[-1].info["config_type"] = "IsolatedAtom"
+    return configs
+
+
+_mace_params = {
+    "name": "MACE",
+    "valid_fraction": 0.05,
+    "energy_weight": 1.0,
+    "forces_weight": 10.0,
+    "stress_weight": 1.0,
+    "model": "MACE",
+    "hidden_irreps": "128x0e",
+    "r_max": 3.5,
+    "batch_size": 5,
+    "max_num_epochs": 10,
+    "swa": None,
+    "start_swa": 5,
+    "ema": None,
+    "ema_decay": 0.99,
+    "amsgrad": None,
+    "restart_latest": None,
+    "device": "cpu",
+    "seed": 5,
+    "loss": "stress",
+    "energy_key": "REF_energy",
+    "forces_key": "REF_forces",
+    "stress_key": "REF_stress",
+    "eval_interval": 2,
+}
+
+
+def test_run_train(tmp_path, fitting_configs):
+    ase.io.write(tmp_path / "fit.xyz", fitting_configs)
+
+    mace_params = _mace_params.copy()
+    mace_params["checkpoints_dir"] = str(tmp_path)
+    mace_params["model_dir"] = str(tmp_path)
+    mace_params["train_file"] = tmp_path / "fit.xyz"
+
+    # make sure run_train.py is using the mace that is currently being tested
+    run_env = os.environ.copy()
+    sys.path.insert(0, str(Path(__file__).parent.parent))
+    run_env["PYTHONPATH"] = ":".join(sys.path)
+    print("DEBUG subprocess PYTHONPATH", run_env["PYTHONPATH"])
+
+    cmd = (
+        sys.executable
+        + " "
+        + str(run_train)
+        + " "
+        + " ".join(
+            [
+                (f"--{k}={v}" if v is not None else f"--{k}")
+                for k, v in mace_params.items()
+            ]
+        )
+    )
+
+    p = subprocess.run(cmd.split(), env=run_env, check=True)
+    assert p.returncode == 0
+
+    calc = MACECalculator(model_paths=tmp_path / "MACE.model", device="cpu")
+
+    Es = []
+    for at in fitting_configs:
+        at.calc = calc
+        Es.append(at.get_potential_energy())
+
+    print("Es", Es)
+    # from a run on 04/06/2024 on stress_bugfix 967f0bfb6490086599da247874b24595d149caa7
+    ref_Es = [
+        0.0,
+        0.0,
+        -0.039181344585828524,
+        -0.0915223395136733,
+        -0.14953484236456582,
+        -0.06662480820063998,
+        -0.09983737353050133,
+        0.12477442296789745,
+        -0.06486086271762856,
+        -0.1460607988519944,
+        0.12886334908465508,
+        -0.14000990081920373,
+        -0.05319886578958313,
+        0.07780520158391,
+        -0.08895480281886901,
+        -0.15474719614734422,
+        0.007756765146527644,
+        -0.044879267197498685,
+        -0.036065736712447574,
+        -0.24413743841886623,
+        -0.0838104612106429,
+        -0.14751978636626545,
+    ]
+
+    assert np.allclose(Es, ref_Es)
+
+
+def test_run_train_missing_data(tmp_path, fitting_configs):
+    del fitting_configs[5].info["REF_energy"]
+    del fitting_configs[6].arrays["REF_forces"]
+    del fitting_configs[7].info["REF_stress"]
+
+    ase.io.write(tmp_path / "fit.xyz", fitting_configs)
+
+    mace_params = _mace_params.copy()
+    mace_params["checkpoints_dir"] = str(tmp_path)
+    mace_params["model_dir"] = str(tmp_path)
+    mace_params["train_file"] = tmp_path / "fit.xyz"
+
+    # make sure run_train.py is using the mace that is currently being tested
+    run_env = os.environ.copy()
+    sys.path.insert(0, str(Path(__file__).parent.parent))
+    run_env["PYTHONPATH"] = ":".join(sys.path)
+    print("DEBUG subprocess PYTHONPATH", run_env["PYTHONPATH"])
+
+    cmd = (
+        sys.executable
+        + " "
+        + str(run_train)
+        + " "
+        + " ".join(
+            [
+                (f"--{k}={v}" if v is not None else f"--{k}")
+                for k, v in mace_params.items()
+            ]
+        )
+    )
+
+    p = subprocess.run(cmd.split(), env=run_env, check=True)
+    assert p.returncode == 0
+
+    calc = MACECalculator(model_paths=tmp_path / "MACE.model", device="cpu")
+
+    Es = []
+    for at in fitting_configs:
+        at.calc = calc
+        Es.append(at.get_potential_energy())
+
+    print("Es", Es)
+    # from a run on 04/06/2024 on stress_bugfix 967f0bfb6490086599da247874b24595d149caa7
+    ref_Es = [
+        0.0,
+        0.0,
+        -0.05464025113696155,
+        -0.11272131295940478,
+        0.039200919331076826,
+        -0.07517990972827505,
+        -0.13504202474582666,
+        0.0292022872055344,
+        -0.06541099574579018,
+        -0.1497824717832886,
+        0.19397709360828813,
+        -0.13587609467143014,
+        -0.05242956276828463,
+        -0.0504862057364953,
+        -0.07095795959430119,
+        -0.2463753796753703,
+        -0.002031543147676121,
+        -0.03864918790300681,
+        -0.13680153117705554,
+        -0.23418951968636786,
+        -0.11790833839379238,
+        -0.14930562311066484,
+    ]
+    assert np.allclose(Es, ref_Es)
+
+
+def test_run_train_no_stress(tmp_path, fitting_configs):
+    del fitting_configs[5].info["REF_energy"]
+    del fitting_configs[6].arrays["REF_forces"]
+    del fitting_configs[7].info["REF_stress"]
+
+    ase.io.write(tmp_path / "fit.xyz", fitting_configs)
+
+    mace_params = _mace_params.copy()
+    mace_params["checkpoints_dir"] = str(tmp_path)
+    mace_params["model_dir"] = str(tmp_path)
+    mace_params["train_file"] = tmp_path / "fit.xyz"
+    mace_params["loss"] = "weighted"
+
+    # make sure run_train.py is using the mace that is currently being tested
+    run_env = os.environ.copy()
+    sys.path.insert(0, str(Path(__file__).parent.parent))
+    run_env["PYTHONPATH"] = ":".join(sys.path)
+    print("DEBUG subprocess PYTHONPATH", run_env["PYTHONPATH"])
+
+    cmd = (
+        sys.executable
+        + " "
+        + str(run_train)
+        + " "
+        + " ".join(
+            [
+                (f"--{k}={v}" if v is not None else f"--{k}")
+                for k, v in mace_params.items()
+            ]
+        )
+    )
+
+    p = subprocess.run(cmd.split(), env=run_env, check=True)
+    assert p.returncode == 0
+
+    calc = MACECalculator(model_paths=tmp_path / "MACE.model", device="cpu")
+
+    Es = []
+    for at in fitting_configs:
+        at.calc = calc
+        Es.append(at.get_potential_energy())
+
+    print("Es", Es)
+    # from a run on 28/03/2023 on main 88d49f9ed6925dec07d1777043a36e1fe4872ff3
+    ref_Es = [
+        0.0,
+        0.0,
+        -0.05450093218377135,
+        -0.11235475232750518,
+        0.03914558031854152,
+        -0.07500839914816063,
+        -0.13469160624431492,
+        0.029384214243251838,
+        -0.06521819204166135,
+        -0.14944896282001804,
+        0.19413948083049481,
+        -0.13543541860473626,
+        -0.05235495076237124,
+        -0.049556206595684105,
+        -0.07080758913030646,
+        -0.24571898386301153,
+        -0.002070636306950905,
+        -0.03863113401320783,
+        -0.13620291339913712,
+        -0.23383074855679695,
+        -0.11776449630199368,
+        -0.1489441490225184,
+    ]
+    assert np.allclose(Es, ref_Es)
+
+
+def test_run_train_multihead(tmp_path, fitting_configs):
+    fitting_configs_dft = []
+    fitting_configs_mp2 = []
+    fitting_configs_ccd = []
+    for _, c in enumerate(fitting_configs):
+        c_dft = c.copy()
+        c_dft.info["head"] = "DFT"
+        fitting_configs_dft.append(c_dft)
+
+        c_mp2 = c.copy()
+        c_mp2.info["head"] = "MP2"
+        fitting_configs_mp2.append(c_mp2)
+
+        c_ccd = c.copy()
+        c_ccd.info["head"] = "CCD"
+        fitting_configs_ccd.append(c_ccd)
+    ase.io.write(tmp_path / "fit_multihead_dft.xyz", fitting_configs_dft)
+    ase.io.write(tmp_path / "fit_multihead_mp2.xyz", fitting_configs_mp2)
+    ase.io.write(tmp_path / "fit_multihead_ccd.xyz", fitting_configs_ccd)
+
+    heads = {
+        "DFT": {"train_file": f"{str(tmp_path)}/fit_multihead_dft.xyz"},
+        "MP2": {"train_file": f"{str(tmp_path)}/fit_multihead_mp2.xyz"},
+        "CCD": {"train_file": f"{str(tmp_path)}/fit_multihead_ccd.xyz"},
+    }
+    yaml_str = "heads:\n"
+    for key, value in heads.items():
+        yaml_str += f"  {key}:\n"
+        for sub_key, sub_value in value.items():
+            yaml_str += f"    {sub_key}: {sub_value}\n"
+    filename = tmp_path / "config.yaml"
+    with open(filename, "w", encoding="utf-8") as file:
+        file.write(yaml_str)
+
+    mace_params = _mace_params.copy()
+    mace_params["valid_fraction"] = 0.1
+    mace_params["checkpoints_dir"] = str(tmp_path)
+    mace_params["model_dir"] = str(tmp_path)
+    mace_params["loss"] = "weighted"
+    mace_params["hidden_irreps"] = "128x0e"
+    mace_params["r_max"] = 6.0
+    mace_params["default_dtype"] = "float64"
+    mace_params["num_radial_basis"] = 10
+    mace_params["interaction_first"] = "RealAgnosticResidualInteractionBlock"
+    mace_params["config"] = tmp_path / "config.yaml"
+    mace_params["batch_size"] = 2
+    mace_params["num_samples_pt"] = 50
+    mace_params["subselect_pt"] = "random"
+    # make sure run_train.py is using the mace that is currently being tested
+    run_env = os.environ.copy()
+    sys.path.insert(0, str(Path(__file__).parent.parent))
+    run_env["PYTHONPATH"] = ":".join(sys.path)
+    print("DEBUG subprocess PYTHONPATH", run_env["PYTHONPATH"])
+
+    cmd = (
+        sys.executable
+        + " "
+        + str(run_train)
+        + " "
+        + " ".join(
+            [
+                (f"--{k}={v}" if v is not None else f"--{k}")
+                for k, v in mace_params.items()
+            ]
+        )
+    )
+
+    p = subprocess.run(cmd.split(), env=run_env, check=True)
+    assert p.returncode == 0
+
+    calc = MACECalculator(
+        model_paths=tmp_path / "MACE.model",
+        device="cpu",
+        default_dtype="float64",
+        head="CCD",
+    )
+
+    Es = []
+    for at in fitting_configs:
+        at.calc = calc
+        Es.append(at.get_potential_energy())
+
+    print("Es", Es)
+    # from a run on 02/09/2024 on develop branch
+    ref_Es = [
+        0.0,
+        0.0,
+        0.10637113905361611,
+        -0.012499594026624754,
+        0.08983077108171753,
+        0.21071322543112597,
+        -0.028921849222784398,
+        -0.02423359575741567,
+        0.022923252188079057,
+        -0.02048334610058991,
+        0.4349711162741364,
+        -0.04455577015569887,
+        -0.09765806785570091,
+        0.16013134616829822,
+        0.0758442928017698,
+        -0.05931856557011721,
+        0.33964473532953265,
+        0.134338442158641,
+        0.18024119757783053,
+        -0.18914740992058765,
+        -0.06503477155294624,
+        0.03436649147415213,
+    ]
+    assert np.allclose(Es, ref_Es)
+
+
+def test_run_train_foundation(tmp_path, fitting_configs):
+    ase.io.write(tmp_path / "fit.xyz", fitting_configs)
+
+    mace_params = _mace_params.copy()
+    mace_params["checkpoints_dir"] = str(tmp_path)
+    mace_params["model_dir"] = str(tmp_path)
+    mace_params["train_file"] = tmp_path / "fit.xyz"
+    mace_params["loss"] = "weighted"
+    mace_params["foundation_model"] = "small"
+    mace_params["hidden_irreps"] = "128x0e"
+    mace_params["r_max"] = 6.0
+    mace_params["default_dtype"] = "float64"
+    mace_params["num_radial_basis"] = 10
+    mace_params["interaction_first"] = "RealAgnosticResidualInteractionBlock"
+    mace_params["multiheads_finetuning"] = False
+
+    run_env = os.environ.copy()
+    sys.path.insert(0, str(Path(__file__).parent.parent))
+    run_env["PYTHONPATH"] = ":".join(sys.path)
+
+    cmd = (
+        sys.executable
+        + " "
+        + str(run_train)
+        + " "
+        + " ".join(
+            [
+                (f"--{k}={v}" if v is not None else f"--{k}")
+                for k, v in mace_params.items()
+            ]
+        )
+    )
+
+    p = subprocess.run(cmd.split(), env=run_env, check=True)
+    assert p.returncode == 0
+
+    calc = MACECalculator(
+        model_paths=tmp_path / "MACE.model", device="cpu", default_dtype="float64"
+    )
+
+    Es = []
+    for at in fitting_configs:
+        at.calc = calc
+        Es.append(at.get_potential_energy())
+
+    print("Es", Es)
+    # from a run on 28/03/2023 on repulsion a63434aaab70c84ee016e13e4aca8d57297a0f26
+    ref_Es = [
+        1.6780993938446045,
+        0.8916864395141602,
+        0.7290308475494385,
+        0.6194742918014526,
+        0.6697757840156555,
+        0.7025266289710999,
+        0.5818213224411011,
+        0.7897703647613525,
+        0.6558921337127686,
+        0.5071806907653809,
+        3.581131935119629,
+        0.691562294960022,
+        0.6257331967353821,
+        0.9560437202453613,
+        0.7716934680938721,
+        0.6730310916900635,
+        0.8297463655471802,
+        0.8053972721099854,
+        0.8337507247924805,
+        0.4107491970062256,
+        0.6019601821899414,
+        0.7301387786865234,
+    ]
+    assert np.allclose(Es, ref_Es)
+
+
+def test_run_train_foundation_multihead(tmp_path, fitting_configs):
+    fitting_configs_dft = []
+    fitting_configs_mp2 = []
+    atomic_numbers = np.unique(
+        np.concatenate([at.numbers for at in fitting_configs])
+    ).tolist()
+    for i, c in enumerate(fitting_configs):
+        if i in (0, 1):
+            c_dft = c.copy()
+            c_dft.info["head"] = "DFT"
+            fitting_configs_dft.append(c_dft)
+            fitting_configs_dft.append(c)
+            c_mp2 = c.copy()
+            c_mp2.info["head"] = "MP2"
+            fitting_configs_mp2.append(c_mp2)
+        elif i % 2 == 0:
+            c.info["head"] = "DFT"
+            fitting_configs_dft.append(c)
+        else:
+            c.info["head"] = "MP2"
+            fitting_configs_mp2.append(c)
+    ase.io.write(tmp_path / "fit_multihead_dft.xyz", fitting_configs_dft)
+    ase.io.write(tmp_path / "fit_multihead_mp2.xyz", fitting_configs_mp2)
+    heads = {
+        "DFT": {"train_file": f"{str(tmp_path)}/fit_multihead_dft.xyz"},
+        "MP2": {"train_file": f"{str(tmp_path)}/fit_multihead_mp2.xyz"},
+    }
+    yaml_str = "heads:\n"
+    for key, value in heads.items():
+        yaml_str += f"  {key}:\n"
+        for sub_key, sub_value in value.items():
+            yaml_str += f"    {sub_key}: {sub_value}\n"
+    filename = tmp_path / "config.yaml"
+    with open(filename, "w", encoding="utf-8") as file:
+        file.write(yaml_str)
+    mace_params = _mace_params.copy()
+    mace_params["valid_fraction"] = 0.1
+    mace_params["checkpoints_dir"] = str(tmp_path)
+    mace_params["model_dir"] = str(tmp_path)
+    mace_params["config"] = tmp_path / "config.yaml"
+    mace_params["loss"] = "weighted"
+    mace_params["foundation_model"] = "small"
+    mace_params["hidden_irreps"] = "128x0e"
+    mace_params["r_max"] = 6.0
+    mace_params["default_dtype"] = "float64"
+    mace_params["num_radial_basis"] = 10
+    mace_params["interaction_first"] = "RealAgnosticResidualInteractionBlock"
+    mace_params["batch_size"] = 2
+    mace_params["valid_batch_size"] = 1
+    mace_params["num_samples_pt"] = 50
+    mace_params["subselect_pt"] = "random"
+    mace_params["atomic_numbers"] = "[" + ",".join(map(str, atomic_numbers)) + "]"
+    mace_params["filter_type_pt"] = "combinations"
+    mace_params["force_mh_ft_lr"] = True
+    # make sure run_train.py is using the mace that is currently being tested
+    run_env = os.environ.copy()
+    sys.path.insert(0, str(Path(__file__).parent.parent))
+    run_env["PYTHONPATH"] = ":".join(sys.path)
+    print("DEBUG subprocess PYTHONPATH", run_env["PYTHONPATH"])
+
+    cmd = (
+        sys.executable
+        + " "
+        + str(run_train)
+        + " "
+        + " ".join(
+            [
+                (f"--{k}={v}" if v is not None else f"--{k}")
+                for k, v in mace_params.items()
+            ]
+        )
+    )
+
+    try:
+        completed_process = subprocess.run(
+            cmd.split(), env=run_env, capture_output=True, text=True, check=True
+        )
+        # Process executed successfully
+        print(completed_process.stdout)
+    except subprocess.CalledProcessError as e:
+        # Process failed with non-zero exit code
+        print(f"Command failed with exit code {e.returncode}")
+        print(f"STDOUT: {e.stdout}")
+        print(f"STDERR: {e.stderr}")
+        raise e
+    assert completed_process.returncode == 0
+
+    Es = []
+    for at in fitting_configs:
+        config_head = at.info.get("head", "MP2")
+        calc = MACECalculator(
+            model_paths=tmp_path / "MACE.model",
+            device="cpu",
+            default_dtype="float64",
+            head=config_head,
+        )
+        at.calc = calc
+        Es.append(at.get_potential_energy())
+
+    print("Es", Es)
+    # from a run on 20/08/2024 on commit
+    ref_Es = [
+        1.654685616493225,
+        0.44693732261657715,
+        0.8741313815116882,
+        0.569085955619812,
+        0.7161882519721985,
+        0.8654778599739075,
+        0.8722733855247498,
+        0.49582308530807495,
+        0.814422607421875,
+        0.7027317881584167,
+        0.7196993827819824,
+        0.517953097820282,
+        0.8631765246391296,
+        0.4679797887802124,
+        0.8163984417915344,
+        0.4252359867095947,
+        1.0861445665359497,
+        0.6829671263694763,
+        0.7136879563331604,
+        0.5160345435142517,
+        0.7002358436584473,
+        0.5574042201042175,
+    ]
+    assert np.allclose(Es, ref_Es, atol=1e-1)
+
+
+def test_run_train_foundation_multihead_json(tmp_path, fitting_configs):
+    fitting_configs_dft = []
+    fitting_configs_mp2 = []
+    atomic_numbers = np.unique(
+        np.concatenate([at.numbers for at in fitting_configs])
+    ).tolist()
+    for i, c in enumerate(fitting_configs):
+
+        if i in (0, 1):
+            continue  # skip isolated atoms, as energies specified by json files below
+        if i % 2 == 0:
+            c.info["head"] = "DFT"
+            fitting_configs_dft.append(c)
+        else:
+            c.info["head"] = "MP2"
+            fitting_configs_mp2.append(c)
+    ase.io.write(tmp_path / "fit_multihead_dft.xyz", fitting_configs_dft)
+    ase.io.write(tmp_path / "fit_multihead_mp2.xyz", fitting_configs_mp2)
+
+    # write E0s to json files
+    E0s = {1: 0.0, 8: 0.0}
+    with open(tmp_path / "fit_multihead_dft.json", "w", encoding="utf-8") as f:
+        json.dump(E0s, f)
+    with open(tmp_path / "fit_multihead_mp2.json", "w", encoding="utf-8") as f:
+        json.dump(E0s, f)
+
+    heads = {
+        "DFT": {
+            "train_file": f"{str(tmp_path)}/fit_multihead_dft.xyz",
+            "E0s": f"{str(tmp_path)}/fit_multihead_dft.json",
+        },
+        "MP2": {
+            "train_file": f"{str(tmp_path)}/fit_multihead_mp2.xyz",
+            "E0s": f"{str(tmp_path)}/fit_multihead_mp2.json",
+        },
+    }
+    yaml_str = "heads:\n"
+    for key, value in heads.items():
+        yaml_str += f"  {key}:\n"
+        for sub_key, sub_value in value.items():
+            yaml_str += f"    {sub_key}: {sub_value}\n"
+    filename = tmp_path / "config.yaml"
+    with open(filename, "w", encoding="utf-8") as file:
+        file.write(yaml_str)
+    mace_params = _mace_params.copy()
+    mace_params["valid_fraction"] = 0.1
+    mace_params["checkpoints_dir"] = str(tmp_path)
+    mace_params["model_dir"] = str(tmp_path)
+    mace_params["config"] = tmp_path / "config.yaml"
+    mace_params["loss"] = "weighted"
+    mace_params["foundation_model"] = "small"
+    mace_params["hidden_irreps"] = "128x0e"
+    mace_params["r_max"] = 6.0
+    mace_params["default_dtype"] = "float64"
+    mace_params["num_radial_basis"] = 10
+    mace_params["interaction_first"] = "RealAgnosticResidualInteractionBlock"
+    mace_params["batch_size"] = 2
+    mace_params["valid_batch_size"] = 1
+    mace_params["num_samples_pt"] = 50
+    mace_params["subselect_pt"] = "random"
+    mace_params["atomic_numbers"] = "[" + ",".join(map(str, atomic_numbers)) + "]"
+    mace_params["filter_type_pt"] = "combinations"
+    mace_params["force_mh_ft_lr"] = True
+    # make sure run_train.py is using the mace that is currently being tested
+    run_env = os.environ.copy()
+    sys.path.insert(0, str(Path(__file__).parent.parent))
+    run_env["PYTHONPATH"] = ":".join(sys.path)
+    print("DEBUG subprocess PYTHONPATH", run_env["PYTHONPATH"])
+
+    cmd = (
+        sys.executable
+        + " "
+        + str(run_train)
+        + " "
+        + " ".join(
+            [
+                (f"--{k}={v}" if v is not None else f"--{k}")
+                for k, v in mace_params.items()
+            ]
+        )
+    )
+
+    try:
+        completed_process = subprocess.run(
+            cmd.split(), env=run_env, capture_output=True, text=True, check=True
+        )
+        # Process executed successfully
+        print(completed_process.stdout)
+    except subprocess.CalledProcessError as e:
+        # Process failed with non-zero exit code
+        print(f"Command failed with exit code {e.returncode}")
+        print(f"STDOUT: {e.stdout}")
+        print(f"STDERR: {e.stderr}")
+        raise e
+    assert completed_process.returncode == 0
+
+    Es = []
+    for at in fitting_configs:
+        config_head = at.info.get("head", "MP2")
+        calc = MACECalculator(
+            model_paths=tmp_path / "MACE.model",
+            device="cpu",
+            default_dtype="float64",
+            head=config_head,
+        )
+        at.calc = calc
+        Es.append(at.get_potential_energy())
+
+    print("Es", Es)
+    # from a run on 20/08/2024 on commit
+    ref_Es = [
+        1.654685616493225,
+        0.44693732261657715,
+        0.8741313815116882,
+        0.569085955619812,
+        0.7161882519721985,
+        0.8654778599739075,
+        0.8722733855247498,
+        0.49582308530807495,
+        0.814422607421875,
+        0.7027317881584167,
+        0.7196993827819824,
+        0.517953097820282,
+        0.8631765246391296,
+        0.4679797887802124,
+        0.8163984417915344,
+        0.4252359867095947,
+        1.0861445665359497,
+        0.6829671263694763,
+        0.7136879563331604,
+        0.5160345435142517,
+        0.7002358436584473,
+        0.5574042201042175,
+    ]
+    assert np.allclose(Es, ref_Es, atol=1e-1)
+
+
+def test_run_train_multihead_replay_custum_finetuning(
+    tmp_path, fitting_configs, pretraining_configs
+):
+    ase.io.write(tmp_path / "pretrain.xyz", pretraining_configs)
+
+    foundation_params = {
+        "name": "foundation",
+        "train_file": os.path.join(tmp_path, "pretrain.xyz"),
+        "valid_fraction": 0.2,
+        "energy_weight": 1.0,
+        "forces_weight": 10.0,
+        "stress_weight": 1.0,
+        "model": "MACE",
+        "hidden_irreps": "32x0e",
+        "r_max": 5.0,
+        "batch_size": 2,
+        "max_num_epochs": 5,
+        "swa": None,
+        "start_swa": 3,
+        "device": "cpu",
+        "seed": 42,
+        "loss": "weighted",
+        "energy_key": "REF_energy",
+        "forces_key": "REF_forces",
+        "stress_key": "REF_stress",
+        "default_dtype": "float64",
+        "checkpoints_dir": str(tmp_path),
+        "model_dir": str(tmp_path),
+    }
+
+    run_env = os.environ.copy()
+    sys.path.insert(0, str(Path(__file__).parent.parent))
+    run_env["PYTHONPATH"] = ":".join(sys.path)
+
+    cmd = [sys.executable, str(run_train)]
+    for k, v in foundation_params.items():
+        if v is None:
+            cmd.append(f"--{k}")
+        else:
+            cmd.append(f"--{k}={v}")
+
+    p = subprocess.run(cmd, env=run_env, check=True)
+    assert p.returncode == 0
+
+    # Step 3: Create finetuning set
+    fitting_configs_dft = []
+    fitting_configs_mp2 = []
+    for i, c in enumerate(fitting_configs):
+        if i in (0, 1):
+            c_dft = c.copy()
+            c_dft.info["head"] = "DFT"
+            fitting_configs_dft.append(c_dft)
+            fitting_configs_dft.append(c)
+            c_mp2 = c.copy()
+            c_mp2.info["head"] = "MP2"
+            fitting_configs_mp2.append(c_mp2)
+        elif i % 2 == 0:
+            c.info["head"] = "DFT"
+            fitting_configs_dft.append(c)
+        else:
+            c.info["head"] = "MP2"
+            fitting_configs_mp2.append(c)
+    ase.io.write(tmp_path / "fit_multihead_dft.xyz", fitting_configs_dft)
+    ase.io.write(tmp_path / "fit_multihead_mp2.xyz", fitting_configs_mp2)
+
+    # Step 4: Finetune the pretrained model with multihead replay
+    heads = {
+        "DFT": {"train_file": f"{str(tmp_path)}/fit_multihead_dft.xyz"},
+        "MP2": {"train_file": f"{str(tmp_path)}/fit_multihead_mp2.xyz"},
+    }
+    yaml_str = "heads:\n"
+    for key, value in heads.items():
+        yaml_str += f"  {key}:\n"
+        for sub_key, sub_value in value.items():
+            yaml_str += f"    {sub_key}: {sub_value}\n"
+    filename = tmp_path / "config.yaml"
+    with open(filename, "w", encoding="utf-8") as file:
+        file.write(yaml_str)
+
+    finetuning_params = {
+        "name": "finetuned",
+        "valid_fraction": 0.1,
+        "energy_weight": 1.0,
+        "forces_weight": 10.0,
+        "stress_weight": 1.0,
+        "model": "MACE",
+        "hidden_irreps": "32x0e",
+        "r_max": 5.0,
+        "batch_size": 2,
+        "max_num_epochs": 5,
+        "device": "cpu",
+        "seed": 42,
+        "loss": "weighted",
+        "default_dtype": "float64",
+        "checkpoints_dir": str(tmp_path),
+        "model_dir": str(tmp_path),
+        "foundation_model": os.path.join(tmp_path, "foundation.model"),
+        "config": os.path.join(tmp_path, "config.yaml"),
+        "pt_train_file": os.path.join(tmp_path, "pretrain.xyz"),
+        "num_samples_pt": 3,
+        "subselect_pt": "random",
+        "force_mh_ft_lr": True,
+    }
+
+    cmd = [sys.executable, str(run_train)]
+    for k, v in finetuning_params.items():
+        if v is None:
+            cmd.append(f"--{k}")
+        else:
+            cmd.append(f"--{k}={v}")
+
+    p = subprocess.run(cmd, env=run_env, check=True)
+    assert p.returncode == 0
+
+    # Load and test the finetuned model
+    calc = MACECalculator(
+        model_paths=tmp_path / "finetuned.model",
+        device="cpu",
+        default_dtype="float64",
+        head="pt_head",
+    )
+
+    Es = []
+    for at in fitting_configs:
+        at.calc = calc
+        Es.append(at.get_potential_energy())
+
+    print("Energies:", Es)
+
+    # Add some basic checks
+    assert len(Es) == len(fitting_configs)
+    assert all(isinstance(E, float) for E in Es)
+    assert len(set(Es)) > 1  # Ens
+
+
+@pytest.mark.skipif(not CUET_AVAILABLE, reason="cuequivariance not installed")
+def test_run_train_cueq(tmp_path, fitting_configs):
+    torch.set_default_dtype(torch.float64)
+    ase.io.write(tmp_path / "fit.xyz", fitting_configs)
+
+    mace_params = _mace_params.copy()
+    mace_params["checkpoints_dir"] = str(tmp_path)
+    mace_params["model_dir"] = str(tmp_path)
+    mace_params["train_file"] = tmp_path / "fit.xyz"
+    mace_params["enable_cueq"] = True
+    mace_params["default_dtype"] = "float64"
+
+    # make sure run_train.py is using the mace that is currently being tested
+    run_env = os.environ.copy()
+    sys.path.insert(0, str(Path(__file__).parent.parent))
+    run_env["PYTHONPATH"] = ":".join(sys.path)
+    print("DEBUG subprocess PYTHONPATH", run_env["PYTHONPATH"])
+
+    cmd = (
+        sys.executable
+        + " "
+        + str(run_train)
+        + " "
+        + " ".join(
+            [
+                (f"--{k}={v}" if v is not None else f"--{k}")
+                for k, v in mace_params.items()
+            ]
+        )
+    )
+
+    try:
+        completed_process = subprocess.run(
+            cmd.split(), env=run_env, capture_output=True, text=True, check=True
+        )
+        # Process executed successfully
+        print(completed_process.stdout)
+    except subprocess.CalledProcessError as e:
+        # Process failed with non-zero exit code
+        print(f"Command failed with exit code {e.returncode}")
+        print(f"STDOUT: {e.stdout}")
+        print(f"STDERR: {e.stderr}")
+        raise e
+    assert completed_process.returncode == 0
+
+    calc = MACECalculator(model_paths=tmp_path / "MACE.model", device="cuda")
+    Es = []
+    for at in fitting_configs[2:]:
+        at.calc = calc
+        Es.append(at.get_potential_energy())
+
+    calc = MACECalculator(
+        model_paths=tmp_path / "MACE.model", device="cpu", enable_cueq=True
+    )
+    Es_cueq = []
+    for at in fitting_configs[2:]:
+        at.calc = calc
+        Es_cueq.append(at.get_potential_energy())
+
+    # from a run on 04/06/2024 on stress_bugfix 967f0bfb6490086599da247874b24595d149caa7
+    ref_Es = [
+        -0.039181344585828524,
+        -0.0915223395136733,
+        -0.14953484236456582,
+        -0.06662480820063998,
+        -0.09983737353050133,
+        0.12477442296789745,
+        -0.06486086271762856,
+        -0.1460607988519944,
+        0.12886334908465508,
+        -0.14000990081920373,
+        -0.05319886578958313,
+        0.07780520158391,
+        -0.08895480281886901,
+        -0.15474719614734422,
+        0.007756765146527644,
+        -0.044879267197498685,
+        -0.036065736712447574,
+        -0.24413743841886623,
+        -0.0838104612106429,
+        -0.14751978636626545,
+    ]
+
+    assert np.allclose(Es, ref_Es)
+    assert np.allclose(ref_Es, Es_cueq)
+
+
+@pytest.mark.skipif(not CUET_AVAILABLE, reason="cuequivariance not installed")
+def test_run_train_foundation_multihead_json_cueq(tmp_path, fitting_configs):
+    fitting_configs_dft = []
+    fitting_configs_mp2 = []
+    atomic_numbers = np.unique(
+        np.concatenate([at.numbers for at in fitting_configs])
+    ).tolist()
+    for i, c in enumerate(fitting_configs):
+
+        if i in (0, 1):
+            continue  # skip isolated atoms, as energies specified by json files below
+        if i % 2 == 0:
+            c.info["head"] = "DFT"
+            fitting_configs_dft.append(c)
+        else:
+            c.info["head"] = "MP2"
+            fitting_configs_mp2.append(c)
+    ase.io.write(tmp_path / "fit_multihead_dft.xyz", fitting_configs_dft)
+    ase.io.write(tmp_path / "fit_multihead_mp2.xyz", fitting_configs_mp2)
+
+    # write E0s to json files
+    E0s = {1: 0.0, 8: 0.0}
+    with open(tmp_path / "fit_multihead_dft.json", "w", encoding="utf-8") as f:
+        json.dump(E0s, f)
+    with open(tmp_path / "fit_multihead_mp2.json", "w", encoding="utf-8") as f:
+        json.dump(E0s, f)
+
+    heads = {
+        "DFT": {
+            "train_file": f"{str(tmp_path)}/fit_multihead_dft.xyz",
+            "E0s": f"{str(tmp_path)}/fit_multihead_dft.json",
+        },
+        "MP2": {
+            "train_file": f"{str(tmp_path)}/fit_multihead_mp2.xyz",
+            "E0s": f"{str(tmp_path)}/fit_multihead_mp2.json",
+        },
+    }
+    yaml_str = "heads:\n"
+    for key, value in heads.items():
+        yaml_str += f"  {key}:\n"
+        for sub_key, sub_value in value.items():
+            yaml_str += f"    {sub_key}: {sub_value}\n"
+    filename = tmp_path / "config.yaml"
+    with open(filename, "w", encoding="utf-8") as file:
+        file.write(yaml_str)
+    mace_params = _mace_params.copy()
+    mace_params["valid_fraction"] = 0.1
+    mace_params["checkpoints_dir"] = str(tmp_path)
+    mace_params["model_dir"] = str(tmp_path)
+    mace_params["config"] = tmp_path / "config.yaml"
+    mace_params["loss"] = "weighted"
+    mace_params["foundation_model"] = "small"
+    mace_params["hidden_irreps"] = "128x0e"
+    mace_params["r_max"] = 6.0
+    mace_params["default_dtype"] = "float64"
+    mace_params["num_radial_basis"] = 10
+    mace_params["interaction_first"] = "RealAgnosticResidualInteractionBlock"
+    mace_params["batch_size"] = 2
+    mace_params["valid_batch_size"] = 1
+    mace_params["num_samples_pt"] = 50
+    mace_params["subselect_pt"] = "random"
+    mace_params["enable_cueq"] = True
+    mace_params["atomic_numbers"] = "[" + ",".join(map(str, atomic_numbers)) + "]"
+    mace_params["filter_type_pt"] = "combinations"
+    mace_params["device"] = "cuda"
+    mace_params["force_mh_ft_lr"] = True
+    # make sure run_train.py is using the mace that is currently being tested
+    run_env = os.environ.copy()
+    sys.path.insert(0, str(Path(__file__).parent.parent))
+    run_env["PYTHONPATH"] = ":".join(sys.path)
+    print("DEBUG subprocess PYTHONPATH", run_env["PYTHONPATH"])
+
+    cmd = (
+        sys.executable
+        + " "
+        + str(run_train)
+        + " "
+        + " ".join(
+            [
+                (f"--{k}={v}" if v is not None else f"--{k}")
+                for k, v in mace_params.items()
+            ]
+        )
+    )
+
+    try:
+        completed_process = subprocess.run(
+            cmd.split(), env=run_env, capture_output=True, text=True, check=True
+        )
+        # Process executed successfully
+        print(completed_process.stdout)
+    except subprocess.CalledProcessError as e:
+        # Process failed with non-zero exit code
+        print(f"Command failed with exit code {e.returncode}")
+        print(f"STDOUT: {e.stdout}")
+        print(f"STDERR: {e.stderr}")
+        raise e
+    assert completed_process.returncode == 0
+
+    calc = MACECalculator(
+        model_paths=tmp_path / "MACE.model",
+        device="cuda",
+        default_dtype="float64",
+        head="DFT",
+    )
+
+    Es = []
+    for at in fitting_configs:
+        at.calc = calc
+        Es.append(at.get_potential_energy())
+
+    print("Es", Es)
+    # from a run on 20/08/2024 on commit
+    ref_Es = [
+        1.654685616493225,
+        0.44693732261657715,
+        0.8741313815116882,
+        0.569085955619812,
+        0.7161882519721985,
+        0.8654778599739075,
+        0.8722733855247498,
+        0.49582308530807495,
+        0.814422607421875,
+        0.7027317881584167,
+        0.7196993827819824,
+        0.517953097820282,
+        0.8631765246391296,
+        0.4679797887802124,
+        0.8163984417915344,
+        0.4252359867095947,
+        1.0861445665359497,
+        0.6829671263694763,
+        0.7136879563331604,
+        0.5160345435142517,
+        0.7002358436584473,
+        0.5574042201042175,
+    ]
+    assert np.allclose(Es, ref_Es, atol=1e-1)
+
+
+def test_run_train_lbfgs(tmp_path, fitting_configs):
+    ase.io.write(tmp_path / "fit.xyz", fitting_configs)
+
+    mace_params = _mace_params.copy()
+    mace_params["checkpoints_dir"] = str(tmp_path)
+    mace_params["model_dir"] = str(tmp_path)
+    mace_params["train_file"] = tmp_path / "fit.xyz"
+    mace_params["lbfgs"] = None
+    mace_params["max_num_epochs"] = 2
+
+    # make sure run_train.py is using the mace that is currently being tested
+    run_env = os.environ.copy()
+    sys.path.insert(0, str(Path(__file__).parent.parent))
+    run_env["PYTHONPATH"] = ":".join(sys.path)
+    print("DEBUG subprocess PYTHONPATH", run_env["PYTHONPATH"])
+
+    cmd = (
+        sys.executable
+        + " "
+        + str(run_train)
+        + " "
+        + " ".join(
+            [
+                (f"--{k}={v}" if v is not None else f"--{k}")
+                for k, v in mace_params.items()
+            ]
+        )
+    )
+
+    p = subprocess.run(cmd.split(), env=run_env, check=True)
+    assert p.returncode == 0
+
+    calc = MACECalculator(model_paths=tmp_path / "MACE.model", device="cpu")
+
+    Es = []
+    for at in fitting_configs:
+        at.calc = calc
+        Es.append(at.get_potential_energy())
+
+    print("Es", Es)
+    # from a run on 14/03/2025
+    ref_Es = [
+        0.0,
+        0.0,
+        -0.1874197850340979,
+        -0.25991775038059006,
+        0.18263492399322268,
+        -0.15026829765490662,
+        -0.2403061362015996,
+        0.1689257170630718,
+        -0.2095568077455055,
+        -0.2957758160829075,
+        -0.0035370913684985364,
+        -0.2195416610745775,
+        -0.25405549447739517,
+        -0.06201390990366806,
+        -0.13332219494388334,
+        -0.19633181702040337,
+        0.013014932630445699,
+        -0.08808335967147174,
+        -0.06664444189210728,
+        -0.4230467426992034,
+        -0.2348250569553676,
+        -0.17593904833220647,
+    ]
+    assert np.allclose(Es, ref_Es, atol=1e-2)
+
+
+def test_run_train_foundation_elements(tmp_path, fitting_configs):
+
+    ase.io.write(tmp_path / "fit.xyz", fitting_configs)
+
+    base_params = {
+        "name": "MACE",
+        "checkpoints_dir": str(tmp_path),
+        "model_dir": str(tmp_path),
+        "train_file": tmp_path / "fit.xyz",
+        "loss": "weighted",
+        "foundation_model": "small",
+        "hidden_irreps": "128x0e",
+        "r_max": 6.0,
+        "default_dtype": "float64",
+        "max_num_epochs": 5,
+        "num_radial_basis": 10,
+        "interaction_first": "RealAgnosticResidualInteractionBlock",
+        "multiheads_finetuning": False,
+    }
+
+    # Run environment setup
+    run_env = os.environ.copy()
+    sys.path.insert(0, str(Path(__file__).parent.parent))
+    run_env["PYTHONPATH"] = ":".join(sys.path)
+
+    # First run: without foundation_model_elements (default behavior)
+    mace_params = base_params.copy()
+    cmd = (
+        sys.executable
+        + " "
+        + str(run_train)
+        + " "
+        + " ".join(
+            [
+                (f"--{k}={v}" if v is not None else f"--{k}")
+                for k, v in mace_params.items()
+            ]
+        )
+    )
+    p = subprocess.run(cmd.split(), env=run_env, check=True)
+    assert p.returncode == 0
+
+    # Load model and check elements
+    model_filtered = torch.load(tmp_path / "MACE.model", map_location="cpu")
+    filtered_elements = set(int(z) for z in model_filtered.atomic_numbers)
+    assert filtered_elements == {1, 8}  # Only H and O should be present
+
+    # Second run: with foundation_model_elements
+    mace_params = base_params.copy()
+    mace_params["name"] = "MACE_all_elements"
+    mace_params["foundation_model_elements"] = True  # Flag-only argument
+    cmd = (
+        sys.executable
+        + " "
+        + str(run_train)
+        + " "
+        + " ".join(
+            [
+                (f"--{k}={v}" if v is not None else f"--{k}")
+                for k, v in mace_params.items()
+            ]
+        )
+    )
+    p = subprocess.run(cmd.split(), env=run_env, check=True)
+    assert p.returncode == 0
+
+    # Load model and check elements
+    model_all = torch.load(tmp_path / "MACE_all_elements.model", map_location="cpu")
+    all_elements = set(int(z) for z in model_all.atomic_numbers)
+
+    # Get elements from foundation model for comparison
+    calc = mace_mp(model="small", device="cpu")
+    foundation_elements = set(int(z) for z in calc.models[0].atomic_numbers)
+
+    # Check that all foundation model elements are preserved
+    assert all_elements == foundation_elements
+    assert len(all_elements) > len(filtered_elements)
+
+    # Check that both models can make predictions
+    at = fitting_configs[2].copy()
+
+    # Test filtered model
+    calc_filtered = MACECalculator(
+        model_paths=tmp_path / "MACE.model", device="cpu", default_dtype="float64"
+    )
+    at.calc = calc_filtered
+    e1 = at.get_potential_energy()
+
+    # Test all-elements model
+    calc_all = MACECalculator(
+        model_paths=tmp_path / "MACE_all_elements.model",
+        device="cpu",
+        default_dtype="float64",
+    )
+    at.calc = calc_all
+    e2 = at.get_potential_energy()
+
+    # Energies should be different since the models are trained differently,
+    # but both should give reasonable results
+    assert np.isfinite(e1)
+    assert np.isfinite(e2)
+
+
+def test_run_train_foundation_elements_multihead(tmp_path, fitting_configs):
+    fitting_configs_dft = []
+    fitting_configs_mp2 = []
+    atomic_numbers = np.unique(
+        np.concatenate([at.numbers for at in fitting_configs])
+    ).tolist()
+    for i, c in enumerate(fitting_configs):
+        if i in (0, 1):
+            c_dft = c.copy()
+            c_dft.info["head"] = "DFT"
+            fitting_configs_dft.append(c_dft)
+            c_mp2 = c.copy()
+            c_mp2.info["head"] = "MP2"
+            fitting_configs_mp2.append(c_mp2)
+        if i % 2 == 0:
+            c_copy = c.copy()
+            c_copy.info["head"] = "DFT"
+            fitting_configs_dft.append(c_copy)
+        else:
+            c_copy = c.copy()
+            c_copy.info["head"] = "MP2"
+            fitting_configs_mp2.append(c_copy)
+
+    ase.io.write(tmp_path / "fit_dft.xyz", fitting_configs_dft)
+    ase.io.write(tmp_path / "fit_mp2.xyz", fitting_configs_mp2)
+
+    # Create multihead configuration
+    heads = {
+        "DFT": {"train_file": f"{str(tmp_path)}/fit_dft.xyz"},
+        "MP2": {"train_file": f"{str(tmp_path)}/fit_mp2.xyz"},
+    }
+    yaml_str = "heads:\n"
+    for key, value in heads.items():
+        yaml_str += f"  {key}:\n"
+        for sub_key, sub_value in value.items():
+            yaml_str += f"    {sub_key}: {sub_value}\n"
+    config_file = tmp_path / "config.yaml"
+    with open(config_file, "w", encoding="utf-8") as file:
+        file.write(yaml_str)
+
+    base_params = {
+        "name": "MACE",
+        "checkpoints_dir": str(tmp_path),
+        "model_dir": str(tmp_path),
+        "config": str(config_file),
+        "loss": "weighted",
+        "foundation_model": "small",
+        "hidden_irreps": "128x0e",
+        "r_max": 6.0,
+        "default_dtype": "float64",
+        "max_num_epochs": 5,
+        "num_radial_basis": 10,
+        "interaction_first": "RealAgnosticResidualInteractionBlock",
+        "force_mh_ft_lr": True,
+        "batch_size": 1,
+        "num_samples_pt": 50,
+        "subselect_pt": "random",
+        "atomic_numbers": "[" + ",".join(map(str, atomic_numbers)) + "]",
+        "filter_type_pt": "combinations",
+        "valid_fraction": 0.1,
+        "valid_batch_size": 1,
+    }
+
+    # Run environment setup
+    run_env = os.environ.copy()
+    sys.path.insert(0, str(Path(__file__).parent.parent))
+    run_env["PYTHONPATH"] = ":".join(sys.path)
+
+    # First run: without foundation_model_elements (default behavior)
+    mace_params = base_params.copy()
+    cmd = (
+        sys.executable
+        + " "
+        + str(run_train)
+        + " "
+        + " ".join(
+            [
+                (f"--{k}={v}" if v is not None else f"--{k}")
+                for k, v in mace_params.items()
+            ]
+        )
+    )
+    try:
+        completed_process = subprocess.run(
+            cmd.split(), env=run_env, capture_output=True, text=True, check=True
+        )
+        # Process executed successfully
+        print(completed_process.stdout)
+    except subprocess.CalledProcessError as e:
+        # Process failed with non-zero exit code
+        print(f"Command failed with exit code {e.returncode}")
+        print(f"STDOUT: {e.stdout}")
+        print(f"STDERR: {e.stderr}")
+        raise e
+    assert completed_process.returncode == 0
+
+    # Load model and check elements
+    model_filtered = torch.load(tmp_path / "MACE.model", map_location="cpu")
+    filtered_elements = set(int(z) for z in model_filtered.atomic_numbers)
+    assert filtered_elements == {1, 8}  # Only H and O should be present
+    assert len(model_filtered.heads) == 3  # pt_head + DFT + MP2
+
+    # Second run: with foundation_model_elements
+    mace_params = base_params.copy()
+    mace_params["name"] = "MACE_all_elements"
+    mace_params["foundation_model_elements"] = True
+    cmd = (
+        sys.executable
+        + " "
+        + str(run_train)
+        + " "
+        + " ".join(
+            [
+                (f"--{k}={v}" if v is not None else f"--{k}")
+                for k, v in mace_params.items()
+            ]
+        )
+    )
+    p = subprocess.run(cmd.split(), env=run_env, check=True)
+    assert p.returncode == 0
+
+    # Load model and check elements
+    model_all = torch.load(tmp_path / "MACE_all_elements.model", map_location="cpu")
+    all_elements = set(int(z) for z in model_all.atomic_numbers)
+
+    # Get elements from foundation model for comparison
+    calc = mace_mp(model="small", device="cpu")
+    foundation_elements = set(int(z) for z in calc.models[0].atomic_numbers)
+
+    # Check that all foundation model elements are preserved
+    assert all_elements == foundation_elements
+    assert len(all_elements) > len(filtered_elements)
+    assert len(model_all.heads) == 3  # pt_head + DFT + MP2
+
+    # Check that both models can make predictions
+    at = fitting_configs_dft[2].copy()
+
+    # Test filtered model
+    calc_filtered = MACECalculator(
+        model_paths=tmp_path / "MACE.model",
+        device="cpu",
+        default_dtype="float64",
+        head="DFT",
+    )
+    at.calc = calc_filtered
+    e1 = at.get_potential_energy()
+
+    # Test all-elements model
+    calc_all = MACECalculator(
+        model_paths=tmp_path / "MACE_all_elements.model",
+        device="cpu",
+        default_dtype="float64",
+        head="DFT",
+    )
+    at.calc = calc_all
+    e2 = at.get_potential_energy()
+
+    assert np.isfinite(e1)
+    assert np.isfinite(e2)
--- a/mace-bench/3rdparty/mace/tests/test_run_train_allkeys.py
+++ b/mace-bench/3rdparty/mace/tests/test_run_train_allkeys.py
+import os
+import subprocess
+import sys
+from copy import deepcopy
+from pathlib import Path
+
+import ase.io
+import numpy as np
+import pytest
+from ase.atoms import Atoms
+
+from mace.calculators.mace import MACECalculator
+from mace.cli.run_train import run as run_mace_train
+from mace.data.utils import KeySpecification
+from mace.tools import build_default_arg_parser
+
+run_train = Path(__file__).parent.parent / "mace" / "cli" / "run_train.py"
+
+
+_mace_params = {
+    "name": "MACE",
+    "valid_fraction": 0.05,
+    "energy_weight": 1.0,
+    "forces_weight": 10.0,
+    "stress_weight": 1.0,
+    "model": "MACE",
+    "hidden_irreps": "128x0e",
+    "max_num_epochs": 10,
+    "swa": None,
+    "start_swa": 5,
+    "ema": None,
+    "ema_decay": 0.99,
+    "amsgrad": None,
+    "device": "cpu",
+    "seed": 5,
+    "loss": "weighted",
+    "energy_key": "REF_energy",
+    "forces_key": "REF_forces",
+    "stress_key": "REF_stress",
+    "interaction_first": "RealAgnosticResidualInteractionBlock",
+    "batch_size": 1,
+    "valid_batch_size": 1,
+    "num_samples_pt": 50,
+    "subselect_pt": "random",
+    "eval_interval": 2,
+    "num_radial_basis": 10,
+    "r_max": 6.0,
+    "default_dtype": "float64",
+}
+
+
+def configs_numbered_keys():
+    np.random.seed(0)
+    water = Atoms(
+        numbers=[8, 1, 1],
+        positions=[[0, -2.0, 0], [1, 0, 0], [0, 1, 0]],
+        cell=[4] * 3,
+        pbc=[True] * 3,
+    )
+
+    energies = list(np.random.normal(0.1, size=15))
+    forces = list(np.random.normal(0.1, size=(15, 3, 3)))
+
+    trial_configs_lists = []
+    # some keys present, some not
+    keys_to_use = (
+        ["REF_energy"]
+        + ["2_energy"] * 2
+        + ["3_energy"] * 3
+        + ["4_energy"] * 4
+        + ["5_energy"] * 5
+    )
+
+    force_keys_to_use = (
+        ["REF_forces"]
+        + ["2_forces"] * 2
+        + ["3_forces"] * 3
+        + ["4_forces"] * 4
+        + ["5_forces"] * 5
+    )
+
+    for ind in range(15):
+        c = deepcopy(water)
+        c.info[keys_to_use[ind]] = energies[ind]
+        c.arrays[force_keys_to_use[ind]] = forces[ind]
+        c.positions += np.random.normal(0.1, size=(3, 3))
+        trial_configs_lists.append(c)
+
+    return trial_configs_lists
+
+
+def trial_yamls_and_and_expected():
+    yamls = {}
+    command_line_kwargs = {"energy_key": "2_energy", "forces_key": "2_forces"}
+
+    yamls["no_heads"] = {}
+
+    yamls["one_head_no_dicts"] = {
+        "heads": {
+            "Default": {
+                "energy_key": "3_energy",
+            }
+        }
+    }
+
+    yamls["one_head_with_dicts"] = {
+        "heads": {
+            "Default": {
+                "info_keys": {
+                    "energy": "3_energy",
+                },
+                "arrays_keys": {
+                    "forces": "3_forces",
+                },
+            }
+        }
+    }
+
+    yamls["two_heads_no_dicts"] = {
+        "heads": {
+            "dft": {
+                "train_file": "fit_multihead_dft.xyz",
+                "energy_key": "3_energy",
+            },
+            "mp2": {
+                "train_file": "fit_multihead_mp2.xyz",
+                "energy_key": "4_energy",
+            },
+        }
+    }
+
+    yamls["two_heads_mixed"] = {
+        "heads": {
+            "dft": {
+                "train_file": "fit_multihead_dft.xyz",
+                "info_keys": {
+                    "energy": "3_energy",
+                },
+                "arrays_keys": {
+                    "forces": "3_forces",
+                },
+                "forces_key": "4_forces",
+            },
+            "mp2": {
+                "train_file": "fit_multihead_mp2.xyz",
+                "energy_key": "4_energy",
+            },
+        }
+    }
+    all_arg_sets = {
+        "with_command_line": {
+            key: {**command_line_kwargs, **value} for key, value in yamls.items()
+        },
+        "without_command_line": yamls,
+    }
+
+    all_expected_outputs = {
+        "with_command_line": {
+            "no_heads": [
+                1.0037831178668188,
+                1.0183291323603265,
+                1.0120784084221528,
+                0.9935695881012243,
+                1.0021641561865526,
+                0.9999135609205868,
+                0.9809440616323108,
+                1.0025784765050076,
+                1.0017901145495376,
+                1.0136913185404515,
+                1.006798563238269,
+                1.0187758397828384,
+                1.0180201540775071,
+                1.0132368725061702,
+                0.9998734173248169,
+            ],
+            "one_head_no_dicts": [
+                1.0028437510688613,
+                1.0514693378041775,
+                1.059933403321331,
+                1.034719940573569,
+                1.0438040675561824,
+                1.019719477728329,
+                0.9841759692947915,
+                1.0435266573857496,
+                1.0339501989779065,
+                1.0501795448530264,
+                1.0402594216704781,
+                1.0604998765679152,
+                1.0633411200246015,
+                1.0539071190201297,
+                1.0393496428177804,
+            ],
+            "one_head_with_dicts": [
+                0.8638341551096959,
+                1.0078341354784144,
+                1.0149701178418595,
+                0.9945723048460148,
+                1.0184158011731292,
+                0.9992135295205004,
+                0.8943420783639198,
+                1.0327920054084088,
+                0.9905731198078909,
+                0.9838325204450648,
+                1.0018725575620482,
+                1.007263052421034,
+                1.0335213929231966,
+                1.0033503312511205,
+                1.0174433894759563,
+            ],
+            "two_heads_no_dicts": [
+                0.9836377578288774,
+                1.0196844186291318,
+                1.0151628222871238,
+                0.957307281711648,
+                0.985574141310865,
+                0.9629670134047853,
+                0.9242583185138095,
+                0.9807770070311039,
+                0.9973679440479541,
+                1.0221127246963275,
+                1.0031807967874216,
+                1.0358701219543687,
+                1.0434208761164758,
+                1.0235606028124515,
+                0.9797494630655053,
+            ],
+            "two_heads_mixed": [
+                0.8664108574741868,
+                0.9907166576278023,
+                1.0051969372365164,
+                0.978702477000018,
+                1.025500166764692,
+                0.9940095566375018,
+                0.9034029726954119,
+                1.0391739502744488,
+                0.9717327061183668,
+                0.972292103670355,
+                1.0012510461663253,
+                0.9978051155885286,
+                1.0378611651753475,
+                1.0003207628186224,
+                1.0209509292189651,
+            ],
+        },
+        "without_command_line": {
+            "no_heads": [
+                0.9352605307451007,
+                0.991084559389268,
+                0.9940350095024881,
+                0.9953849198103668,
+                0.9954705498032904,
+                0.9964815693808411,
+                0.9663142667436776,
+                0.9947223808739147,
+                0.9897776682803257,
+                0.989027769690667,
+                0.9910280920241263,
+                0.992067980667518,
+                0.9917276132506404,
+                0.9902848752169671,
+                0.9928585982942544,
+            ],
+            "one_head_no_dicts": [
+                0.9425342207393083,
+                1.0149788456087416,
+                1.0249228965652788,
+                1.0247924743285792,
+                1.02732103964481,
+                1.0168852937950326,
+                0.9771283495170653,
+                1.0261776335561517,
+                1.0130461033368028,
+                1.0162619153561783,
+                1.019995179866916,
+                1.0209512298344965,
+                1.0219971755636952,
+                1.0195791901659124,
+                1.0234662527729408,
+            ],
+            "one_head_with_dicts": [
+                0.8638341551096959,
+                1.0078341354784144,
+                1.0149701178418595,
+                0.9945723048460148,
+                1.0184158011731292,
+                0.9992135295205004,
+                0.8943420783639198,
+                1.0327920054084088,
+                0.9905731198078909,
+                0.9838325204450648,
+                1.0018725575620482,
+                1.007263052421034,
+                1.0335213929231966,
+                1.0033503312511205,
+                1.0174433894759563,
+            ],
+            "two_heads_no_dicts": [
+                0.9933763730233168,
+                0.9986480398559268,
+                1.0042486164355315,
+                1.0025568793877726,
+                1.0032598081704625,
+                0.9926714183717912,
+                0.9920385249670881,
+                1.0020278841030676,
+                1.0012474150830537,
+                1.0039289677261019,
+                1.0022718878661814,
+                1.003586385624809,
+                1.003436450009097,
+                1.003805673887942,
+                1.001450261102316,
+            ],
+            "two_heads_mixed": [
+                0.8781767864616707,
+                0.9843563603794138,
+                1.0145197579049248,
+                0.9835060778675391,
+                1.0419060462994596,
+                0.9917393978520056,
+                0.9091521032773944,
+                1.0605463095070453,
+                0.9685381713826684,
+                0.9866493058823766,
+                1.00305061187164,
+                1.0051273128414386,
+                1.037964258398104,
+                1.0106663924241408,
+                1.0274351814133602,
+            ],
+        },
+    }
+
+    list_of_all = []
+    for key, value in all_arg_sets.items():
+        for key2, value2 in value.items():
+            list_of_all.append(
+                (value2, (key, key2), np.asarray(all_expected_outputs[key][key2]))
+            )
+
+    return list_of_all
+
+
+def dict_to_yaml_str(data, indent=0):
+    yaml_str = ""
+    for key, value in data.items():
+        yaml_str += " " * indent + str(key) + ":"
+        if isinstance(value, dict):
+            yaml_str += "\n" + dict_to_yaml_str(value, indent + 2)
+        else:
+            yaml_str += " " + str(value) + "\n"
+    return yaml_str
+
+
+_trial_yamls_and_and_expected = trial_yamls_and_and_expected()
+
+
+@pytest.mark.parametrize(
+    "yaml_contents, name, expected_value", _trial_yamls_and_and_expected
+)
+def test_key_specification_methods(tmp_path, yaml_contents, name, expected_value):
+    fitting_configs = configs_numbered_keys()
+
+    ase.io.write(tmp_path / "fit_multihead_dft.xyz", fitting_configs)
+    ase.io.write(tmp_path / "fit_multihead_mp2.xyz", fitting_configs)
+    ase.io.write(tmp_path / "duplicated_fit_multihead_dft.xyz", fitting_configs)
+
+    mace_params = _mace_params.copy()
+    mace_params["valid_fraction"] = 0.1
+    mace_params["checkpoints_dir"] = str(tmp_path)
+    mace_params["model_dir"] = str(tmp_path)
+    mace_params["train_file"] = "fit_multihead_dft.xyz"
+    mace_params["E0s"] = "{1:0.0,8:1.0}"
+    mace_params["valid_file"] = "duplicated_fit_multihead_dft.xyz"
+    del mace_params["valid_fraction"]
+    mace_params["max_num_epochs"] = 1  # many tests to do
+    del mace_params["energy_key"]
+    del mace_params["forces_key"]
+    del mace_params["stress_key"]
+
+    mace_params["name"] = "MACE_"
+
+    filename = tmp_path / "config.yaml"
+    with open(filename, "w", encoding="utf-8") as file:
+        file.write(dict_to_yaml_str(yaml_contents))
+    if len(yaml_contents) > 0:
+        mace_params["config"] = str(tmp_path / "config.yaml")
+
+    run_env = os.environ.copy()
+    sys.path.insert(0, str(Path(__file__).parent.parent))
+    run_env["PYTHONPATH"] = ":".join(sys.path)
+    print("DEBUG subprocess PYTHONPATH", run_env["PYTHONPATH"])
+
+    cmd = (
+        sys.executable
+        + " "
+        + str(run_train)
+        + " "
+        + " ".join(
+            [
+                (f"--{k}={v}" if v is not None else f"--{k}")
+                for k, v in mace_params.items()
+            ]
+        )
+    )
+
+    p = subprocess.run(cmd.split(), env=run_env, cwd=tmp_path, check=True)
+    assert p.returncode == 0
+
+    if "heads" in yaml_contents:
+        headname = list(yaml_contents["heads"].keys())[0]
+    else:
+        headname = "Default"
+
+    calc = MACECalculator(
+        tmp_path / "MACE_.model", device="cpu", default_dtype="float64", head=headname
+    )
+
+    Es = []
+    for at in fitting_configs:
+        at.calc = calc
+        Es.append(at.get_potential_energy())
+
+    print(name)
+    print("Es", Es)
+
+    assert np.allclose(
+        np.asarray(Es), expected_value, rtol=1e-8, atol=1e-8
+    ), f"Expected {expected_value} but got {Es} with error {np.max(np.abs(Es - expected_value))}"
+
+
+def test_multihead_finetuning_does_not_modify_default_keyspec(tmp_path):
+    fitting_configs = configs_numbered_keys()
+    ase.io.write(tmp_path / "fit_multihead_dft.xyz", fitting_configs)
+
+    args = build_default_arg_parser().parse_args(
+        [
+            "--name",
+            "_MACE_",
+            "--train_file",
+            str(tmp_path / "fit_multihead_dft.xyz"),
+            "--foundation_model",
+            "small",
+            "--device",
+            "cpu",
+            "--E0s",
+            "{1:0.0,8:1.0}",
+            "--energy_key",
+            "2_energy",
+            "--dry_run",
+        ]
+    )
+    default_key_spec = KeySpecification.from_defaults()
+    default_key_spec.info_keys["energy"] = "2_energy"
+    run_mace_train(args)
+    assert args.key_specification == default_key_spec
+
+# for creating values
+def make_output():
+    outputs = {}
+    for yaml_contents, name, expected_value in _trial_yamls_and_and_expected:
+        if name[0] not in outputs:
+            outputs[name[0]] = {}
+        expected = test_key_specification_methods(
+            Path("."), yaml_contents, name, expected_value, debug_test=False
+        )
+        outputs[name[0]][name[1]] = expected
+    print(outputs)
--- a/mace-bench/3rdparty/mace/tests/test_schedulefree.py
+++ b/mace-bench/3rdparty/mace/tests/test_schedulefree.py
+import tempfile
+from unittest.mock import MagicMock
+
+import numpy as np
+import pytest
+import torch
+import torch.nn.functional as F
+from e3nn import o3
+
+from mace import data, modules, tools
+from mace.tools import scripts_utils, torch_geometric
+
+try:
+    import schedulefree
+except ImportError:
+    pytest.skip(
+        "Skipping schedulefree tests due to ImportError", allow_module_level=True
+    )
+
+torch.set_default_dtype(torch.float64)
+
+table = tools.AtomicNumberTable([6])
+atomic_energies = np.array([1.0], dtype=float)
+cutoff = 5.0
+
+
+def create_mace(device: str, seed: int = 1702):
+    torch_geometric.seed_everything(seed)
+
+    model_config = {
+        "r_max": cutoff,
+        "num_bessel": 8,
+        "num_polynomial_cutoff": 6,
+        "max_ell": 3,
+        "interaction_cls": modules.interaction_classes[
+            "RealAgnosticResidualInteractionBlock"
+        ],
+        "interaction_cls_first": modules.interaction_classes[
+            "RealAgnosticResidualInteractionBlock"
+        ],
+        "num_interactions": 2,
+        "num_elements": 1,
+        "hidden_irreps": o3.Irreps("8x0e + 8x1o"),
+        "MLP_irreps": o3.Irreps("16x0e"),
+        "gate": F.silu,
+        "atomic_energies": atomic_energies,
+        "avg_num_neighbors": 8,
+        "atomic_numbers": table.zs,
+        "correlation": 3,
+        "radial_type": "bessel",
+    }
+    model = modules.MACE(**model_config)
+    return model.to(device)
+
+
+def create_batch(device: str):
+    from ase import build
+
+    size = 2
+    atoms = build.bulk("C", "diamond", a=3.567, cubic=True)
+    atoms_list = [atoms.repeat((size, size, size))]
+    print("Number of atoms", len(atoms_list[0]))
+
+    configs = [data.config_from_atoms(atoms) for atoms in atoms_list]
+    data_loader = torch_geometric.dataloader.DataLoader(
+        dataset=[
+            data.AtomicData.from_config(config, z_table=table, cutoff=cutoff)
+            for config in configs
+        ],
+        batch_size=1,
+        shuffle=False,
+        drop_last=False,
+    )
+    batch = next(iter(data_loader))
+    batch = batch.to(device)
+    batch = batch.to_dict()
+    return batch
+
+
+def do_optimization_step(
+    model,
+    optimizer,
+    device,
+):
+    batch = create_batch(device)
+    model.train()
+    optimizer.train()
+    optimizer.zero_grad()
+    output = model(batch, training=True, compute_force=False)
+    loss = output["energy"].mean()
+    loss.backward()
+    optimizer.step()
+    model.eval()
+    optimizer.eval()
+
+
+@pytest.mark.parametrize("device", ["cpu", "cuda"])
+def test_can_load_checkpoint(device):
+    model = create_mace(device)
+    optimizer = schedulefree.adamw_schedulefree.AdamWScheduleFree(model.parameters())
+    args = MagicMock()
+    args.optimizer = "schedulefree"
+    args.scheduler = "ExponentialLR"
+    args.lr_scheduler_gamma = 0.9
+    lr_scheduler = scripts_utils.LRScheduler(optimizer, args)
+    with tempfile.TemporaryDirectory() as d:
+        checkpoint_handler = tools.CheckpointHandler(
+            directory=d, keep=False, tag="schedulefree"
+        )
+        for _ in range(10):
+            do_optimization_step(model, optimizer, device)
+        batch = create_batch(device)
+        output = model(batch)
+        energy = output["energy"].detach().cpu().numpy()
+
+        state = tools.CheckpointState(
+            model=model, optimizer=optimizer, lr_scheduler=lr_scheduler
+        )
+        checkpoint_handler.save(state, epochs=0, keep_last=False)
+        checkpoint_handler.load_latest(
+            state=tools.CheckpointState(model, optimizer, lr_scheduler),
+            swa=False,
+        )
+        batch = create_batch(device)
+        output = model(batch)
+        new_energy = output["energy"].detach().cpu().numpy()
+        assert np.allclose(energy, new_energy, atol=1e-9)
--- a/mace-bench/3rdparty/mace/tests/test_tools.py
+++ b/mace-bench/3rdparty/mace/tests/test_tools.py
+import tempfile
+
+import numpy as np
+import torch
+import torch.nn.functional
+from torch import nn, optim
+
+from mace.tools import (
+    AtomicNumberTable,
+    CheckpointHandler,
+    CheckpointState,
+    atomic_numbers_to_indices,
+)
+
+
+def test_atomic_number_table():
+    table = AtomicNumberTable(zs=[1, 8])
+    array = np.array([8, 8, 1])
+    indices = atomic_numbers_to_indices(array, z_table=table)
+    expected = np.array([1, 1, 0], dtype=int)
+    assert np.allclose(expected, indices)
+
+
+class MyModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.linear = torch.nn.Linear(3, 4)
+
+    def forward(self, x):
+        return torch.nn.functional.relu(self.linear(x))
+
+
+def test_save_load():
+    model = MyModel()
+    initial_lr = 0.001
+    optimizer = optim.SGD(model.parameters(), lr=initial_lr, momentum=0.9)
+    scheduler = optim.lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=0.99)
+
+    with tempfile.TemporaryDirectory() as directory:
+        handler = CheckpointHandler(directory=directory, tag="test", keep=True)
+        handler.save(state=CheckpointState(model, optimizer, scheduler), epochs=50)
+
+        optimizer.step()
+        scheduler.step()
+        assert not np.isclose(optimizer.param_groups[0]["lr"], initial_lr)
+
+        handler.load_latest(state=CheckpointState(model, optimizer, scheduler))
+        assert np.isclose(optimizer.param_groups[0]["lr"], initial_lr)