Spaces:

atomind
/

mlip-arena

Running

App Files Files Community

cyrusyc commited on Mar 30

Commit

49d0cfc

•

1 Parent(s): ce2bf3c

improve class defintion and new task

Browse files

- Improve MLIP, MLIPCalculator class definitions
- Add diatomics.py task
- Introduce covalent, improve pyproject.toml

Files changed (8) hide show

mlip_arena/models/__init__.py +37 -13
mlip_arena/models/{mace-mp.py → mace.py} +0 -0
mlip_arena/models/registry.yaml +3 -1
mlip_arena/models/utils.py +15 -0
mlip_arena/tasks/__init__.py +23 -14
mlip_arena/tasks/diatomics.py +123 -0
pyproject.toml +4 -2
tests/oxygen_diatomics.ipynb +100 -0

mlip_arena/models/__init__.py CHANGED Viewed

@@ -1,27 +1,49 @@
 import os
 import torch
 import yaml
 from ase import Atoms
 from ase.calculators.calculator import Calculator, all_changes
 from torch import nn
 from torch_geometric.data import Data
 with open(os.path.join(os.path.dirname(__file__), "registry.yaml")) as f:
     REGISTRY = yaml.load(f, Loader=yaml.FullLoader)
-class MLIP(Calculator):
-    def __init__(self,
-                 model_path: str = None,
-                 device: torch.device = None):
         super().__init__()
         self.name: str = self.__class__.__name__
-        self.version: str = None
-        self.device = device or torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self.model: nn.Module = torch.load(model_path, map_location=self.device)
-        self.implemented_properties = ["energy", "forces"]
-    def calculate(self, atoms: Atoms, properties: list[str], system_changes: dict = all_changes):
         """Calculate energies and forces for the given Atoms object"""
         super().calculate(atoms, properties, system_changes)
@@ -29,13 +51,15 @@ class MLIP(Calculator):
         self.results = {}
         if "energy" in properties:
-            self.results["energy"] = output["energy"].item()
         if "forces" in properties:
-            self.results["forces"] = output["forces"].cpu().detach().numpy()
-    def forward(self, x: Data | Atoms) -> dict[str, torch.Tensor]:
         """Implement data conversion, graph creation, and model forward pass
         Example implementation:
         1. Use `ase.neighborlist.NeighborList` to get neighbor list
         2. Create `torch_geometric.data.Data` object and copy the data

 import os
+from pathlib import Path
 import torch
 import yaml
 from ase import Atoms
 from ase.calculators.calculator import Calculator, all_changes
+from huggingface_hub import PyTorchModelHubMixin
 from torch import nn
 from torch_geometric.data import Data
 with open(os.path.join(os.path.dirname(__file__), "registry.yaml")) as f:
     REGISTRY = yaml.load(f, Loader=yaml.FullLoader)
+# class MLIPEnum(enum.Enum):
+#     for model, metadata in REGISTRY.items():
+#         model_class = getattr(importlib.import_module(model["module"]), model)
+#         self.setattr(model, model_class)
+class MLIP(
+    nn.Module,
+    PyTorchModelHubMixin,
+    tags=["atomistic-simulation", "MLIP"],
+):
+    def __init__(self, *args, **kwargs) -> None:
+        super().__init__(*args, **kwargs)
+class MLIPCalculator(Calculator):
+    def __init__(
+        self,
+        model_path: str | Path,
+        device: torch.device | None = None,
+    ):
         super().__init__()
         self.name: str = self.__class__.__name__
+        self.device = device or torch.device(
+            "cuda" if torch.cuda.is_available() else "cpu"
+        )
+        self.model: MLIP = MLIP.from_pretrained(model_path, map_location=self.device)
+        self.implemented_properties = ["energy", "forces", "stress"]
+    def calculate(
+        self, atoms: Atoms, properties: list[str], system_changes: list = all_changes
+    ):
         """Calculate energies and forces for the given Atoms object"""
         super().calculate(atoms, properties, system_changes)
         self.results = {}
         if "energy" in properties:
+            self.results["energy"] = output["energy"].squeeze().item()
         if "forces" in properties:
+            self.results["forces"] = output["forces"].squeeze().cpu().detach().numpy()
+        if "stress" in properties:
+            self.results["stress"] = output["stress"].squeeze().cpu().detach().numpy()
+    def forward(self, x: Atoms) -> dict[str, torch.Tensor]:
         """Implement data conversion, graph creation, and model forward pass
         Example implementation:
         1. Use `ase.neighborlist.NeighborList` to get neighbor list
         2. Create `torch_geometric.data.Data` object and copy the data

mlip_arena/models/{mace-mp.py → mace.py} RENAMED Viewed

File without changes

mlip_arena/models/registry.yaml CHANGED Viewed

@@ -1,5 +1,7 @@
 MACE_MP_Medium:
   username: cyrusyc # HF username
   datetime: 2024-03-25T14:30:00 # TODO: Fake datetime
   datasets: # list of training datasets
@@ -8,4 +10,4 @@ MACE_MP_Medium:
     - alexandria
     - qmof
   gpu-tasks:

 MACE_MP_Medium:
+  module: mace
   username: cyrusyc # HF username
   datetime: 2024-03-25T14:30:00 # TODO: Fake datetime
   datasets: # list of training datasets
     - alexandria
     - qmof
   gpu-tasks:

mlip_arena/models/utils.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import importlib
+import os
+from enum import Enum
+from mlip_arena.models import REGISTRY
+MLIPEnum = Enum(
+    "MLIPEnum",
+    {
+        model: getattr(
+            importlib.import_module(f"{__package__}.{metadata['module']}"), model
+        )
+        for model, metadata in REGISTRY.items()
+    },
+)

mlip_arena/tasks/__init__.py CHANGED Viewed

@@ -12,33 +12,42 @@ with open(os.path.join(os.path.dirname(__file__), "registry.yaml")) as f:
 class Task:
     def __init__(self):
-        self.name: str = self.__class__.__name__ # display name on the leaderboard
-    def run(self, model: MLIP):
         """Run the task using the given model and return the results"""
         raise NotImplementedError
         # Calcualte evaluation metrics and postprocessed data
         api = HfApi()
         api.upload_file(
             path_or_fileobj="results.json",
-            path_in_repo=f"{self.__class__.__name__}/{model.__class__.__name__}/results.json", # Upload to a specific folder
             repo_id="atomind/mlip-arena",
-            repo_type="dataset"
         )
     def get_results(self):
         """Get the results from the task"""
         # fs = HfFileSystem()
         # files = fs.glob(f"datasets/atomind/mlip-arena/{self.__class__.__name__}/*/*.json")
         for model, metadata in MODEL_REGISTRY.items():
             results = hf_hub_download(
-                repo_id="atomind/mlip-arena", filename="results.json",
-                subfolder=f"{self.__class__.__name__}/{model}",
-                repo_type="dataset",
-                revision=None
-                )
-        return results

 class Task:
     def __init__(self):
+        self.name: str = self.__class__.__name__  # display name on the leaderboard
+    def run_local(self, model: MLIP):
         """Run the task using the given model and return the results"""
         raise NotImplementedError
+    def run_hf(self, model: MLIP):
+        """Run the task using the given model and return the results"""
+        raise NotImplementedError
         # Calcualte evaluation metrics and postprocessed data
         api = HfApi()
         api.upload_file(
             path_or_fileobj="results.json",
+            path_in_repo=f"{self.__class__.__name__}/{model.__class__.__name__}/results.json",  # Upload to a specific folder
             repo_id="atomind/mlip-arena",
+            repo_type="dataset",
         )
+    def run_nersc(self, model: MLIP):
+        """Run the task using the given model and return the results"""
+        raise NotImplementedError
     def get_results(self):
         """Get the results from the task"""
         # fs = HfFileSystem()
         # files = fs.glob(f"datasets/atomind/mlip-arena/{self.__class__.__name__}/*/*.json")
         for model, metadata in MODEL_REGISTRY.items():
             results = hf_hub_download(
+                repo_id="atomind/mlip-arena",
+                filename="results.json",
+                subfolder=f"{self.__class__.__name__}/{model}",
+                repo_type="dataset",
+                revision=None,
+            )
+        return results

mlip_arena/tasks/diatomics.py ADDED Viewed

	@@ -0,0 +1,123 @@

+import covalent as ct
+import numpy as np
+import pandas as pd
+import torch
+from ase import Atoms
+from ase.calculators.calculator import Calculator
+from ase.data import chemical_symbols
+from matplotlib import pyplot as plt
+from mlip_arena.models import MLIPCalculator
+device = torch.device("cuda")
+@ct.electron
+def calculate_single_diatomic(
+    calculator: MLIPCalculator | Calculator,
+    atom1: str,
+    atom2: str,
+    rmin: float = 0.1,
+    rmax: float = 6.5,
+    npts: int = int(1e3),
+):
+    a = 2 * rmax
+    rs = np.linspace(rmin, rmax, npts)
+    e = np.zeros_like(rs)
+    f = np.zeros_like(rs)
+    da = atom1 + atom2
+    for i, r in enumerate(rs):
+        positions = [
+            [0, 0, 0],
+            [r, 0, 0],
+        ]
+        # Create the unit cell with two atoms
+        atoms = Atoms(da, positions=positions, cell=[a, a, a])
+        atoms.calc = calculator
+        e[i] = atoms.get_potential_energy()
+        f[i] = np.inner(np.array([1, 0, 0]), atoms.get_forces()[1])
+    return rs, e, f, da
+@ct.lattice
+def calculate_homonuclear_diatomics(calculator: MLIPCalculator | Calculator):
+    chemical_symbols.remove("X")
+    results = {}
+    for atom in chemical_symbols:
+        rs, e, f, da = calculate_single_diatomic(calculator, atom, atom)
+        results[da] = {"r": rs, "E": e, "F": f}
+    return results
+# with plt.style.context("default"):
+#     SMALL_SIZE = 6
+#     MEDIUM_SIZE = 8
+#     LARGE_SIZE = 10
+#     LINE_WIDTH = 1
+#     plt.rcParams.update(
+#         {
+#             "pgf.texsystem": "pdflatex",
+#             "font.family": "sans-serif",
+#             "text.usetex": True,
+#             "pgf.rcfonts": True,
+#             "figure.constrained_layout.use": True,
+#             "axes.labelsize": MEDIUM_SIZE,
+#             "axes.titlesize": MEDIUM_SIZE,
+#             "legend.frameon": False,
+#             "legend.fontsize": MEDIUM_SIZE,
+#             "legend.loc": "best",
+#             "lines.linewidth": LINE_WIDTH,
+#             "xtick.labelsize": SMALL_SIZE,
+#             "ytick.labelsize": SMALL_SIZE,
+#         }
+#     )
+#     fig, ax = plt.subplots(layout="constrained", figsize=(3, 2), dpi=300)
+#     color = "tab:red"
+#     ax.plot(rs, e, color=color, zorder=1)
+#     ax.axhline(ls="--", color=color, alpha=0.5, lw=0.5 * LINE_WIDTH)
+#     ylo, yhi = ax.get_ylim()
+#     ax.set(xlabel=r"r [$\AA]$", ylim=(max(-7, ylo), min(5, yhi)))
+#     ax.set_ylabel(ylabel="E [eV]", color=color)
+#     ax.tick_params(axis="y", labelcolor=color)
+#     ax.text(0.8, 0.85, da, fontsize=LARGE_SIZE, transform=ax.transAxes)
+#     color = "tab:blue"
+#     at = ax.twinx()
+#     at.plot(rs, f, color=color, zorder=0, lw=0.5 * LINE_WIDTH)
+#     at.axhline(ls="--", color=color, alpha=0.5, lw=0.5 * LINE_WIDTH)
+#     ylo, yhi = at.get_ylim()
+#     at.set(
+#         xlabel=r"r [$\AA]$",
+#         ylim=(max(-20, ylo), min(20, yhi)),
+#     )
+#     at.set_ylabel(ylabel="F [eV/$\AA$]", color=color)
+#     at.tick_params(axis="y", labelcolor=color)
+#     plt.show()
+if __name__ == "__main__":
+    local = ct.executor.LocalExecutor()

pyproject.toml CHANGED Viewed

@@ -26,9 +26,11 @@ classifiers=[
     "Programming Language :: Python :: 3 :: Only",
 ]
 dependencies=[
-    "torch",
     "ase",
-    "torch_dftd",
 ]
 [project.urls]

     "Programming Language :: Python :: 3 :: Only",
 ]
 dependencies=[
+    "torch>=2.0.0",
     "ase",
+    "torch_dftd>=0.4.0",
+    "huggingface_hub",
+    "torch-geometric>=2.5.2",
 ]
 [project.urls]

tests/oxygen_diatomics.ipynb ADDED Viewed

	@@ -0,0 +1,100 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from mace.calculators import mace_mp\n",
+    "from mlip_arena.tasks.diatomics import calculate_single_diatomic, calculate_homonuclear_diatomics\n",
+    "\n",
+    "calc = mace_mp()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from enum import Enum\n",
+    "\n",
+    "e = Enum(\"TESTEnum\", {\"A\": 1, \"B\": 2, \"C\": 3})\n",
+    "\n",
+    "print(e.A)\n",
+    "print(e.__members__)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "MACE_MP_Medium <class 'mlip_arena.models.mace.MACE_MP_Medium'>\n"
+     ]
+    }
+   ],
+   "source": [
+    "from mlip_arena.models.utils import MLIPEnum\n",
+    "\n",
+    "for i in MLIPEnum:\n",
+    "    print(i.name, i.value)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import covalent as ct\n",
+    "local = ct.executor.LocalExecutor()\n",
+    "\n",
+    "dispatch_id = ct.dispatch(calculate_homonuclear_diatomics)(mace_mp)\n",
+    "\n",
+    "result = ct.get_result(dispatch_id)\n",
+    "print(result)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}