cyrusyc commited on
Commit
49d0cfc
1 Parent(s): ce2bf3c

improve class defintion and new task

Browse files

- Improve MLIP, MLIPCalculator class definitions
- Add diatomics.py task
- Introduce covalent, improve pyproject.toml

mlip_arena/models/__init__.py CHANGED
@@ -1,27 +1,49 @@
1
  import os
 
2
 
3
  import torch
4
  import yaml
5
  from ase import Atoms
6
  from ase.calculators.calculator import Calculator, all_changes
 
7
  from torch import nn
8
  from torch_geometric.data import Data
9
 
10
  with open(os.path.join(os.path.dirname(__file__), "registry.yaml")) as f:
11
  REGISTRY = yaml.load(f, Loader=yaml.FullLoader)
12
 
13
- class MLIP(Calculator):
14
- def __init__(self,
15
- model_path: str = None,
16
- device: torch.device = None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  super().__init__()
18
  self.name: str = self.__class__.__name__
19
- self.version: str = None
20
- self.device = device or torch.device("cuda" if torch.cuda.is_available() else "cpu")
21
- self.model: nn.Module = torch.load(model_path, map_location=self.device)
22
- self.implemented_properties = ["energy", "forces"]
 
23
 
24
- def calculate(self, atoms: Atoms, properties: list[str], system_changes: dict = all_changes):
 
 
25
  """Calculate energies and forces for the given Atoms object"""
26
  super().calculate(atoms, properties, system_changes)
27
 
@@ -29,13 +51,15 @@ class MLIP(Calculator):
29
 
30
  self.results = {}
31
  if "energy" in properties:
32
- self.results["energy"] = output["energy"].item()
33
  if "forces" in properties:
34
- self.results["forces"] = output["forces"].cpu().detach().numpy()
 
 
35
 
36
- def forward(self, x: Data | Atoms) -> dict[str, torch.Tensor]:
37
  """Implement data conversion, graph creation, and model forward pass
38
-
39
  Example implementation:
40
  1. Use `ase.neighborlist.NeighborList` to get neighbor list
41
  2. Create `torch_geometric.data.Data` object and copy the data
 
1
  import os
2
+ from pathlib import Path
3
 
4
  import torch
5
  import yaml
6
  from ase import Atoms
7
  from ase.calculators.calculator import Calculator, all_changes
8
+ from huggingface_hub import PyTorchModelHubMixin
9
  from torch import nn
10
  from torch_geometric.data import Data
11
 
12
  with open(os.path.join(os.path.dirname(__file__), "registry.yaml")) as f:
13
  REGISTRY = yaml.load(f, Loader=yaml.FullLoader)
14
 
15
+ # class MLIPEnum(enum.Enum):
16
+ # for model, metadata in REGISTRY.items():
17
+ # model_class = getattr(importlib.import_module(model["module"]), model)
18
+ # self.setattr(model, model_class)
19
+
20
+
21
+ class MLIP(
22
+ nn.Module,
23
+ PyTorchModelHubMixin,
24
+ tags=["atomistic-simulation", "MLIP"],
25
+ ):
26
+ def __init__(self, *args, **kwargs) -> None:
27
+ super().__init__(*args, **kwargs)
28
+
29
+
30
+ class MLIPCalculator(Calculator):
31
+ def __init__(
32
+ self,
33
+ model_path: str | Path,
34
+ device: torch.device | None = None,
35
+ ):
36
  super().__init__()
37
  self.name: str = self.__class__.__name__
38
+ self.device = device or torch.device(
39
+ "cuda" if torch.cuda.is_available() else "cpu"
40
+ )
41
+ self.model: MLIP = MLIP.from_pretrained(model_path, map_location=self.device)
42
+ self.implemented_properties = ["energy", "forces", "stress"]
43
 
44
+ def calculate(
45
+ self, atoms: Atoms, properties: list[str], system_changes: list = all_changes
46
+ ):
47
  """Calculate energies and forces for the given Atoms object"""
48
  super().calculate(atoms, properties, system_changes)
49
 
 
51
 
52
  self.results = {}
53
  if "energy" in properties:
54
+ self.results["energy"] = output["energy"].squeeze().item()
55
  if "forces" in properties:
56
+ self.results["forces"] = output["forces"].squeeze().cpu().detach().numpy()
57
+ if "stress" in properties:
58
+ self.results["stress"] = output["stress"].squeeze().cpu().detach().numpy()
59
 
60
+ def forward(self, x: Atoms) -> dict[str, torch.Tensor]:
61
  """Implement data conversion, graph creation, and model forward pass
62
+
63
  Example implementation:
64
  1. Use `ase.neighborlist.NeighborList` to get neighbor list
65
  2. Create `torch_geometric.data.Data` object and copy the data
mlip_arena/models/{mace-mp.py → mace.py} RENAMED
File without changes
mlip_arena/models/registry.yaml CHANGED
@@ -1,5 +1,7 @@
1
 
 
2
  MACE_MP_Medium:
 
3
  username: cyrusyc # HF username
4
  datetime: 2024-03-25T14:30:00 # TODO: Fake datetime
5
  datasets: # list of training datasets
@@ -8,4 +10,4 @@ MACE_MP_Medium:
8
  - alexandria
9
  - qmof
10
  gpu-tasks:
11
-
 
1
 
2
+
3
  MACE_MP_Medium:
4
+ module: mace
5
  username: cyrusyc # HF username
6
  datetime: 2024-03-25T14:30:00 # TODO: Fake datetime
7
  datasets: # list of training datasets
 
10
  - alexandria
11
  - qmof
12
  gpu-tasks:
13
+
mlip_arena/models/utils.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import importlib
2
+ import os
3
+ from enum import Enum
4
+
5
+ from mlip_arena.models import REGISTRY
6
+
7
+ MLIPEnum = Enum(
8
+ "MLIPEnum",
9
+ {
10
+ model: getattr(
11
+ importlib.import_module(f"{__package__}.{metadata['module']}"), model
12
+ )
13
+ for model, metadata in REGISTRY.items()
14
+ },
15
+ )
mlip_arena/tasks/__init__.py CHANGED
@@ -12,33 +12,42 @@ with open(os.path.join(os.path.dirname(__file__), "registry.yaml")) as f:
12
 
13
  class Task:
14
  def __init__(self):
15
- self.name: str = self.__class__.__name__ # display name on the leaderboard
16
 
17
- def run(self, model: MLIP):
18
  """Run the task using the given model and return the results"""
19
  raise NotImplementedError
20
-
 
 
 
 
21
  # Calcualte evaluation metrics and postprocessed data
22
  api = HfApi()
23
  api.upload_file(
24
  path_or_fileobj="results.json",
25
- path_in_repo=f"{self.__class__.__name__}/{model.__class__.__name__}/results.json", # Upload to a specific folder
26
  repo_id="atomind/mlip-arena",
27
- repo_type="dataset"
28
  )
29
-
 
 
 
 
30
  def get_results(self):
31
  """Get the results from the task"""
32
 
33
  # fs = HfFileSystem()
34
  # files = fs.glob(f"datasets/atomind/mlip-arena/{self.__class__.__name__}/*/*.json")
35
-
36
  for model, metadata in MODEL_REGISTRY.items():
37
  results = hf_hub_download(
38
- repo_id="atomind/mlip-arena", filename="results.json",
39
- subfolder=f"{self.__class__.__name__}/{model}",
40
- repo_type="dataset",
41
- revision=None
42
- )
43
-
44
- return results
 
 
12
 
13
  class Task:
14
  def __init__(self):
15
+ self.name: str = self.__class__.__name__ # display name on the leaderboard
16
 
17
+ def run_local(self, model: MLIP):
18
  """Run the task using the given model and return the results"""
19
  raise NotImplementedError
20
+
21
+ def run_hf(self, model: MLIP):
22
+ """Run the task using the given model and return the results"""
23
+ raise NotImplementedError
24
+
25
  # Calcualte evaluation metrics and postprocessed data
26
  api = HfApi()
27
  api.upload_file(
28
  path_or_fileobj="results.json",
29
+ path_in_repo=f"{self.__class__.__name__}/{model.__class__.__name__}/results.json", # Upload to a specific folder
30
  repo_id="atomind/mlip-arena",
31
+ repo_type="dataset",
32
  )
33
+
34
+ def run_nersc(self, model: MLIP):
35
+ """Run the task using the given model and return the results"""
36
+ raise NotImplementedError
37
+
38
  def get_results(self):
39
  """Get the results from the task"""
40
 
41
  # fs = HfFileSystem()
42
  # files = fs.glob(f"datasets/atomind/mlip-arena/{self.__class__.__name__}/*/*.json")
43
+
44
  for model, metadata in MODEL_REGISTRY.items():
45
  results = hf_hub_download(
46
+ repo_id="atomind/mlip-arena",
47
+ filename="results.json",
48
+ subfolder=f"{self.__class__.__name__}/{model}",
49
+ repo_type="dataset",
50
+ revision=None,
51
+ )
52
+
53
+ return results
mlip_arena/tasks/diatomics.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import covalent as ct
2
+ import numpy as np
3
+ import pandas as pd
4
+ import torch
5
+ from ase import Atoms
6
+ from ase.calculators.calculator import Calculator
7
+ from ase.data import chemical_symbols
8
+ from matplotlib import pyplot as plt
9
+
10
+ from mlip_arena.models import MLIPCalculator
11
+
12
+ device = torch.device("cuda")
13
+
14
+
15
+ @ct.electron
16
+ def calculate_single_diatomic(
17
+ calculator: MLIPCalculator | Calculator,
18
+ atom1: str,
19
+ atom2: str,
20
+ rmin: float = 0.1,
21
+ rmax: float = 6.5,
22
+ npts: int = int(1e3),
23
+ ):
24
+ a = 2 * rmax
25
+
26
+ rs = np.linspace(rmin, rmax, npts)
27
+ e = np.zeros_like(rs)
28
+ f = np.zeros_like(rs)
29
+
30
+ da = atom1 + atom2
31
+
32
+ for i, r in enumerate(rs):
33
+
34
+ positions = [
35
+ [0, 0, 0],
36
+ [r, 0, 0],
37
+ ]
38
+
39
+ # Create the unit cell with two atoms
40
+ atoms = Atoms(da, positions=positions, cell=[a, a, a])
41
+
42
+ atoms.calc = calculator
43
+
44
+ e[i] = atoms.get_potential_energy()
45
+ f[i] = np.inner(np.array([1, 0, 0]), atoms.get_forces()[1])
46
+
47
+ return rs, e, f, da
48
+
49
+
50
+ @ct.lattice
51
+ def calculate_homonuclear_diatomics(calculator: MLIPCalculator | Calculator):
52
+
53
+ chemical_symbols.remove("X")
54
+
55
+ results = {}
56
+
57
+ for atom in chemical_symbols:
58
+ rs, e, f, da = calculate_single_diatomic(calculator, atom, atom)
59
+ results[da] = {"r": rs, "E": e, "F": f}
60
+
61
+ return results
62
+
63
+
64
+ # with plt.style.context("default"):
65
+
66
+ # SMALL_SIZE = 6
67
+ # MEDIUM_SIZE = 8
68
+ # LARGE_SIZE = 10
69
+
70
+ # LINE_WIDTH = 1
71
+
72
+ # plt.rcParams.update(
73
+ # {
74
+ # "pgf.texsystem": "pdflatex",
75
+ # "font.family": "sans-serif",
76
+ # "text.usetex": True,
77
+ # "pgf.rcfonts": True,
78
+ # "figure.constrained_layout.use": True,
79
+ # "axes.labelsize": MEDIUM_SIZE,
80
+ # "axes.titlesize": MEDIUM_SIZE,
81
+ # "legend.frameon": False,
82
+ # "legend.fontsize": MEDIUM_SIZE,
83
+ # "legend.loc": "best",
84
+ # "lines.linewidth": LINE_WIDTH,
85
+ # "xtick.labelsize": SMALL_SIZE,
86
+ # "ytick.labelsize": SMALL_SIZE,
87
+ # }
88
+ # )
89
+
90
+ # fig, ax = plt.subplots(layout="constrained", figsize=(3, 2), dpi=300)
91
+
92
+ # color = "tab:red"
93
+ # ax.plot(rs, e, color=color, zorder=1)
94
+
95
+ # ax.axhline(ls="--", color=color, alpha=0.5, lw=0.5 * LINE_WIDTH)
96
+
97
+ # ylo, yhi = ax.get_ylim()
98
+ # ax.set(xlabel=r"r [$\AA]$", ylim=(max(-7, ylo), min(5, yhi)))
99
+ # ax.set_ylabel(ylabel="E [eV]", color=color)
100
+ # ax.tick_params(axis="y", labelcolor=color)
101
+ # ax.text(0.8, 0.85, da, fontsize=LARGE_SIZE, transform=ax.transAxes)
102
+
103
+ # color = "tab:blue"
104
+
105
+ # at = ax.twinx()
106
+ # at.plot(rs, f, color=color, zorder=0, lw=0.5 * LINE_WIDTH)
107
+
108
+ # at.axhline(ls="--", color=color, alpha=0.5, lw=0.5 * LINE_WIDTH)
109
+
110
+ # ylo, yhi = at.get_ylim()
111
+ # at.set(
112
+ # xlabel=r"r [$\AA]$",
113
+ # ylim=(max(-20, ylo), min(20, yhi)),
114
+ # )
115
+ # at.set_ylabel(ylabel="F [eV/$\AA$]", color=color)
116
+ # at.tick_params(axis="y", labelcolor=color)
117
+
118
+ # plt.show()
119
+
120
+
121
+ if __name__ == "__main__":
122
+
123
+ local = ct.executor.LocalExecutor()
pyproject.toml CHANGED
@@ -26,9 +26,11 @@ classifiers=[
26
  "Programming Language :: Python :: 3 :: Only",
27
  ]
28
  dependencies=[
29
- "torch",
30
  "ase",
31
- "torch_dftd",
 
 
32
  ]
33
 
34
  [project.urls]
 
26
  "Programming Language :: Python :: 3 :: Only",
27
  ]
28
  dependencies=[
29
+ "torch>=2.0.0",
30
  "ase",
31
+ "torch_dftd>=0.4.0",
32
+ "huggingface_hub",
33
+ "torch-geometric>=2.5.2",
34
  ]
35
 
36
  [project.urls]
tests/oxygen_diatomics.ipynb ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "from mace.calculators import mace_mp\n",
10
+ "from mlip_arena.tasks.diatomics import calculate_single_diatomic, calculate_homonuclear_diatomics\n",
11
+ "\n",
12
+ "calc = mace_mp()"
13
+ ]
14
+ },
15
+ {
16
+ "cell_type": "code",
17
+ "execution_count": null,
18
+ "metadata": {},
19
+ "outputs": [],
20
+ "source": [
21
+ "from enum import Enum\n",
22
+ "\n",
23
+ "e = Enum(\"TESTEnum\", {\"A\": 1, \"B\": 2, \"C\": 3})\n",
24
+ "\n",
25
+ "print(e.A)\n",
26
+ "print(e.__members__)"
27
+ ]
28
+ },
29
+ {
30
+ "cell_type": "code",
31
+ "execution_count": null,
32
+ "metadata": {},
33
+ "outputs": [],
34
+ "source": []
35
+ },
36
+ {
37
+ "cell_type": "code",
38
+ "execution_count": 11,
39
+ "metadata": {},
40
+ "outputs": [
41
+ {
42
+ "name": "stdout",
43
+ "output_type": "stream",
44
+ "text": [
45
+ "MACE_MP_Medium <class 'mlip_arena.models.mace.MACE_MP_Medium'>\n"
46
+ ]
47
+ }
48
+ ],
49
+ "source": [
50
+ "from mlip_arena.models.utils import MLIPEnum\n",
51
+ "\n",
52
+ "for i in MLIPEnum:\n",
53
+ " print(i.name, i.value)"
54
+ ]
55
+ },
56
+ {
57
+ "cell_type": "code",
58
+ "execution_count": null,
59
+ "metadata": {},
60
+ "outputs": [],
61
+ "source": [
62
+ "import covalent as ct\n",
63
+ "local = ct.executor.LocalExecutor()\n",
64
+ "\n",
65
+ "dispatch_id = ct.dispatch(calculate_homonuclear_diatomics)(mace_mp)\n",
66
+ "\n",
67
+ "result = ct.get_result(dispatch_id)\n",
68
+ "print(result)"
69
+ ]
70
+ },
71
+ {
72
+ "cell_type": "code",
73
+ "execution_count": null,
74
+ "metadata": {},
75
+ "outputs": [],
76
+ "source": []
77
+ }
78
+ ],
79
+ "metadata": {
80
+ "kernelspec": {
81
+ "display_name": "Python 3",
82
+ "language": "python",
83
+ "name": "python3"
84
+ },
85
+ "language_info": {
86
+ "codemirror_mode": {
87
+ "name": "ipython",
88
+ "version": 3
89
+ },
90
+ "file_extension": ".py",
91
+ "mimetype": "text/x-python",
92
+ "name": "python",
93
+ "nbconvert_exporter": "python",
94
+ "pygments_lexer": "ipython3",
95
+ "version": "3.11.8"
96
+ }
97
+ },
98
+ "nbformat": 4,
99
+ "nbformat_minor": 2
100
+ }