Spaces:

atomind
/

mlip-arena

Running

App Files Files Community

cyrusyc commited on Mar 26

Commit

b3722a8

•

1 Parent(s): 66b89f4

add jobs tasks

Browse files

Files changed (12) hide show

README.md +5 -1
mlip_arena/jobs/__init__.py +38 -0
mlip_arena/jobs/run.py +13 -0
mlip_arena/models/README.md +1 -1
mlip_arena/models/__init__.py +11 -6
mlip_arena/models/mace-mp.py +9 -7
mlip_arena/models/registry.yaml +4 -2
mlip_arena/tasks/README.md +9 -0
mlip_arena/tasks/__init__.py +42 -1
mlip_arena/tasks/alexandria.py +4 -0
mlip_arena/tasks/nacl.py +7 -3
mlip_arena/tasks/qmof.py +4 -0

README.md CHANGED Viewed

@@ -23,7 +23,11 @@ If you have pretrained MLIP models that you would like to contribute to the MLIP
 2. Follow the template to code the I/O interface for your model, and upload the script along with metadata to the MLIP Arena [here]().
 3. CPU benchmarking will be performed automatically. Due to the limited amount GPU compute, if you would like to be considered for GPU benchmarking, please create a pull request to demonstrate the offline performance of your model (published paper or preprint). We will review and select the models to be benchmarked on GPU.
-### Add new benchmarks
 #### Molecular dynamics calculations

 2. Follow the template to code the I/O interface for your model, and upload the script along with metadata to the MLIP Arena [here]().
 3. CPU benchmarking will be performed automatically. Due to the limited amount GPU compute, if you would like to be considered for GPU benchmarking, please create a pull request to demonstrate the offline performance of your model (published paper or preprint). We will review and select the models to be benchmarked on GPU.
+### Add new benchmark tasks
+1. Create a new [Hugging Face Dataset](https://huggingface.co/new-dataset) repository and upload the reference data (e.g. DFT, AIMD, experimental measurements such as RDF).
+2. Follow the task template to implement the task class and upload the script along with metadata to the MLIP Arena [here]().
+3. Code a benchmark script to evaluate the performance of your model on the task. The script should be able to load the model and the dataset, and output the evaluation metrics.
 #### Molecular dynamics calculations

mlip_arena/jobs/__init__.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import enum
+from mlip_arena.models import MLIP
+from mlip_arena.tasks import Task
+class Machine(enum.Enum):
+    """Enum class for machine"""
+    HFCPU = "Hugging Face CPU Basic"
+    PERLCPU = "NERSC Perlmutter CPU"
+    PERLA100 = "NERSC Perlmutter A100 40GB"
+    PERLA100L = "NERSC Perlmutter A100 80GB"
+class Job:
+    def __init__(self, model: MLIP, task: Task, machine: Machine, **kwargs):
+        self.calculator = model
+        self.task = task
+        self.machine = machine
+        self.kwargs = kwargs
+    def __str__(self):
+        return f"Job: {self.task.name} on {self.machine.value}"
+    def run(self):
+        if self.machine == Machine.HFCPU:
+            print(f"Running {self.name} on {self.machine.value}")
+            "run the task on Hugging Face CPU Basic"
+            raise NotImplementedError
+        elif self.machine == Machine.PERLCPU:
+            print(f"Running {self.name} on {self.machine.value}")
+            "send the task to NERSC Perlmutter CPU node and listen for the results"
+            raise NotImplementedError
+        elif self.machine == Machine.PERLA100:
+            print(f"Running {self.name} on {self.machine.value}")
+            "send the task to NERSC Perlmutter GPU node and listen for the results"
+            raise NotImplementedError

mlip_arena/jobs/run.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import importlib
+from mlip_arena.models import REGISTRY as MODEL_REGISTRY
+from mlip_arena.tasks import REGISTRY as TASK_REGISTRY
+print(MODEL_REGISTRY)
+print(TASK_REGISTRY)
+for task, metadata in TASK_REGISTRY.items():
+    print(f"mlip_arena.tasks.{task}")
+    module = importlib.import_module(f"mlip_arena.tasks.{task}")
+    module.whoami()

mlip_arena/models/README.md CHANGED Viewed

@@ -6,4 +6,4 @@
 2. Add the classes and their supported tasks to the model registry file `registry.yaml`.
 3. Run tests on HF Space to ensure the model is working as expected.
 4. [Push files to the Hub](https://huggingface.co/docs/huggingface_hub/guides/upload) and sync with github repository.
-5. Use [HF webhook](https://huggingface.co/docs/hub/en/webhooks) to run tasks and visualize the results on leaderboard. [[guide]](https://huggingface.co/docs/hub/en/webhooks-guide-metadata-review)

 2. Add the classes and their supported tasks to the model registry file `registry.yaml`.
 3. Run tests on HF Space to ensure the model is working as expected.
 4. [Push files to the Hub](https://huggingface.co/docs/huggingface_hub/guides/upload) and sync with github repository.
+5. Use [HF webhook](https://huggingface.co/docs/hub/en/webhooks) to check the status of benchmark tasks (pass, fail, null), run unfinisehd tasks and visualize the results on leaderboard. [[guide]](https://huggingface.co/docs/hub/en/webhooks-guide-metadata-review)

mlip_arena/models/__init__.py CHANGED Viewed

@@ -1,19 +1,24 @@
 import torch
 from ase import Atoms
 from ase.calculators.calculator import Calculator, all_changes
 from torch import nn
 from torch_geometric.data import Data
-REGISTRY_FILE = 'registry.yaml'
 class MLIP(Calculator):
-    def __init__(self):
         super().__init__()
-        self.name: str = "MLIP"
         self.version: str = None
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self.model: nn.Module = None
         self.implemented_properties = ["energy", "forces"]
     def calculate(self, atoms: Atoms, properties: list[str], system_changes: dict = all_changes):

+import os
 import torch
+import yaml
 from ase import Atoms
 from ase.calculators.calculator import Calculator, all_changes
 from torch import nn
 from torch_geometric.data import Data
+with open(os.path.join(os.path.dirname(__file__), "registry.yaml")) as f:
+    REGISTRY = yaml.load(f, Loader=yaml.FullLoader)
 class MLIP(Calculator):
+    def __init__(self,
+                 model_path: str = None,
+                 device: torch.device = None):
         super().__init__()
+        self.name: str = self.__class__.__name__
         self.version: str = None
+        self.device = device or torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model: nn.Module = torch.load(model_path, map_location=self.device)
         self.implemented_properties = ["energy", "forces"]
     def calculate(self, atoms: Atoms, properties: list[str], system_changes: dict = all_changes):

mlip_arena/models/mace-mp.py CHANGED Viewed

@@ -8,15 +8,17 @@ from mlip_arena.models import MLIP
 class MACE_MP_Medium(MLIP):
-    def __init__(self):
-        super().__init__()
         self.name = "MACE-MP-0 (medium)"
         self.version = "1.0.0"
-        fpath = hf_hub_download(repo_id="cyrusyc/mace-universal", subfolder="pretrained", filename="2023-12-12-mace-128-L1_epoch-199.model")
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self.model = torch.load(fpath, map_location="cpu")
-        self.model.to(self.device)
         self.implemented_properties = [
             "energy",
             "forces",

 class MACE_MP_Medium(MLIP):
+    def __init__(self, device: torch.device = None):
+        fpath = hf_hub_download(
+            repo_id="cyrusyc/mace-universal",
+            subfolder="pretrained",
+            filename="2023-12-12-mace-128-L1_epoch-199.model",
+            revision=None # TODO: Add revision
+        )
+        super().__init__(model_path=fpath, device=device)
         self.name = "MACE-MP-0 (medium)"
         self.version = "1.0.0"
         self.implemented_properties = [
             "energy",
             "forces",

mlip_arena/models/registry.yaml CHANGED Viewed

@@ -4,6 +4,8 @@ MACE_MP_Medium:
   datetime: 2024-03-25T14:30:00 # TODO: Fake datetime
   datasets: # list of training datasets
     - atomind/mptrj # TODO: fake HF dataset repo
-  tasks:
     - alexandria
-    - qmof

   datetime: 2024-03-25T14:30:00 # TODO: Fake datetime
   datasets: # list of training datasets
     - atomind/mptrj # TODO: fake HF dataset repo
+  cpu-tasks:
     - alexandria
+    - qmof
+  gpu-tasks:

mlip_arena/tasks/README.md CHANGED Viewed

	@@ -0,0 +1,9 @@

+## Note on task registration
+1. Use `ast` to parse task classes from the uploaded script.
+2. Add the classes and their supported tasks to the task registry file `registry.yaml`.
+3. Run tests on HF Space to ensure the task is working as expected.
+4. [Push task script to the Space](https://huggingface.co/docs/huggingface_hub/guides/upload) and sync with github repository.
+5. Create task folder in [mlip-arena](https://huggingface.co/datasets/atomind/mlip-arena) HF Dataset.
+6.

mlip_arena/tasks/__init__.py CHANGED Viewed

	@@ -1,3 +1,44 @@

1


2
3	- ~~REGISTRY_FILE~~ = ~~'registry.yaml'~~

+import os
+import yaml
+from huggingface_hub import HfApi, HfFileSystem, hf_hub_download
+from mlip_arena.models import MLIP
+from mlip_arena.models import REGISTRY as MODEL_REGISTRY
+with open(os.path.join(os.path.dirname(__file__), "registry.yaml")) as f:
+    REGISTRY = yaml.load(f, Loader=yaml.FullLoader)
+class Task:
+    def __init__(self):
+        self.name: str = self.__class__.__name__ # display name on the leaderboard
+    def run(self, model: MLIP):
+        """Run the task using the given model and return the results"""
+        raise NotImplementedError
+        # Calcualte evaluation metrics and postprocessed data
+        api = HfApi()
+        api.upload_file(
+            path_or_fileobj="results.json",
+            path_in_repo=f"{self.__class__.__name__}/{model.__class__.__name__}/results.json", # Upload to a specific folder
+            repo_id="atomind/mlip-arena",
+            repo_type="dataset"
+        )
+    def get_results(self):
+        """Get the results from the task"""
+        # fs = HfFileSystem()
+        # files = fs.glob(f"datasets/atomind/mlip-arena/{self.__class__.__name__}/*/*.json")
+        for model, metadata in MODEL_REGISTRY.items():
+            results = hf_hub_download(
+                repo_id="atomind/mlip-arena", filename="results.json",
+                subfolder=f"{self.__class__.__name__}/{model}",
+                repo_type="dataset",
+                revision=None
+                )
+        return results

mlip_arena/tasks/alexandria.py CHANGED Viewed

	@@ -1,3 +1,7 @@
1
2
3	URL = "https://alexandria.icams.rub.de/"

 URL = "https://alexandria.icams.rub.de/"
+def whoami():
+    print(f'TEST: {__file__}')

mlip_arena/tasks/nacl.py CHANGED Viewed

@@ -1,7 +1,11 @@
-import yaml
 from torch_dftd.torch_dftd3_calculator import TorchDFTD3Calculator
-from mlip_arena.models import REGISTRY_FILE
-yaml.load(open(REGISTRY_FILE), Loader=yaml.FullLoader)

 from torch_dftd.torch_dftd3_calculator import TorchDFTD3Calculator
+from mlip_arena.models import MLIP
+def whoami():
+    print(f'TEST: {__file__}')
+if __name__ == "__main__":

mlip_arena/tasks/qmof.py CHANGED Viewed

	@@ -0,0 +1,4 @@


1	+
2	+
3	+ def whoami():
4	+ print(f'TEST: {__file__}')