Spaces:

atomind
/

mlip-arena

Running

Yuan (Cyrus) Chiang commited on 8 days ago

Commit

c19aed5

•

1 Parent(s): fdb2bc5

add thermal conductivity rank and page (#16)

* add thermal conductivity page

* add thermal conductivity rank

* add streamlit config

* add wte csv file

* add wte csv file

* add metric explanation; update page

Files changed (13) hide show

.streamlit/config.toml +2 -0
mlip_arena/__init__.py +3 -0
mlip_arena/models/__init__.py +1 -1
mlip_arena/models/registry.yaml +37 -9
mlip_arena/tasks/registry.yaml +5 -6
mlip_arena/tasks/thermal-conductivity/wte.csv +7 -0
serve/app.py +11 -0
serve/leaderboard.py +3 -2
serve/ranks/combustion.py +7 -1
serve/ranks/homonuclear-diatomics.py +18 -7
serve/ranks/thermal-conductivity.py +60 -0
serve/tasks/stability.py +7 -7
serve/tasks/thermal-conductivity.py +46 -0

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ [server]
2	+ fileWatcherType = "poll"

mlip_arena/__init__.py CHANGED Viewed

	@@ -0,0 +1,3 @@


1	+ from pathlib import Path
2	+
3	+ PKG_DIR = Path(__file__).parent

mlip_arena/models/__init__.py CHANGED Viewed

@@ -22,7 +22,7 @@ for model, metadata in REGISTRY.items():
     try:
         module = importlib.import_module(f"{__package__}.{metadata['module']}.{metadata['family']}")
         MLIPMap[model] = getattr(module, metadata["class"])
-    except ModuleNotFoundError as e:
         print(e)
         continue

     try:
         module = importlib.import_module(f"{__package__}.{metadata['module']}.{metadata['family']}")
         MLIPMap[model] = getattr(module, metadata["class"])
+    except (ModuleNotFoundError, AttributeError) as e:
         print(e)
         continue

mlip_arena/models/registry.yaml CHANGED Viewed

@@ -10,8 +10,6 @@ MACE-MP(M):
   datasets:
     - MPTrj # TODO: fake HF dataset repo
   cpu-tasks:
-    - alexandria
-    - qmof
   gpu-tasks:
     - homonuclear-diatomics
     - stability
@@ -79,8 +77,6 @@ ORB:
     - MPTrj # TODO: fake HF dataset repo
     - Alexandria
   cpu-tasks:
-    - alexandria
-    - qmof
   gpu-tasks:
     - homonuclear-diatomics
     - combustion
@@ -104,8 +100,6 @@ SevenNet:
   datasets:
     - MPTrj # TODO: fake HF dataset repo
   cpu-tasks:
-    - alexandria
-    - qmof
   gpu-tasks:
     - homonuclear-diatomics
     - stability
@@ -215,8 +209,6 @@ MACE-OFF(M):
   datasets:
     - SPICE # TODO: fake HF dataset repo
   cpu-tasks:
-    - alexandria
-    - qmof
   gpu-tasks:
     - homonuclear-diatomics
   github: https://github.com/ACEsuit/mace
@@ -263,4 +255,40 @@ DeepMD:
   date: 2024-10-09
   prediction: EFS
   nvt: true
-  npt: true

   datasets:
     - MPTrj # TODO: fake HF dataset repo
   cpu-tasks:
   gpu-tasks:
     - homonuclear-diatomics
     - stability
     - MPTrj # TODO: fake HF dataset repo
     - Alexandria
   cpu-tasks:
   gpu-tasks:
     - homonuclear-diatomics
     - combustion
   datasets:
     - MPTrj # TODO: fake HF dataset repo
   cpu-tasks:
   gpu-tasks:
     - homonuclear-diatomics
     - stability
   datasets:
     - SPICE # TODO: fake HF dataset repo
   cpu-tasks:
   gpu-tasks:
     - homonuclear-diatomics
   github: https://github.com/ACEsuit/mace
   date: 2024-10-09
   prediction: EFS
   nvt: true
+  npt: true
+ORBv2(MPTrj):
+  module: externals
+  class: ORBv2
+  family: orb
+  package:
+  checkpoint: orb-mptraj-only-v2-20241014.ckpt
+  username:
+  last-update: 2024-10-29T00:00:00
+  datetime: 2024-10-29T00:00:00 # TODO: Fake datetime
+  datasets:
+    - MPTrj
+  github: https://github.com/orbital-materials/orb-models
+  doi:
+  date: 2024-10-15
+  prediction: EFS
+  nvt: true
+  npt: true
+eqV2(MPTrj-S):
+  module: externals
+  class: eqV2
+  family: fairchem
+  package: fairchem-core==1.2.0
+  checkpoint: eqV2_31M_mp.pt
+  username: fairchem # HF handle
+  last-update: 2024-10-29T00:00:00
+  datetime: 2024-10-29T00:00:00
+  datasets:
+    - MPTrj
+  prediction: EFS
+  nvt: true
+  npt: true
+  date: 2024-10-18
+  github: https://github.com/FAIR-Chem/fairchem
+  doi: https://arxiv.org/abs/2410.12771

mlip_arena/tasks/registry.yaml CHANGED Viewed

@@ -4,6 +4,11 @@ Homonuclear diatomics:
   task-layout: wide
   rank-page: homonuclear-diatomics
   last-update: 2024-09-19
 High pressure stability:
   category: Molecular dynamics
   task-page: stability
@@ -14,9 +19,3 @@ Combustion:
   task-page: combustion
   task-layout: centered
   rank-page: combustion
-# nacl:
-#   last-update: 2024-03-25T14:30:00 # TODO: Fake datetime
-# alexandria:
-#   last-update: 2024-03-25T14:30:00 # TODO: Fake datetime
-# qmof:
-#   last-update: 2024-03-25T14:30:00 # TODO: Fake datetime

   task-layout: wide
   rank-page: homonuclear-diatomics
   last-update: 2024-09-19
+Thermal conductivity:
+  category: Material Properties
+  task-page: thermal-conductivity
+  task-layout: centered
+  rank-page: thermal-conductivity
 High pressure stability:
   category: Molecular dynamics
   task-page: stability
   task-page: combustion
   task-layout: centered
   rank-page: combustion

mlip_arena/tasks/thermal-conductivity/wte.csv ADDED Viewed

	@@ -0,0 +1,7 @@

+method,srme
+M3GNet,1.142
+CHGNet,1.717
+MACE-MP(M),0.647
+SevenNet,0.767
+ORBv2(MPTrj),1.732
+eqV2(OMat-S),1.772

serve/app.py CHANGED Viewed

@@ -44,6 +44,8 @@ for task in TASKS:
     else:
         centered_pages.append(page)
 pg = st.navigation(nav)
@@ -75,4 +77,13 @@ st.toast(
     icon="🍞",
 )
 pg.run()

     else:
         centered_pages.append(page)
+# mbd = st.page_link(page="https://matbench-discovery.materialsproject.org/", label="Matbench Discovery", icon=":material/extension:")
+# nav["Other benchmarks"].append(mbd)
 pg = st.navigation(nav)
     icon="🍞",
 )
+# st.sidebar.markdown(
+# """
+# Other benchmarks
+# """
+# )
+# st.sidebar.page_link(
+#     "https://matbench-discovery.materialsproject.org/", label="Matbench Discovery", icon=":material/extension:"
+# )
 pg.run()

serve/leaderboard.py CHANGED Viewed

@@ -4,11 +4,12 @@ from pathlib import Path
 import pandas as pd
 import streamlit as st
 from mlip_arena.models import REGISTRY as MODELS
 from mlip_arena.tasks import REGISTRY as TASKS
 # Read the data
-DATA_DIR = Path("mlip_arena/tasks/diatomics")
 dfs = []
 for model in MODELS:
@@ -126,6 +127,6 @@ for task in TASKS:
     st.page_link(
         f"tasks/{TASKS[task]['task-page']}.py",
-        label="Task page",
         icon=":material/link:",
     )

 import pandas as pd
 import streamlit as st
+from mlip_arena import PKG_DIR
 from mlip_arena.models import REGISTRY as MODELS
 from mlip_arena.tasks import REGISTRY as TASKS
 # Read the data
+DATA_DIR = PKG_DIR / "tasks" /"diatomics"
 dfs = []
 for model in MODELS:
     st.page_link(
         f"tasks/{TASKS[task]['task-page']}.py",
+        label="Go to the associated task page",
         icon=":material/link:",
     )

serve/ranks/combustion.py CHANGED Viewed

@@ -46,4 +46,10 @@ def get_com_drifts(df):
 df_exploded = get_com_drifts(df)
-table = pd.DataFrame()

 df_exploded = get_com_drifts(df)
+table = pd.DataFrame()
+# def render():
+#     st.dataframe(
+#         table,
+#         use_container_width=True,
+#     )

serve/ranks/homonuclear-diatomics.py CHANGED Viewed

@@ -74,14 +74,10 @@ table["Rank"] += np.argsort(table["Force flips"].to_numpy())
 table["Rank"] += 1
-table.sort_values("Rank", ascending=True, inplace=True)
 table["Rank aggr."] = table["Rank"]
-table["Rank"] = np.argsort(table["Rank"].to_numpy()) + 1
-# table.drop(columns=["rank"], inplace=True)
-# table = table.rename(columns={"Rank": "Rank Aggr."})
 table = table.reindex(
     columns=[
@@ -128,4 +124,19 @@ def render():
         s,
         use_container_width=True,
     )
-    # return table

 table["Rank"] += 1
+table.sort_values(["Rank", "Conservation deviation [eV/Å]"], ascending=True, inplace=True)
 table["Rank aggr."] = table["Rank"]
+table["Rank"] = table["Rank aggr."].rank(method='min').astype(int)
 table = table.reindex(
     columns=[
         s,
         use_container_width=True,
     )
+    with st.expander(":material/info: Explanation"):
+        st.caption(
+            """
+            - **Conservation deviation**: The average deviation of force from negative energy gradient along the diatomic curves.
+            $$
+            \\text{Conservation deviation} = \\left\\langle\\left| \\mathbf{F}(\\mathbf{r})\\cdot\\frac{\\mathbf{r}}{\\|\\mathbf{r}\\|} +  \\nabla_rE\\right|\\right\\rangle_{r = \\|\\mathbf{r}\\|}
+            $$
+            - **Spearman's coeff. (Energy - repulsion)**: Spearman's correlation coefficient of energy prediction within equilibrium distance $r \\in (r_{min}, r_o = \\argmin_{r} E(r))$.
+            - **Spearman's coeff. (Force - descending)**: Spearman's correlation coefficient of force prediction within equilibrium distance $r \\in (r_{min}, r_o = \\argmin_{r} E(r))$.
+            - **Tortuosity**: The ratio between total variation in energy and sum of absolute energy differences between $r_{min}$, $r_o$, and $r_{max}$.
+            - **Energy jump**: The sum of energy discontinuity.
+            - **Force flips**: The number of sign changes.
+            """
+        )

serve/ranks/thermal-conductivity.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import pandas as pd
+import streamlit as st
+from mlip_arena import PKG_DIR
+DATA_DIR = PKG_DIR / "tasks" / "thermal-conductivity"
+table = pd.read_csv(DATA_DIR / "wte.csv")
+table.rename(
+    columns={
+        "method": "Model",
+        "srme": "SRME [W/mK]",
+    },
+    inplace=True,
+)
+table.set_index("Model", inplace=True)
+table.sort_values(["SRME [W/mK]"], ascending=True, inplace=True)
+table["Rank"] = table["SRME [W/mK]"].rank(method='min').astype(int)
+table = table.reindex(
+    columns=[
+        "Rank",
+        "SRME [W/mK]",
+    ]
+)
+s = (
+    table.style.background_gradient(
+        cmap="Reds", subset=["SRME [W/mK]"]
+    )
+    .background_gradient(
+        cmap="Blues",
+        subset=["Rank"],
+    )
+    .format("{:.3f}", subset=["SRME [W/mK]"])
+)
+def render():
+    st.dataframe(
+        s,
+        use_container_width=True
+    )
+    with st.expander(":material/info: Explanation"):
+        st.caption(
+            """
+            - **SRME**: symmetric relative mean error of single-phonon conductivity:
+            $$
+            \\text{SRME}[\\left\lbrace\\mathcal{K}({\\mathbf{q},s)}\\right\\rbrace] = \\frac{2}{N_qV}\\frac{\\sum_{\\mathbf{q}s}|\\mathcal{K}_{\\text{MLIP}}(\\mathbf{q},s) - \\mathcal{K}_{\\text{DFT}}(\\mathbf{q},s)|}{\\kappa_{\\text{MLIP}} + \\kappa_{\\text{DFT}}}
+            $$
+            """
+        )

serve/tasks/stability.py CHANGED Viewed

@@ -154,16 +154,16 @@ def plot_md_steps(counts_per_method, count_or_percetange):
 plot_md_steps(counts_per_method, count_or_percetange)
-st.markdown(
-    """
-> The histogram shows the distribution of the total number of MD steps before the system crashes or completes the trajectory. :red[The color of the bins indicates the number of steps in the bin]. :blue[The height of the bars indicates the number or percentage of each bin among all the runs].
 """
 )
 ###
 st.markdown(
-    """
 ## Inference speed
 The inference speed of the MLIPs is crucial for the high-throughput virutal screening. Under high pressure conditions, the atoms often move faster and closer to each other, which increases the size of neighbor list and local graph construction and hence slows down the inference speed.
@@ -221,8 +221,8 @@ def plot_speed(df, method_color_mapping):
 plot_speed(df, method_color_mapping)
-st.markdown(
-    """
-> The plot shows the inference speed (steps per second) as a function of the number of atoms in the system. :red[The size of the points is proportional to the total number of steps in the MD trajectory before crash or completion (~49990)]. :blue[The lines show the fit of the data to the power law function $a N^{-n}$], where $N$ is the number of atoms and $a$ and $n$ are the fit parameters.
 """
 )

 plot_md_steps(counts_per_method, count_or_percetange)
+st.caption(
+"""
+The histogram shows the distribution of the total number of MD steps before the system crashes or completes the trajectory. :red[The color of the bins indicates the number of steps in the bin]. :blue[The height of the bars indicates the number or percentage of each bin among all the runs].
 """
 )
 ###
 st.markdown(
+"""
 ## Inference speed
 The inference speed of the MLIPs is crucial for the high-throughput virutal screening. Under high pressure conditions, the atoms often move faster and closer to each other, which increases the size of neighbor list and local graph construction and hence slows down the inference speed.
 plot_speed(df, method_color_mapping)
+st.caption(
+"""
+The plot shows the inference speed (steps per second) as a function of the number of atoms in the system. :red[The size of the points is proportional to the total number of steps in the MD trajectory before crash or completion (~49990)]. :blue[The lines show the fit of the data to the power law function $a N^{-n}$], where $N$ is the number of atoms and $a$ and $n$ are the fit parameters.
 """
 )

serve/tasks/thermal-conductivity.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import pandas as pd
+import streamlit as st
+from mlip_arena import PKG_DIR
+st.markdown(
+"""
+# Thermal Conductivity
+Compared to Póta, Ahlawat, Csányi, and Simoncelli, [arXiv:2408.00755v4](https://arxiv.org/abs/2408.00755), the relaxation protocol has been updated and unified for all the models. The relaxation is a combination of sequential vc-relax (changes cell and atom positions) and relax (changes atom positions only). Each relaxation stage has a maximum number of 300 steps, and consist of a single FrechetCellFilter relaxation with force threshold =1e-4 eV/Ang. To preserve crystal symmetry, unit-cell angles are not allowed to change. This unified protocol gives the same SRME reported in [arXiv:2408.00755v4](https://arxiv.org/abs/2408.00755) for all the models but M3GNet. In M3GNet this updated relaxation protocol gives SRME = 1.412, slightly smaller than the value 1.469 that was obtained with the non-unified relaxation protocol in [arXiv:2408.00755v4](https://arxiv.org/abs/2408.00755).
+**SRME** is the Symmetric Relative Mean Error, defined as the mean of the absolute values of the relative errors of the predictions. Here, it is used to quantify the error on microscopic single-phonon conductivity:
+$$
+\\text{SRME}[\\left\lbrace\\mathcal{K}({\\mathbf{q},s)}\\right\\rbrace] = \\frac{2}{N_qV}\\frac{\\sum_{\\mathbf{q}s}|\\mathcal{K}_{\\text{MLIP}}(\\mathbf{q},s) - \\mathcal{K}_{\\text{DFT}}(\\mathbf{q},s)|}{\\kappa_{\\text{MLIP}} + \\kappa_{\\text{DFT}}}
+$$
+"""
+)
+DATA_DIR = PKG_DIR / "tasks" / "thermal-conductivity"
+table = pd.read_csv(DATA_DIR / "wte.csv")
+table.rename(
+    columns={
+        "method": "Model",
+        "srme": "SRME [W/mK]",
+    },
+    inplace=True,
+)
+table.set_index("Model", inplace=True)
+table.sort_values(["SRME [W/mK]"], ascending=True, inplace=True)
+s = table.style.background_gradient(
+    cmap="Reds", subset=["SRME [W/mK]"]
+)
+st.dataframe(
+    s,
+    use_container_width=True,
+    column_config={"SRME [W/mK]": {"format": "{:.3f}"}}
+)