Spaces:

librarian-bots
/

new_hub_datasets

Sleeping

davanstrien HF staff commited on Oct 17, 2023

Commit

f5faacd

•

1 Parent(s): e62ac39

update'

Files changed (3) hide show

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ from httpx import Client
 from huggingface_hub import DatasetCard, hf_hub_url, list_datasets
 from tqdm.auto import tqdm
 from tqdm.contrib.concurrent import thread_map
 load_dotenv()
@@ -30,11 +30,12 @@ client = Client(
     headers=headers,
     timeout=60,
 )
-LOCAL = False
-if platform == "darwin":
-    LOCAL = True
-cache_dir = "cache" if LOCAL else "/data/diskcache"
-cache = Cache(cache_dir)
 def add_created_data(dataset):
@@ -90,7 +91,7 @@ def render_model_hub_link(hub_id):
     )
-@cache.memoize(expire=CACHE_TIME)
 def get_datasets():
     return list(
         tqdm(
@@ -101,7 +102,7 @@ def get_datasets():
     )
-@cache.memoize(expire=CACHE_TIME)
 def load_data():
     datasets = get_datasets()
     datasets = [add_created_data(dataset) for dataset in tqdm(datasets)]
@@ -162,7 +163,7 @@ def filter_by_readme_len(df, min_len=None):
 def filter_df(max_age_days=None, min_len=None, needs_server_preview: bool = False):
     df = prep_dataframe()
     if needs_server_preview:
-        df = df[df["server_preview"] is True]
     if max_age_days is not None:
         df = filter_df_by_max_age(df, max_age_days=max_age_days)
     if min_len is not None:
@@ -194,7 +195,7 @@ with gr.Blocks() as demo:
             interactive=True,
         )
         needs_server_preview = gr.Checkbox(
-            label="Needs Server Preview", default=False, interactive=True
         )
     output = gr.DataFrame(filter_df, datatype="markdown", min_width=160 * 2.5)

 from huggingface_hub import DatasetCard, hf_hub_url, list_datasets
 from tqdm.auto import tqdm
 from tqdm.contrib.concurrent import thread_map
+from cachetools import TTLCache, cached
 load_dotenv()
     headers=headers,
     timeout=60,
 )
+# LOCAL = False
+# if platform == "darwin":
+#     LOCAL = True
+# cache_dir = "cache" if LOCAL else "/data/diskcache"
+# cache = Cache(cache_dir)
+cache = TTLCache(maxsize=10, ttl=CACHE_TIME)
 def add_created_data(dataset):
     )
+@cached(cache)
 def get_datasets():
     return list(
         tqdm(
     )
+@cached(cache)
 def load_data():
     datasets = get_datasets()
     datasets = [add_created_data(dataset) for dataset in tqdm(datasets)]
 def filter_df(max_age_days=None, min_len=None, needs_server_preview: bool = False):
     df = prep_dataframe()
     if needs_server_preview:
+        df = df[df["server_preview"] == True]
     if max_age_days is not None:
         df = filter_df_by_max_age(df, max_age_days=max_age_days)
     if min_len is not None:
             interactive=True,
         )
         needs_server_preview = gr.Checkbox(
+            label="Needs Server Preview", value=False, interactive=True
         )
     output = gr.DataFrame(filter_df, datatype="markdown", min_width=160 * 2.5)

requirements.in CHANGED Viewed

@@ -1,3 +1,4 @@
 datasets
 datasets
 diskcache

+cachetools
 datasets
 datasets
 diskcache

requirements.txt CHANGED Viewed

@@ -16,8 +16,9 @@ altair==5.1.2
     # via gradio
 annotated-types==0.6.0
     # via pydantic
-anyio==4.0.0
     # via
     #   httpcore
     #   starlette
 async-timeout==4.0.3
@@ -27,6 +28,8 @@ attrs==23.1.0
     #   aiohttp
     #   jsonschema
     #   referencing
 certifi==2023.7.22
     # via
     #   httpcore
@@ -42,7 +45,7 @@ contourpy==1.1.1
     # via matplotlib
 cycler==0.12.1
     # via matplotlib
-datasets==2.14.4
     # via -r requirements.in
 dill==0.3.7
     # via
@@ -50,7 +53,7 @@ dill==0.3.7
     #   multiprocess
 diskcache==5.6.3
     # via -r requirements.in
-fastapi==0.103.0
     # via gradio
 ffmpy==0.3.1
     # via gradio
@@ -62,14 +65,14 @@ frozenlist==1.4.0
     # via
     #   aiohttp
     #   aiosignal
-fsspec[http]==2023.9.2
     # via
     #   datasets
     #   gradio-client
     #   huggingface-hub
-gradio==3.47.1
     # via -r requirements.in
-gradio-client==0.6.0
     # via gradio
 h11==0.14.0
     # via
@@ -118,7 +121,7 @@ multidict==6.0.4
     #   yarl
 multiprocess==0.70.15
     # via datasets
-numpy==1.26.0
     # via
     #   altair
     #   contourpy

     # via gradio
 annotated-types==0.6.0
     # via pydantic
+anyio==3.7.1
     # via
+    #   fastapi
     #   httpcore
     #   starlette
 async-timeout==4.0.3
     #   aiohttp
     #   jsonschema
     #   referencing
+cachetools==5.3.1
+    # via -r requirements.in
 certifi==2023.7.22
     # via
     #   httpcore
     # via matplotlib
 cycler==0.12.1
     # via matplotlib
+datasets==2.14.5
     # via -r requirements.in
 dill==0.3.7
     # via
     #   multiprocess
 diskcache==5.6.3
     # via -r requirements.in
+fastapi==0.103.2
     # via gradio
 ffmpy==0.3.1
     # via gradio
     # via
     #   aiohttp
     #   aiosignal
+fsspec[http]==2023.6.0
     # via
     #   datasets
     #   gradio-client
     #   huggingface-hub
+gradio==3.48.0
     # via -r requirements.in
+gradio-client==0.6.1
     # via gradio
 h11==0.14.0
     # via
     #   yarl
 multiprocess==0.70.15
     # via datasets
+numpy==1.26.1
     # via
     #   altair
     #   contourpy