Spaces:
Sleeping
Sleeping
Commit
•
3215dde
1
Parent(s):
67b9f48
add filter option for readme length
Browse files
app.py
CHANGED
@@ -10,6 +10,7 @@ from httpx import Client
|
|
10 |
from huggingface_hub import hf_hub_url, list_datasets
|
11 |
from tqdm.auto import tqdm
|
12 |
from tqdm.contrib.concurrent import thread_map
|
|
|
13 |
|
14 |
load_dotenv()
|
15 |
|
@@ -50,7 +51,8 @@ def get_readme_len(dataset):
|
|
50 |
url = hf_hub_url(dataset["id"], "README.md", repo_type="dataset")
|
51 |
resp = client.get(url)
|
52 |
if resp.status_code == 200:
|
53 |
-
|
|
|
54 |
return dataset
|
55 |
except Exception as e:
|
56 |
print(e)
|
@@ -115,8 +117,7 @@ def prep_dataframe(remove_orgs_and_users=remove_orgs, columns_to_drop=columns_to
|
|
115 |
return df
|
116 |
|
117 |
|
118 |
-
def filter_df_by_max_age(max_age_days=None):
|
119 |
-
df = prep_dataframe()
|
120 |
df = df.dropna(subset=["created"])
|
121 |
now = datetime.now()
|
122 |
if max_age_days is not None:
|
@@ -126,15 +127,38 @@ def filter_df_by_max_age(max_age_days=None):
|
|
126 |
return df
|
127 |
|
128 |
|
129 |
-
def filter_by_readme_len(df, min_len=None
|
130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
|
132 |
|
133 |
with gr.Blocks() as demo:
|
134 |
max_age_days = gr.Slider(
|
135 |
label="Max Age (days)", value=7, minimum=0, maximum=90, step=1, interactive=True
|
136 |
)
|
137 |
-
|
138 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
|
140 |
demo.launch()
|
|
|
10 |
from huggingface_hub import hf_hub_url, list_datasets
|
11 |
from tqdm.auto import tqdm
|
12 |
from tqdm.contrib.concurrent import thread_map
|
13 |
+
from huggingface_hub import DatasetCard
|
14 |
|
15 |
load_dotenv()
|
16 |
|
|
|
51 |
url = hf_hub_url(dataset["id"], "README.md", repo_type="dataset")
|
52 |
resp = client.get(url)
|
53 |
if resp.status_code == 200:
|
54 |
+
card = DatasetCard(resp.text)
|
55 |
+
dataset["len"] = len(card.text)
|
56 |
return dataset
|
57 |
except Exception as e:
|
58 |
print(e)
|
|
|
117 |
return df
|
118 |
|
119 |
|
120 |
+
def filter_df_by_max_age(df, max_age_days=None):
|
|
|
121 |
df = df.dropna(subset=["created"])
|
122 |
now = datetime.now()
|
123 |
if max_age_days is not None:
|
|
|
127 |
return df
|
128 |
|
129 |
|
130 |
+
def filter_by_readme_len(df, min_len=None):
|
131 |
+
if min_len is not None:
|
132 |
+
df = df[df["len"] >= min_len]
|
133 |
+
return df
|
134 |
+
|
135 |
+
|
136 |
+
def filter_df(max_age_days=None, min_len=None):
|
137 |
+
df = prep_dataframe()
|
138 |
+
if max_age_days is not None:
|
139 |
+
df = filter_df_by_max_age(df, max_age_days=max_age_days)
|
140 |
+
if min_len is not None:
|
141 |
+
df = filter_by_readme_len(df, min_len=min_len)
|
142 |
+
return df
|
143 |
|
144 |
|
145 |
with gr.Blocks() as demo:
|
146 |
max_age_days = gr.Slider(
|
147 |
label="Max Age (days)", value=7, minimum=0, maximum=90, step=1, interactive=True
|
148 |
)
|
149 |
+
|
150 |
+
min_len = gr.Slider(
|
151 |
+
label="Minimum README Length",
|
152 |
+
value=300,
|
153 |
+
minimum=0,
|
154 |
+
maximum=1000,
|
155 |
+
step=50,
|
156 |
+
interactive=True,
|
157 |
+
)
|
158 |
+
|
159 |
+
output = gr.DataFrame(filter_df, datatype="markdown", min_width=160 * 2.5)
|
160 |
+
max_age_days.input(filter_df, inputs=[max_age_days, min_len], outputs=[output])
|
161 |
+
min_len.input(filter_df, inputs=[max_age_days, min_len], outputs=[output])
|
162 |
+
|
163 |
|
164 |
demo.launch()
|