davanstrien HF staff commited on
Commit
3215dde
1 Parent(s): 67b9f48

add filter option for readme length

Browse files
Files changed (1) hide show
  1. app.py +31 -7
app.py CHANGED
@@ -10,6 +10,7 @@ from httpx import Client
10
  from huggingface_hub import hf_hub_url, list_datasets
11
  from tqdm.auto import tqdm
12
  from tqdm.contrib.concurrent import thread_map
 
13
 
14
  load_dotenv()
15
 
@@ -50,7 +51,8 @@ def get_readme_len(dataset):
50
  url = hf_hub_url(dataset["id"], "README.md", repo_type="dataset")
51
  resp = client.get(url)
52
  if resp.status_code == 200:
53
- dataset["len"] = len(resp.text)
 
54
  return dataset
55
  except Exception as e:
56
  print(e)
@@ -115,8 +117,7 @@ def prep_dataframe(remove_orgs_and_users=remove_orgs, columns_to_drop=columns_to
115
  return df
116
 
117
 
118
- def filter_df_by_max_age(max_age_days=None):
119
- df = prep_dataframe()
120
  df = df.dropna(subset=["created"])
121
  now = datetime.now()
122
  if max_age_days is not None:
@@ -126,15 +127,38 @@ def filter_df_by_max_age(max_age_days=None):
126
  return df
127
 
128
 
129
- def filter_by_readme_len(df, min_len=None, max_len=None):
130
- pass
 
 
 
 
 
 
 
 
 
 
 
131
 
132
 
133
  with gr.Blocks() as demo:
134
  max_age_days = gr.Slider(
135
  label="Max Age (days)", value=7, minimum=0, maximum=90, step=1, interactive=True
136
  )
137
- output = gr.DataFrame(prep_dataframe(), datatype="markdown", min_width=160 * 2.5)
138
- max_age_days.input(filter_df_by_max_age, inputs=[max_age_days], outputs=[output])
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
  demo.launch()
 
10
  from huggingface_hub import hf_hub_url, list_datasets
11
  from tqdm.auto import tqdm
12
  from tqdm.contrib.concurrent import thread_map
13
+ from huggingface_hub import DatasetCard
14
 
15
  load_dotenv()
16
 
 
51
  url = hf_hub_url(dataset["id"], "README.md", repo_type="dataset")
52
  resp = client.get(url)
53
  if resp.status_code == 200:
54
+ card = DatasetCard(resp.text)
55
+ dataset["len"] = len(card.text)
56
  return dataset
57
  except Exception as e:
58
  print(e)
 
117
  return df
118
 
119
 
120
+ def filter_df_by_max_age(df, max_age_days=None):
 
121
  df = df.dropna(subset=["created"])
122
  now = datetime.now()
123
  if max_age_days is not None:
 
127
  return df
128
 
129
 
130
+ def filter_by_readme_len(df, min_len=None):
131
+ if min_len is not None:
132
+ df = df[df["len"] >= min_len]
133
+ return df
134
+
135
+
136
+ def filter_df(max_age_days=None, min_len=None):
137
+ df = prep_dataframe()
138
+ if max_age_days is not None:
139
+ df = filter_df_by_max_age(df, max_age_days=max_age_days)
140
+ if min_len is not None:
141
+ df = filter_by_readme_len(df, min_len=min_len)
142
+ return df
143
 
144
 
145
  with gr.Blocks() as demo:
146
  max_age_days = gr.Slider(
147
  label="Max Age (days)", value=7, minimum=0, maximum=90, step=1, interactive=True
148
  )
149
+
150
+ min_len = gr.Slider(
151
+ label="Minimum README Length",
152
+ value=300,
153
+ minimum=0,
154
+ maximum=1000,
155
+ step=50,
156
+ interactive=True,
157
+ )
158
+
159
+ output = gr.DataFrame(filter_df, datatype="markdown", min_width=160 * 2.5)
160
+ max_age_days.input(filter_df, inputs=[max_age_days, min_len], outputs=[output])
161
+ min_len.input(filter_df, inputs=[max_age_days, min_len], outputs=[output])
162
+
163
 
164
  demo.launch()