aliabd HF staff commited on
Commit
92714bd
1 Parent(s): 01856eb

Upload with huggingface_hub

Browse files
Files changed (4) hide show
  1. DESCRIPTION.md +1 -0
  2. README.md +6 -7
  3. requirements.txt +3 -0
  4. run.py +281 -0
DESCRIPTION.md ADDED
@@ -0,0 +1 @@
 
 
1
+ This demo built with Blocks generates 9 plots based on the input.
README.md CHANGED
@@ -1,12 +1,11 @@
 
1
  ---
2
- title: Clustering Main
3
- emoji: 😻
4
- colorFrom: gray
5
- colorTo: gray
6
  sdk: gradio
7
  sdk_version: 3.6
8
- app_file: app.py
9
  pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+
2
  ---
3
+ title: clustering_main
4
+ emoji: 🔥
5
+ colorFrom: indigo
6
+ colorTo: indigo
7
  sdk: gradio
8
  sdk_version: 3.6
9
+ app_file: run.py
10
  pinned: false
11
  ---
 
 
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ matplotlib>=3.5.2
2
+ scikit-learn>=1.0.1
3
+ https://gradio-main-build.s3.amazonaws.com/c3bec6153737855510542e8154391f328ac72606/gradio-3.6-py3-none-any.whl
run.py ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import math
3
+ from functools import partial
4
+ import matplotlib.pyplot as plt
5
+ import numpy as np
6
+ from sklearn.cluster import (
7
+ AgglomerativeClustering, Birch, DBSCAN, KMeans, MeanShift, OPTICS, SpectralClustering, estimate_bandwidth
8
+ )
9
+ from sklearn.datasets import make_blobs, make_circles, make_moons
10
+ from sklearn.mixture import GaussianMixture
11
+ from sklearn.neighbors import kneighbors_graph
12
+ from sklearn.preprocessing import StandardScaler
13
+
14
+ plt.style.use('seaborn')
15
+ SEED = 0
16
+ MAX_CLUSTERS = 10
17
+ N_SAMPLES = 1000
18
+ N_COLS = 3
19
+ FIGSIZE = 7, 7 # does not affect size in webpage
20
+ COLORS = [
21
+ 'blue', 'orange', 'green', 'red', 'purple', 'brown', 'pink', 'gray', 'olive', 'cyan'
22
+ ]
23
+ assert len(COLORS) >= MAX_CLUSTERS, "Not enough different colors for all clusters"
24
+ np.random.seed(SEED)
25
+
26
+
27
+ def normalize(X):
28
+ return StandardScaler().fit_transform(X)
29
+
30
+ def get_regular(n_clusters):
31
+ # spiral pattern
32
+ centers = [
33
+ [0, 0],
34
+ [1, 0],
35
+ [1, 1],
36
+ [0, 1],
37
+ [-1, 1],
38
+ [-1, 0],
39
+ [-1, -1],
40
+ [0, -1],
41
+ [1, -1],
42
+ [2, -1],
43
+ ][:n_clusters]
44
+ assert len(centers) == n_clusters
45
+ X, labels = make_blobs(n_samples=N_SAMPLES, centers=centers, cluster_std=0.25, random_state=SEED)
46
+ return normalize(X), labels
47
+
48
+
49
+ def get_circles(n_clusters):
50
+ X, labels = make_circles(n_samples=N_SAMPLES, factor=0.5, noise=0.05, random_state=SEED)
51
+ return normalize(X), labels
52
+
53
+
54
+ def get_moons(n_clusters):
55
+ X, labels = make_moons(n_samples=N_SAMPLES, noise=0.05, random_state=SEED)
56
+ return normalize(X), labels
57
+
58
+
59
+ def get_noise(n_clusters):
60
+ np.random.seed(SEED)
61
+ X, labels = np.random.rand(N_SAMPLES, 2), np.random.randint(0, n_clusters, size=(N_SAMPLES,))
62
+ return normalize(X), labels
63
+
64
+
65
+ def get_anisotropic(n_clusters):
66
+ X, labels = make_blobs(n_samples=N_SAMPLES, centers=n_clusters, random_state=170)
67
+ transformation = [[0.6, -0.6], [-0.4, 0.8]]
68
+ X = np.dot(X, transformation)
69
+ return X, labels
70
+
71
+
72
+ def get_varied(n_clusters):
73
+ cluster_std = [1.0, 2.5, 0.5, 1.0, 2.5, 0.5, 1.0, 2.5, 0.5, 1.0][:n_clusters]
74
+ assert len(cluster_std) == n_clusters
75
+ X, labels = make_blobs(
76
+ n_samples=N_SAMPLES, centers=n_clusters, cluster_std=cluster_std, random_state=SEED
77
+ )
78
+ return normalize(X), labels
79
+
80
+
81
+ def get_spiral(n_clusters):
82
+ # from https://scikit-learn.org/stable/auto_examples/cluster/plot_agglomerative_clustering.html
83
+ np.random.seed(SEED)
84
+ t = 1.5 * np.pi * (1 + 3 * np.random.rand(1, N_SAMPLES))
85
+ x = t * np.cos(t)
86
+ y = t * np.sin(t)
87
+ X = np.concatenate((x, y))
88
+ X += 0.7 * np.random.randn(2, N_SAMPLES)
89
+ X = np.ascontiguousarray(X.T)
90
+
91
+ labels = np.zeros(N_SAMPLES, dtype=int)
92
+ return normalize(X), labels
93
+
94
+
95
+ DATA_MAPPING = {
96
+ 'regular': get_regular,
97
+ 'circles': get_circles,
98
+ 'moons': get_moons,
99
+ 'spiral': get_spiral,
100
+ 'noise': get_noise,
101
+ 'anisotropic': get_anisotropic,
102
+ 'varied': get_varied,
103
+ }
104
+
105
+
106
+ def get_groundtruth_model(X, labels, n_clusters, **kwargs):
107
+ # dummy model to show true label distribution
108
+ class Dummy:
109
+ def __init__(self, y):
110
+ self.labels_ = labels
111
+
112
+ return Dummy(labels)
113
+
114
+
115
+ def get_kmeans(X, labels, n_clusters, **kwargs):
116
+ model = KMeans(init="k-means++", n_clusters=n_clusters, n_init=10, random_state=SEED)
117
+ model.set_params(**kwargs)
118
+ return model.fit(X)
119
+
120
+
121
+ def get_dbscan(X, labels, n_clusters, **kwargs):
122
+ model = DBSCAN(eps=0.3)
123
+ model.set_params(**kwargs)
124
+ return model.fit(X)
125
+
126
+
127
+ def get_agglomerative(X, labels, n_clusters, **kwargs):
128
+ connectivity = kneighbors_graph(
129
+ X, n_neighbors=n_clusters, include_self=False
130
+ )
131
+ # make connectivity symmetric
132
+ connectivity = 0.5 * (connectivity + connectivity.T)
133
+ model = AgglomerativeClustering(
134
+ n_clusters=n_clusters, linkage="ward", connectivity=connectivity
135
+ )
136
+ model.set_params(**kwargs)
137
+ return model.fit(X)
138
+
139
+
140
+ def get_meanshift(X, labels, n_clusters, **kwargs):
141
+ bandwidth = estimate_bandwidth(X, quantile=0.25)
142
+ model = MeanShift(bandwidth=bandwidth, bin_seeding=True)
143
+ model.set_params(**kwargs)
144
+ return model.fit(X)
145
+
146
+
147
+ def get_spectral(X, labels, n_clusters, **kwargs):
148
+ model = SpectralClustering(
149
+ n_clusters=n_clusters,
150
+ eigen_solver="arpack",
151
+ affinity="nearest_neighbors",
152
+ )
153
+ model.set_params(**kwargs)
154
+ return model.fit(X)
155
+
156
+
157
+ def get_optics(X, labels, n_clusters, **kwargs):
158
+ model = OPTICS(
159
+ min_samples=7,
160
+ xi=0.05,
161
+ min_cluster_size=0.1,
162
+ )
163
+ model.set_params(**kwargs)
164
+ return model.fit(X)
165
+
166
+
167
+ def get_birch(X, labels, n_clusters, **kwargs):
168
+ model = Birch(n_clusters=n_clusters)
169
+ model.set_params(**kwargs)
170
+ return model.fit(X)
171
+
172
+
173
+ def get_gaussianmixture(X, labels, n_clusters, **kwargs):
174
+ model = GaussianMixture(
175
+ n_components=n_clusters, covariance_type="full", random_state=SEED,
176
+ )
177
+ model.set_params(**kwargs)
178
+ return model.fit(X)
179
+
180
+
181
+ MODEL_MAPPING = {
182
+ 'True labels': get_groundtruth_model,
183
+ 'KMeans': get_kmeans,
184
+ 'DBSCAN': get_dbscan,
185
+ 'MeanShift': get_meanshift,
186
+ 'SpectralClustering': get_spectral,
187
+ 'OPTICS': get_optics,
188
+ 'Birch': get_birch,
189
+ 'GaussianMixture': get_gaussianmixture,
190
+ 'AgglomerativeClustering': get_agglomerative,
191
+ }
192
+
193
+
194
+ def plot_clusters(ax, X, labels):
195
+ set_clusters = set(labels)
196
+ set_clusters.discard(-1) # -1 signifiies outliers, which we plot separately
197
+ for label, color in zip(sorted(set_clusters), COLORS):
198
+ idx = labels == label
199
+ if not sum(idx):
200
+ continue
201
+ ax.scatter(X[idx, 0], X[idx, 1], color=color)
202
+
203
+ # show outliers (if any)
204
+ idx = labels == -1
205
+ if sum(idx):
206
+ ax.scatter(X[idx, 0], X[idx, 1], c='k', marker='x')
207
+
208
+ ax.grid(None)
209
+ ax.set_xticks([])
210
+ ax.set_yticks([])
211
+ return ax
212
+
213
+
214
+ def cluster(dataset: str, n_clusters: int, clustering_algorithm: str):
215
+ if isinstance(n_clusters, dict):
216
+ n_clusters = n_clusters['value']
217
+ else:
218
+ n_clusters = int(n_clusters)
219
+
220
+ X, labels = DATA_MAPPING[dataset](n_clusters)
221
+ model = MODEL_MAPPING[clustering_algorithm](X, labels, n_clusters=n_clusters)
222
+ if hasattr(model, "labels_"):
223
+ y_pred = model.labels_.astype(int)
224
+ else:
225
+ y_pred = model.predict(X)
226
+
227
+ fig, ax = plt.subplots(figsize=FIGSIZE)
228
+
229
+ plot_clusters(ax, X, y_pred)
230
+ ax.set_title(clustering_algorithm, fontsize=16)
231
+
232
+ return fig
233
+
234
+
235
+ title = "Clustering with Scikit-learn"
236
+ description = (
237
+ "This example shows how different clustering algorithms work. Simply pick "
238
+ "the dataset and the number of clusters to see how the clustering algorithms work. "
239
+ "Colored cirles are (predicted) labels and black x are outliers."
240
+ )
241
+
242
+
243
+ def iter_grid(n_rows, n_cols):
244
+ # create a grid using gradio Block
245
+ for _ in range(n_rows):
246
+ with gr.Row():
247
+ for _ in range(n_cols):
248
+ with gr.Column():
249
+ yield
250
+
251
+ with gr.Blocks(title=title) as demo:
252
+ gr.HTML(f"<b>{title}</b>")
253
+ gr.Markdown(description)
254
+
255
+ input_models = list(MODEL_MAPPING)
256
+ input_data = gr.Radio(
257
+ list(DATA_MAPPING),
258
+ value="regular",
259
+ label="dataset"
260
+ )
261
+ input_n_clusters = gr.Slider(
262
+ minimum=1,
263
+ maximum=MAX_CLUSTERS,
264
+ value=4,
265
+ step=1,
266
+ label='Number of clusters'
267
+ )
268
+ n_rows = int(math.ceil(len(input_models) / N_COLS))
269
+ counter = 0
270
+ for _ in iter_grid(n_rows, N_COLS):
271
+ if counter >= len(input_models):
272
+ break
273
+
274
+ input_model = input_models[counter]
275
+ plot = gr.Plot(label=input_model)
276
+ fn = partial(cluster, clustering_algorithm=input_model)
277
+ input_data.change(fn=fn, inputs=[input_data, input_n_clusters], outputs=plot)
278
+ input_n_clusters.change(fn=fn, inputs=[input_data, input_n_clusters], outputs=plot)
279
+ counter += 1
280
+
281
+ demo.launch()