spdin
commited on
Commit
•
1a82bcb
1
Parent(s):
3df246f
update
Browse files- README.md +4 -4
- active_learning.py +31 -0
- app.py +101 -0
- index.html +0 -19
- requirements.txt +3 -0
- static/css/styles.css +69 -0
- style.css +0 -28
- templates/annotate.html +89 -0
- templates/upload.html +54 -0
README.md
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
colorFrom: red
|
5 |
-
colorTo:
|
6 |
-
sdk:
|
7 |
pinned: false
|
8 |
---
|
9 |
|
|
|
1 |
---
|
2 |
+
title: Test Create Docker
|
3 |
+
emoji: 🐢
|
4 |
colorFrom: red
|
5 |
+
colorTo: indigo
|
6 |
+
sdk: docker
|
7 |
pinned: false
|
8 |
---
|
9 |
|
active_learning.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import numpy as np
|
3 |
+
|
4 |
+
def get_initial_sample(unlabeled_data, num_query):
|
5 |
+
|
6 |
+
# print(len(unlabeled_data))
|
7 |
+
# print(unlabeled_data)
|
8 |
+
|
9 |
+
uncertain_samples = np.random.choice(len(unlabeled_data), size=num_query, replace=False)
|
10 |
+
return uncertain_samples
|
11 |
+
|
12 |
+
def get_uncertain_sample(
|
13 |
+
labeled_data, unlabeled_data, num_query
|
14 |
+
):
|
15 |
+
# print(len(labeled_data))
|
16 |
+
# print(labeled_data)
|
17 |
+
|
18 |
+
# print(len(unlabeled_data))
|
19 |
+
# print(unlabeled_data)
|
20 |
+
|
21 |
+
uncertain_samples = np.random.choice(len(unlabeled_data), size=num_query, replace=False)
|
22 |
+
print(uncertain_samples)
|
23 |
+
|
24 |
+
return uncertain_samples
|
25 |
+
|
26 |
+
def get_stopping_conditioon(
|
27 |
+
labeled_data, eval_metrics
|
28 |
+
):
|
29 |
+
print(eval_metrics)
|
30 |
+
|
31 |
+
return True
|
app.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, render_template, request, redirect, url_for, send_file, make_response
|
2 |
+
import pandas as pd
|
3 |
+
import io
|
4 |
+
import csv
|
5 |
+
import numpy as np
|
6 |
+
|
7 |
+
from active_learning import get_initial_sample, get_uncertain_sample
|
8 |
+
from datasets import Dataset, concatenate_datasets
|
9 |
+
|
10 |
+
app = Flask(__name__)
|
11 |
+
|
12 |
+
# Global variables to store data and labels
|
13 |
+
uploaded_data = None
|
14 |
+
labels = []
|
15 |
+
al_step = 0
|
16 |
+
data_amount = 100
|
17 |
+
dataset_labeled_prev = None
|
18 |
+
#num_query = 5
|
19 |
+
|
20 |
+
@app.route("/", methods=["GET", "POST"])
|
21 |
+
def upload():
|
22 |
+
global uploaded_data, labels, dataset_unlabeled, al_step, num_query
|
23 |
+
|
24 |
+
if request.method == "POST":
|
25 |
+
# Upload the data file
|
26 |
+
num_query = int(request.form.get('query_number'))
|
27 |
+
data_file = request.files["data_file"]
|
28 |
+
if data_file:
|
29 |
+
df = pd.read_csv(data_file)
|
30 |
+
unlabeled_data = Dataset.from_pandas(df)
|
31 |
+
|
32 |
+
# get_initial_sample()
|
33 |
+
uncertain_samples = get_initial_sample(unlabeled_data, num_query)
|
34 |
+
|
35 |
+
all_list = np.arange(0, len(unlabeled_data))
|
36 |
+
unlabeled_list = np.delete(all_list, uncertain_samples)
|
37 |
+
|
38 |
+
dataset_labeled = unlabeled_data[uncertain_samples]
|
39 |
+
dataset_unlabeled = unlabeled_data[unlabeled_list]
|
40 |
+
|
41 |
+
dataset_unlabeled = Dataset.from_dict(dataset_unlabeled)
|
42 |
+
|
43 |
+
uploaded_data = dataset_labeled
|
44 |
+
|
45 |
+
# Upload the labels file
|
46 |
+
labels_file = request.files["labels_file"]
|
47 |
+
if labels_file:
|
48 |
+
df_labels = pd.read_csv(labels_file)
|
49 |
+
labels = df_labels["label"].tolist()
|
50 |
+
|
51 |
+
return redirect(url_for("annotate"))
|
52 |
+
|
53 |
+
return render_template("upload.html")
|
54 |
+
|
55 |
+
@app.route("/annotate", methods=["GET", "POST"])
|
56 |
+
def annotate():
|
57 |
+
global uploaded_data, labels, dataset_unlabeled, al_step, dataset_labeled_prev, num_query
|
58 |
+
|
59 |
+
al_process = "Run get_initial_sample()..."
|
60 |
+
|
61 |
+
if request.method == "POST":
|
62 |
+
al_step += 1
|
63 |
+
al_process = "Run get_uncertain_sample()..."
|
64 |
+
|
65 |
+
annotated_data = request.form.getlist("annotations")
|
66 |
+
df = pd.DataFrame(uploaded_data)
|
67 |
+
df["label"] = annotated_data
|
68 |
+
dataset_labeled = Dataset.from_pandas(df)
|
69 |
+
|
70 |
+
print(f"AL STEP #{al_step}")
|
71 |
+
print(dataset_labeled)
|
72 |
+
|
73 |
+
if al_step > 1:
|
74 |
+
dataset_labeled = concatenate_datasets([dataset_labeled_prev, dataset_labeled])
|
75 |
+
|
76 |
+
# get_uncertain_sample()
|
77 |
+
uncertain_samples = get_uncertain_sample(dataset_labeled, dataset_unlabeled, num_query)
|
78 |
+
|
79 |
+
dataset_labeled_prev = dataset_labeled
|
80 |
+
|
81 |
+
unlabeled_list = np.arange(0, data_amount - al_step*num_query)
|
82 |
+
unlabeled_list = np.delete(unlabeled_list, uncertain_samples)
|
83 |
+
|
84 |
+
dataset_labeled_next = dataset_unlabeled[uncertain_samples]
|
85 |
+
dataset_unlabeled = dataset_unlabeled[unlabeled_list]
|
86 |
+
|
87 |
+
uploaded_data = dataset_labeled_next
|
88 |
+
|
89 |
+
dataset_unlabeled = Dataset.from_dict(dataset_unlabeled)
|
90 |
+
|
91 |
+
return render_template(
|
92 |
+
"annotate.html",
|
93 |
+
data=uploaded_data,
|
94 |
+
labels=labels,
|
95 |
+
al_step=al_step,
|
96 |
+
n_unlabeled=len(dataset_unlabeled),
|
97 |
+
al_process=al_process
|
98 |
+
)
|
99 |
+
|
100 |
+
if __name__ == "__main__":
|
101 |
+
app.run(host="0.0.0.0", port=7860)
|
index.html
DELETED
@@ -1,19 +0,0 @@
|
|
1 |
-
<!DOCTYPE html>
|
2 |
-
<html>
|
3 |
-
<head>
|
4 |
-
<meta charset="utf-8" />
|
5 |
-
<meta name="viewport" content="width=device-width" />
|
6 |
-
<title>My static Space</title>
|
7 |
-
<link rel="stylesheet" href="style.css" />
|
8 |
-
</head>
|
9 |
-
<body>
|
10 |
-
<div class="card">
|
11 |
-
<h1>Welcome to your static Space!</h1>
|
12 |
-
<p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
|
13 |
-
<p>
|
14 |
-
Also don't forget to check the
|
15 |
-
<a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
|
16 |
-
</p>
|
17 |
-
</div>
|
18 |
-
</body>
|
19 |
-
</html>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
flask==3.0.0
|
2 |
+
requests==2.31.*
|
3 |
+
datasets
|
static/css/styles.css
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/* Style the header */
|
2 |
+
h1 {
|
3 |
+
text-align: center;
|
4 |
+
font-size: 24px;
|
5 |
+
margin-bottom: 20px;
|
6 |
+
}
|
7 |
+
|
8 |
+
/* Style the form container */
|
9 |
+
form {
|
10 |
+
width: 60%;
|
11 |
+
margin: 0 auto;
|
12 |
+
padding: 20px;
|
13 |
+
border: 1px solid #ccc;
|
14 |
+
border-radius: 5px;
|
15 |
+
background-color: #f9f9f9;
|
16 |
+
}
|
17 |
+
|
18 |
+
/* Style the file input fields */
|
19 |
+
/* input[type="file"] {
|
20 |
+
display: block;
|
21 |
+
margin: 10px 0;
|
22 |
+
} */
|
23 |
+
|
24 |
+
|
25 |
+
|
26 |
+
/* Style the table */
|
27 |
+
table {
|
28 |
+
width: 100%;
|
29 |
+
border-collapse: collapse;
|
30 |
+
margin-top: 20px;
|
31 |
+
}
|
32 |
+
|
33 |
+
/* Style table headers */
|
34 |
+
th {
|
35 |
+
background-color: #333;
|
36 |
+
color: white;
|
37 |
+
text-align: left;
|
38 |
+
}
|
39 |
+
|
40 |
+
/* Style table data rows */
|
41 |
+
tr:nth-child(even) {
|
42 |
+
background-color: #f2f2f2;
|
43 |
+
}
|
44 |
+
|
45 |
+
/* Style table data cells */
|
46 |
+
td {
|
47 |
+
padding: 8px;
|
48 |
+
border: 1px solid #ddd;
|
49 |
+
}
|
50 |
+
|
51 |
+
/* Style the submit button */
|
52 |
+
button[type="submit"] {
|
53 |
+
display: block;
|
54 |
+
margin: 20px 0;
|
55 |
+
padding: 10px 20px;
|
56 |
+
background-color: #333;
|
57 |
+
color: white;
|
58 |
+
border: none;
|
59 |
+
border-radius: 5px;
|
60 |
+
cursor: pointer;
|
61 |
+
}
|
62 |
+
|
63 |
+
/* Style the dropdown select */
|
64 |
+
select {
|
65 |
+
width: 100%;
|
66 |
+
padding: 8px;
|
67 |
+
border: 1px solid #ccc;
|
68 |
+
border-radius: 5px;
|
69 |
+
}
|
style.css
DELETED
@@ -1,28 +0,0 @@
|
|
1 |
-
body {
|
2 |
-
padding: 2rem;
|
3 |
-
font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
|
4 |
-
}
|
5 |
-
|
6 |
-
h1 {
|
7 |
-
font-size: 16px;
|
8 |
-
margin-top: 0;
|
9 |
-
}
|
10 |
-
|
11 |
-
p {
|
12 |
-
color: rgb(107, 114, 128);
|
13 |
-
font-size: 15px;
|
14 |
-
margin-bottom: 10px;
|
15 |
-
margin-top: 5px;
|
16 |
-
}
|
17 |
-
|
18 |
-
.card {
|
19 |
-
max-width: 620px;
|
20 |
-
margin: 0 auto;
|
21 |
-
padding: 16px;
|
22 |
-
border: 1px solid lightgray;
|
23 |
-
border-radius: 16px;
|
24 |
-
}
|
25 |
-
|
26 |
-
.card p:last-child {
|
27 |
-
margin-bottom: 0;
|
28 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
templates/annotate.html
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html>
|
3 |
+
|
4 |
+
<head>
|
5 |
+
<title>Annotation Page</title>
|
6 |
+
|
7 |
+
<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='css/styles.css') }}">
|
8 |
+
|
9 |
+
<style>
|
10 |
+
/* Style for selected rows */
|
11 |
+
.selected {
|
12 |
+
background-color: #a0e57e !important;
|
13 |
+
/* Change this color as desired */
|
14 |
+
}
|
15 |
+
|
16 |
+
#splash-screen {
|
17 |
+
position: fixed;
|
18 |
+
top: 0;
|
19 |
+
left: 0;
|
20 |
+
width: 100%;
|
21 |
+
height: 100%;
|
22 |
+
background-color: #ffffff;
|
23 |
+
display: flex;
|
24 |
+
justify-content: center;
|
25 |
+
align-items: center;
|
26 |
+
}
|
27 |
+
|
28 |
+
/* Style for the loading text */
|
29 |
+
#loading-text {
|
30 |
+
font-size: 24px;
|
31 |
+
font-weight: bold;
|
32 |
+
}
|
33 |
+
</style>
|
34 |
+
<script>
|
35 |
+
// JavaScript function to update row color
|
36 |
+
function updateRowColor(selectElement, row) {
|
37 |
+
if (selectElement.value === "") {
|
38 |
+
row.classList.remove("selected");
|
39 |
+
} else {
|
40 |
+
row.classList.add("selected");
|
41 |
+
}
|
42 |
+
}
|
43 |
+
|
44 |
+
// JavaScript to hide the splash screen after 3 seconds and display the main content
|
45 |
+
setTimeout(function () {
|
46 |
+
document.getElementById("splash-screen").style.display = "none";
|
47 |
+
document.getElementById("main-content").style.display = "block";
|
48 |
+
}, 2000); // 3000
|
49 |
+
</script>
|
50 |
+
</head>
|
51 |
+
|
52 |
+
<body>
|
53 |
+
|
54 |
+
<h1>Active Learning POC</h1>
|
55 |
+
<form method="POST">
|
56 |
+
<h3>Active Learning Step #{{ al_step }} - Remain data: {{n_unlabeled}} / 100</h3>
|
57 |
+
<div id="splash-screen">
|
58 |
+
<div id="loading-text">Active Learning Step #{{ al_step }}</br></br>{{al_process}}</div>
|
59 |
+
</div>
|
60 |
+
<div id="main-content" style="display: none;">
|
61 |
+
<table>
|
62 |
+
<colgroup>
|
63 |
+
<col style="width: 80%;">
|
64 |
+
<col style="width: 20%;">
|
65 |
+
</colgroup>
|
66 |
+
<tr>
|
67 |
+
<th>Text</th>
|
68 |
+
<th>Label</th>
|
69 |
+
</tr>
|
70 |
+
{% for item in data.text %}
|
71 |
+
<tr>
|
72 |
+
<td>{{ item }}</td>
|
73 |
+
<td>
|
74 |
+
<select name="annotations" onchange="updateRowColor(this, this.parentNode.parentNode)">
|
75 |
+
<option value="">None</option>
|
76 |
+
{% for label in labels %}
|
77 |
+
<option value="{{ label }}">{{ label }}</option>
|
78 |
+
{% endfor %}
|
79 |
+
</select>
|
80 |
+
</td>
|
81 |
+
</tr>
|
82 |
+
{% endfor %}
|
83 |
+
</table>
|
84 |
+
</div>
|
85 |
+
<button type="submit">Next Samples</button>
|
86 |
+
</form>
|
87 |
+
</body>
|
88 |
+
|
89 |
+
</html>
|
templates/upload.html
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html>
|
3 |
+
|
4 |
+
<head>
|
5 |
+
<title>Active Learning POC</title>
|
6 |
+
|
7 |
+
<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='css/styles.css') }}">
|
8 |
+
|
9 |
+
<style>
|
10 |
+
input[type=file] {
|
11 |
+
width: 350px;
|
12 |
+
max-width: 100%;
|
13 |
+
color: #444;
|
14 |
+
padding: 5px;
|
15 |
+
background: #fff;
|
16 |
+
border-radius: 10px;
|
17 |
+
border: 1px solid #555;
|
18 |
+
}
|
19 |
+
|
20 |
+
input[type=number] {
|
21 |
+
width: 100px;
|
22 |
+
max-width: 100%;
|
23 |
+
color: #444;
|
24 |
+
padding: 5px;
|
25 |
+
background: #fff;
|
26 |
+
border-radius: 10px;
|
27 |
+
border: 1px solid #555;
|
28 |
+
}
|
29 |
+
</style>
|
30 |
+
</head>
|
31 |
+
|
32 |
+
<body>
|
33 |
+
<h1>Active Learning POC</h1>
|
34 |
+
<form method="POST" enctype="multipart/form-data">
|
35 |
+
<h3>Upload Data File (CSV)</h3>
|
36 |
+
<input type="file" name="data_file" accept=".csv" required>
|
37 |
+
|
38 |
+
<h3>Upload Labels File (CSV)</h3>
|
39 |
+
<input type="file" name="labels_file" accept=".csv" required>
|
40 |
+
<table style="border-style: hidden">
|
41 |
+
<tr>
|
42 |
+
<td style="width: 200px;">
|
43 |
+
Active Learning Sample
|
44 |
+
</td>
|
45 |
+
<td><input type="number" name="query_number" required></td>
|
46 |
+
</tr>
|
47 |
+
</table>
|
48 |
+
|
49 |
+
|
50 |
+
<button type="submit">Start Labeling</button>
|
51 |
+
</form>
|
52 |
+
</body>
|
53 |
+
|
54 |
+
</html>
|