|
from flask import Flask, render_template, request, redirect, url_for, send_file, make_response |
|
import pandas as pd |
|
import io |
|
import csv |
|
import numpy as np |
|
|
|
from active_learning import get_initial_sample, get_uncertain_sample |
|
from datasets import Dataset, concatenate_datasets |
|
|
|
app = Flask(__name__) |
|
|
|
|
|
uploaded_data = None |
|
labels = [] |
|
al_step = 0 |
|
data_amount = 100 |
|
dataset_labeled_prev = None |
|
|
|
|
|
@app.route("/", methods=["GET", "POST"]) |
|
def upload(): |
|
global uploaded_data, labels, dataset_unlabeled, al_step, num_query |
|
|
|
if request.method == "POST": |
|
|
|
num_query = int(request.form.get('query_number')) |
|
data_file = request.files["data_file"] |
|
if data_file: |
|
df = pd.read_csv(data_file) |
|
unlabeled_data = Dataset.from_pandas(df) |
|
|
|
|
|
uncertain_samples = get_initial_sample(unlabeled_data, num_query) |
|
|
|
all_list = np.arange(0, len(unlabeled_data)) |
|
unlabeled_list = np.delete(all_list, uncertain_samples) |
|
|
|
dataset_labeled = unlabeled_data[uncertain_samples] |
|
dataset_unlabeled = unlabeled_data[unlabeled_list] |
|
|
|
dataset_unlabeled = Dataset.from_dict(dataset_unlabeled) |
|
|
|
uploaded_data = dataset_labeled |
|
|
|
|
|
labels_file = request.files["labels_file"] |
|
if labels_file: |
|
df_labels = pd.read_csv(labels_file) |
|
labels = df_labels["label"].tolist() |
|
|
|
return redirect(url_for("annotate")) |
|
|
|
return render_template("upload.html") |
|
|
|
@app.route("/annotate", methods=["GET", "POST"]) |
|
def annotate(): |
|
global uploaded_data, labels, dataset_unlabeled, al_step, dataset_labeled_prev, num_query |
|
|
|
al_process = "Run get_initial_sample()..." |
|
|
|
if request.method == "POST": |
|
al_step += 1 |
|
al_process = "Run get_uncertain_sample()..." |
|
|
|
annotated_data = request.form.getlist("annotations") |
|
df = pd.DataFrame(uploaded_data) |
|
df["label"] = annotated_data |
|
dataset_labeled = Dataset.from_pandas(df) |
|
|
|
print(f"AL STEP #{al_step}") |
|
print(dataset_labeled) |
|
|
|
if al_step > 1: |
|
dataset_labeled = concatenate_datasets([dataset_labeled_prev, dataset_labeled]) |
|
|
|
|
|
uncertain_samples = get_uncertain_sample(dataset_labeled, dataset_unlabeled, num_query) |
|
|
|
dataset_labeled_prev = dataset_labeled |
|
|
|
unlabeled_list = np.arange(0, data_amount - al_step*num_query) |
|
unlabeled_list = np.delete(unlabeled_list, uncertain_samples) |
|
|
|
dataset_labeled_next = dataset_unlabeled[uncertain_samples] |
|
dataset_unlabeled = dataset_unlabeled[unlabeled_list] |
|
|
|
uploaded_data = dataset_labeled_next |
|
|
|
dataset_unlabeled = Dataset.from_dict(dataset_unlabeled) |
|
|
|
return render_template( |
|
"annotate.html", |
|
data=uploaded_data, |
|
labels=labels, |
|
al_step=al_step, |
|
n_unlabeled=len(dataset_unlabeled), |
|
al_process=al_process |
|
) |
|
|
|
if __name__ == "__main__": |
|
app.run(host="0.0.0.0", port=7860) |
|
|