File size: 3,266 Bytes
1a82bcb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
from flask import Flask, render_template, request, redirect, url_for, send_file, make_response
import pandas as pd
import io
import csv
import numpy as np

from active_learning import get_initial_sample, get_uncertain_sample
from datasets import Dataset, concatenate_datasets

app = Flask(__name__)

# Global variables to store data and labels
uploaded_data = None
labels = []
al_step = 0
data_amount = 100
dataset_labeled_prev = None
#num_query = 5

@app.route("/", methods=["GET", "POST"])
def upload():
    global uploaded_data, labels, dataset_unlabeled, al_step, num_query

    if request.method == "POST":
        # Upload the data file
        num_query = int(request.form.get('query_number'))
        data_file = request.files["data_file"]
        if data_file:
            df = pd.read_csv(data_file)
            unlabeled_data = Dataset.from_pandas(df)

            # get_initial_sample()
            uncertain_samples = get_initial_sample(unlabeled_data, num_query)

            all_list = np.arange(0, len(unlabeled_data))
            unlabeled_list = np.delete(all_list, uncertain_samples)

            dataset_labeled = unlabeled_data[uncertain_samples]
            dataset_unlabeled = unlabeled_data[unlabeled_list]

            dataset_unlabeled =  Dataset.from_dict(dataset_unlabeled)
    
            uploaded_data = dataset_labeled

        # Upload the labels file
        labels_file = request.files["labels_file"]
        if labels_file:
            df_labels = pd.read_csv(labels_file)
            labels = df_labels["label"].tolist()

        return redirect(url_for("annotate"))

    return render_template("upload.html")

@app.route("/annotate", methods=["GET", "POST"])
def annotate():
    global uploaded_data, labels, dataset_unlabeled, al_step, dataset_labeled_prev, num_query

    al_process = "Run get_initial_sample()..."  

    if request.method == "POST":
        al_step += 1
        al_process = "Run get_uncertain_sample()..."
        
        annotated_data = request.form.getlist("annotations")
        df = pd.DataFrame(uploaded_data)
        df["label"] = annotated_data
        dataset_labeled = Dataset.from_pandas(df)
        
        print(f"AL STEP #{al_step}")
        print(dataset_labeled)

        if al_step > 1:
            dataset_labeled = concatenate_datasets([dataset_labeled_prev, dataset_labeled])
        
        # get_uncertain_sample()
        uncertain_samples = get_uncertain_sample(dataset_labeled, dataset_unlabeled, num_query)

        dataset_labeled_prev = dataset_labeled

        unlabeled_list = np.arange(0, data_amount - al_step*num_query)
        unlabeled_list = np.delete(unlabeled_list, uncertain_samples)

        dataset_labeled_next = dataset_unlabeled[uncertain_samples]
        dataset_unlabeled = dataset_unlabeled[unlabeled_list]

        uploaded_data = dataset_labeled_next

        dataset_unlabeled =  Dataset.from_dict(dataset_unlabeled)

    return render_template(
                "annotate.html", 
                data=uploaded_data, 
                labels=labels, 
                al_step=al_step,
                n_unlabeled=len(dataset_unlabeled), 
                al_process=al_process
            )

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860)