inoid commited on
Commit
c6ee43b
1 Parent(s): fe72bea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -61
app.py CHANGED
@@ -57,9 +57,7 @@ classificationResult = pipe("El objetivo de esta tesis es elaborar un estudio de
57
 
58
 
59
  def thesis_prediction(input):
60
- tokenizer = AutoTokenizer.from_pretrained('hiiamsid/BETO_es_binary_classification', use_fast=False)
61
- X_val_inputs, X_val_masks = preprocessingtext(_text,tokenizer)
62
- t0 = time.time()
63
 
64
  # Deserialization of the file
65
  #file = open(path + os.path.sep + 'classIndexAssociation.pkl', 'rb')
@@ -68,66 +66,17 @@ def thesis_prediction(input):
68
  #sizeOfClass = len(new_model)
69
 
70
  model = AutoModelForSequenceClassification.from_pretrained(
71
- 'hackathon-pln-es/unam_tesis_BETO_finnetuning', num_labels=5, output_attentions=False, output_hidden_states=False)
72
- #Bibliografy from:
73
- #
74
- # https://huggingface.co/docs/transformers/main_classes/output
75
- #
76
- inputs = tokenizer(_text, return_tensors="pt")
77
- labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
78
- outputs = model(**inputs, labels=labels)
79
-
80
- loss, logits = outputs[:2]
81
-
82
- #Transform in array
83
- logits = logits.detach().cpu().numpy()
84
-
85
- #Get max element and position
86
- result = logits.argmax()
87
- return result
88
-
89
- #Example from
90
- #
91
- #
92
- #
93
- # pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True)
94
- # # Put the model in evaluation mode
95
- # classificationResult = pipe(_text)
96
- # if classificationResult[0] != None and len (classificationResult[0]) > 0:
97
- # #Order the result with more close to 1
98
- # classificationResult[0].sort(reverse=True, key=lambda x:x['score'])
99
  # # Return the text clasification
100
  # keyClass = classificationResult[0][0]['label']
101
- # keyClass = keyClass.replace("LABEL_","").strip()
102
- # if keyClass.isnumeric():
103
- # return new_model[ int (keyClass)]
104
- # else:
105
- # raise Exception("Not exist class info")
106
- # model.eval()
107
- # outputs = model(X_val_inputs,
108
- # token_type_ids=None,
109
- # attention_mask=X_val_masks)
110
- #
111
- # # The "logits" are the output values
112
- # # prior to applying an activation function
113
- # logits = outputs[0]
114
- #
115
- # # Move logits and labels to CPU
116
- # logits = logits.detach().cpu().numpy()
117
- #
118
- # sorted_tuples = sorted(logits.items(), key=lambda item: item[1])
119
- # #Return the text clasification
120
- # keyClass = sorted_tuples.keys()[0]
121
- # return new_model[keyClass]
122
-
123
- #else:
124
- # raise Exception("Not exist model info")
125
- #else:
126
- # raise Exception("Not exist model info")
127
- #return "Text"
128
-
129
- #pass
130
-
131
 
132
  examples = [["Introducción al análisis de riesgos competitivos bajo el enfoque de la función de incidencia acumulada (FIA) y su aplicación con R"], ["Los promedios de calificaciones y clasificar por grupo o asignatura se realizaron a través de tablas dinámicas en Excel"]]
133
 
 
57
 
58
 
59
  def thesis_prediction(input):
60
+ tokenizer = AutoTokenizer.from_pretrained('"hiiamsid/BETO_es_binary_classification"', use_fast=False)
 
 
61
 
62
  # Deserialization of the file
63
  #file = open(path + os.path.sep + 'classIndexAssociation.pkl', 'rb')
 
66
  #sizeOfClass = len(new_model)
67
 
68
  model = AutoModelForSequenceClassification.from_pretrained(
69
+ 'hackathon-pln-es/unam_tesis_BETO_finnetuning', num_labels=5, output_attentions=False,
70
+ output_hidden_states=False)
71
+
72
+ pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True)
73
+ classificationResult = pipe(_text)
74
+ classificationResult[0].sort(reverse=True, key=lambda x:x['score'])
75
+ keyClass = classificationResult[0][0]['label']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  # # Return the text clasification
77
  # keyClass = classificationResult[0][0]['label']
78
+ return keyClass
79
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
  examples = [["Introducción al análisis de riesgos competitivos bajo el enfoque de la función de incidencia acumulada (FIA) y su aplicación con R"], ["Los promedios de calificaciones y clasificar por grupo o asignatura se realizaron a través de tablas dinámicas en Excel"]]
82