Spaces:

somosnlp-hackathon-2022
/

clasificador-de-tesis

Runtime error

App Files Files Community

inoid commited on Apr 4, 2022

Commit

c6ee43b

•

1 Parent(s): fe72bea

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -61

app.py CHANGED Viewed

@@ -57,9 +57,7 @@ classificationResult = pipe("El objetivo de esta tesis es elaborar un estudio de
 def thesis_prediction(input):
-              tokenizer = AutoTokenizer.from_pretrained('hiiamsid/BETO_es_binary_classification', use_fast=False)
-              X_val_inputs, X_val_masks = preprocessingtext(_text,tokenizer)
-              t0 = time.time()
               # Deserialization of the file
               #file = open(path + os.path.sep + 'classIndexAssociation.pkl', 'rb')
@@ -68,66 +66,17 @@ def thesis_prediction(input):
               #sizeOfClass = len(new_model)
               model = AutoModelForSequenceClassification.from_pretrained(
-                    'hackathon-pln-es/unam_tesis_BETO_finnetuning', num_labels=5, output_attentions=False, output_hidden_states=False)
-              #Bibliografy from:
-              #
-              #  https://huggingface.co/docs/transformers/main_classes/output
-              #
-              inputs = tokenizer(_text, return_tensors="pt")
-              labels = torch.tensor([1]).unsqueeze(0)  # Batch size 1
-              outputs = model(**inputs, labels=labels)
-              loss, logits = outputs[:2]
-              #Transform in array
-              logits = logits.detach().cpu().numpy()
-              #Get max element and position
-              result = logits.argmax()
-              return result
-              #Example from
-              #
-              #
-              #
-              # pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True)
-              # # Put the model in evaluation mode
-              # classificationResult = pipe(_text)
-              # if  classificationResult[0]  != None and len (classificationResult[0]) > 0:
-              #     #Order the result with more close to 1
-              #     classificationResult[0].sort(reverse=True, key=lambda x:x['score'])
               #     # Return the text clasification
               #     keyClass = classificationResult[0][0]['label']
-              #     keyClass = keyClass.replace("LABEL_","").strip()
-              #     if  keyClass.isnumeric():
-              #       return new_model[ int (keyClass)]
-              #     else:
-              #         raise Exception("Not exist class info")
-                  # model.eval()
-                  # outputs = model(X_val_inputs,
-                  #                 token_type_ids=None,
-                  #                 attention_mask=X_val_masks)
-                  #
-                  # # The "logits" are the output values
-                  # # prior to applying an activation function
-                  # logits = outputs[0]
-                  #
-                  # # Move logits and labels to CPU
-                  # logits = logits.detach().cpu().numpy()
-                  #
-                  # sorted_tuples = sorted(logits.items(), key=lambda item: item[1])
-                  # #Return the text clasification
-                  # keyClass = sorted_tuples.keys()[0]
-                  # return new_model[keyClass]
-         #else:
-            # raise Exception("Not exist model info")
-     #else:
-     #   raise Exception("Not exist model info")
-     #return "Text"
-    #pass
 examples = [["Introducción al análisis de riesgos competitivos bajo el enfoque de la función de incidencia acumulada (FIA) y su aplicación con R"], ["Los promedios de calificaciones y clasificar por grupo o asignatura se realizaron a través de tablas dinámicas en Excel"]]

 def thesis_prediction(input):
+               tokenizer = AutoTokenizer.from_pretrained('"hiiamsid/BETO_es_binary_classification"', use_fast=False)
               # Deserialization of the file
               #file = open(path + os.path.sep + 'classIndexAssociation.pkl', 'rb')
               #sizeOfClass = len(new_model)
               model = AutoModelForSequenceClassification.from_pretrained(
+                   'hackathon-pln-es/unam_tesis_BETO_finnetuning', num_labels=5, output_attentions=False,
+                  output_hidden_states=False)
+              pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True)
+              classificationResult = pipe(_text)
+              classificationResult[0].sort(reverse=True, key=lambda x:x['score'])
+              keyClass = classificationResult[0][0]['label']
               #     # Return the text clasification
               #     keyClass = classificationResult[0][0]['label']
+              return keyClass
 examples = [["Introducción al análisis de riesgos competitivos bajo el enfoque de la función de incidencia acumulada (FIA) y su aplicación con R"], ["Los promedios de calificaciones y clasificar por grupo o asignatura se realizaron a través de tablas dinámicas en Excel"]]