#!/usr/bin/env python3 """ Data Scientist.: Dr.Eddy Giusepe Chirinos Isidro Objetivo: Neste script utilizamos um modelo pré-treinado para extrair Entidades e usamos o pacote logging do python para registrar nossos LOGs. """ import logging from transformers import pipeline class EntityRecognizer: def __init__(self, model_name="Babelscape/wikineural-multilingual-ner"): # https://huggingface.co/Babelscape/wikineural-multilingual-ner self.model = self.load_model(model_name) self.logger = self.setup_logger() def load_model(self, model_name="Babelscape/wikineural-multilingual-ner"): # Carrego o modelo pré-treinado do Hugging Face: return pipeline("ner", model=model_name, tokenizer=model_name) def setup_logger(self): # Configuração de Logs: logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') file_handler = logging.FileHandler('reconhecimento_de_entidade.log') file_handler.setLevel(logging.INFO) file_handler.setFormatter(formatter) logger.addHandler(file_handler) return logger def recognize_entities(self, text): # Use o modelo NER pré-treinado para reconhecer entidades no texto: entities = self.model(text) recognized_entities = [] for entity in entities: entity_text = entity['word'] entity_type = entity['entity'] recognized_entities.append((entity_text, entity_type)) self.logger.info(f"Entidades reconhecidas: {recognized_entities}") return recognized_entities def process_classification_result(self, tokens_and_tags): result = {} current_type = None current_entity = "" for token, tag in tokens_and_tags: if tag.startswith("B-"): if current_type is not None and current_entity: result[current_entity] = current_type current_type = tag[2:] current_entity = token elif tag.startswith("I-"): current_entity += " " + token if current_type is not None and current_entity: result[current_entity] = current_type return result if __name__ == "__main__": # Exemplo de uso: #model_name = "Babelscape/wikineural-multilingual-ner" #text = "O Eddwin e a Karina foram para Estados Unidos a estudar em Harvard." text = "Eddy e Karina compraram uns tênis na loja Nike." entity_recognizer = EntityRecognizer() # entity_recognizer = EntityRecognizer(model_name) recognized = entity_recognizer.recognize_entities(text) print(recognized) print("🤗🤗🤗") result = entity_recognizer.process_classification_result(recognized) result = {k.replace(" ##", ""): v for k, v in result.items()} # Remove '##' from keys print(result)