using_PYTHON_logging / 4_Entity_and_logging.py
EddyGiusepe's picture
NER and logging
7dbdab5
raw
history blame contribute delete
No virus
2.98 kB
#!/usr/bin/env python3
"""
Data Scientist.: Dr.Eddy Giusepe Chirinos Isidro
Objetivo: Neste script utilizamos um modelo pré-treinado para extrair
Entidades e usamos o pacote logging do python para registrar
nossos LOGs.
"""
import logging
from transformers import pipeline
class EntityRecognizer:
def __init__(self, model_name="Babelscape/wikineural-multilingual-ner"): # https://huggingface.co/Babelscape/wikineural-multilingual-ner
self.model = self.load_model(model_name)
self.logger = self.setup_logger()
def load_model(self, model_name="Babelscape/wikineural-multilingual-ner"):
# Carrego o modelo pré-treinado do Hugging Face:
return pipeline("ner", model=model_name, tokenizer=model_name)
def setup_logger(self):
# Configuração de Logs:
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
file_handler = logging.FileHandler('reconhecimento_de_entidade.log')
file_handler.setLevel(logging.INFO)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
return logger
def recognize_entities(self, text):
# Use o modelo NER pré-treinado para reconhecer entidades no texto:
entities = self.model(text)
recognized_entities = []
for entity in entities:
entity_text = entity['word']
entity_type = entity['entity']
recognized_entities.append((entity_text, entity_type))
self.logger.info(f"Entidades reconhecidas: {recognized_entities}")
return recognized_entities
def process_classification_result(self, tokens_and_tags):
result = {}
current_type = None
current_entity = ""
for token, tag in tokens_and_tags:
if tag.startswith("B-"):
if current_type is not None and current_entity:
result[current_entity] = current_type
current_type = tag[2:]
current_entity = token
elif tag.startswith("I-"):
current_entity += " " + token
if current_type is not None and current_entity:
result[current_entity] = current_type
return result
if __name__ == "__main__":
# Exemplo de uso:
#model_name = "Babelscape/wikineural-multilingual-ner"
#text = "O Eddwin e a Karina foram para Estados Unidos a estudar em Harvard."
text = "Eddy e Karina compraram uns tênis na loja Nike."
entity_recognizer = EntityRecognizer() # entity_recognizer = EntityRecognizer(model_name)
recognized = entity_recognizer.recognize_entities(text)
print(recognized)
print("🤗🤗🤗")
result = entity_recognizer.process_classification_result(recognized)
result = {k.replace(" ##", ""): v for k, v in result.items()} # Remove '##' from keys
print(result)