from metrics import calc_metrics import gradio as gr from openai import OpenAI import os from transformers import pipeline # from dotenv import load_dotenv, find_dotenv import huggingface_hub import json from evaluate_data import store_sample_data, get_metrics_trf from sentence_transformers import SentenceTransformer from tqdm import tqdm hf_token= os.environ['HF_TOKEN'] huggingface_hub.login(hf_token) pipe = pipeline("token-classification", model="elshehawy/finer-ord-transformers", aggregation_strategy="first") # llm_model = 'gpt-3.5-turbo-0125' # llm_model = 'gpt-4-0125-preview' llm_model = 'gpt-3.5-turbo-0301' # openai.api_key = os.environ['OPENAI_API_KEY'] client = OpenAI( api_key=os.environ.get("OPENAI_API_KEY"), ) def get_completion(prompt, model=llm_model): messages = [{"role": "user", "content": prompt}] response = client.chat.completions.create( messages=messages, model=model, temperature=0, response_format={"type": "json_object"} ) # print(response.choices[0].message.content) return response.choices[0].message.content def find_orgs_gpt(sentence): prompt = f""" In context of named entity recognition (NER), find all organizations in the text delimited by triple backticks. text: ``` {sentence} ``` Your output should be a a json object that containes the extracted organizations. Output example 1: {{\"Organizations\": [\"Organization 1\", \"Organization 2\", \"Organization 3\"]}} Output example 2: {{\"Organizations\": []}} """ sent_orgs_str = get_completion(prompt) sent_orgs = json.loads(sent_orgs_str) return sent_orgs['Organizations'] example = """ My latest exclusive for The Hill : Conservative frustration over Republican efforts to force a House vote on reauthorizing the Export - Import Bank boiled over Wednesday during a contentious GOP meeting. """ def find_orgs(uploaded_file): print(type(uploaded_file)) uploaded_data = json.loads(uploaded_file) all_metrics = {} sample_data = store_sample_data(uploaded_data) gpt_orgs, true_orgs = [], [] for sent in tqdm(sample_data): gpt_orgs.append(find_orgs_gpt(sent['text'])) true_orgs.append(sent['orgs']) sim_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') all_metrics['gpt'] = calc_metrics(true_orgs, gpt_orgs, sim_model, threshold=0.85) print(all_metrics) all_metrics['trf'] = get_metrics_trf(uploaded_data) print(all_metrics) return all_metrics upload_btn = gr.UploadButton(label='Upload a json file.', type='binary') iface = gr.Interface(fn=find_orgs, inputs=upload_btn, outputs="text") iface.launch(share=True)