|
|
|
import streamlit as st |
|
from PIL import Image |
|
import spacy |
|
import streamlit as st |
|
from streamlit_pdf_viewer import pdf_viewer |
|
|
|
|
|
|
|
st.set_page_config(page_title="FACTOID: FACtual enTailment fOr hallucInation Detection", layout="wide") |
|
st.title('Welcome to :blue[FACTOID] ') |
|
|
|
st.header('FACTOID: FACtual enTailment fOr hallucInation Detection :blue[Web Demo]') |
|
|
|
|
|
pdf_viewer(input="fac.pdf", width=700) |
|
|
|
|
|
sentence1 = [f"U.S. President Barack Obama declared that the U.S. will refrain from deploying troops in Ukraine."] |
|
sentence2 = [f"Joe Biden said we’d not send U.S. troops to fight Russian troops in Ukraine, but we would provide robust military assistance and try to unify the Western world against Russia’s aggression."] |
|
|
|
selected_sentence1 = st.selectbox("Select first sentence:", sentence1) |
|
selected_sentence2 = st.selectbox("Select first sentence:", sentence2) |
|
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
import torch |
|
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") |
|
|
|
model_name = "MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name,use_fast=False) |
|
model = AutoModelForSequenceClassification.from_pretrained(model_name) |
|
|
|
|
|
premise = selected_sentence1 |
|
hypothesis = selected_sentence2 |
|
input = tokenizer(premise, hypothesis, truncation=True, return_tensors="pt") |
|
output = model(input["input_ids"].to(device)) |
|
prediction = torch.softmax(output["logits"][0], -1).tolist() |
|
label_names = ["support", "neutral", "refute"] |
|
prediction = {name: float(pred) for pred, name in zip(prediction, label_names)} |
|
highest_label = max(prediction, key=prediction.get) |
|
|
|
|
|
from transformers import pipeline |
|
pipe = pipeline("text-classification",model="sileod/deberta-v3-base-tasksource-nli") |
|
labels=pipe([dict(text=selected_sentence1, |
|
text_pair=selected_sentence2)]) |
|
|
|
|
|
import en_core_web_sm |
|
|
|
|
|
def extract_person_names(sentence): |
|
""" |
|
Extract person names from a sentence using spaCy's named entity recognition. |
|
|
|
Parameters: |
|
sentence (str): Input sentence. |
|
|
|
Returns: |
|
list: List of person names extracted from the sentence. |
|
""" |
|
|
|
nlp = spacy.load("en_core_web_sm") |
|
|
|
|
|
doc = nlp(sentence) |
|
|
|
|
|
person_names = [entity.text for entity in doc.ents if entity.label_ == 'PERSON'] |
|
|
|
return person_names[0] |
|
|
|
person_name1 = extract_person_names(selected_sentence1) |
|
person_name2 = extract_person_names(selected_sentence2) |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
|
with col1: |
|
st.write("Without Factual Entailment.") |
|
st.write("Textual Entailment Model:\n",highest_label) |
|
|
|
with col2: |
|
st.write("With Factual Entailment:") |
|
st.write("Textual Entailment Model:\n",labels[0]['label']) |
|
st.write("Span Detection Model:\n") |
|
st.write(f"{person_name1}::{person_name2}") |
|
|
|
|