|
import pandas as pd |
|
import numpy as np |
|
import streamlit as st |
|
|
|
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline |
|
|
|
model_name = "deepset/roberta-base-squad2" |
|
|
|
|
|
nlp = pipeline('question-answering', model=model_name, tokenizer=model_name) |
|
|
|
|
|
model = AutoModelForQuestionAnswering.from_pretrained(model_name) |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
|
|
suspicious_words = [ |
|
"robbery", "crime", "exchange", "extortion", "threat", "suspicious", "fraud", "laundering", |
|
"illegal", "contraband", "smuggling", "burglary", "assault", "hijacking", "kidnapping", "ransom", |
|
"hostage", "terrorism", "homicide", "murder", "manslaughter", "weapon", "gun", "explosive", "bomb", "knives", |
|
"threaten", "blackmail", "intimidate", "menace", "harassment", "stalking", "kidnap", "abduction", "guns", "bombs", |
|
"abuse", "trafficking", "prostitution", "pimping", "drug", "narcotic", "cocaine", "heroin", "methamphetamine", |
|
"amphetamine", "opiate", "meth", "gang", "gangster", "mafia", "racket", "extort", "embezzle", "corruption", |
|
"bribe", "scam", "forgery", "counterfeit", "fraudulent", "cybercrime", "hacker", "phishing", "identity", "theft", |
|
"credit card", "fraud", "identity", "fraud", "ponzi", "scheme", "pyramid", "scheme", "money", "scam", "swindle", "deception", |
|
"conspiracy", "scheme", "plot", "coercion", "corrupt", "criminal", "felony", "misdemeanor", "felon", "fugitive", |
|
"wanted", "arson", "arsonist", "arsony", "stolen", "steal", "loot", "heist", "launder", "hitman", "racketeer", |
|
"hijack", "smuggle", "terrorist", "kidnapper", "perpetrator", "ringleader", "prowler", "vigilante", "sabotage", |
|
"saboteur", "suicide", "discreet", "hide", "action", "profile", "alert", "vigilant", "clandestine", "riot", "arms", "deal" |
|
] |
|
|
|
|
|
q = ["","",""] |
|
a = ["","",""] |
|
|
|
|
|
q[0] = "What event is going to take place?" |
|
q[1] = "Where is it going to happen" |
|
q[2] = "What time is it going to happen?" |
|
|
|
|
|
QA_input = [{} for i in range(3)] |
|
res = [{} for i in range(3)] |
|
|
|
df = pd.read_excel('senti.xlsx') |
|
|
|
parsed_column = df['sentences'].to_list() |
|
|
|
print(parsed_column) |
|
for sentence in parsed_column: |
|
for i in range(3): |
|
QA_input[i] = { |
|
'question': q[i], |
|
'context': sentence |
|
} |
|
res[i] = nlp(QA_input[i]) |
|
a[i] = res[i]['answer'] |
|
|
|
a1 = a[0].lower() |
|
a1s = set(a1.split()) |
|
sus = set(suspicious_words) |
|
cw = a1s.intersection(sus) |
|
|
|
if len(cw) != 0: |
|
st.write("The crime detected is: ",a[0]) |
|
if len(a[1]) != 0: |
|
st.write("The location of crime detected is: ",a[1]) |
|
elif len(a[1]) == 0: |
|
st.write("No location detected") |
|
if len(a[2]) != 0: |
|
st.write("The time of crime detected is: ",a[2]) |
|
elif len(a[2]) == 0: |
|
st.write("No time detected") |
|
elif len(cw) == 0: |
|
st.write("No crime detected") |