Spaces:
Sleeping
Sleeping
import gradio as gr | |
import spacy | |
import pandas as pd | |
import networkx as nx | |
import matplotlib.pyplot as plt | |
# Initialize spaCy NLP model | |
nlp = spacy.load("en_core_web_sm") | |
# Import Lexicon | |
cues = pd.read_excel('link_cues.xlsx') | |
list_causalmarkers = cues['causal_markers'] | |
def contains_words_or_phrases(words_list, sentence): | |
""" | |
Check if any word or phrase from words_list is present in the sentence. | |
:param words_list: List of words or phrases to check | |
:param sentence: The input sentence where to look for words or phrases | |
:return: Entities if any word or phrase is found, otherwise None | |
""" | |
# Normalize the sentence to lower case to make the search case insensitive | |
normalized_sentence = sentence.lower() | |
# Check each word or phrase in the list | |
for word_or_phrase in words_list: | |
# Check if the word or phrase is in the normalized sentence | |
if word_or_phrase.lower() in normalized_sentence: | |
return True # Return True immediately if any word or phrase is found | |
return False # Return False if none of the words or phrases are found | |
class NounExtractor: | |
def __init__(self, nlp): | |
""" | |
Initialize the NounExtractor with a pre-loaded spaCy NLP model. | |
""" | |
self.nlp = nlp | |
def process_text(self, text): | |
""" | |
Process the text using the spaCy NLP pipeline. | |
""" | |
return self.nlp(text) | |
def get_noun_phrases(self, doc): | |
""" | |
Extract and refine noun phrases from the spaCy doc, tracking and using dependency labels accurately. | |
""" | |
noun_phrases = list(doc.noun_chunks) | |
merged_phrases = [] | |
skip_indexes = set() # Indexes to skip because they have been merged into another phrase | |
list_dep_labels = [token.dep_ for token in doc] # List of dependency labels for each token | |
for i in range(len(noun_phrases)): | |
if i in skip_indexes: | |
continue | |
current = noun_phrases[i] | |
# Collect dependency labels for the current noun phrase | |
deps_in_phrase = {list_dep_labels[tok.i] for tok in current} | |
# Merge logic based on 'of' construction | |
if i + 1 < len(noun_phrases) and (doc[current.end].text in ['of', 'in', 'among', 'on', 'towards', 'to', 'for']): | |
next_phrase = noun_phrases[i + 1] | |
if i + 2 < len(noun_phrases) and doc[next_phrase.end].dep_ == 'pcomp': | |
extended_phrase = doc[current.start:noun_phrases[i + 2].end] | |
skip_indexes.update({i + 1, i + 2}) | |
extended_deps = {list_dep_labels[tok.i] for tok in extended_phrase} | |
dep_label = self.determine_dep_label(extended_deps) | |
merged_phrases.append((extended_phrase.text, dep_label)) | |
continue | |
else: | |
merged_phrase = doc[current.start:next_phrase.end] | |
skip_indexes.add(i + 1) | |
merged_deps = {list_dep_labels[tok.i] for tok in merged_phrase} | |
dep_label = self.determine_dep_label(merged_deps) | |
merged_phrases.append((merged_phrase.text, dep_label)) | |
continue | |
if i not in skip_indexes: | |
dep_label = self.determine_dep_label(deps_in_phrase) | |
merged_phrases.append((current.text, dep_label)) | |
return merged_phrases | |
def determine_dep_label(self, deps_in_phrase): | |
""" | |
Determine the most appropriate dependency label for a phrase based on internal dependencies. | |
""" | |
if 'nsubj' in deps_in_phrase or 'nsubjpass' in deps_in_phrase: | |
return 'ROOT' | |
else: | |
# Choose a representative dependency if no clear subject is present | |
return deps_in_phrase.pop() if deps_in_phrase else 'unknown' | |
def extract(self, sentence, action_verb): | |
""" | |
Extracts and returns noun phrases with their detailed dependency tags from the sentence. | |
""" | |
doc = self.process_text(sentence) | |
noun_phrases = self.get_noun_phrases(doc) | |
result_dict = {phrase: dep for phrase, dep in noun_phrases} | |
# Check for the presence of any actionable verbs in the sentence | |
found_verbs = [v for v in action_verb if v.lower() in sentence.lower()] | |
if found_verbs: | |
# Adjust dependency labels for noun phrases based on the presence of an actionable verb. | |
for phrase, dep in list(result_dict.items()): # Work on a copy of items to safely modify the dict | |
if dep == 'ROOT': | |
result_dict[phrase] = 'dobj' | |
elif dep == 'dobj': | |
result_dict[phrase] = 'ROOT' | |
return result_dict | |
def format_results(results): | |
formatted = [] | |
# Find all roots or central subjects to structure the phrases around them | |
root_keys = [key for key, value in results.items() if value == 'ROOT' or value == 'nsubjpass'] | |
for key, value in results.items(): | |
if key in root_keys: | |
continue # Skip the roots themselves when adding to the formatted list | |
for root_key in root_keys: | |
if value == 'nsubjpass': # If the dependency indicates a passive subject | |
formatted.append(f"{key} -> {root_key}") | |
else: | |
formatted.append(f"{root_key} <- {key}") | |
# Remove duplicates and return the formatted results | |
formatted = list(set(formatted)) | |
return formatted | |
def wrap_label(label): | |
"""Helper function to wrap labels after every three words.""" | |
words = label.split() | |
wrapped_label = '\n'.join(' '.join(words[i:i+3]) for i in range(0, len(words), 3)) | |
return wrapped_label | |
def visualize_cognitive_map(formatted_results): | |
G = nx.DiGraph() # Directed graph to show direction of relationships | |
# Add edges based on formatted results | |
for result in formatted_results: | |
if '<-' in result: | |
# Extract nodes and add edge in the reverse direction | |
nodes = result.split(' <- ') | |
G.add_edge(nodes[1], nodes[0]) | |
elif '->' in result: | |
# Extract nodes and add edge in the specified direction | |
nodes = result.split(' -> ') | |
G.add_edge(nodes[0], nodes[1]) | |
# Position nodes using the spring layout | |
pos = nx.spring_layout(G, k=0.50) | |
# Setup the plot with a larger size | |
plt.figure(figsize=(12, 8)) # Larger figure size for better visibility | |
# Prepare custom labels with wrapped text | |
labels = {node: wrap_label(node) for node in G.nodes()} | |
# Draw the graph with custom labels | |
nx.draw(G, pos, labels=labels, node_color='skyblue', edge_color='#FF5733', | |
node_size=5000, font_size=10, font_weight='bold', with_labels=True, arrowstyle='-|>', arrowsize=30) | |
plt.show() | |
return plt | |
extractor = NounExtractor(nlp=nlp) | |
# Example of how to use this function | |
words_list = ["so", "because", "increase", "contribute", "due to"] | |
action_verb = ['affect', 'influence', 'increase', 'against'] | |
# Define the callback function for the GUI | |
def CogMapAnalysis(text): | |
if contains_words_or_phrases(words_list, text): | |
result = extractor.extract(text, action_verb) | |
formatted_result = format_results(result) | |
plot = visualize_cognitive_map(formatted_result) | |
return formatted_result, plot | |
else: | |
formatted_result = "❌ No causal expression was identified." | |
plot = None # Use None instead of empty string for non-existent objects | |
return formatted_result, plot | |
# Create the GUI using the 'gr' library | |
with gr.Blocks() as demo: | |
with gr.Column(): | |
gr.Markdown('<div style="text-align: center;"><h1><strong>CogMap</strong></h1></div> <div style="text-align: center;"><h3></h3></div>') | |
with gr.Row(): | |
inputs = gr.Textbox(label="Input", lines=2, placeholder="Enter your text here...") | |
examples = [ | |
"Public support for anti-discrimination laws and the movement to support immigrants grew due to the impact of getting widespread education on social justice issues.", | |
"The introduction of new anti-discrimination laws has been driven by an increasing awareness of social injustices and grassroots movements.", | |
"CogMap is a tool that lets you create cognitive maps from text." | |
] | |
output = gr.Textbox(label="CogMap", lines=1, placeholder=".............") | |
cogmap_plot = gr.Plot(label="Visualization") | |
interface = gr.Interface(fn=CogMapAnalysis, examples=examples, inputs=inputs, outputs=[output, cogmap_plot]) | |
with gr.Row(): | |
gr.Markdown("⚠️ Feel free to flag me if you find any errors. :)") | |
with gr.Column(): | |
gr.Markdown('<p style="text-align: center; ">Demo made with ❤ by P.K. Ningrum (2024) | Contact: panggih_kusuma.ningrum@univ-fcomte.fr</p>') | |
if __name__ == "__main__": | |
demo.launch(show_api=False, share=True) | |