Spaces:
Sleeping
Sleeping
File size: 9,063 Bytes
145dc69 a6e894d 145dc69 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 |
import gradio as gr
import spacy
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
# Initialize spaCy NLP model
nlp = spacy.load("en_core_web_sm")
# Import Lexicon
cues = pd.read_excel('link_cues.xlsx')
list_causalmarkers = cues['causal_markers']
def contains_words_or_phrases(words_list, sentence):
"""
Check if any word or phrase from words_list is present in the sentence.
:param words_list: List of words or phrases to check
:param sentence: The input sentence where to look for words or phrases
:return: Entities if any word or phrase is found, otherwise None
"""
# Normalize the sentence to lower case to make the search case insensitive
normalized_sentence = sentence.lower()
# Check each word or phrase in the list
for word_or_phrase in words_list:
# Check if the word or phrase is in the normalized sentence
if word_or_phrase.lower() in normalized_sentence:
return True # Return True immediately if any word or phrase is found
return False # Return False if none of the words or phrases are found
class NounExtractor:
def __init__(self, nlp):
"""
Initialize the NounExtractor with a pre-loaded spaCy NLP model.
"""
self.nlp = nlp
def process_text(self, text):
"""
Process the text using the spaCy NLP pipeline.
"""
return self.nlp(text)
def get_noun_phrases(self, doc):
"""
Extract and refine noun phrases from the spaCy doc, tracking and using dependency labels accurately.
"""
noun_phrases = list(doc.noun_chunks)
merged_phrases = []
skip_indexes = set() # Indexes to skip because they have been merged into another phrase
list_dep_labels = [token.dep_ for token in doc] # List of dependency labels for each token
for i in range(len(noun_phrases)):
if i in skip_indexes:
continue
current = noun_phrases[i]
# Collect dependency labels for the current noun phrase
deps_in_phrase = {list_dep_labels[tok.i] for tok in current}
# Merge logic based on 'of' construction
if i + 1 < len(noun_phrases) and (doc[current.end].text in ['of', 'in', 'among', 'on', 'towards', 'to', 'for']):
next_phrase = noun_phrases[i + 1]
if i + 2 < len(noun_phrases) and doc[next_phrase.end].dep_ == 'pcomp':
extended_phrase = doc[current.start:noun_phrases[i + 2].end]
skip_indexes.update({i + 1, i + 2})
extended_deps = {list_dep_labels[tok.i] for tok in extended_phrase}
dep_label = self.determine_dep_label(extended_deps)
merged_phrases.append((extended_phrase.text, dep_label))
continue
else:
merged_phrase = doc[current.start:next_phrase.end]
skip_indexes.add(i + 1)
merged_deps = {list_dep_labels[tok.i] for tok in merged_phrase}
dep_label = self.determine_dep_label(merged_deps)
merged_phrases.append((merged_phrase.text, dep_label))
continue
if i not in skip_indexes:
dep_label = self.determine_dep_label(deps_in_phrase)
merged_phrases.append((current.text, dep_label))
return merged_phrases
def determine_dep_label(self, deps_in_phrase):
"""
Determine the most appropriate dependency label for a phrase based on internal dependencies.
"""
if 'nsubj' in deps_in_phrase or 'nsubjpass' in deps_in_phrase:
return 'ROOT'
else:
# Choose a representative dependency if no clear subject is present
return deps_in_phrase.pop() if deps_in_phrase else 'unknown'
def extract(self, sentence, action_verb):
"""
Extracts and returns noun phrases with their detailed dependency tags from the sentence.
"""
doc = self.process_text(sentence)
noun_phrases = self.get_noun_phrases(doc)
result_dict = {phrase: dep for phrase, dep in noun_phrases}
# Check for the presence of any actionable verbs in the sentence
found_verbs = [v for v in action_verb if v.lower() in sentence.lower()]
if found_verbs:
# Adjust dependency labels for noun phrases based on the presence of an actionable verb.
for phrase, dep in list(result_dict.items()): # Work on a copy of items to safely modify the dict
if dep == 'ROOT':
result_dict[phrase] = 'dobj'
elif dep == 'dobj':
result_dict[phrase] = 'ROOT'
return result_dict
def format_results(results):
formatted = []
# Find all roots or central subjects to structure the phrases around them
root_keys = [key for key, value in results.items() if value == 'ROOT' or value == 'nsubjpass']
for key, value in results.items():
if key in root_keys:
continue # Skip the roots themselves when adding to the formatted list
for root_key in root_keys:
if value == 'nsubjpass': # If the dependency indicates a passive subject
formatted.append(f"{key} -> {root_key}")
else:
formatted.append(f"{root_key} <- {key}")
# Remove duplicates and return the formatted results
formatted = list(set(formatted))
return formatted
def wrap_label(label):
"""Helper function to wrap labels after every three words."""
words = label.split()
wrapped_label = '\n'.join(' '.join(words[i:i+3]) for i in range(0, len(words), 3))
return wrapped_label
def visualize_cognitive_map(formatted_results):
G = nx.DiGraph() # Directed graph to show direction of relationships
# Add edges based on formatted results
for result in formatted_results:
if '<-' in result:
# Extract nodes and add edge in the reverse direction
nodes = result.split(' <- ')
G.add_edge(nodes[1], nodes[0])
elif '->' in result:
# Extract nodes and add edge in the specified direction
nodes = result.split(' -> ')
G.add_edge(nodes[0], nodes[1])
# Position nodes using the spring layout
pos = nx.spring_layout(G, k=0.50)
# Setup the plot with a larger size
plt.figure(figsize=(12, 8)) # Larger figure size for better visibility
# Prepare custom labels with wrapped text
labels = {node: wrap_label(node) for node in G.nodes()}
# Draw the graph with custom labels
nx.draw(G, pos, labels=labels, node_color='skyblue', edge_color='#FF5733',
node_size=5000, font_size=10, font_weight='bold', with_labels=True, arrowstyle='-|>', arrowsize=30)
plt.show()
return plt
extractor = NounExtractor(nlp=nlp)
# Example of how to use this function
words_list = ["so", "because", "increase", "contribute", "due to"]
action_verb = ['affect', 'influence', 'increase', 'against']
# Define the callback function for the GUI
def CogMapAnalysis(text):
if contains_words_or_phrases(words_list, text):
result = extractor.extract(text, action_verb)
formatted_result = format_results(result)
plot = visualize_cognitive_map(formatted_result)
return formatted_result, plot
else:
formatted_result = "❌ No causal expression was identified."
plot = None # Use None instead of empty string for non-existent objects
return formatted_result, plot
# Create the GUI using the 'gr' library
with gr.Blocks() as demo:
with gr.Column():
gr.Markdown('<div style="text-align: center;"><h1><strong>CogMap</strong></h1></div> <div style="text-align: center;"><h3></h3></div>')
with gr.Row():
inputs = gr.Textbox(label="Input", lines=2, placeholder="Enter your text here...")
examples = [
"Public support for anti-discrimination laws and the movement to support immigrants grew due to the impact of getting widespread education on social justice issues.",
"The introduction of new anti-discrimination laws has been driven by an increasing awareness of social injustices and grassroots movements.",
"CogMap is a tool that lets you create cognitive maps from text."
]
output = gr.Textbox(label="CogMap", lines=1, placeholder=".............")
cogmap_plot = gr.Plot(label="Visualization")
interface = gr.Interface(fn=CogMapAnalysis, examples=examples, inputs=inputs, outputs=[output, cogmap_plot])
with gr.Row():
gr.Markdown("⚠️ Feel free to flag me if you find any errors. :)")
with gr.Column():
gr.Markdown('<p style="text-align: center; ">Demo made with ❤ by P.K. Ningrum (2024) | Contact: panggih_kusuma.ningrum@univ-fcomte.fr</p>')
if __name__ == "__main__":
demo.launch(show_api=False, share=True)
|