Spaces:
Sleeping
Sleeping
ningrumdaud
commited on
Commit
•
145dc69
1
Parent(s):
b08e850
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import spacy
|
3 |
+
import pandas as pd
|
4 |
+
import networkx as nx
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
|
7 |
+
# Initialize spaCy NLP model
|
8 |
+
nlp = spacy.load("en_core_web_sm")
|
9 |
+
|
10 |
+
# Import Lexicon
|
11 |
+
cues = pd.read_excel('link_cues.xlsx')
|
12 |
+
list_causalmarkers = cues['causal_markers']
|
13 |
+
|
14 |
+
def contains_words_or_phrases(words_list, sentence):
|
15 |
+
"""
|
16 |
+
Check if any word or phrase from words_list is present in the sentence.
|
17 |
+
|
18 |
+
:param words_list: List of words or phrases to check
|
19 |
+
:param sentence: The input sentence where to look for words or phrases
|
20 |
+
:return: Entities if any word or phrase is found, otherwise None
|
21 |
+
"""
|
22 |
+
# Normalize the sentence to lower case to make the search case insensitive
|
23 |
+
normalized_sentence = sentence.lower()
|
24 |
+
|
25 |
+
# Check each word or phrase in the list
|
26 |
+
for word_or_phrase in words_list:
|
27 |
+
# Check if the word or phrase is in the normalized sentence
|
28 |
+
if word_or_phrase.lower() in normalized_sentence:
|
29 |
+
return True # Return True immediately if any word or phrase is found
|
30 |
+
|
31 |
+
return False # Return False if none of the words or phrases are found
|
32 |
+
|
33 |
+
class NounExtractor:
|
34 |
+
def __init__(self, nlp):
|
35 |
+
"""
|
36 |
+
Initialize the NounExtractor with a pre-loaded spaCy NLP model.
|
37 |
+
"""
|
38 |
+
self.nlp = nlp
|
39 |
+
|
40 |
+
def process_text(self, text):
|
41 |
+
"""
|
42 |
+
Process the text using the spaCy NLP pipeline.
|
43 |
+
"""
|
44 |
+
return self.nlp(text)
|
45 |
+
|
46 |
+
def get_noun_phrases(self, doc):
|
47 |
+
"""
|
48 |
+
Extract and refine noun phrases from the spaCy doc, tracking and using dependency labels accurately.
|
49 |
+
"""
|
50 |
+
noun_phrases = list(doc.noun_chunks)
|
51 |
+
merged_phrases = []
|
52 |
+
skip_indexes = set() # Indexes to skip because they have been merged into another phrase
|
53 |
+
list_dep_labels = [token.dep_ for token in doc] # List of dependency labels for each token
|
54 |
+
|
55 |
+
for i in range(len(noun_phrases)):
|
56 |
+
if i in skip_indexes:
|
57 |
+
continue
|
58 |
+
|
59 |
+
current = noun_phrases[i]
|
60 |
+
# Collect dependency labels for the current noun phrase
|
61 |
+
deps_in_phrase = {list_dep_labels[tok.i] for tok in current}
|
62 |
+
|
63 |
+
# Merge logic based on 'of' construction
|
64 |
+
if i + 1 < len(noun_phrases) and (doc[current.end].text in ['of', 'in', 'among', 'on', 'towards', 'to', 'for']):
|
65 |
+
next_phrase = noun_phrases[i + 1]
|
66 |
+
if i + 2 < len(noun_phrases) and doc[next_phrase.end].dep_ == 'pcomp':
|
67 |
+
extended_phrase = doc[current.start:noun_phrases[i + 2].end]
|
68 |
+
skip_indexes.update({i + 1, i + 2})
|
69 |
+
extended_deps = {list_dep_labels[tok.i] for tok in extended_phrase}
|
70 |
+
dep_label = self.determine_dep_label(extended_deps)
|
71 |
+
merged_phrases.append((extended_phrase.text, dep_label))
|
72 |
+
continue
|
73 |
+
else:
|
74 |
+
merged_phrase = doc[current.start:next_phrase.end]
|
75 |
+
skip_indexes.add(i + 1)
|
76 |
+
merged_deps = {list_dep_labels[tok.i] for tok in merged_phrase}
|
77 |
+
dep_label = self.determine_dep_label(merged_deps)
|
78 |
+
merged_phrases.append((merged_phrase.text, dep_label))
|
79 |
+
continue
|
80 |
+
|
81 |
+
if i not in skip_indexes:
|
82 |
+
dep_label = self.determine_dep_label(deps_in_phrase)
|
83 |
+
merged_phrases.append((current.text, dep_label))
|
84 |
+
|
85 |
+
return merged_phrases
|
86 |
+
|
87 |
+
def determine_dep_label(self, deps_in_phrase):
|
88 |
+
"""
|
89 |
+
Determine the most appropriate dependency label for a phrase based on internal dependencies.
|
90 |
+
"""
|
91 |
+
if 'nsubj' in deps_in_phrase or 'nsubjpass' in deps_in_phrase:
|
92 |
+
return 'ROOT'
|
93 |
+
else:
|
94 |
+
# Choose a representative dependency if no clear subject is present
|
95 |
+
return deps_in_phrase.pop() if deps_in_phrase else 'unknown'
|
96 |
+
|
97 |
+
def extract(self, sentence, action_verb):
|
98 |
+
"""
|
99 |
+
Extracts and returns noun phrases with their detailed dependency tags from the sentence.
|
100 |
+
"""
|
101 |
+
doc = self.process_text(sentence)
|
102 |
+
noun_phrases = self.get_noun_phrases(doc)
|
103 |
+
result_dict = {phrase: dep for phrase, dep in noun_phrases}
|
104 |
+
|
105 |
+
# Check for the presence of any actionable verbs in the sentence
|
106 |
+
found_verbs = [v for v in action_verb if v.lower() in sentence.lower()]
|
107 |
+
if found_verbs:
|
108 |
+
# Adjust dependency labels for noun phrases based on the presence of an actionable verb.
|
109 |
+
for phrase, dep in list(result_dict.items()): # Work on a copy of items to safely modify the dict
|
110 |
+
if dep == 'ROOT':
|
111 |
+
result_dict[phrase] = 'dobj'
|
112 |
+
elif dep == 'dobj':
|
113 |
+
result_dict[phrase] = 'ROOT'
|
114 |
+
|
115 |
+
return result_dict
|
116 |
+
|
117 |
+
def format_results(results):
|
118 |
+
formatted = []
|
119 |
+
# Find all roots or central subjects to structure the phrases around them
|
120 |
+
root_keys = [key for key, value in results.items() if value == 'ROOT' or value == 'nsubjpass']
|
121 |
+
|
122 |
+
for key, value in results.items():
|
123 |
+
if key in root_keys:
|
124 |
+
continue # Skip the roots themselves when adding to the formatted list
|
125 |
+
for root_key in root_keys:
|
126 |
+
if value == 'nsubjpass': # If the dependency indicates a passive subject
|
127 |
+
formatted.append(f"{key} -> {root_key}")
|
128 |
+
else:
|
129 |
+
formatted.append(f"{root_key} <- {key}")
|
130 |
+
|
131 |
+
# Remove duplicates and return the formatted results
|
132 |
+
formatted = list(set(formatted))
|
133 |
+
return formatted
|
134 |
+
|
135 |
+
def wrap_label(label):
|
136 |
+
"""Helper function to wrap labels after every three words."""
|
137 |
+
words = label.split()
|
138 |
+
wrapped_label = '\n'.join(' '.join(words[i:i+3]) for i in range(0, len(words), 3))
|
139 |
+
return wrapped_label
|
140 |
+
|
141 |
+
def visualize_cognitive_map(formatted_results):
|
142 |
+
G = nx.DiGraph() # Directed graph to show direction of relationships
|
143 |
+
|
144 |
+
# Add edges based on formatted results
|
145 |
+
for result in formatted_results:
|
146 |
+
if '<-' in result:
|
147 |
+
# Extract nodes and add edge in the reverse direction
|
148 |
+
nodes = result.split(' <- ')
|
149 |
+
G.add_edge(nodes[1], nodes[0])
|
150 |
+
elif '->' in result:
|
151 |
+
# Extract nodes and add edge in the specified direction
|
152 |
+
nodes = result.split(' -> ')
|
153 |
+
G.add_edge(nodes[0], nodes[1])
|
154 |
+
|
155 |
+
# Position nodes using the spring layout
|
156 |
+
pos = nx.spring_layout(G, k=0.50)
|
157 |
+
|
158 |
+
# Setup the plot with a larger size
|
159 |
+
plt.figure(figsize=(12, 8)) # Larger figure size for better visibility
|
160 |
+
|
161 |
+
# Prepare custom labels with wrapped text
|
162 |
+
labels = {node: wrap_label(node) for node in G.nodes()}
|
163 |
+
|
164 |
+
# Draw the graph with custom labels
|
165 |
+
nx.draw(G, pos, labels=labels, node_color='skyblue', edge_color='#FF5733',
|
166 |
+
node_size=5000, font_size=10, font_weight='bold', with_labels=True, arrowstyle='-|>', arrowsize=30)
|
167 |
+
|
168 |
+
plt.show()
|
169 |
+
|
170 |
+
return plt
|
171 |
+
|
172 |
+
extractor = NounExtractor(nlp=nlp)
|
173 |
+
|
174 |
+
# Example of how to use this function
|
175 |
+
words_list = ["so", "because", "increase", "contribute", "due to"]
|
176 |
+
action_verb = ['affect', 'influence', 'increase', 'against']
|
177 |
+
|
178 |
+
# Define the callback function for the GUI
|
179 |
+
def CogMapAnalysis(text):
|
180 |
+
if contains_words_or_phrases(words_list, text):
|
181 |
+
result = extractor.extract(text, action_verb)
|
182 |
+
formatted_result = format_results(result)
|
183 |
+
plot = visualize_cognitive_map(formatted_result)
|
184 |
+
return formatted_result, plot
|
185 |
+
else:
|
186 |
+
formatted_result = "❌ No causal expression was identified."
|
187 |
+
plot = None # Use None instead of empty string for non-existent objects
|
188 |
+
return formatted_result, plot
|
189 |
+
|
190 |
+
# Create the GUI using the 'gr' library
|
191 |
+
with gr.Blocks() as demo:
|
192 |
+
with gr.Column():
|
193 |
+
gr.Markdown('<div style="text-align: center;"><h1><strong>CogMap</strong></h1></div> <div style="text-align: center;"><h3></h3></div>')
|
194 |
+
|
195 |
+
with gr.Row():
|
196 |
+
inputs = gr.Textbox(label="Input", lines=2, placeholder="Enter your text here...")
|
197 |
+
examples = [
|
198 |
+
"Public support for anti-discrimination laws and the movement to support immigrants grew due to the impact of getting widespread education on social justice issues.",
|
199 |
+
"The introduction of new anti-discrimination laws has been driven by an increasing awareness of social injustices and grassroots movements.",
|
200 |
+
"CogMap is a tool that lets you create cognitive maps from text."
|
201 |
+
]
|
202 |
+
|
203 |
+
output = gr.Textbox(label="CogMap", lines=1, placeholder=".............")
|
204 |
+
cogmap_plot = gr.Plot(label="Visualization")
|
205 |
+
interface = gr.Interface(fn=CogMapAnalysis, examples=examples, inputs=inputs, outputs=[output, cogmap_plot])
|
206 |
+
|
207 |
+
with gr.Row():
|
208 |
+
gr.Markdown("⚠️ Feel free to flag me if you find any errors. :)")
|
209 |
+
|
210 |
+
with gr.Column():
|
211 |
+
gr.Markdown('<p style="text-align: center; ">Demo made with ❤ by P.K. Ningrum (2024) | Contact: panggih_kusuma.ningrum@univ-fcomte.fr</p>')
|
212 |
+
|
213 |
+
if __name__ == "__main__":
|
214 |
+
demo.launch(show_api=False, share=True)
|