ningrumdaud commited on
Commit
145dc69
1 Parent(s): b08e850

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +214 -0
app.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spacy
3
+ import pandas as pd
4
+ import networkx as nx
5
+ import matplotlib.pyplot as plt
6
+
7
+ # Initialize spaCy NLP model
8
+ nlp = spacy.load("en_core_web_sm")
9
+
10
+ # Import Lexicon
11
+ cues = pd.read_excel('link_cues.xlsx')
12
+ list_causalmarkers = cues['causal_markers']
13
+
14
+ def contains_words_or_phrases(words_list, sentence):
15
+ """
16
+ Check if any word or phrase from words_list is present in the sentence.
17
+
18
+ :param words_list: List of words or phrases to check
19
+ :param sentence: The input sentence where to look for words or phrases
20
+ :return: Entities if any word or phrase is found, otherwise None
21
+ """
22
+ # Normalize the sentence to lower case to make the search case insensitive
23
+ normalized_sentence = sentence.lower()
24
+
25
+ # Check each word or phrase in the list
26
+ for word_or_phrase in words_list:
27
+ # Check if the word or phrase is in the normalized sentence
28
+ if word_or_phrase.lower() in normalized_sentence:
29
+ return True # Return True immediately if any word or phrase is found
30
+
31
+ return False # Return False if none of the words or phrases are found
32
+
33
+ class NounExtractor:
34
+ def __init__(self, nlp):
35
+ """
36
+ Initialize the NounExtractor with a pre-loaded spaCy NLP model.
37
+ """
38
+ self.nlp = nlp
39
+
40
+ def process_text(self, text):
41
+ """
42
+ Process the text using the spaCy NLP pipeline.
43
+ """
44
+ return self.nlp(text)
45
+
46
+ def get_noun_phrases(self, doc):
47
+ """
48
+ Extract and refine noun phrases from the spaCy doc, tracking and using dependency labels accurately.
49
+ """
50
+ noun_phrases = list(doc.noun_chunks)
51
+ merged_phrases = []
52
+ skip_indexes = set() # Indexes to skip because they have been merged into another phrase
53
+ list_dep_labels = [token.dep_ for token in doc] # List of dependency labels for each token
54
+
55
+ for i in range(len(noun_phrases)):
56
+ if i in skip_indexes:
57
+ continue
58
+
59
+ current = noun_phrases[i]
60
+ # Collect dependency labels for the current noun phrase
61
+ deps_in_phrase = {list_dep_labels[tok.i] for tok in current}
62
+
63
+ # Merge logic based on 'of' construction
64
+ if i + 1 < len(noun_phrases) and (doc[current.end].text in ['of', 'in', 'among', 'on', 'towards', 'to', 'for']):
65
+ next_phrase = noun_phrases[i + 1]
66
+ if i + 2 < len(noun_phrases) and doc[next_phrase.end].dep_ == 'pcomp':
67
+ extended_phrase = doc[current.start:noun_phrases[i + 2].end]
68
+ skip_indexes.update({i + 1, i + 2})
69
+ extended_deps = {list_dep_labels[tok.i] for tok in extended_phrase}
70
+ dep_label = self.determine_dep_label(extended_deps)
71
+ merged_phrases.append((extended_phrase.text, dep_label))
72
+ continue
73
+ else:
74
+ merged_phrase = doc[current.start:next_phrase.end]
75
+ skip_indexes.add(i + 1)
76
+ merged_deps = {list_dep_labels[tok.i] for tok in merged_phrase}
77
+ dep_label = self.determine_dep_label(merged_deps)
78
+ merged_phrases.append((merged_phrase.text, dep_label))
79
+ continue
80
+
81
+ if i not in skip_indexes:
82
+ dep_label = self.determine_dep_label(deps_in_phrase)
83
+ merged_phrases.append((current.text, dep_label))
84
+
85
+ return merged_phrases
86
+
87
+ def determine_dep_label(self, deps_in_phrase):
88
+ """
89
+ Determine the most appropriate dependency label for a phrase based on internal dependencies.
90
+ """
91
+ if 'nsubj' in deps_in_phrase or 'nsubjpass' in deps_in_phrase:
92
+ return 'ROOT'
93
+ else:
94
+ # Choose a representative dependency if no clear subject is present
95
+ return deps_in_phrase.pop() if deps_in_phrase else 'unknown'
96
+
97
+ def extract(self, sentence, action_verb):
98
+ """
99
+ Extracts and returns noun phrases with their detailed dependency tags from the sentence.
100
+ """
101
+ doc = self.process_text(sentence)
102
+ noun_phrases = self.get_noun_phrases(doc)
103
+ result_dict = {phrase: dep for phrase, dep in noun_phrases}
104
+
105
+ # Check for the presence of any actionable verbs in the sentence
106
+ found_verbs = [v for v in action_verb if v.lower() in sentence.lower()]
107
+ if found_verbs:
108
+ # Adjust dependency labels for noun phrases based on the presence of an actionable verb.
109
+ for phrase, dep in list(result_dict.items()): # Work on a copy of items to safely modify the dict
110
+ if dep == 'ROOT':
111
+ result_dict[phrase] = 'dobj'
112
+ elif dep == 'dobj':
113
+ result_dict[phrase] = 'ROOT'
114
+
115
+ return result_dict
116
+
117
+ def format_results(results):
118
+ formatted = []
119
+ # Find all roots or central subjects to structure the phrases around them
120
+ root_keys = [key for key, value in results.items() if value == 'ROOT' or value == 'nsubjpass']
121
+
122
+ for key, value in results.items():
123
+ if key in root_keys:
124
+ continue # Skip the roots themselves when adding to the formatted list
125
+ for root_key in root_keys:
126
+ if value == 'nsubjpass': # If the dependency indicates a passive subject
127
+ formatted.append(f"{key} -> {root_key}")
128
+ else:
129
+ formatted.append(f"{root_key} <- {key}")
130
+
131
+ # Remove duplicates and return the formatted results
132
+ formatted = list(set(formatted))
133
+ return formatted
134
+
135
+ def wrap_label(label):
136
+ """Helper function to wrap labels after every three words."""
137
+ words = label.split()
138
+ wrapped_label = '\n'.join(' '.join(words[i:i+3]) for i in range(0, len(words), 3))
139
+ return wrapped_label
140
+
141
+ def visualize_cognitive_map(formatted_results):
142
+ G = nx.DiGraph() # Directed graph to show direction of relationships
143
+
144
+ # Add edges based on formatted results
145
+ for result in formatted_results:
146
+ if '<-' in result:
147
+ # Extract nodes and add edge in the reverse direction
148
+ nodes = result.split(' <- ')
149
+ G.add_edge(nodes[1], nodes[0])
150
+ elif '->' in result:
151
+ # Extract nodes and add edge in the specified direction
152
+ nodes = result.split(' -> ')
153
+ G.add_edge(nodes[0], nodes[1])
154
+
155
+ # Position nodes using the spring layout
156
+ pos = nx.spring_layout(G, k=0.50)
157
+
158
+ # Setup the plot with a larger size
159
+ plt.figure(figsize=(12, 8)) # Larger figure size for better visibility
160
+
161
+ # Prepare custom labels with wrapped text
162
+ labels = {node: wrap_label(node) for node in G.nodes()}
163
+
164
+ # Draw the graph with custom labels
165
+ nx.draw(G, pos, labels=labels, node_color='skyblue', edge_color='#FF5733',
166
+ node_size=5000, font_size=10, font_weight='bold', with_labels=True, arrowstyle='-|>', arrowsize=30)
167
+
168
+ plt.show()
169
+
170
+ return plt
171
+
172
+ extractor = NounExtractor(nlp=nlp)
173
+
174
+ # Example of how to use this function
175
+ words_list = ["so", "because", "increase", "contribute", "due to"]
176
+ action_verb = ['affect', 'influence', 'increase', 'against']
177
+
178
+ # Define the callback function for the GUI
179
+ def CogMapAnalysis(text):
180
+ if contains_words_or_phrases(words_list, text):
181
+ result = extractor.extract(text, action_verb)
182
+ formatted_result = format_results(result)
183
+ plot = visualize_cognitive_map(formatted_result)
184
+ return formatted_result, plot
185
+ else:
186
+ formatted_result = "❌ No causal expression was identified."
187
+ plot = None # Use None instead of empty string for non-existent objects
188
+ return formatted_result, plot
189
+
190
+ # Create the GUI using the 'gr' library
191
+ with gr.Blocks() as demo:
192
+ with gr.Column():
193
+ gr.Markdown('<div style="text-align: center;"><h1><strong>CogMap</strong></h1></div> <div style="text-align: center;"><h3></h3></div>')
194
+
195
+ with gr.Row():
196
+ inputs = gr.Textbox(label="Input", lines=2, placeholder="Enter your text here...")
197
+ examples = [
198
+ "Public support for anti-discrimination laws and the movement to support immigrants grew due to the impact of getting widespread education on social justice issues.",
199
+ "The introduction of new anti-discrimination laws has been driven by an increasing awareness of social injustices and grassroots movements.",
200
+ "CogMap is a tool that lets you create cognitive maps from text."
201
+ ]
202
+
203
+ output = gr.Textbox(label="CogMap", lines=1, placeholder=".............")
204
+ cogmap_plot = gr.Plot(label="Visualization")
205
+ interface = gr.Interface(fn=CogMapAnalysis, examples=examples, inputs=inputs, outputs=[output, cogmap_plot])
206
+
207
+ with gr.Row():
208
+ gr.Markdown("⚠️ Feel free to flag me if you find any errors. :)")
209
+
210
+ with gr.Column():
211
+ gr.Markdown('<p style="text-align: center; ">Demo made with ❤ by P.K. Ningrum (2024) | Contact: panggih_kusuma.ningrum@univ-fcomte.fr</p>')
212
+
213
+ if __name__ == "__main__":
214
+ demo.launch(show_api=False, share=True)