Spaces:
Sleeping
Sleeping
File size: 10,750 Bytes
145dc69 a6e894d 145dc69 7f29b59 145dc69 7f29b59 145dc69 7f29b59 145dc69 7f29b59 145dc69 7f29b59 145dc69 2b4a211 9cd08f7 145dc69 62a0a9c 145dc69 62a0a9c 145dc69 62a0a9c 145dc69 3dd06d9 145dc69 7eb0562 42eee96 9cd08f7 145dc69 4149036 62a0a9c 145dc69 c2dfc7d 145dc69 c2dfc7d 145dc69 e1eb9e1 7ca5735 e1eb9e1 d611f5c e1eb9e1 d539513 0957032 e1eb9e1 8342ba8 0957032 d539513 7ca5735 e1eb9e1 c2dfc7d 8342ba8 e1eb9e1 7ca5735 e1eb9e1 7ca5735 145dc69 8342ba8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 |
import gradio as gr
import spacy
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
# Initialize spaCy NLP model
nlp = spacy.load("en_core_web_sm")
# Import Lexicon
cues = pd.read_excel('link_cues.xlsx')
list_causalmarkers = cues['causal_markers']
# def contains_words_or_phrases(words_list, sentence):
# """
# Check if any word or phrase from words_list is present in the sentence.
# :param words_list: List of words or phrases to check
# :param sentence: The input sentence where to look for words or phrases
# :return: Entities if any word or phrase is found, otherwise None
# """
# # Normalize the sentence to lower case to make the search case insensitive
# normalized_sentence = sentence.lower()
# # Check each word or phrase in the list
# for word_or_phrase in words_list:
# # Check if the word or phrase is in the normalized sentence
# if word_or_phrase.lower() in normalized_sentence:
# return True # Return True immediately if any word or phrase is found
# return False # Return False if none of the words or phrases are found
def contains_words_or_phrases(words_list, sentence):
"""
Check if any word or phrase from words_list is present in the sentence.
:param words_list: List of words or phrases to check
:param sentence: The input sentence where to look for words or phrases
:return: True if any word or phrase is found, otherwise False
"""
# Process the sentence with spaCy to obtain the lemmatized form of each token
processed_sentence = nlp(sentence.lower())
lemmatized_sentence = " ".join(token.lemma_ for token in processed_sentence)
# Process each word or phrase for lemmatization
for word_or_phrase in words_list:
# Process and create a lemma string for the word or phrase
processed_word_or_phrase = nlp(word_or_phrase.lower())
lemmatized_word_or_phrase = " ".join(token.lemma_ for token in processed_word_or_phrase)
# Check if the lemmatized word or phrase is in the lemmatized sentence
if lemmatized_word_or_phrase in lemmatized_sentence:
return True # Return True immediately if any word or phrase is found
return False # Return False if none of the words or phrases are found
class NounExtractor:
def __init__(self, nlp):
"""
Initialize the NounExtractor with a pre-loaded spaCy NLP model.
"""
self.nlp = nlp
def process_text(self, text):
"""
Process the text using the spaCy NLP pipeline.
"""
return self.nlp(text)
def get_noun_phrases(self, doc):
"""
Extract and refine noun phrases from the spaCy doc, tracking and using dependency labels accurately.
"""
noun_phrases = list(doc.noun_chunks)
merged_phrases = []
skip_indexes = set() # Indexes to skip because they have been merged into another phrase
list_dep_labels = [token.dep_ for token in doc] # List of dependency labels for each token
for i in range(len(noun_phrases)):
if i in skip_indexes:
continue
current = noun_phrases[i]
# Collect dependency labels for the current noun phrase
deps_in_phrase = {list_dep_labels[tok.i] for tok in current}
# Merge logic based on 'phrases connectors' construction
if i + 1 < len(noun_phrases) and (doc[current.end].text in ['of', 'in', 'among', 'on', 'towards', 'to', 'for', 'across']):
next_phrase = noun_phrases[i + 1]
if i + 2 < len(noun_phrases) and doc[next_phrase.end].dep_ == 'pcomp':
extended_phrase = doc[current.start:noun_phrases[i + 2].end]
skip_indexes.update({i + 1, i + 2})
extended_deps = {list_dep_labels[tok.i] for tok in extended_phrase}
dep_label = self.determine_dep_label(extended_deps)
merged_phrases.append((extended_phrase.text, dep_label))
continue
else:
merged_phrase = doc[current.start:next_phrase.end]
skip_indexes.add(i + 1)
merged_deps = {list_dep_labels[tok.i] for tok in merged_phrase}
dep_label = self.determine_dep_label(merged_deps)
merged_phrases.append((merged_phrase.text, dep_label))
continue
if i not in skip_indexes:
dep_label = self.determine_dep_label(deps_in_phrase)
merged_phrases.append((current.text, dep_label))
return merged_phrases
def determine_dep_label(self, deps_in_phrase):
"""
Determine the most appropriate dependency label for a phrase based on internal dependencies.
"""
if 'nsubj' in deps_in_phrase or 'nsubjpass' in deps_in_phrase:
return 'ROOT'
else:
# Choose a representative dependency if no clear subject is present
return deps_in_phrase.pop() if deps_in_phrase else 'unknown'
def extract(self, sentence, causative_verb):
"""
Extracts and returns noun phrases with their detailed dependency tags from the sentence.
"""
doc = self.process_text(sentence)
noun_phrases = self.get_noun_phrases(doc)
result_dict = {phrase: dep for phrase, dep in noun_phrases}
# Check for the presence of causative verbs like 'cause', in the sentence
found_verbs = [v for v in causative_verb if v.lower() in sentence.lower()]
if found_verbs:
# Adjust dependency labels for noun phrases based on the presence of an causative verb.
for phrase, dep in list(result_dict.items()): # Work on a copy of items to safely modify the dict
if dep == 'ROOT':
result_dict[phrase] = 'dobj'
elif dep == 'dobj':
result_dict[phrase] = 'ROOT'
return result_dict
def format_results(results):
formatted = []
# Find all roots or central subjects to structure the phrases around them
root_keys = [key for key, value in results.items() if value == 'ROOT' or value == 'nsubjpass']
for key, value in results.items():
if key in root_keys:
continue # Skip the roots themselves when adding to the formatted list
for root_key in root_keys:
if value == 'nsubjpass': # If the dependency indicates a passive subject
formatted.append(f"{key} -> {root_key}")
else:
formatted.append(f"{root_key} <- {key}")
# Remove duplicates and return the formatted results
formatted = list(set(formatted))
return formatted
def wrap_label(label):
"""Helper function to wrap labels after every three words."""
words = label.split()
wrapped_label = '\n'.join(' '.join(words[i:i+3]) for i in range(0, len(words), 3))
return wrapped_label
def visualize_cognitive_map(formatted_results):
G = nx.DiGraph() # Directed graph to show direction of relationships
# Add edges based on formatted results
for result in formatted_results:
if '<-' in result:
# Extract nodes and add edge in the reverse direction
nodes = result.split(' <- ')
G.add_edge(nodes[1], nodes[0])
elif '->' in result:
# Extract nodes and add edge in the specified direction
nodes = result.split(' -> ')
G.add_edge(nodes[0], nodes[1])
# Position nodes using the spring layout
pos = nx.spring_layout(G, k=0.50)
# Setup the plot with a larger size
plt.figure(figsize=(12, 8)) # Larger figure size for better visibility
# Prepare custom labels with wrapped text
labels = {node: wrap_label(node) for node in G.nodes()}
# Draw the graph with custom labels
nx.draw(G, pos, labels=labels, node_color='skyblue', edge_color='#FF5733',
node_size=5000, font_size=10, font_weight='bold', with_labels=True, arrowstyle='-|>', arrowsize=30)
plt.show()
return plt
# Use the functions
extractor = NounExtractor(nlp=nlp)
# Example of how to use this function
# words_list = ["so", "because", "contribute", "due to", "therefore", "as"]
causative_verb = ['affect', 'influence', 'increase', 'against', 'bring', 'lead', 'result', 'cause', 'is to', 'was to',
'are to', "were to", 'contribute', 'make', 'attribute', 'provoke', 'spark']
# Define the callback function for the GUI
def CogMapAnalysis(text):
if contains_words_or_phrases(list_causalmarkers, text):
result = extractor.extract(text, causative_verb)
formatted_result = format_results(result)
plot = visualize_cognitive_map(formatted_result)
return formatted_result, plot
else:
formatted_result = "❌ No causal expression was identified."
plot = None # Use None instead of empty string for non-existent objects
return formatted_result, plot
# Create the GUI using the 'gr' library
with gr.Blocks() as demo:
gr.Markdown('# CogMApp')
gr.Markdown('### Generate cognitive maps from text with one click!')
with gr.Row():
with gr.Column():
inputs = gr.Textbox(label="Input", lines=2, placeholder="Enter your text here...")
submit_button = gr.Button("Submit")
with gr.Column():
examples = gr.Examples(examples=[
"Public support for anti-discrimination laws and the movement to support immigrants grew due to the impact of getting widespread education on social justice issues.",
"The introduction of new anti-discrimination laws has been driven by an increasing awareness of social injustices and grassroots movements.",
"The weak law enforcement in this country is due to its citizens's ignorance.",
"CogMApp is a tool that lets you create cognitive maps from text."
], inputs=inputs)
with gr.Row():
output = gr.Textbox(label="Result", lines=5, placeholder=" ")
with gr.Row():
cogmap_plot = gr.Plot(label="Generated Cognitive Map")
with gr.Row():
gr.Markdown("⚠️ Feel free to flag me if you find any errors. 🙂")
with gr.Column():
gr.Markdown('Demo made with ❤ by P.K. Ningrum (2024) | Contact: [https://ningrumdaud.github.io/](https://ningrumdaud.github.io/)')
# Set up the button to execute the function when clicked
submit_button.click(CogMapAnalysis, inputs=[inputs], outputs=[output, cogmap_plot])
if __name__ == "__main__":
demo.launch(show_api=False, share=True)
|