File size: 9,063 Bytes
145dc69
 
 
 
 
 
 
 
 
 
a6e894d
145dc69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
import gradio as gr
import spacy
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

# Initialize spaCy NLP model
nlp = spacy.load("en_core_web_sm")

# Import Lexicon
cues = pd.read_excel('link_cues.xlsx')
list_causalmarkers = cues['causal_markers']

def contains_words_or_phrases(words_list, sentence):
    """
    Check if any word or phrase from words_list is present in the sentence.

    :param words_list: List of words or phrases to check
    :param sentence: The input sentence where to look for words or phrases
    :return: Entities if any word or phrase is found, otherwise None
    """
    # Normalize the sentence to lower case to make the search case insensitive
    normalized_sentence = sentence.lower()

    # Check each word or phrase in the list
    for word_or_phrase in words_list:
        # Check if the word or phrase is in the normalized sentence
        if word_or_phrase.lower() in normalized_sentence:
            return True  # Return True immediately if any word or phrase is found

    return False  # Return False if none of the words or phrases are found

class NounExtractor:
    def __init__(self, nlp):
        """
        Initialize the NounExtractor with a pre-loaded spaCy NLP model.
        """
        self.nlp = nlp

    def process_text(self, text):
        """
        Process the text using the spaCy NLP pipeline.
        """
        return self.nlp(text)

    def get_noun_phrases(self, doc):
        """
        Extract and refine noun phrases from the spaCy doc, tracking and using dependency labels accurately.
        """
        noun_phrases = list(doc.noun_chunks)
        merged_phrases = []
        skip_indexes = set()  # Indexes to skip because they have been merged into another phrase
        list_dep_labels = [token.dep_ for token in doc]  # List of dependency labels for each token

        for i in range(len(noun_phrases)):
            if i in skip_indexes:
                continue

            current = noun_phrases[i]
            # Collect dependency labels for the current noun phrase
            deps_in_phrase = {list_dep_labels[tok.i] for tok in current}

            # Merge logic based on 'of' construction
            if i + 1 < len(noun_phrases) and (doc[current.end].text in ['of', 'in', 'among', 'on', 'towards', 'to', 'for']):
                next_phrase = noun_phrases[i + 1]
                if i + 2 < len(noun_phrases) and doc[next_phrase.end].dep_ == 'pcomp':
                    extended_phrase = doc[current.start:noun_phrases[i + 2].end]
                    skip_indexes.update({i + 1, i + 2})
                    extended_deps = {list_dep_labels[tok.i] for tok in extended_phrase}
                    dep_label = self.determine_dep_label(extended_deps)
                    merged_phrases.append((extended_phrase.text, dep_label))
                    continue
                else:
                    merged_phrase = doc[current.start:next_phrase.end]
                    skip_indexes.add(i + 1)
                    merged_deps = {list_dep_labels[tok.i] for tok in merged_phrase}
                    dep_label = self.determine_dep_label(merged_deps)
                    merged_phrases.append((merged_phrase.text, dep_label))
                    continue

            if i not in skip_indexes:
                dep_label = self.determine_dep_label(deps_in_phrase)
                merged_phrases.append((current.text, dep_label))

        return merged_phrases

    def determine_dep_label(self, deps_in_phrase):
        """
        Determine the most appropriate dependency label for a phrase based on internal dependencies.
        """
        if 'nsubj' in deps_in_phrase or 'nsubjpass' in deps_in_phrase:
            return 'ROOT'
        else:
            # Choose a representative dependency if no clear subject is present
            return deps_in_phrase.pop() if deps_in_phrase else 'unknown'
    
    def extract(self, sentence, action_verb):
        """
        Extracts and returns noun phrases with their detailed dependency tags from the sentence.
        """
        doc = self.process_text(sentence)
        noun_phrases = self.get_noun_phrases(doc)
        result_dict = {phrase: dep for phrase, dep in noun_phrases}

        # Check for the presence of any actionable verbs in the sentence
        found_verbs = [v for v in action_verb if v.lower() in sentence.lower()]
        if found_verbs:
            # Adjust dependency labels for noun phrases based on the presence of an actionable verb.
            for phrase, dep in list(result_dict.items()):  # Work on a copy of items to safely modify the dict
                if dep == 'ROOT':
                    result_dict[phrase] = 'dobj'
                elif dep == 'dobj':
                    result_dict[phrase] = 'ROOT'
        
        return result_dict

def format_results(results):
    formatted = []
    # Find all roots or central subjects to structure the phrases around them
    root_keys = [key for key, value in results.items() if value == 'ROOT' or value == 'nsubjpass']

    for key, value in results.items():
        if key in root_keys:
            continue  # Skip the roots themselves when adding to the formatted list
        for root_key in root_keys:
            if value == 'nsubjpass':  # If the dependency indicates a passive subject
                formatted.append(f"{key} -> {root_key}")
            else:
                formatted.append(f"{root_key} <- {key}")

    # Remove duplicates and return the formatted results
    formatted = list(set(formatted))
    return formatted

def wrap_label(label):
    """Helper function to wrap labels after every three words."""
    words = label.split()
    wrapped_label = '\n'.join(' '.join(words[i:i+3]) for i in range(0, len(words), 3))
    return wrapped_label

def visualize_cognitive_map(formatted_results):
    G = nx.DiGraph()  # Directed graph to show direction of relationships

    # Add edges based on formatted results
    for result in formatted_results:
        if '<-' in result:
            # Extract nodes and add edge in the reverse direction
            nodes = result.split(' <- ')
            G.add_edge(nodes[1], nodes[0])
        elif '->' in result:
            # Extract nodes and add edge in the specified direction
            nodes = result.split(' -> ')
            G.add_edge(nodes[0], nodes[1])

    # Position nodes using the spring layout
    pos = nx.spring_layout(G, k=0.50)

    # Setup the plot with a larger size
    plt.figure(figsize=(12, 8))  # Larger figure size for better visibility

    # Prepare custom labels with wrapped text
    labels = {node: wrap_label(node) for node in G.nodes()}

    # Draw the graph with custom labels
    nx.draw(G, pos, labels=labels, node_color='skyblue', edge_color='#FF5733', 
            node_size=5000, font_size=10, font_weight='bold', with_labels=True, arrowstyle='-|>', arrowsize=30)
    
    plt.show()

    return plt

extractor = NounExtractor(nlp=nlp)

# Example of how to use this function
words_list = ["so", "because", "increase", "contribute", "due to"]
action_verb = ['affect', 'influence', 'increase', 'against']

# Define the callback function for the GUI
def CogMapAnalysis(text):
    if contains_words_or_phrases(words_list, text):
        result = extractor.extract(text, action_verb)
        formatted_result = format_results(result)
        plot = visualize_cognitive_map(formatted_result)
        return formatted_result, plot
    else:
        formatted_result = "❌ No causal expression was identified."
        plot = None  # Use None instead of empty string for non-existent objects
        return formatted_result, plot

# Create the GUI using the 'gr' library
with gr.Blocks() as demo:
    with gr.Column():
        gr.Markdown('<div style="text-align: center;"><h1><strong>CogMap</strong></h1></div> <div style="text-align: center;"><h3></h3></div>')

    with gr.Row():
        inputs = gr.Textbox(label="Input", lines=2, placeholder="Enter your text here...")
        examples = [
            "Public support for anti-discrimination laws and the movement to support immigrants grew due to the impact of getting widespread education on social justice issues.",
            "The introduction of new anti-discrimination laws has been driven by an increasing awareness of social injustices and grassroots movements.",
            "CogMap is a tool that lets you create cognitive maps from text."
        ]

        output = gr.Textbox(label="CogMap", lines=1, placeholder=".............")
        cogmap_plot = gr.Plot(label="Visualization")
        interface = gr.Interface(fn=CogMapAnalysis, examples=examples, inputs=inputs, outputs=[output, cogmap_plot])
    
    with gr.Row():
        gr.Markdown("⚠️  Feel free to flag me if you find any errors. :)")
    
    with gr.Column():
        gr.Markdown('<p style="text-align: center; ">Demo made with ❤ by P.K. Ningrum (2024) | Contact: panggih_kusuma.ningrum@univ-fcomte.fr</p>')

if __name__ == "__main__":
    demo.launch(show_api=False, share=True)