Spaces:

elozano
/

news-analyzer

Runtime error

elozano commited on Feb 9, 2022

Commit

30ad188

•

1 Parent(s): deefcae

NER model added

Files changed (3) hide show

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import streamlit as st
 from news_pipeline import NewsPipeline
 CATEGORY_EMOJIS = {
@@ -34,15 +34,34 @@ def app():
             with st.spinner("Analyzing article..."):
                 prediction = news_pipe(headline, content)
-            st.markdown(
-                f"{CATEGORY_EMOJIS[prediction['category']]} **Category**: {prediction['category']}"
-            )
-            st.markdown(
-                f"{FAKE_EMOJIS[prediction['fake']]} **Fake**: {'Yes' if prediction['fake'] == 'Fake' else 'No'}"
-            )
-            st.markdown(
-                f"{CLICKBAIT_EMOJIS[prediction['clickbait']]} **Clickbait**: {'Yes' if prediction['clickbait'] == 'Clickbait' else 'No'}"
-            )
 if __name__ == "__main__":

 import streamlit as st
+from annotated_text import annotated_text
 from news_pipeline import NewsPipeline
 CATEGORY_EMOJIS = {
             with st.spinner("Analyzing article..."):
                 prediction = news_pipe(headline, content)
+            col1, _, col2 = st.columns([2, 1, 6])
+            with col1:
+                st.subheader("Analysis:")
+                st.markdown(
+                    f"{CATEGORY_EMOJIS[prediction['category']]} **Category**: {prediction['category']}"
+                )
+                st.markdown(
+                    f"{FAKE_EMOJIS[prediction['fake']]} **Fake**: {'Yes' if prediction['fake'] == 'Fake' else 'No'}"
+                )
+                st.markdown(
+                    f"{CLICKBAIT_EMOJIS[prediction['clickbait']]} **Clickbait**: {'Yes' if prediction['clickbait'] == 'Clickbait' else 'No'}"
+                )
+            with col2:
+                st.subheader("Headline")
+                annotated_text(*parse_text(headline, prediction["ner"]["headline"]))
+                st.subheader("Content")
+                annotated_text(*parse_text(content, prediction["ner"]["content"]))
+def parse_text(text, prediction):
+    start = 0
+    parsed_text = []
+    for p in prediction:
+        parsed_text.append(text[start : p["start"]])
+        parsed_text.append((p["word"], p["entity_group"]))
+        start = p["end"]
+    parsed_text.append(text[start:])
+    return parsed_text
 if __name__ == "__main__":

news_pipeline.py CHANGED Viewed

@@ -2,8 +2,10 @@ from typing import Dict
 from transformers import (
     AutoModelForSequenceClassification,
     AutoTokenizer,
     TextClassificationPipeline,
 )
@@ -29,6 +31,13 @@ class NewsPipeline:
             ),
             tokenizer=AutoTokenizer.from_pretrained("elozano/news-clickbait"),
         )
     def __call__(self, headline: str, content: str) -> Dict[str, str]:
         category_article_text = f" {self.category_tokenizer.sep_token} ".join(
@@ -41,4 +50,12 @@ class NewsPipeline:
             "category": self.category_pipeline(category_article_text)[0]["label"],
             "fake": self.fake_pipeline(fake_article_text)[0]["label"],
             "clickbait": self.clickbait_pipeline(headline)[0]["label"],
         }

 from transformers import (
     AutoModelForSequenceClassification,
+    AutoModelForTokenClassification,
     AutoTokenizer,
     TextClassificationPipeline,
+    TokenClassificationPipeline,
 )
             ),
             tokenizer=AutoTokenizer.from_pretrained("elozano/news-clickbait"),
         )
+        self.ner_pipeline = TokenClassificationPipeline(
+            tokenizer=AutoTokenizer.from_pretrained("dslim/bert-base-NER"),
+            model=AutoModelForTokenClassification.from_pretrained(
+                "dslim/bert-base-NER"
+            ),
+            aggregation_strategy="simple",
+        )
     def __call__(self, headline: str, content: str) -> Dict[str, str]:
         category_article_text = f" {self.category_tokenizer.sep_token} ".join(
             "category": self.category_pipeline(category_article_text)[0]["label"],
             "fake": self.fake_pipeline(fake_article_text)[0]["label"],
             "clickbait": self.clickbait_pipeline(headline)[0]["label"],
+            "ner": {
+                "headline": list(
+                    filter(lambda x: x["score"] > 0.8, self.ner_pipeline(headline))
+                ),
+                "content": list(
+                    filter(lambda x: x["score"] > 0.8, self.ner_pipeline(content))
+                ),
+            },
         }

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 transformers
 torch

 transformers
 torch
+st-annotated-text