Spaces:

amsterdamNLP
/

attention-rollout

Running

App Files Files Community

Martijn van Beers commited on Aug 25, 2022

Commit

9e7d7f8

•

1 Parent(s): 5b3ff3f

Add title and description

Browse files

Files changed (1) hide show

app.py +65 -35

app.py CHANGED Viewed

@@ -9,9 +9,7 @@ from BERT_explainability.ExplanationGenerator import Generator
 from BERT_explainability.roberta2 import RobertaForSequenceClassification
 from transformers import AutoTokenizer
-from captum.attr import (
-        visualization
-    )
 import torch
 # from https://discuss.pytorch.org/t/using-scikit-learns-scalers-for-torchvision/53455
@@ -19,11 +17,15 @@ class PyTMinMaxScalerVectorized(object):
     """
     Transforms each channel to the range [0, 1].
     """
     def __init__(self, dimension=-1):
         self.d = dimension
     def __call__(self, tensor):
         d = self.d
-        scale = 1.0 / (tensor.max(dim=d, keepdim=True)[0] - tensor.min(dim=d, keepdim=True)[0])
         tensor.mul_(scale).sub_(tensor.min(dim=d, keepdim=True)[0])
         return tensor
@@ -33,7 +35,9 @@ if torch.cuda.is_available():
 else:
     device = torch.device("cpu")
-model = RobertaForSequenceClassification.from_pretrained("textattack/roberta-base-SST-2").to(device)
 model.eval()
 tokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-SST-2")
 # initialize the explanations generator
@@ -43,33 +47,33 @@ classifications = ["NEGATIVE", "POSITIVE"]
 # rule 5 from paper
 def avg_heads(cam, grad):
-    cam = (
-            (grad * cam)
-            .clamp(min=0)
-            .mean(dim=-3)
-        )
     # set negative values to 0, then average
-#    cam = cam.clamp(min=0).mean(dim=0)
     return cam
 # rule 6 from paper
 def apply_self_attention_rules(R_ss, cam_ss):
     R_ss_addition = torch.matmul(cam_ss, R_ss)
     return R_ss_addition
 def generate_relevance(model, input_ids, attention_mask, index=None, start_layer=0):
     output = model(input_ids=input_ids, attention_mask=attention_mask)[0]
     if index == None:
-        #index = np.expand_dims(np.arange(input_ids.shape[1])
         # by default explain the class with the highest score
         index = output.argmax(axis=-1).detach().cpu().numpy()
     # create a one-hot vector selecting class we want explanations for
-    one_hot = (torch.nn.functional
-            .one_hot(torch.tensor(index, dtype=torch.int64), num_classes=output.size(-1))
-            .to(torch.float)
-            .requires_grad_(True)
-        ).to(device)
     print("ONE_HOT", one_hot.size(), one_hot)
     one_hot = torch.sum(one_hot * output)
     model.zero_grad()
@@ -90,6 +94,7 @@ def generate_relevance(model, input_ids, attention_mask, index=None, start_layer
         R += joint
     return output, R[:, 0, 1:-1]
 def visualize_text(datarecords, legend=True):
     dom = ["<table width: 100%>"]
     rows = [
@@ -111,7 +116,9 @@ def visualize_text(datarecords, legend=True):
                         )
                     ),
                     visualization.format_classname(datarecord.attr_class),
-                    visualization.format_classname("{0:.2f}".format(datarecord.attr_score)),
                     visualization.format_word_importances(
                         datarecord.raw_input_ids, datarecord.word_attributions
                     ),
@@ -143,9 +150,12 @@ def visualize_text(datarecords, legend=True):
     return html
 def show_explanation(model, input_ids, attention_mask, index=None, start_layer=0):
     # generate an explanation for the input
-    output, expl = generate_relevance(model, input_ids, attention_mask, index=index, start_layer=start_layer)
     print(output.shape, expl.shape)
     # normalize scores
     scaler = PyTMinMaxScalerVectorized()
@@ -154,7 +164,6 @@ def show_explanation(model, input_ids, attention_mask, index=None, start_layer=0
     # get the model classification
     output = torch.nn.functional.softmax(output, dim=-1)
     vis_data_records = []
     for record in range(input_ids.size(0)):
         classification = output[record].argmax(dim=-1).item()
@@ -164,25 +173,31 @@ def show_explanation(model, input_ids, attention_mask, index=None, start_layer=0
         # if the classification is negative, higher explanation scores are more negative
         # flip for visualization
         if class_name == "NEGATIVE":
-            nrm *= (-1)
-        tokens = tokenizer.convert_ids_to_tokens(input_ids[record].flatten())[1:0 - ((attention_mask[record] == 0).sum().item() + 1)]
         print([(tokens[i], nrm[i].item()) for i in range(len(tokens))])
-        vis_data_records.append(visualization.VisualizationDataRecord(
-                                    nrm,
-                                    output[record][classification],
-                                    classification,
-                                    classification,
-                                    index,
-                                    1,
-                                    tokens,
-                                    1))
     return visualize_text(vis_data_records)
 def run(input_text):
     text_batch = [input_text]
-    encoding = tokenizer(text_batch, return_tensors='pt')
-    input_ids = encoding['input_ids'].to(device)
-    attention_mask = encoding['attention_mask'].to(device)
     # true class is positive - 1
     true_class = 1
@@ -190,5 +205,20 @@ def run(input_text):
     html = show_explanation(model, input_ids, attention_mask)
     return html
-iface = gradio.Interface(fn=run, inputs="text", outputs="html", examples=[["This movie was the best movie I have ever seen! some scenes were ridiculous, but acting was great"], ["I really didn't like this movie. Some of the actors were good, but overall the movie was boring"]])
 iface.launch()

 from BERT_explainability.roberta2 import RobertaForSequenceClassification
 from transformers import AutoTokenizer
+from captum.attr import visualization
 import torch
 # from https://discuss.pytorch.org/t/using-scikit-learns-scalers-for-torchvision/53455
     """
     Transforms each channel to the range [0, 1].
     """
     def __init__(self, dimension=-1):
         self.d = dimension
     def __call__(self, tensor):
         d = self.d
+        scale = 1.0 / (
+            tensor.max(dim=d, keepdim=True)[0] - tensor.min(dim=d, keepdim=True)[0]
+        )
         tensor.mul_(scale).sub_(tensor.min(dim=d, keepdim=True)[0])
         return tensor
 else:
     device = torch.device("cpu")
+model = RobertaForSequenceClassification.from_pretrained(
+    "textattack/roberta-base-SST-2"
+).to(device)
 model.eval()
 tokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-SST-2")
 # initialize the explanations generator
 # rule 5 from paper
 def avg_heads(cam, grad):
+    cam = (grad * cam).clamp(min=0).mean(dim=-3)
     # set negative values to 0, then average
+    #    cam = cam.clamp(min=0).mean(dim=0)
     return cam
 # rule 6 from paper
 def apply_self_attention_rules(R_ss, cam_ss):
     R_ss_addition = torch.matmul(cam_ss, R_ss)
     return R_ss_addition
 def generate_relevance(model, input_ids, attention_mask, index=None, start_layer=0):
     output = model(input_ids=input_ids, attention_mask=attention_mask)[0]
     if index == None:
+        # index = np.expand_dims(np.arange(input_ids.shape[1])
         # by default explain the class with the highest score
         index = output.argmax(axis=-1).detach().cpu().numpy()
     # create a one-hot vector selecting class we want explanations for
+    one_hot = (
+        torch.nn.functional.one_hot(
+            torch.tensor(index, dtype=torch.int64), num_classes=output.size(-1)
+        )
+        .to(torch.float)
+        .requires_grad_(True)
+    ).to(device)
     print("ONE_HOT", one_hot.size(), one_hot)
     one_hot = torch.sum(one_hot * output)
     model.zero_grad()
         R += joint
     return output, R[:, 0, 1:-1]
 def visualize_text(datarecords, legend=True):
     dom = ["<table width: 100%>"]
     rows = [
                         )
                     ),
                     visualization.format_classname(datarecord.attr_class),
+                    visualization.format_classname(
+                        "{0:.2f}".format(datarecord.attr_score)
+                    ),
                     visualization.format_word_importances(
                         datarecord.raw_input_ids, datarecord.word_attributions
                     ),
     return html
 def show_explanation(model, input_ids, attention_mask, index=None, start_layer=0):
     # generate an explanation for the input
+    output, expl = generate_relevance(
+        model, input_ids, attention_mask, index=index, start_layer=start_layer
+    )
     print(output.shape, expl.shape)
     # normalize scores
     scaler = PyTMinMaxScalerVectorized()
     # get the model classification
     output = torch.nn.functional.softmax(output, dim=-1)
     vis_data_records = []
     for record in range(input_ids.size(0)):
         classification = output[record].argmax(dim=-1).item()
         # if the classification is negative, higher explanation scores are more negative
         # flip for visualization
         if class_name == "NEGATIVE":
+            nrm *= -1
+        tokens = tokenizer.convert_ids_to_tokens(input_ids[record].flatten())[
+            1 : 0 - ((attention_mask[record] == 0).sum().item() + 1)
+        ]
         print([(tokens[i], nrm[i].item()) for i in range(len(tokens))])
+        vis_data_records.append(
+            visualization.VisualizationDataRecord(
+                nrm,
+                output[record][classification],
+                classification,
+                classification,
+                index,
+                1,
+                tokens,
+                1,
+            )
+        )
     return visualize_text(vis_data_records)
 def run(input_text):
     text_batch = [input_text]
+    encoding = tokenizer(text_batch, return_tensors="pt")
+    input_ids = encoding["input_ids"].to(device)
+    attention_mask = encoding["attention_mask"].to(device)
     # true class is positive - 1
     true_class = 1
     html = show_explanation(model, input_ids, attention_mask)
     return html
+iface = gradio.Interface(
+    fn=run,
+    inputs="text",
+    outputs="html",
+    title="RoBERTa Explanability",
+    description="Quick demo of a version of [Hila Chefer's](https://github.com/hila-chefer) [Transformer-Explanability](https://github.com/hila-chefer/Transformer-Explainability/) but without the layerwise relevance propagation (as in [Transformer-MM_explainability](https://github.com/hila-chefer/Transformer-MM-Explainability/)) for a RoBERTa model.",
+    examples=[
+        [
+            "This movie was the best movie I have ever seen! some scenes were ridiculous, but acting was great"
+        ],
+        [
+            "I really didn't like this movie. Some of the actors were good, but overall the movie was boring"
+        ],
+    ],
+)
 iface.launch()