import sys import gradio sys.path.append("BERT_explainability") import torch from BERT_explainability.ExplanationGenerator import Generator from BERT_explainability.roberta2 import RobertaForSequenceClassification from transformers import AutoTokenizer from captum.attr import ( visualization ) import torch # from https://discuss.pytorch.org/t/using-scikit-learns-scalers-for-torchvision/53455 class PyTMinMaxScalerVectorized(object): """ Transforms each channel to the range [0, 1]. """ def __init__(self, dimension=-1): self.d = dimension def __call__(self, tensor): d = self.d scale = 1.0 / (tensor.max(dim=d, keepdim=True)[0] - tensor.min(dim=d, keepdim=True)[0]) tensor.mul_(scale).sub_(tensor.min(dim=d, keepdim=True)[0]) return tensor if torch.cuda.is_available(): device = torch.device("cuda") else: device = torch.device("cpu") model = RobertaForSequenceClassification.from_pretrained("textattack/roberta-base-SST-2").to(device) model.eval() tokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-SST-2") # initialize the explanations generator explanations = Generator(model, "roberta") classifications = ["NEGATIVE", "POSITIVE"] # rule 5 from paper def avg_heads(cam, grad): cam = ( (grad * cam) .clamp(min=0) .mean(dim=-3) ) # set negative values to 0, then average # cam = cam.clamp(min=0).mean(dim=0) return cam # rule 6 from paper def apply_self_attention_rules(R_ss, cam_ss): R_ss_addition = torch.matmul(cam_ss, R_ss) return R_ss_addition def generate_relevance(model, input_ids, attention_mask, index=None, start_layer=0): output = model(input_ids=input_ids, attention_mask=attention_mask)[0] if index == None: #index = np.expand_dims(np.arange(input_ids.shape[1]) # by default explain the class with the highest score index = output.argmax(axis=-1).detach().cpu().numpy() # create a one-hot vector selecting class we want explanations for one_hot = (torch.nn.functional .one_hot(torch.tensor(index, dtype=torch.int64), num_classes=output.size(-1)) .to(torch.float) .requires_grad_(True) ).to(device) print("ONE_HOT", one_hot.size(), one_hot) one_hot = torch.sum(one_hot * output) model.zero_grad() # create the gradients for the class we're interested in one_hot.backward(retain_graph=True) num_tokens = model.roberta.encoder.layer[0].attention.self.get_attn().shape[-1] print(input_ids.size(-1), num_tokens) R = torch.eye(num_tokens).expand(output.size(0), -1, -1).clone().to(device) for i, blk in enumerate(model.roberta.encoder.layer): if i < start_layer: continue grad = blk.attention.self.get_attn_gradients() cam = blk.attention.self.get_attn() cam = avg_heads(cam, grad) joint = apply_self_attention_rules(R, cam) R += joint return output, R[:, 0, 1:-1] def visualize_text(datarecords, legend=True): dom = ["
True Label | " "Predicted Label | " "Attribution Label | " "Attribution Score | " "Word Importance | " ] for datarecord in datarecords: rows.append( "".join( [ "
---|---|---|---|---|