chenxwh
/

AVeriTeC

Model card Files Files and versions Community

Chenxi Whitehouse commited on Apr 12

Commit

2b4f5ff

•

1 Parent(s): 5e94756

add src file for models

Browse files

Files changed (3) hide show

src/models/DualEncoderModule.py +143 -0
src/models/SequenceClassificationModule.py +178 -0
src/reranking/rerank_questions.py +1 -1

src/models/DualEncoderModule.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import pytorch_lightning as pl
+import torch
+from transformers.optimization import AdamW
+import torchmetrics
+class DualEncoderModule(pl.LightningModule):
+    def __init__(self, tokenizer, model, learning_rate=1e-3):
+        super().__init__()
+        self.tokenizer = tokenizer
+        self.model = model
+        self.learning_rate = learning_rate
+        self.train_acc = torchmetrics.Accuracy(
+            task="multiclass", num_classes=model.num_labels
+        )
+        self.val_acc = torchmetrics.Accuracy(
+            task="multiclass", num_classes=model.num_labels
+        )
+        self.test_acc = torchmetrics.Accuracy(
+            task="multiclass", num_classes=model.num_labels
+        )
+    def forward(self, input_ids, **kwargs):
+        return self.model(input_ids, **kwargs)
+    def configure_optimizers(self):
+        optimizer = AdamW(self.parameters(), lr=self.learning_rate)
+        return optimizer
+    def training_step(self, batch, batch_idx):
+        pos_ids, pos_mask, neg_ids, neg_mask = batch
+        neg_ids = neg_ids.view(-1, neg_ids.shape[-1])
+        neg_mask = neg_mask.view(-1, neg_mask.shape[-1])
+        pos_outputs = self(
+            pos_ids,
+            attention_mask=pos_mask,
+            labels=torch.ones(pos_ids.shape[0], dtype=torch.uint8).to(
+                pos_ids.get_device()
+            ),
+        )
+        neg_outputs = self(
+            neg_ids,
+            attention_mask=neg_mask,
+            labels=torch.zeros(neg_ids.shape[0], dtype=torch.uint8).to(
+                neg_ids.get_device()
+            ),
+        )
+        loss_scale = 1.0
+        loss = pos_outputs.loss + loss_scale * neg_outputs.loss
+        pos_logits = pos_outputs.logits
+        pos_preds = torch.argmax(pos_logits, axis=1)
+        self.train_acc(
+            pos_preds.cpu(), torch.ones(pos_ids.shape[0], dtype=torch.uint8).cpu()
+        )
+        neg_logits = neg_outputs.logits
+        neg_preds = torch.argmax(neg_logits, axis=1)
+        self.train_acc(
+            neg_preds.cpu(), torch.zeros(neg_ids.shape[0], dtype=torch.uint8).cpu()
+        )
+        return {"loss": loss}
+    def validation_step(self, batch, batch_idx):
+        pos_ids, pos_mask, neg_ids, neg_mask = batch
+        neg_ids = neg_ids.view(-1, neg_ids.shape[-1])
+        neg_mask = neg_mask.view(-1, neg_mask.shape[-1])
+        pos_outputs = self(
+            pos_ids,
+            attention_mask=pos_mask,
+            labels=torch.ones(pos_ids.shape[0], dtype=torch.uint8).to(
+                pos_ids.get_device()
+            ),
+        )
+        neg_outputs = self(
+            neg_ids,
+            attention_mask=neg_mask,
+            labels=torch.zeros(neg_ids.shape[0], dtype=torch.uint8).to(
+                neg_ids.get_device()
+            ),
+        )
+        loss_scale = 1.0
+        loss = pos_outputs.loss + loss_scale * neg_outputs.loss
+        pos_logits = pos_outputs.logits
+        pos_preds = torch.argmax(pos_logits, axis=1)
+        self.val_acc(
+            pos_preds.cpu(), torch.ones(pos_ids.shape[0], dtype=torch.uint8).cpu()
+        )
+        neg_logits = neg_outputs.logits
+        neg_preds = torch.argmax(neg_logits, axis=1)
+        self.val_acc(
+            neg_preds.cpu(), torch.zeros(neg_ids.shape[0], dtype=torch.uint8).cpu()
+        )
+        self.log("val_acc", self.val_acc)
+        return {"loss": loss}
+    def test_step(self, batch, batch_idx):
+        pos_ids, pos_mask, neg_ids, neg_mask = batch
+        neg_ids = neg_ids.view(-1, neg_ids.shape[-1])
+        neg_mask = neg_mask.view(-1, neg_mask.shape[-1])
+        pos_outputs = self(
+            pos_ids,
+            attention_mask=pos_mask,
+            labels=torch.ones(pos_ids.shape[0], dtype=torch.uint8).to(
+                pos_ids.get_device()
+            ),
+        )
+        neg_outputs = self(
+            neg_ids,
+            attention_mask=neg_mask,
+            labels=torch.zeros(neg_ids.shape[0], dtype=torch.uint8).to(
+                neg_ids.get_device()
+            ),
+        )
+        pos_logits = pos_outputs.logits
+        pos_preds = torch.argmax(pos_logits, axis=1)
+        self.test_acc(
+            pos_preds.cpu(), torch.ones(pos_ids.shape[0], dtype=torch.uint8).cpu()
+        )
+        neg_logits = neg_outputs.logits
+        neg_preds = torch.argmax(neg_logits, axis=1)
+        self.test_acc(
+            neg_preds.cpu(), torch.zeros(neg_ids.shape[0], dtype=torch.uint8).cpu()
+        )
+        self.log("test_acc", self.test_acc)

src/models/SequenceClassificationModule.py ADDED Viewed

	@@ -0,0 +1,178 @@

+import pytorch_lightning as pl
+import torch
+from transformers.optimization import AdamW
+import torchmetrics
+from torchmetrics.classification import F1Score
+class SequenceClassificationModule(pl.LightningModule):
+    def __init__(
+        self, tokenizer, model, use_question_stance_approach=True, learning_rate=1e-3
+    ):
+        super().__init__()
+        self.tokenizer = tokenizer
+        self.model = model
+        self.learning_rate = learning_rate
+        self.train_acc = torchmetrics.Accuracy(
+            task="multiclass", num_classes=model.num_labels
+        )
+        self.val_acc = torchmetrics.Accuracy(
+            task="multiclass", num_classes=model.num_labels
+        )
+        self.test_acc = torchmetrics.Accuracy(
+            task="multiclass", num_classes=model.num_labels
+        )
+        self.train_f1 = F1Score(
+            task="multiclass", num_classes=model.num_labels, average="macro"
+        )
+        self.val_f1 = F1Score(
+            task="multiclass", num_classes=model.num_labels, average=None
+        )
+        self.test_f1 = F1Score(
+            task="multiclass", num_classes=model.num_labels, average=None
+        )
+        self.use_question_stance_approach = use_question_stance_approach
+    def forward(self, input_ids, **kwargs):
+        return self.model(input_ids, **kwargs)
+    def configure_optimizers(self):
+        optimizer = AdamW(self.parameters(), lr=self.learning_rate)
+        return optimizer
+    def training_step(self, batch, batch_idx):
+        x, x_mask, y = batch
+        outputs = self(x, attention_mask=x_mask, labels=y)
+        logits = outputs.logits
+        loss = outputs.loss
+        preds = torch.argmax(logits, axis=1)
+        self.log("train_loss", loss)
+        return {"loss": loss}
+    def validation_step(self, batch, batch_idx):
+        x, x_mask, y = batch
+        outputs = self(x, attention_mask=x_mask, labels=y)
+        logits = outputs.logits
+        loss = outputs.loss
+        preds = torch.argmax(logits, axis=1)
+        if not self.use_question_stance_approach:
+            self.val_acc(preds, y)
+            self.log("val_acc_step", self.val_acc)
+            self.val_f1(preds, y)
+            self.log("val_loss", loss)
+        return {"val_loss": loss, "src": x, "pred": preds, "target": y}
+    def validation_epoch_end(self, outs):
+        if self.use_question_stance_approach:
+            self.handle_end_of_epoch_scoring(outs, self.val_acc, self.val_f1)
+        self.log("val_acc_epoch", self.val_acc)
+        f1 = self.val_f1.compute()
+        self.val_f1.reset()
+        self.log("val_f1_epoch", torch.mean(f1))
+        class_names = ["supported", "refuted", "nei", "conflicting"]
+        for i, c_name in enumerate(class_names):
+            self.log("val_f1_" + c_name, f1[i])
+    def test_step(self, batch, batch_idx):
+        x, x_mask, y = batch
+        outputs = self(x, attention_mask=x_mask)
+        logits = outputs.logits
+        preds = torch.argmax(logits, axis=1)
+        if not self.use_question_stance_approach:
+            self.test_acc(preds, y)
+            self.log("test_acc_step", self.test_acc)
+            self.test_f1(preds, y)
+        return {"src": x, "pred": preds, "target": y}
+    def test_epoch_end(self, outs):
+        if self.use_question_stance_approach:
+            self.handle_end_of_epoch_scoring(outs, self.test_acc, self.test_f1)
+        self.log("test_acc_epoch", self.test_acc)
+        f1 = self.test_f1.compute()
+        self.test_f1.reset()
+        self.log("test_f1_epoch", torch.mean(f1))
+        class_names = ["supported", "refuted", "nei", "conflicting"]
+        for i, c_name in enumerate(class_names):
+            self.log("test_f1_" + c_name, f1[i])
+    def handle_end_of_epoch_scoring(self, outputs, acc_scorer, f1_scorer):
+        gold_labels = {}
+        question_support = {}
+        for out in outputs:
+            srcs = out["src"]
+            preds = out["pred"]
+            tgts = out["target"]
+            tokens = self.tokenizer.batch_decode(
+                srcs, skip_special_tokens=True, clean_up_tokenization_spaces=True
+            )
+            for src, pred, tgt in zip(tokens, preds, tgts):
+                claim_id = src.split("[ question ]")[0]
+                if claim_id not in gold_labels:
+                    gold_labels[claim_id] = tgt
+                    question_support[claim_id] = []
+                question_support[claim_id].append(pred)
+        for k, gold_label in gold_labels.items():
+            support = question_support[k]
+            has_unanswerable = False
+            has_true = False
+            has_false = False
+            for v in support:
+                if v == 0:
+                    has_true = True
+                if v == 1:
+                    has_false = True
+                if v in (
+                    2,
+                    3,
+                ):  # TODO very ugly hack -- we cant have different numbers of labels for train and test so we do this
+                    has_unanswerable = True
+            if has_unanswerable:
+                answer = 2
+            elif has_true and not has_false:
+                answer = 0
+            elif has_false and not has_true:
+                answer = 1
+            elif has_true and has_false:
+                answer = 3
+            # TODO this is very hacky and wont work if the device is literally anything other than cuda:0
+            acc_scorer(
+                torch.as_tensor([answer]).to("cuda:0"),
+                torch.as_tensor([gold_label]).to("cuda:0"),
+            )
+            f1_scorer(
+                torch.as_tensor([answer]).to("cuda:0"),
+                torch.as_tensor([gold_label]).to("cuda:0"),
+            )

src/reranking/rerank_questions.py CHANGED Viewed

@@ -17,7 +17,7 @@ if __name__ == "__main__":
     parser.add_argument(
         "-i",
         "--top_k_qa_file",
-        default="data/dev_top_k_qa.json",
         help="Json file with claim and top k generated question-answer pairs.",
     )
     parser.add_argument(

     parser.add_argument(
         "-i",
         "--top_k_qa_file",
+        default="data_store/dev_top_k_qa.json",
         help="Json file with claim and top k generated question-answer pairs.",
     )
     parser.add_argument(