pororo-roberta-base-mrc / modeling_roberta.py
jinmang2's picture
Create modeling_roberta.py
1f878a8
raw
history blame
1.53 kB
import torch
import torch.nn as nn
from transformers import (
RobertaModel,
RobertaForQuestionAnswering,
)
class SpanPredictionHead(nn.Module):
"""Head for span prediction tasks.
Can be viewed as a 2-class output layer that is applied to every position.
"""
def __init__(self, input_dim, inner_dim, num_classes, pooler_dropout):
assert num_classes == 2
super().__init__()
self.dense = nn.Linear(input_dim, inner_dim)
self.dropout = nn.Dropout(p=pooler_dropout)
self.out_proj = nn.Linear(inner_dim, num_classes)
def forward(self, features, **kwargs):
x = features # take features across ALL positions
x = self.dropout(x)
x = self.dense(x)
x = torch.tanh(x)
x = self.dropout(x)
x = self.out_proj(x)
return x # B x T x C, but softmax should be taken over T
class RobertaForPororoMRC(RobertaForQuestionAnswering):
def __init__(self, config):
# Initialize on RobertaPreTrainedModel
super(RobertaForQuestionAnswering, self).__init__(config)
config.num_labels = 2
self.num_labels = config.num_labels
self.roberta = RobertaModel(config, add_pooling_layer=False)
self.qa_outputs = SpanPredictionHead(
input_dim=config.hidden_size,
inner_dim=config.span_head_inner_dim,
num_classes=config.num_labels,
pooler_dropout=config.span_head_dropout,
)
self.init_weights()