Create modeling_roberta.py
Browse files- modeling_roberta.py +48 -0
modeling_roberta.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
from transformers import (
|
4 |
+
RobertaModel,
|
5 |
+
RobertaForQuestionAnswering,
|
6 |
+
)
|
7 |
+
|
8 |
+
|
9 |
+
class SpanPredictionHead(nn.Module):
|
10 |
+
"""Head for span prediction tasks.
|
11 |
+
Can be viewed as a 2-class output layer that is applied to every position.
|
12 |
+
"""
|
13 |
+
|
14 |
+
def __init__(self, input_dim, inner_dim, num_classes, pooler_dropout):
|
15 |
+
assert num_classes == 2
|
16 |
+
super().__init__()
|
17 |
+
self.dense = nn.Linear(input_dim, inner_dim)
|
18 |
+
self.dropout = nn.Dropout(p=pooler_dropout)
|
19 |
+
self.out_proj = nn.Linear(inner_dim, num_classes)
|
20 |
+
|
21 |
+
def forward(self, features, **kwargs):
|
22 |
+
x = features # take features across ALL positions
|
23 |
+
x = self.dropout(x)
|
24 |
+
x = self.dense(x)
|
25 |
+
x = torch.tanh(x)
|
26 |
+
x = self.dropout(x)
|
27 |
+
x = self.out_proj(x)
|
28 |
+
return x # B x T x C, but softmax should be taken over T
|
29 |
+
|
30 |
+
|
31 |
+
class RobertaForPororoMRC(RobertaForQuestionAnswering):
|
32 |
+
|
33 |
+
def __init__(self, config):
|
34 |
+
# Initialize on RobertaPreTrainedModel
|
35 |
+
super(RobertaForQuestionAnswering, self).__init__(config)
|
36 |
+
|
37 |
+
config.num_labels = 2
|
38 |
+
self.num_labels = config.num_labels
|
39 |
+
|
40 |
+
self.roberta = RobertaModel(config, add_pooling_layer=False)
|
41 |
+
self.qa_outputs = SpanPredictionHead(
|
42 |
+
input_dim=config.hidden_size,
|
43 |
+
inner_dim=config.span_head_inner_dim,
|
44 |
+
num_classes=config.num_labels,
|
45 |
+
pooler_dropout=config.span_head_dropout,
|
46 |
+
)
|
47 |
+
|
48 |
+
self.init_weights()
|