import math import numpy as np import torch import torch.nn as nn from torch.nn import functional as F from torchvision.ops import roi_align, roi_pool class VQAHead(nn.Module): """MLP Regression Head for VQA. Args: in_channels: input channels for MLP hidden_channels: hidden channels for MLP dropout_ratio: the dropout ratio for features before the MLP (default 0.5) pre_pool: whether pre-pool the features or not (True for Aesthetic Attributes, False for Technical Attributes) """ def __init__( self, in_channels=768, hidden_channels=64, dropout_ratio=0.5, pre_pool=False, **kwargs ): super().__init__() self.dropout_ratio = dropout_ratio self.in_channels = in_channels self.hidden_channels = hidden_channels self.pre_pool = pre_pool if self.dropout_ratio != 0: self.dropout = nn.Dropout(p=self.dropout_ratio) else: self.dropout = None self.fc_hid = nn.Conv3d(self.in_channels, self.hidden_channels, (1, 1, 1)) self.fc_last = nn.Conv3d(self.hidden_channels, 1, (1, 1, 1)) self.gelu = nn.GELU() self.avg_pool = nn.AdaptiveAvgPool3d((1, 1, 1)) def forward(self, x, rois=None): if self.pre_pool: x = self.avg_pool(x) x = self.dropout(x) qlt_score = self.fc_last(self.dropout(self.gelu(self.fc_hid(x)))) return qlt_score class VARHead(nn.Module): """MLP Regression Head for Video Action Recognition. Args: in_channels: input channels for MLP hidden_channels: hidden channels for MLP dropout_ratio: the dropout ratio for features before the MLP (default 0.5) """ def __init__(self, in_channels=768, out_channels=400, dropout_ratio=0.5, **kwargs): super().__init__() self.dropout_ratio = dropout_ratio self.in_channels = in_channels self.out_channels = out_channels if self.dropout_ratio != 0: self.dropout = nn.Dropout(p=self.dropout_ratio) else: self.dropout = None self.fc = nn.Conv3d(self.in_channels, self.out_channels, (1, 1, 1)) self.avg_pool = nn.AdaptiveAvgPool3d((1, 1, 1)) def forward(self, x, rois=None): x = self.dropout(x) x = self.avg_pool(x) out = self.fc(x) return out class IQAHead(nn.Module): """MLP Regression Head for IQA. Args: in_channels: input channels for MLP hidden_channels: hidden channels for MLP dropout_ratio: the dropout ratio for features before the MLP (default 0.5) """ def __init__( self, in_channels=768, hidden_channels=64, dropout_ratio=0.5, **kwargs ): super().__init__() self.dropout_ratio = dropout_ratio self.in_channels = in_channels self.hidden_channels = hidden_channels if self.dropout_ratio != 0: self.dropout = nn.Dropout(p=self.dropout_ratio) else: self.dropout = None self.fc_hid = nn.Linear(self.in_channels, self.hidden_channels) self.fc_last = nn.Linear(self.hidden_channels, 1) self.gelu = nn.GELU() def forward(self, x): x = self.dropout(x) qlt_score = self.fc_last(self.dropout(self.gelu(self.fc_hid(x)))) return qlt_score