Spaces:
Runtime error
Runtime error
File size: 5,206 Bytes
2366e36 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import mmocr.utils as utils
from mmocr.models.builder import CONVERTORS
from .base import BaseConvertor
@CONVERTORS.register_module()
class AttnConvertor(BaseConvertor):
"""Convert between text, index and tensor for encoder-decoder based
pipeline.
Args:
dict_type (str): Type of dict, should be one of {'DICT36', 'DICT90'}.
dict_file (None|str): Character dict file path. If not none,
higher priority than dict_type.
dict_list (None|list[str]): Character list. If not none, higher
priority than dict_type, but lower than dict_file.
with_unknown (bool): If True, add `UKN` token to class.
max_seq_len (int): Maximum sequence length of label.
lower (bool): If True, convert original string to lower case.
start_end_same (bool): Whether use the same index for
start and end token or not. Default: True.
"""
def __init__(self,
dict_type='DICT90',
dict_file=None,
dict_list=None,
with_unknown=True,
max_seq_len=40,
lower=False,
start_end_same=True,
**kwargs):
super().__init__(dict_type, dict_file, dict_list)
assert isinstance(with_unknown, bool)
assert isinstance(max_seq_len, int)
assert isinstance(lower, bool)
self.with_unknown = with_unknown
self.max_seq_len = max_seq_len
self.lower = lower
self.start_end_same = start_end_same
self.update_dict()
def update_dict(self):
start_end_token = '<BOS/EOS>'
unknown_token = '<UKN>'
padding_token = '<PAD>'
# unknown
self.unknown_idx = None
if self.with_unknown:
self.idx2char.append(unknown_token)
self.unknown_idx = len(self.idx2char) - 1
# BOS/EOS
self.idx2char.append(start_end_token)
self.start_idx = len(self.idx2char) - 1
if not self.start_end_same:
self.idx2char.append(start_end_token)
self.end_idx = len(self.idx2char) - 1
# padding
self.idx2char.append(padding_token)
self.padding_idx = len(self.idx2char) - 1
# update char2idx
self.char2idx = {}
for idx, char in enumerate(self.idx2char):
self.char2idx[char] = idx
def str2tensor(self, strings):
"""
Convert text-string into tensor.
Args:
strings (list[str]): ['hello', 'world']
Returns:
dict (str: Tensor | list[tensor]):
tensors (list[Tensor]): [torch.Tensor([1,2,3,3,4]),
torch.Tensor([5,4,6,3,7])]
padded_targets (Tensor(bsz * max_seq_len))
"""
assert utils.is_type_list(strings, str)
tensors, padded_targets = [], []
indexes = self.str2idx(strings)
for index in indexes:
tensor = torch.LongTensor(index)
tensors.append(tensor)
# target tensor for loss
src_target = torch.LongTensor(tensor.size(0) + 2).fill_(0)
src_target[-1] = self.end_idx
src_target[0] = self.start_idx
src_target[1:-1] = tensor
padded_target = (torch.ones(self.max_seq_len) *
self.padding_idx).long()
char_num = src_target.size(0)
if char_num > self.max_seq_len:
padded_target = src_target[:self.max_seq_len]
else:
padded_target[:char_num] = src_target
padded_targets.append(padded_target)
padded_targets = torch.stack(padded_targets, 0).long()
return {'targets': tensors, 'padded_targets': padded_targets}
def tensor2idx(self, outputs, img_metas=None):
"""
Convert output tensor to text-index
Args:
outputs (tensor): model outputs with size: N * T * C
img_metas (list[dict]): Each dict contains one image info.
Returns:
indexes (list[list[int]]): [[1,2,3,3,4], [5,4,6,3,7]]
scores (list[list[float]]): [[0.9,0.8,0.95,0.97,0.94],
[0.9,0.9,0.98,0.97,0.96]]
"""
batch_size = outputs.size(0)
ignore_indexes = [self.padding_idx]
indexes, scores = [], []
for idx in range(batch_size):
seq = outputs[idx, :, :]
max_value, max_idx = torch.max(seq, -1)
str_index, str_score = [], []
output_index = max_idx.cpu().detach().numpy().tolist()
output_score = max_value.cpu().detach().numpy().tolist()
for char_index, char_score in zip(output_index, output_score):
if char_index in ignore_indexes:
continue
if char_index == self.end_idx:
break
str_index.append(char_index)
str_score.append(char_score)
indexes.append(str_index)
scores.append(str_score)
return indexes, scores
|