Spaces:
Runtime error
Runtime error
File size: 3,390 Bytes
2366e36 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
_base_ = [
'../../_base_/default_runtime.py',
'../../_base_/schedules/schedule_adam_step_5e.py'
]
dict_file = 'data/chineseocr/labels/dict_printed_chinese_english_digits.txt'
label_convertor = dict(
type='AttnConvertor', dict_file=dict_file, with_unknown=True)
model = dict(
type='SARNet',
backbone=dict(type='ResNet31OCR'),
encoder=dict(
type='SAREncoder',
enc_bi_rnn=False,
enc_do_rnn=0.1,
enc_gru=False,
),
decoder=dict(
type='ParallelSARDecoder',
enc_bi_rnn=False,
dec_bi_rnn=False,
dec_do_rnn=0,
dec_gru=False,
pred_dropout=0.1,
d_k=512,
pred_concat=True),
loss=dict(type='SARLoss'),
label_convertor=label_convertor,
max_seq_len=30)
img_norm_cfg = dict(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='ResizeOCR',
height=48,
min_width=48,
max_width=256,
keep_aspect_ratio=True,
width_downsample_ratio=0.25),
dict(type='ToTensorOCR'),
dict(type='NormalizeOCR', **img_norm_cfg),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'filename', 'ori_shape', 'resize_shape', 'text', 'valid_ratio'
]),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiRotateAugOCR',
rotate_degrees=[0, 90, 270],
transforms=[
dict(
type='ResizeOCR',
height=48,
min_width=48,
max_width=256,
keep_aspect_ratio=True,
width_downsample_ratio=0.25),
dict(type='ToTensorOCR'),
dict(type='NormalizeOCR', **img_norm_cfg),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'filename', 'ori_shape', 'resize_shape', 'valid_ratio'
]),
])
]
dataset_type = 'OCRDataset'
train_prefix = 'data/chinese/'
train_ann_file = train_prefix + 'labels/train.txt'
train = dict(
type=dataset_type,
img_prefix=train_prefix,
ann_file=train_ann_file,
loader=dict(
type='HardDiskLoader',
repeat=1,
parser=dict(
type='LineStrParser',
keys=['filename', 'text'],
keys_idx=[0, 1],
separator=' ')),
pipeline=None,
test_mode=False)
test_prefix = 'data/chineseocr/'
test_ann_file = test_prefix + 'labels/test.txt'
test = dict(
type=dataset_type,
img_prefix=test_prefix,
ann_file=test_ann_file,
loader=dict(
type='HardDiskLoader',
repeat=1,
parser=dict(
type='LineStrParser',
keys=['filename', 'text'],
keys_idx=[0, 1],
separator=' ')),
pipeline=None,
test_mode=False)
data = dict(
samples_per_gpu=40,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=1),
test_dataloader=dict(samples_per_gpu=1),
train=dict(
type='UniformConcatDataset', datasets=[train],
pipeline=train_pipeline),
val=dict(
type='UniformConcatDataset', datasets=[test], pipeline=test_pipeline),
test=dict(
type='UniformConcatDataset', datasets=[test], pipeline=test_pipeline))
evaluation = dict(interval=1, metric='acc')
|