Skip to content

TypeError: __init__() got an unexpected keyword argument 'data_filter' #86

@Y-pandaman

Description

@Y-pandaman

python tools/train.py configs/resnet_ctc_xg.py
2022-06-28 16:28:49,958 - INFO - Use CPU
2022-06-28 16:28:49,958 - INFO - Set cudnn deterministic False
2022-06-28 16:28:49,959 - INFO - Set cudnn benchmark True
2022-06-28 16:28:49,959 - INFO - Set seed 1111
2022-06-28 16:28:49,964 - INFO - Build model
2022-06-28 16:28:50,349 - INFO - GResNet init weights
2022-06-28 16:28:50,710 - INFO - CTCHead init weights
Traceback (most recent call last):
File "tools/train.py", line 42, in
main()
File "tools/train.py", line 37, in main
runner = TrainRunner(train_cfg, deploy_cfg, common_cfg)
File "tools/../vedastr/runners/train_runner.py", line 19, in init
train_cfg['data']['train'])
File "tools/../vedastr/runners/base.py", line 84, in _build_dataloader
dataset = build_datasets(cfg['dataset'], dict(transform=transform))
File "tools/../vedastr/datasets/builder.py", line 16, in build_datasets
datasets = build_from_cfg(cfg, DATASETS, default_args)
File "tools/../vedastr/utils/common.py", line 14, in build_from_cfg
return obj_from_dict_registry(cfg, parent, default_args)
File "tools/../vedastr/utils/common.py", line 79, in obj_from_dict_registry
return obj_cls(**args)
File "tools/../vedastr/datasets/concat_dataset.py", line 12, in init
datasets = build_datasets(datasets, default_args=kwargs)
File "tools/../vedastr/datasets/builder.py", line 13, in build_datasets
ds = build_from_cfg(icfg, DATASETS, default_args)
File "tools/../vedastr/utils/common.py", line 14, in build_from_cfg
return obj_from_dict_registry(cfg, parent, default_args)
File "tools/../vedastr/utils/common.py", line 79, in obj_from_dict_registry
return obj_cls(**args)
File "tools/../vedastr/datasets/concat_dataset.py", line 12, in init
datasets = build_datasets(datasets, default_args=kwargs)
File "tools/../vedastr/datasets/builder.py", line 13, in build_datasets
ds = build_from_cfg(icfg, DATASETS, default_args)
File "tools/../vedastr/utils/common.py", line 14, in build_from_cfg
return obj_from_dict_registry(cfg, parent, default_args)
File "tools/../vedastr/utils/common.py", line 79, in obj_from_dict_registry
return obj_cls(**args)
TypeError: init() got an unexpected keyword argument 'data_filter'

###############################################################################

1. deploy

size = (32, 100)
mean, std = 0.5, 0.5

sensitive = False

character = 'abcdefghijklmnopqrstuvwxyz0123456789'

character = '0123456789abcdefghijklmnopq'
'rstuvwxyzABCDEFGHIJKLMNOPQRS'
'TUVWXYZ-:' # need character
test_sensitive = False
test_character = '0123456789abcdefghijklmnopqrstuvwxyz'
batch_max_length = 25

norm_cfg = dict(type='BN')
num_class = len(character) + 1

deploy = dict(
gpu_id='0',
transform=[
dict(type='Sensitive', sensitive=sensitive, need_character=character),
dict(type='ToGray'),
dict(type='Resize', size=size),
dict(type='Normalize', mean=mean, std=std),
dict(type='ToTensor'),
],
converter=dict(
type='CTCConverter',
character=character,
batch_max_length=batch_max_length,
),
model=dict(
type='GModel',
need_text=False,
body=dict(
type='GBody',
pipelines=[
dict(
type='FeatureExtractorComponent',
from_layer='input',
to_layer='cnn_feat',
arch=dict(
encoder=dict(
backbone=dict(
type='GResNet',
layers=[
('conv', dict(type='ConvModule', in_channels=1, out_channels=32, kernel_size=3,
stride=1, padding=1, norm_cfg=norm_cfg)),
('conv', dict(type='ConvModule', in_channels=32, out_channels=64, kernel_size=3,
stride=1, padding=1, norm_cfg=norm_cfg)),
('pool', dict(type='MaxPool2d', kernel_size=2, stride=2, padding=0)),
('block', dict(block_name='BasicBlock', planes=128, blocks=1, stride=1)),
('conv', dict(type='ConvModule', in_channels=128, out_channels=128, kernel_size=3,
stride=1, padding=1, norm_cfg=norm_cfg)),
('pool', dict(type='MaxPool2d', kernel_size=2, stride=2, padding=0)),
('block', dict(block_name='BasicBlock', planes=256, blocks=2, stride=1)),
('conv', dict(type='ConvModule', in_channels=256, out_channels=256, kernel_size=3,
stride=1, padding=1, norm_cfg=norm_cfg)),
('pool', dict(type='MaxPool2d', kernel_size=2, stride=(2, 1), padding=(0, 1))),
('block', dict(block_name='BasicBlock', planes=512, blocks=5, stride=1)),
('conv', dict(type='ConvModule', in_channels=512, out_channels=512, kernel_size=3,
stride=1, padding=1, norm_cfg=norm_cfg)),
('block', dict(block_name='BasicBlock', planes=512, blocks=3, stride=1)),
('conv', dict(type='ConvModule', in_channels=512, out_channels=512, kernel_size=2,
stride=(2, 1), padding=(0, 1), norm_cfg=norm_cfg)),
('conv', dict(type='ConvModule', in_channels=512, out_channels=512, kernel_size=2,
stride=1, padding=0, norm_cfg=norm_cfg)),
],
),
),
collect=dict(type='CollectBlock', from_layer='c4'),
),
),
],
),
head=dict(
type='CTCHead',
from_layer='cnn_feat',
num_class=num_class,
in_channels=512,
pool=dict(
type='AdaptiveAvgPool2d',
output_size=(1, None),
),
),
),
postprocess=dict(
sensitive=test_sensitive,
character=test_character,
),
)

###############################################################################

2.common

common = dict(
seed=1111,
logger=dict(
handlers=(
dict(type='StreamHandler', level='INFO'),
dict(type='FileHandler', level='INFO'),
),
),
cudnn_deterministic=False,
cudnn_benchmark=True,
metric=dict(type='Accuracy'),
)
###############################################################################

dataset_params = dict(
batch_max_length=batch_max_length,
data_filter=True,
character=character,
)

test_dataset_params = dict(
batch_max_length=batch_max_length,
data_filter=False,
character=test_character,
)

data_root = '/public/datasets_neo/deep-text-recognition-benchmark/'
data_root_xg = '/public/experiments/ypt/xg/'

###############################################################################

3. test

batch_size = 192

data

test_root = data_root + 'evaluation/'

test_folder_names = ['CUTE80']

test_folder_names = ['CUTE80', 'IC03_867', 'IC13_1015', 'IC15_2077',
'IIIT5k_3000', 'SVT', 'SVTP']
test_dataset = [dict(type='LmdbDataset', root=test_root + f_name,
**test_dataset_params) for f_name in test_folder_names]

test = dict(
data=dict(
dataloader=dict(
type='DataLoader',
batch_size=batch_size,
num_workers=4,
shuffle=False,
),
dataset=test_dataset,
transform=[
dict(type='Sensitive', sensitive=test_sensitive, need_character=test_character),
dict(type='ToGray'),
dict(type='Resize', size=size),
dict(type='Normalize', mean=mean, std=std),
dict(type='ToTensor'),
],
),
postprocess_cfg=dict(
sensitive=test_sensitive,
character=test_character,
),
)

###############################################################################

4. train

root_workdir = 'workdir'

train data

train_root = data_root + 'training/'
train_root_xg = data_root_xg + 'training/'

MJ dataset

train_root_mj = train_root + 'MJ/'
mj_folder_names = ['MJ_test', 'MJ_valid', 'MJ_train']

ST dataset

train_root_st = train_root + 'SynthText/'

TODG dataset

train_root_todg = train_root + 'todg/'

TODG-plain dataset

train_root_todg_plain = train_root + 'todg_plain/'

xg dataset

train_root_xg = train_root + 'todg_xg/'

train_dataset_mj = [dict(type='LmdbDataset', root=train_root_mj + folder_name)
for folder_name in mj_folder_names]
train_dataset_st = [dict(type='LmdbDataset', root=train_root_st)]
train_dataset_todg = [dict(type='TxtDataset', root=train_root_todg)]
train_dataset_todg_plain = [dict(type='TxtDataset', root=train_root_todg_plain)]
train_dataset_xg = [dict(type='TxtDataset', root=train_root_xg)]

valid

valid_root = data_root + 'validation/'
valid_dataset = dict(type='LmdbDataset', root=valid_root, **test_dataset_params)

train transforms

train_transforms = [
dict(type='Sensitive', sensitive=sensitive, need_character=character),
dict(type='ToGray'),
dict(type='Resize', size=size),
dict(type='Normalize', mean=mean, std=std),
dict(type='ToTensor'),
]

max_iterations = 300000
milestones = [150000, 250000]

train = dict(
data=dict(
train=dict(
dataloader=dict(
type='DataLoader',
batch_size=batch_size,
num_workers=8,
),
sampler=dict(
type='BalanceSampler',
batch_size=batch_size,
shuffle=True,
oversample=True,
),
dataset=dict(
type='ConcatDatasets',
datasets=[
# dict(
# type='ConcatDatasets',
# datasets=train_dataset_mj,
# ),
# dict(
# type='ConcatDatasets',
# datasets=train_dataset_st,
# ),
dict(
type='ConcatDatasets',
datasets=train_dataset_todg,
),
dict(
type='ConcatDatasets',
datasets=train_dataset_todg_plain,
),
dict(
type='ConcatDatasets',
datasets=train_dataset_xg,
)
],
batch_ratio=[0.5, 0.5],
**dataset_params,
),
transform=train_transforms,
),
val=dict(
dataloader=dict(
type='DataLoader',
batch_size=batch_size,
num_workers=8,
shuffle=False,
),
dataset=valid_dataset,
transform=deploy['transform'],
),
),
optimizer=dict(type='Adadelta', lr=1.0, rho=0.95, eps=1e-8),
criterion=dict(type='CTCLoss', zero_infinity=True),
lr_scheduler=dict(type='StepLR',
iter_based=True,
milestones=milestones,
),
max_iterations=max_iterations,
log_interval=10,
trainval_ratio=2000,
snapshot_interval=20000,
save_best=True,
resume=None,
)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions