Skip to content

Commit 00b3973

Browse files
committed
jump to 0.3.0
1 parent 21e6635 commit 00b3973

22 files changed

+34272
-5
lines changed

demo/table_recognition/datalist/PubTabNet_2.0.0_val.jsonl

Lines changed: 9115 additions & 0 deletions
Large diffs are not rendered by default.

demo/table_recognition/datalist/PubTabNet_train_datalist_examples.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# PubTabNet
2+
3+
### Dataset list:
4+
PubTabNet_2.0.0_val.jsonl: The original annotation of validation dataset in PubTabNet 2.0.0.
5+
6+
PubTabNet_train_datalist.json: The formated annotation of training dataset in PubTabNet 2.0.0, which is used in LGPMA training. Samples containing noises are filtered out.
7+
8+
PubTabNet_train_datalist_examples.json: Exapmles of PubTabNet_train_datalist.json.
9+
10+
#### annotation example:
11+
``` markdown
12+
{
13+
"Images/train/PMC3348833_020_01.png": {
14+
"height": 90,
15+
"width": 395,
16+
"content_ann": {
17+
"bboxes": [
18+
[40, 4, 75, 20], # bbox of text region in cell. Empty cell are noded as [].
19+
[144, 4, 163, 20],
20+
...
21+
],
22+
"cells": [
23+
[0,0,0,0], # start row, start column, end row and end column of cell
24+
[0,1,0,1],
25+
...
26+
],
27+
"labels": [
28+
[0], # label of cell. [0] means cell in head and [1] means cell in body
29+
[0],
30+
[0],
31+
[0],
32+
[1],
33+
[1],
34+
[1],
35+
[1],
36+
.....
37+
]
38+
}
39+
},
40+
}
41+
```
Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,253 @@
1+
"""
2+
##################################################################################################
3+
# Copyright Info : Copyright (c) Davar Lab @ Hikvision Research Institute. All rights reserved.
4+
# Filename : lgpma_base.py
5+
# Abstract : Base model settings for LGPMA detector
6+
7+
# Current Version: 1.0.0
8+
# Date : 2021-09-18
9+
##################################################################################################
10+
"""
11+
12+
model = dict(
13+
type='LGPMA',
14+
pretrained='path/to/resnet50-19c8e357.pth',
15+
backbone=dict(
16+
type='ResNet',
17+
depth=50,
18+
num_stages=4,
19+
out_indices=(0, 1, 2, 3),
20+
frozen_stages=1,
21+
style='pytorch'),
22+
neck=dict(
23+
type='FPN',
24+
in_channels=[256, 512, 1024, 2048],
25+
out_channels=256,
26+
num_outs=5),
27+
rpn_head=dict(
28+
type='RPNHead',
29+
in_channels=256,
30+
feat_channels=256,
31+
anchor_generator=dict(
32+
type='AnchorGenerator',
33+
scales=[4, 8, 16],
34+
ratios=[0.05, 0.1, 0.2, 0.5, 1.0, 2.0],
35+
strides=[4, 8, 16, 32, 64]),
36+
bbox_coder=dict(
37+
type='DeltaXYWHBBoxCoder',
38+
target_means=[.0, .0, .0, .0],
39+
target_stds=[1.0, 1.0, 1.0, 1.0]),
40+
loss_cls=dict(
41+
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
42+
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
43+
roi_head=dict(
44+
type='LGPMARoIHead',
45+
bbox_roi_extractor=dict(
46+
type='SingleRoIExtractor',
47+
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
48+
out_channels=256,
49+
featmap_strides=[4, 8, 16, 32]),
50+
bbox_head=dict(
51+
type='Shared2FCBBoxHead',
52+
in_channels=256,
53+
fc_out_channels=1024,
54+
roi_feat_size=7,
55+
num_classes=2,
56+
# num_classes=3,
57+
bbox_coder=dict(
58+
type='DeltaXYWHBBoxCoder',
59+
target_means=[0., 0., 0., 0.],
60+
target_stds=[0.1, 0.1, 0.2, 0.2]),
61+
reg_class_agnostic=False,
62+
loss_cls=dict(
63+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
64+
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)),
65+
mask_roi_extractor=dict(
66+
type='SingleRoIExtractor',
67+
roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
68+
out_channels=256,
69+
featmap_strides=[4, 8, 16, 32]),
70+
mask_head=dict(
71+
type='LPMAMaskHead',
72+
num_convs=4,
73+
in_channels=256,
74+
conv_out_channels=256,
75+
num_classes=2,
76+
# num_classes=3,
77+
loss_mask=dict(
78+
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0),
79+
loss_lpma=dict(
80+
type='L1Loss', loss_weight=1.0))),
81+
global_seg_head=dict(
82+
type='GPMAMaskHead',
83+
in_channels=256,
84+
conv_out_channels=256,
85+
num_classes=1,
86+
loss_mask=dict(type='DiceLoss', loss_weight=1),
87+
loss_reg=dict(type='SmoothL1Loss', beta=0.1, loss_weight=0.01, reduction='sum')),
88+
# model training and testing settings
89+
train_cfg=dict(
90+
rpn=dict(
91+
assigner=dict(
92+
type='MaxIoUAssigner',
93+
pos_iou_thr=0.7,
94+
neg_iou_thr=0.3,
95+
min_pos_iou=0.3,
96+
match_low_quality=True,
97+
ignore_iof_thr=-1),
98+
sampler=dict(
99+
type='RandomSampler',
100+
num=256,
101+
pos_fraction=0.5,
102+
neg_pos_ub=-1,
103+
add_gt_as_proposals=False),
104+
allowed_border=0,
105+
pos_weight=-1,
106+
debug=False),
107+
rpn_proposal=dict(
108+
nms_pre=2000,
109+
max_per_img=2000,
110+
nms_post=2000,
111+
nms=dict(type='nms', iou_threshold=0.5),
112+
min_bbox_size=0),
113+
rcnn=dict(
114+
assigner=dict(
115+
type='MaxIoUAssigner',
116+
pos_iou_thr=0.5,
117+
neg_iou_thr=0.5,
118+
min_pos_iou=0.5,
119+
match_low_quality=True,
120+
ignore_iof_thr=-1),
121+
sampler=dict(
122+
type='RandomSampler',
123+
num=512,
124+
pos_fraction=0.25,
125+
neg_pos_ub=-1,
126+
add_gt_as_proposals=True),
127+
mask_size=28,
128+
pos_weight=-1,
129+
debug=False)),
130+
test_cfg=dict(
131+
rpn=dict(
132+
nms_pre=2000,
133+
nms_post=2000,
134+
max_per_img=2000,
135+
nms=dict(type='nms', iou_threshold=0.5),
136+
min_bbox_size=0),
137+
rcnn=dict(
138+
score_thr=0.05,
139+
nms=dict(type='nms', iou_threshold=0.1),
140+
max_per_img=1000,
141+
mask_thr_binary=0.5),
142+
postprocess=dict(
143+
type="PostLGPMA"
144+
)
145+
),
146+
)
147+
148+
train_cfg = None
149+
test_cfg = None
150+
# dataset settings
151+
dataset_type = 'DavarCustomDataset'
152+
data_root = ''
153+
img_norm_cfg = dict(
154+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
155+
train_pipeline = [
156+
dict(type='DavarLoadImageFromFile'),
157+
dict(type='DavarLoadTableAnnotations',
158+
with_bbox=True, # Bounding Rect
159+
with_enlarge_bbox=True,# Produce pseudo-bboxes for aligned cells
160+
with_label=True, # Bboxes' labels
161+
with_poly_mask=True, # Mask
162+
with_empty_bbox=True, # Produce pseudo-bboxes for empty cells
163+
),
164+
dict(type='DavarResize', img_scale=[(360, 480), (960, 1080)], keep_ratio=True, multiscale_mode='range'),
165+
dict(type='Normalize', **img_norm_cfg),
166+
dict(type='Pad', size_divisor=32),
167+
dict(type='GPMADataGeneration'),
168+
dict(type='DavarDefaultFormatBundle'),
169+
dict(type='DavarCollect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']),
170+
]
171+
val_pipeline = [
172+
dict(type='DavarLoadImageFromFile'),
173+
dict(type='DavarLoadTableAnnotations',
174+
with_bbox=True, # Bounding Rect
175+
with_enlarge_bbox=True, # Produce pseudo-bboxes for aligned cells
176+
with_label=True, # Bboxes' labels
177+
with_poly_mask=True, # Mask
178+
with_empty_bbox=True, # Produce pseudo-bboxes for empty cells
179+
),
180+
dict(
181+
type='MultiScaleFlipAug',
182+
scale_factor=1.5,
183+
flip=False,
184+
transforms=[
185+
dict(type='DavarResize', keep_ratio=True),
186+
dict(type='Normalize', **img_norm_cfg),
187+
dict(type='Pad', size_divisor=32),
188+
dict(type='DavarDefaultFormatBundle'),
189+
dict(type='DavarCollect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
190+
])
191+
]
192+
test_pipeline = [
193+
dict(type='DavarLoadImageFromFile'),
194+
dict(
195+
type='MultiScaleFlipAug',
196+
scale_factor=1.5,
197+
flip=False,
198+
transforms=[
199+
dict(type='DavarResize', keep_ratio=True),
200+
dict(type='Normalize', **img_norm_cfg),
201+
dict(type='Pad', size_divisor=32),
202+
dict(type='DavarDefaultFormatBundle'),
203+
dict(type='DavarCollect', keys=['img']),
204+
])
205+
]
206+
data = dict(
207+
samples_per_gpu=3,
208+
workers_per_gpu=1,
209+
train=dict(
210+
type=dataset_type,
211+
ann_file='',
212+
img_prefix='',
213+
pipeline=train_pipeline),
214+
val=dict(
215+
type=dataset_type,
216+
ann_file='',
217+
img_prefix='',
218+
pipeline=val_pipeline),
219+
test=dict(
220+
type=dataset_type,
221+
ann_file='',
222+
img_prefix='',
223+
pipeline=test_pipeline))
224+
225+
# optimizer
226+
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
227+
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
228+
# learning policy
229+
lr_config = dict(
230+
policy='step',
231+
warmup='linear',
232+
warmup_iters=1000,
233+
warmup_ratio=1.0 / 3,
234+
step=[6, 10])
235+
runner = dict(type='EpochBasedRunner', max_epochs=12)
236+
checkpoint_config = dict(interval=1, filename_tmpl='checkpoint/maskrcnn-lgpma-e{}.pth')
237+
# yapf:disable
238+
log_config = dict(
239+
interval=10,
240+
hooks=[
241+
dict(type='TextLoggerHook'),
242+
])
243+
244+
# yapf:enable
245+
# runtime settings
246+
247+
dist_params = dict(backend='nccl')
248+
log_level = 'INFO'
249+
work_dir = ''
250+
251+
load_from = None
252+
resume_from = None
253+
workflow = [('train', 1)]
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
"""
2+
##################################################################################################
3+
# Copyright Info : Copyright (c) Davar Lab @ Hikvision Research Institute. All rights reserved.
4+
# Filename : lgpma_pub.py
5+
# Abstract : Model settings for LGPMA detector on PubTabNet
6+
7+
# Current Version: 1.0.0
8+
# Date : 2021-09-18
9+
##################################################################################################
10+
"""
11+
12+
_base_ = "./lgpma_base.py"
13+
14+
data = dict(
15+
samples_per_gpu=3,
16+
workers_per_gpu=1,
17+
train=dict(
18+
ann_file='path/to/PubTabNet_datalist_train_detection.json',
19+
img_prefix='path/to/PubTabNet'),
20+
val=dict(
21+
ann_file='path/to/PubTabNet_2.0.0_val.jsonl',
22+
img_prefix='path/to/PubTabNet'),
23+
test=dict(
24+
samples_per_gpu=1,
25+
ann_file='path/to/PubTabNet_2.0.0_val.jsonl',
26+
img_prefix='path/to/PubTabNet/Images/val/')
27+
)
28+
29+
# yapf:enable
30+
# runtime settings
31+
32+
checkpoint_config = dict(interval=1, filename_tmpl='checkpoint/maskrcnn-lgpma-pub-e{}.pth')
33+
34+
work_dir = 'path/to/workdir'
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/bin/bash
2+
export LANG=zh_CN.UTF-8
3+
export LANGUAGE=zh_CN:zh:en_US:en
4+
export PATH=/usr/local/miniconda3/bin/:$PATH
5+
6+
DAVAROCR_PATH=/data1/davarocr/
7+
python -m torch.distributed.launch --nproc_per_node 8 $DAVAROCR_PATH/tools/train.py ./config/lgpma_pub.py --no-validate --launcher pytorch

0 commit comments

Comments
 (0)