update mask_roi

qiaoliang6 · qiaoliang6 · commit 7588b6bf6b1a · 2021-09-22T10:05:22.000+08:00
diff --git a/davarocr/davarocr/davar_spotting/models/roi_extractors/mask_roi_extractor.py b/davarocr/davarocr/davar_spotting/models/roi_extractors/mask_roi_extractor.py
@@ -0,0 +1,124 @@
+"""
+##################################################################################################
+# Copyright Info :    Copyright (c) Davar Lab @ Hikvision Research Institute. All rights reserved.
+# Filename       :    mask_roi_extractor.py
+# Abstract       :    Extract RoI masking features from a single level feature map.
+
+# Current Version:    1.0.0
+# Date           :    2021-07-14
+##################################################################################################
+"""
+import numpy as np
+
+import torch
+from mmcv.runner import force_fp32
+
+from mmdet.models.builder import ROI_EXTRACTORS
+from mmdet.models.roi_heads.roi_extractors.single_level_roi_extractor import SingleRoIExtractor
+
+
+@ROI_EXTRACTORS.register_module()
+class MaskRoIExtractor(SingleRoIExtractor):
+    """ Implementation of RoI masking feature extractor. """
+
+    def __init__(self,
+                 roi_layer,
+                 out_channels,
+                 featmap_strides,
+                 finest_scale=56):
+        """
+        Args:
+            roi_layer (dict): Specify RoI layer type and arguments.
+            out_channels (int): Output channels of RoI layers.
+            featmap_strides (List[int]): Strides of input feature maps.
+            finest_scale (int): Scale threshold of mapping to level 0. Default: 56.
+        """
+
+        super().__init__(roi_layer, out_channels, featmap_strides, finest_scale)
+
+    @force_fp32(apply_to=('feats', ), out_fp16=True)
+    def forward(self, feats, rois, masks, roi_scale_factor=None):
+        """ Forward computation.
+
+        Args:
+            feats (list(Tensor)): original feature maps, in shape of [B x C x H x W]
+            rois (Tensor): region of interest, in shape of [num_roi x 5]
+            masks (list(BitmapMasks)): the mask corresponding to each img.
+            roi_scale_factor (tuple): scale factor that RoI will be multiplied by.
+
+        Returns:
+            Tensor: extract RoI masking feature maps, in shape of [num_roi x C x H x W]
+        """
+
+        out_size = self.roi_layers[0].output_size
+        num_levels = len(feats)
+        expand_dims = (-1, self.out_channels * out_size[0] * out_size[1])
+        if torch.onnx.is_in_onnx_export():
+            # Work around to export mask-rcnn to onnx
+            roi_feats = rois[:, :1].clone().detach()
+            roi_feats = roi_feats.expand(*expand_dims)
+            roi_feats = roi_feats.reshape(-1, self.out_channels, *out_size)
+            roi_feats = roi_feats * 0
+        else:
+            roi_feats = feats[0].new_zeros(
+                rois.size(0), self.out_channels, *out_size)
+
+        # TODO: remove this when parrots supports
+        if torch.__version__ == 'parrots':
+            roi_feats.requires_grad = True
+
+        if num_levels == 1:
+            if len(rois) == 0:
+                return roi_feats
+            return self.roi_layers[0](feats[0], rois)
+
+        target_lvls = self.map_roi_levels(rois, num_levels)
+
+        if roi_scale_factor is not None:
+            rois = self.roi_rescale(rois, roi_scale_factor)
+
+        for i in range(num_levels):
+            mask = target_lvls == i
+            if torch.onnx.is_in_onnx_export():
+                # To keep all roi_align nodes exported to onnx
+                # and skip nonzero op
+                mask = mask.float().unsqueeze(-1).expand(*expand_dims).reshape(
+                    roi_feats.shape)
+                roi_feats_t = self.roi_layers[i](feats[i], rois)
+                roi_feats_t *= mask
+                roi_feats += roi_feats_t
+                continue
+            inds = mask.nonzero(as_tuple=False).squeeze(1)
+            if inds.numel() > 0:
+                rois_ = rois[inds]
+                roi_feats_t = self.roi_layers[i](feats[i], rois_)
+                roi_feats[inds] = roi_feats_t
+            else:
+                # Sometimes some pyramid levels will not be used for RoI
+                # feature extraction and this will cause an incomplete
+                # computation graph in one GPU, which is different from those
+                # in other GPUs and will cause a hanging error.
+                # Therefore, we add it to ensure each feature pyramid is
+                # included in the computation graph to avoid runtime bugs.
+                roi_feats += sum(
+                    x.view(-1)[0]
+                    for x in self.parameters()) * 0. + feats[i].sum() * 0.
+
+        if masks is not None:
+            left = 0
+            right = 0
+            output_size = self.roi_layers[0].output_size
+            crop_masks = []
+            for mask in masks:
+                num = mask.masks.shape[0]
+                right += num
+                # Crop mask from gt_masks according to roi
+                crop_mask = mask.crop_and_resize(rois[left:right, 1:], output_size, 
+                    np.array(range(num)), device=rois.device)
+                left += num
+                crop_masks_t = torch.tensor(crop_mask.masks).to(roi_feats.device)
+                crop_masks.append(crop_masks_t)
+            crop_masks = torch.cat(crop_masks)
+            crop_masks = crop_masks.unsqueeze(1)
+            roi_feats = roi_feats * crop_masks.detach()
+        return roi_feats
diff --git a/davarocr/davarocr/davar_spotting/models/roi_extractors/tps_roi_extractor.py b/davarocr/davarocr/davar_spotting/models/roi_extractors/tps_roi_extractor.py
@@ -76,30 +76,47 @@ def forward(self, feats, fiducial_points):
         Returns:
             Tensor: rectification feature of shape [K x C x output_size]
         """
+        roi_feats = []
+        scale_factor = 4
+
         # only using 4x feature
-        x = self.relu(self.bn(self.conv(feats[0])))
+        feats = self.relu(self.bn(self.conv(feats[0])))
+        _, _, height, width = feats.size()
 
-        roi_feats = []
-        for batch_id in range(len(x)):
-            batch_C_prime = fiducial_points[batch_id]
-            if len(batch_C_prime) == 0:
+        for feat, points in zip(feats, fiducial_points):
+            if len(points) == 0:
                 continue
-            # B x point_num x 2
-            batch_C_prime = torch.Tensor(batch_C_prime).cuda(device=x.device)
-            # B x C x H x W
-            batch_I = x[batch_id].unsqueeze(0).expand(len(batch_C_prime), -1, -1, -1)
-            # B x N (= output_size[0] x output_size[1]) x 2
-            build_P_prime = self.GridGenerator.build_P_prime(batch_C_prime)
-            # B x output_size x 2
-            build_P_prime_reshape = build_P_prime.reshape([build_P_prime.size(0),
-                                                           self.output_size[0],
-                                                           self.output_size[1],
-                                                           2])
-            # B x C x output_size
-            batch_I_r = F.grid_sample(batch_I,
-                                      build_P_prime_reshape,
-                                      padding_mode='border')
-            roi_feats.append(batch_I_r)
+            points = torch.Tensor(points).cuda(device=feat.device)
+            points = points / scale_factor
+            for point in points:
+                # Clip points
+                point[:, 0] = torch.clip(point[:, 0], 0, width)
+                point[:, 1] = torch.clip(point[:, 1], 0, height)
+
+                # Caculate points boundary
+                x1 = int(torch.min(point[:, 0]))
+                x2 = int(torch.max(point[:, 0])) + 1
+                y1 = int(torch.min(point[:, 1]))
+                y2 = int(torch.max(point[:, 1])) + 1
+
+                # Normalize points for tps
+                point[:, 0] = 2 * (point[:, 0] - x1) / (x2 - x1) - 1
+                point[:, 1] = 2 * (point[:, 1] - y1) / (y2 - y1) - 1
+
+                # B x N (= output_size[0] x output_size[1]) x 2
+                build_P_prime = self.GridGenerator.build_P_prime(point.unsqueeze(0))
+                # B x output_size x 2
+                build_P_prime_reshape = build_P_prime.reshape([build_P_prime.size(0),
+                                                               self.output_size[0],
+                                                               self.output_size[1],
+                                                               2])
+                # Crop feature according to points boundary
+                crop_feat = feat[:, y1:y2, x1:x2].unsqueeze(0)
+                # B x C x output_size
+                batch_I_r = F.grid_sample(crop_feat,
+                                          build_P_prime_reshape,
+                                          padding_mode='border')
+                roi_feats.append(batch_I_r)
         roi_feats = torch.cat(roi_feats)
         return roi_feats
 
@@ -151,23 +168,19 @@ def get_fiducial_points(self, imgs, polys):
             if len(batch_bboxes) > 0:
                 batch_fiducial_points = np.stack(batch_fiducial_points, axis=0)
 
-                # Normalize fiducial points
-                batch_fiducial_points[:, :, 0] = (2 * batch_fiducial_points[:, :, 0] - width) / width
-                batch_fiducial_points[:, :, 1] = (2 * batch_fiducial_points[:, :, 1] - height) / height
-
             fiducial_points.append(batch_fiducial_points)
         return fiducial_points
 
-    def normalize_fiducial_points(self, imgs, img_metas, fiducial_points):
-        """ Normalize the fiducial points coordinates to [0,1].
+    def rescale_fiducial_points(self, imgs, img_metas, fiducial_points):
+        """ Rescale the fiducial points coordinates.
 
         Args:
             imgs (Tensor): input image.
             img_metas (dict): image meta-info.
             fiducial_points list(np.array): tps fiducial points.
 
         Returns:
-            list(np.array): normalized points
+            list(np.array): Rescaled points
         """
         normalized_fiducial_points = []
         for img, img_meta, point in zip(imgs, img_metas, fiducial_points):
@@ -180,10 +193,7 @@ def normalize_fiducial_points(self, imgs, img_metas, fiducial_points):
                 point[:, :, 0] = point[:, :, 0] * scale_factor[0]
                 point[:, :, 1] = point[:, :, 1] * scale_factor[1]
 
-                # Normalize
-                point[:, :, 0] = (2 * point[:, :, 0] - width) / width
-                point[:, :, 1] = (2 * point[:, :, 1] - height) / height
-
+                # Change points order
                 point_num = int(point.shape[1] / 2)
                 point[:, point_num:, :] = point[:, point_num:, :][:, ::-1, :]
             normalized_fiducial_points.append(point)
diff --git a/davarocr/davarocr/davar_spotting/models/spotters/text_perceptron_spot.py b/davarocr/davarocr/davar_spotting/models/spotters/text_perceptron_spot.py
@@ -187,7 +187,7 @@ def simple_test(self,
             return results
 
         # Compute normalized fiducial points
-        fiducial_points = self.recog_roi_extractor.normalize_fiducial_points(img, img_meta, fiducial_points)
+        fiducial_points = self.recog_roi_extractor.rescale_fiducial_points(img, img_meta, fiducial_points)
 
         # Extract feature according to fiducial point
         recog_feats = self.recog_roi_extractor(feat[:self.recog_roi_extractor.num_inputs], fiducial_points)