hikopensource
diff --git a/‎davarocr/davar_common/apis/test.py‎
Lines changed: 18 additions & 6 deletions b/‎davarocr/davar_common/apis/test.py‎
Lines changed: 18 additions & 6 deletions
diff --git a/‎davarocr/davar_common/apis/train.py‎
Lines changed: 9 additions & 3 deletions b/‎davarocr/davar_common/apis/train.py‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎davarocr/davar_common/datasets/builder.py‎
Lines changed: 0 additions & 3 deletions b/‎davarocr/davar_common/datasets/builder.py‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎davarocr/davar_common/datasets/davar_custom.py‎
Lines changed: 17 additions & 4 deletions b/‎davarocr/davar_common/datasets/davar_custom.py‎
Lines changed: 17 additions & 4 deletions
diff --git a/‎davarocr/davar_common/datasets/davar_multi_dataset.py‎
Lines changed: 21 additions & 1 deletion b/‎davarocr/davar_common/datasets/davar_multi_dataset.py‎
Lines changed: 21 additions & 1 deletion
diff --git a/‎davarocr/davar_common/datasets/pipelines/davar_loading.py‎
Lines changed: 113 additions & 5 deletions b/‎davarocr/davar_common/datasets/pipelines/davar_loading.py‎
Lines changed: 113 additions & 5 deletions
@@ -22,13 +22,16 @@
 from mmdet.core import encode_mask_results
 from mmdet.apis.test import collect_results_cpu, collect_results_gpu
 
+from davarocr.mmcv import DavarProgressBar
+
 
 def single_gpu_test(model,
                     data_loader,
                     show=False,
                     out_dir=None,
                     show_score_thr=0.3,
-                    model_type="DETECTOR"):
+                    model_type="DETECTOR",
+                    min_time_interval=1):
     """ Test model with single GPU, used for visualization.
 
     Args:
@@ -37,15 +40,16 @@ def single_gpu_test(model,
         show (boolean): whether to show visualization
         out_dir (str): visualization results saved path
         show_score_thr (float): the threshold to show visualization.
-        model_type(float): model type indicator, used to formalize final results.
+        model_type(str): model type indicator, used to formalize final results.
+        min_time_interval(int): progressbar minimal update unit
     Returns:
         dict: test results
     """
 
     model.eval()
     results = []
     dataset = data_loader.dataset
-    prog_bar = mmcv.ProgressBar(len(dataset))
+    prog_bar = DavarProgressBar(len(dataset), min_time_interval=min_time_interval)
     for _, data in enumerate(data_loader):
         with torch.no_grad():
             result = model(return_loss=False, rescale=True, **data)
@@ -95,7 +99,7 @@ def single_gpu_test(model,
                 result = list(zip(result["text"], result["length"]))
             else:
                 result = result["text"]
-            batch_size = len(result)
+            batch_size = len(result) if not isinstance(result[0], list) else len(result[0])
         elif model_type == "SPOTTER":
             pass
             # if isinstance(result[0], dict):
@@ -118,7 +122,8 @@ def multi_gpu_test(model,
                    data_loader,
                    tmpdir=None,
                    gpu_collect=False,
-                   model_type="DETECTOR"):
+                   model_type="DETECTOR",
+                   min_time_interval=1):
     """Test model with multiple gpus.
 
     This method tests model with multiple gpus and collects the results
@@ -133,6 +138,8 @@ def multi_gpu_test(model,
         tmpdir (str): Path of directory to save the temporary results from
             different gpus under cpu mode.
         gpu_collect (bool): Option to use either gpu or cpu to collect results.
+        model_type(str): model type indicator, used to formalize final results.
+        min_time_interval(int): progressbar minimal update unit
 
     Returns:
         list(dict): The prediction results.
@@ -142,7 +149,7 @@ def multi_gpu_test(model,
     dataset = data_loader.dataset
     rank, world_size = get_dist_info()
     if rank == 0:
-        prog_bar = mmcv.ProgressBar(len(dataset))
+        prog_bar = DavarProgressBar(len(dataset), min_time_interval=min_time_interval)
     time.sleep(2)  # This line can prevent deadlock problem in some cases.
     for _, data in enumerate(data_loader):
 
@@ -158,12 +165,17 @@ def multi_gpu_test(model,
             elif model_type == "RECOGNIZOR":
                 if "prob" in result:
                     result = result["text"]
+                    if isinstance(result[0], list):
+                        result = result[0]
                 elif "length" in result and "text" not in result:
                     result = result["length"]
                 elif "length" in result and "text" in result:
                     result = list(zip(result["text"], result["length"]))
                 else:
                     result = result["text"]
+                    if isinstance(result[0], list):
+                        result = result[0]
+
             elif model_type == "SPOTTER":
                 pass
                 # if isinstance(result[0], dict):
 
@@ -137,9 +137,15 @@ def train_model(model,
         # Support batch_size > 1 in validation
         val_samples_per_gpu = cfg.data.val.pop('samples_per_gpu', 1)
         if val_samples_per_gpu > 1:
-            # Replace 'ImageToTensor' to 'DefaultFormatBundle'
-            cfg.data.val.pipeline = replace_ImageToTensor(
-                cfg.data.val.get("pipeline", cfg.data.val.dataset.get("pipeline", None)))
+            # in case the test dataset is concatenated
+            val_pipeline = cfg.data.val.get("pipeline", cfg.data.val.dataset.get("pipeline", None))
+            # supported multi dataset with different validation pipelines
+            if isinstance(val_pipeline[0], dict):
+                cfg.data.val.pipeline = replace_ImageToTensor(val_pipeline)
+            elif isinstance(val_pipeline[0], list):
+                cfg.data.val.pipeline = [
+                    replace_ImageToTensor(this_pipeline) for this_pipeline in val_pipeline]
+
         val_dataset = davar_build_dataset(cfg.data.val, dict(test_mode=True))
         val_dataloader = davar_build_dataloader(
             val_dataset,
 
@@ -254,9 +254,6 @@ def parameter_align(cfg):
     if isinstance(cfg["dataset"]["img_prefix"], str):
         cfg["dataset"]["img_prefix"] = cfg["dataset"]["img_prefix"].split('|')
 
-    assert len(batch_ratios) == len(cfg["dataset"]["ann_file"]),\
-        'the numbers of the batch ratios should equal to the numbers of the annotation files'
-
     dataset_num = len(batch_ratios)
 
     for key, item in cfg["dataset"].items():
 
@@ -303,7 +303,6 @@ def process_anns(self, idx):
         img_info = copy.deepcopy(self.data_infos[idx].get('ann', None))
         if self.classes_config is not None:
             img_info['labels'] = [per[0] for per in img_info['labels']]
-
             bboxes = []
             labels = []
             bboxes_ignore = []
@@ -313,11 +312,16 @@ def process_anns(self, idx):
                 cares = [1] * len(img_info['labels'])
 
             for i, care in enumerate(cares):
+                x_min = min(img_info['bboxes'][i][0::2])
+                x_max = max(img_info['bboxes'][i][0::2])
+                y_min = min(img_info['bboxes'][i][1::2])
+                y_max = max(img_info['bboxes'][i][1::2])
+                rect_box = [x_min, y_min, x_max, y_max]
                 if care:
-                    bboxes.append(img_info['bboxes'][i])
+                    bboxes.append(rect_box)
                     labels.append(self.classes_config['classes'].index(img_info['labels'][i]))
                 else:
-                    bboxes_ignore.append(img_info['bboxes'][i])
+                    bboxes_ignore.append(rect_box)
                     labels_ignore.append(self.classes_config['classes'].index(img_info['labels'][i]))
             bboxes = np.array(bboxes).reshape(-1, 4)
             bboxes_ignore = np.array(bboxes_ignore).reshape(-1, 4)
@@ -359,7 +363,16 @@ def evaluate(self,
         allowed_metrics = ['mAP', 'recall']
         if metric not in allowed_metrics:
             raise KeyError(f'metric {metric} is not supported')
-        # annotations = [self.get_ann_info(i) for i in range(len(self))]
+        if len(results) > 0 and isinstance(results[0], dict):
+            num_classes = len(self.classes_config['classes'])
+            tmp_results = []
+            for res in results:
+                points = np.array(res['points']).reshape(-1, 4)
+                scores = np.array(res['scores']).reshape(-1, 1)
+                labels = np.array(res['labels'])
+                bboxes = np.concatenate([points, scores], axis=-1)
+                tmp_results.append([bboxes[labels == i, :] for i in range(num_classes)])
+            results = tmp_results
         annotations = [self.process_anns(i) for i in range(len(self))]
         eval_results = OrderedDict()
         iou_thrs = [iou_thr] if isinstance(iou_thr, float) else iou_thr
 
@@ -153,5 +153,25 @@ def evaluate(self,
             dict: model evaluation metric
 
         """
-        validation_result = self.datasets[0].evaluate(results, metric, logger, **eval_kwargs)
+
+        # use the group samples to validate
+        group_samples = self.flag["group_samples"]
+        start_idx = 0
+        validation_result = dict()
+        for dataset_idx, group_sample in enumerate(group_samples):
+            this_results = results[start_idx:start_idx + group_sample]
+            this_validation_result = self.datasets[dataset_idx].evaluate(
+                this_results, metric, logger, **eval_kwargs)
+            # record the each dataset info
+            for key, value in this_validation_result.items():
+                this_key = "{}_set{}".format(key, dataset_idx)
+                validation_result[this_key] = value
+                # calculate the average performance
+                if dataset_idx == 0:
+                    validation_result[key] = value / len(group_samples)
+                else:
+                    validation_result[key] += value / len(group_samples)
+            # update the sample index 
+            start_idx += group_sample
+
         return validation_result
@@ -17,6 +17,7 @@
 import cv2
 import pycocotools.mask as maskUtils
 import numpy as np
+
 from mmdet.datasets.builder import PIPELINES
 from mmdet.core import BitmapMasks, PolygonMasks
 
@@ -134,6 +135,7 @@ def __init__(self,
                  text_profile=None,
                  label_start_index=0,
                  poly2mask=True,
+                 only_quad=False
                  ):
         """ Parameter initialization
 
@@ -172,6 +174,7 @@ def __init__(self,
                                      according to `classes_config`. The start label will be added. e.g., for mmdet 1.x,
                                      this value is set to [1];  for mmdet 2.x, this will be set to [0].
             poly2mask (boolean):      Whether to convert the instance masks from polygons to bitmaps. Default: True.
+            only_quad (boolean): Whether only quad format annotation supported.
         """
         self.with_bbox = with_bbox
         self.with_poly_bbox = with_poly_bbox
@@ -182,9 +185,10 @@ def __init__(self,
         self.with_text = with_text
         self.bieo_labels = bieo_labels
         self.text_profile = text_profile
-        self.label_start_index=label_start_index
+        self.label_start_index = label_start_index
         self.with_cbbox = with_cbbox
         self.poly2mask = poly2mask
+        self.only_quad = only_quad
 
         assert not (self.with_label and self.with_multi_label), \
             "Only one of with_label and with_multi_label can be true"
@@ -324,10 +328,25 @@ def _load_poly_bboxes(self, results):
         gt_poly_bboxes = []
         gt_poly_bboxes_ignore = []
 
+        height, width = results['img_info']['height'], results['img_info']['width']
+
         for i, box in enumerate(tmp_gt_bboxes):
+            for cor_idx in range(0, len(box), 2):
+                box[cor_idx] = min(max(0, box[cor_idx]), width)
+                box[cor_idx + 1] = min(max(0, box[cor_idx + 1]), height)
+
             # If the bboxes are labeled in 2-point form, then transfer it into 4-point form.
             if len(box) == 4:
                 box = [box[0], box[1], box[2], box[1], box[2], box[3], box[0], box[3]]
+
+            if self.only_quad and len(box) != 8:
+                continue
+
+            if self.only_quad:
+                box = self.sorted_bbox_convex(box.copy())
+                if not self.is_convex(box.copy()):
+                    continue
+
             if cares[i] == 1:
                 gt_poly_bboxes.append(np.array(box))
             else:
@@ -390,6 +409,67 @@ def process_polygons(self, polygons):
                 valid_polygons.append(polygon)
         return valid_polygons
 
+    def is_convex(self, bbox, area=2):
+        """ Determine if a quadrilateral is a convex polygon
+
+        Args:
+            bbox (list[float]): coordinate
+            area (int): minimum area
+
+        Returns:
+            bool: whether a convex polygon
+        """
+        pre = 1
+        n = 8
+        for i in range(n // 2):
+            cur = (bbox[(i * 2 + 2) % n] - bbox[i * 2]) * (bbox[(i * 2 + 5) % n] - bbox[(i * 2 + 3) % n]) \
+                    - (bbox[(i * 2 + 4) % n] - bbox[(i * 2 + 2) % n])\
+                    * (bbox[(i * 2 + 3) % n] - bbox[(i * 2 + 1) % n])
+            if cur < area:
+                return False
+            else:
+                if cur * pre < 0:
+                    return False
+                else:
+                    pre = cur
+        return True
+
+    def sorted_bbox_convex(self, bbox):
+        """ 
+        Args:
+            bbox (list[float]): coordinate
+
+        Returns:
+            list[float]: sorted bbox
+        """
+        assert len(bbox) == 8
+
+        bbox = [[bbox[0], bbox[1]], [bbox[2], bbox[3]], [bbox[4], bbox[5]], [bbox[6], bbox[7]]]
+        tmp_bbox = bbox.copy()
+        tmp_bbox = sorted(tmp_bbox, key=lambda x: x[0])
+        new_bbox = []
+
+        if tmp_bbox[0][1] < tmp_bbox[1][1]:
+            new_bbox.append(tmp_bbox[0])
+            tmp_bbox.pop(0)
+        else:
+            new_bbox.append(tmp_bbox[1])
+            tmp_bbox.pop(1)
+
+        tmp_bbox = sorted(tmp_bbox, key=lambda x: x[1])
+        for idx in range(len(tmp_bbox)):
+            if tmp_bbox[idx][0] > new_bbox[0][0]:
+                new_bbox.append(tmp_bbox[idx])
+                tmp_bbox.pop(idx)
+                break
+        
+        tmp_bbox = sorted(tmp_bbox, key=lambda x: x[0], reverse=True)
+        new_bbox.append(tmp_bbox[0])
+        new_bbox.append(tmp_bbox[1])
+        
+        new_bbox = [i for cor in new_bbox for i in cor]
+        return new_bbox
+
     def _load_polymasks(self, results):
         """Private function to load mask annotations.
 
@@ -407,21 +487,46 @@ def _load_polymasks(self, results):
         cares = results["cares"]
         polygons = ann.get('bboxes', [])
         valid_polygons = []
+        invalid_polygons = []
+
         for i, box in enumerate(polygons):
+            for cor_idx in range(0, len(box), 2):
+                box[cor_idx] = min(max(0, box[cor_idx]), width)
+                box[cor_idx + 1] = min(max(0, box[cor_idx + 1]), height)
+
+            # If the bboxes are labeled in 2-point form, then transfer it into 4-point form.
+            if len(box) == 4:
+                box = [box[0], box[1], box[2], box[1], box[2], box[3], box[0], box[3]]
+
+            if self.only_quad and len(box) != 8:
+                continue
+
+            if self.only_quad:
+                box = self.sorted_bbox_convex(box.copy())
+                if not self.is_convex(box.copy()):
+                    continue
+
             if cares[i] == 1:
-                # Handle the case of 2-point annotation
-                if len(box) == 4:
-                    box = [box[0], box[1], box[2], box[1], box[2], box[3], box[0], box[3]]
                 valid_polygons.append([np.array(box)])
+            else:
+                invalid_polygons.append([np.array(box)])
 
         if self.poly2mask:
             gt_masks = BitmapMasks(
                 [self._poly2mask(mask, height, width) for mask in valid_polygons], height, width)
+            gt_masks_ignore = BitmapMasks(
+                [self._poly2mask(mask, height, width) for mask in invalid_polygons], height, width)
         else:
             gt_masks = PolygonMasks(
                 [self.process_polygons(polygons) for polygons in valid_polygons], height, width)
+            gt_masks_ignore = PolygonMasks(
+                [self.process_polygons(polygons) for polygons in invalid_polygons], height, width)
+
         results['gt_masks'] = gt_masks
+        results['gt_masks_ignore'] = gt_masks_ignore
+
         results['mask_fields'].append('gt_masks')
+        results['mask_fields'].append('gt_masks_ignore')
         return results
 
     def _load_labels(self, results):
@@ -445,7 +550,10 @@ def _load_labels(self, results):
             self.label_start_index = self.label_start_index[0]
 
         # If there is no `labels` in annotation, set `label_start_index` as the default value for all bboxes.
-        if tmp_labels is None or len(tmp_labels)==0:
+        if tmp_labels is None:
+            tmp_labels = [[self.label_start_index]] * bboxes_length
+        # If `labels` in annotation are empty, set `label_start_index` as the default value for all bboxes.
+        elif len(tmp_labels) == 0:
             tmp_labels = [[self.label_start_index]] * bboxes_length
 
         gt_labels = []