open-mmlab · InakiRaba91 · Apr 1, 2023
diff --git a/mmseg/datasets/pipelines/transforms.py b/mmseg/datasets/pipelines/transforms.py
@@ -3,6 +3,7 @@
 import inspect
 
 import cv2
+import math
 import mmcv
 import numpy as np
 from mmcv.utils import deprecated_api_warning, is_tuple_of
@@ -662,6 +663,164 @@ def __repr__(self):
         return self.__class__.__name__ + f'(crop_size={self.crop_size})'
 
 
+@PIPELINES.register_module()
+class RandomResizedCrop(object):
+    """Crop the given image to random size and aspect ratio.
+    A crop of random size (default: of 0.08 to 1.0) of the original size and a
+    random aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio
+    is made. This crop is finally resized to given size.
+    Args:
+        size (sequence | int): Desired output size of the crop. If size is an
+            int instead of sequence like (h, w), a square crop (size, size) is
+            made.
+        scale (tuple): Range of the random size of the cropped image compared
+            to the original image. Defaults to (0.08, 1.0).
+        ratio (tuple): Range of the random aspect ratio of the cropped image
+            compared to the original image. Defaults to (3. / 4., 4. / 3.).
+        max_attempts (int): Maximum number of attempts before falling back to
+            Central Crop. Defaults to 10.
+        efficientnet_style (bool): Whether to use efficientnet style Random
+            ResizedCrop. Defaults to False.
+        min_covered (Number): Minimum ratio of the cropped area to the original
+             area. Only valid if efficientnet_style is true. Defaults to 0.1.
+        crop_padding (int): The crop padding parameter in efficientnet style
+            center crop. Only valid if efficientnet_style is true.
+            Defaults to 32.
+        interpolation (str): Interpolation method, accepted values are
+            'nearest', 'bilinear', 'bicubic', 'area', 'lanczos'. Defaults to
+            'bilinear'.
+        backend (str): The image resize backend type, accepted values are
+            `cv2` and `pillow`. Defaults to `cv2`.
+    """
+
+    def __init__(
+        self,
+        size,
+        scale=(0.08, 1.0),
+        ratio=(3.0 / 4.0, 4.0 / 3.0),
+        max_attempts=10,
+        crop_padding=32,
+        interpolation="bilinear",
+        p=1.0,
+        backend="cv2",
+    ):
+        if isinstance(size, (tuple, list)):
+            self.size = size
+        else:
+            self.size = (size, size)
+        if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
+            raise ValueError("range should be of kind (min, max). " f"But received scale {scale} and rato {ratio}.")
+        assert isinstance(max_attempts, int) and max_attempts >= 0, "max_attempts mush be int and no less than 0."
+        assert interpolation in ("nearest", "bilinear", "bicubic", "area", "lanczos")
+        if backend not in ["cv2", "pillow"]:
+            raise ValueError(f"backend: {backend} is not supported for resize." 'Supported backends are "cv2", "pillow"')
+
+        self.scale = scale
+        self.ratio = ratio
+        self.max_attempts = max_attempts
+        self.crop_padding = crop_padding
+        self.interpolation = interpolation
+        self.backend = backend
+        self.p = p
+
+    @staticmethod
+    def get_params(img, scale, ratio, max_attempts=10):
+        """Get parameters for ``crop`` for a random sized crop.
+        Args:
+            img (ndarray): Image to be cropped.
+            scale (tuple): Range of the random size of the cropped image
+                compared to the original image size.
+            ratio (tuple): Range of the random aspect ratio of the cropped
+                image compared to the original image area.
+            max_attempts (int): Maximum number of attempts before falling back
+                to central crop. Defaults to 10.
+        Returns:
+            tuple: Params (ymin, xmin, ymax, xmax) to be passed to `crop` for
+                a random sized crop.
+        """
+        height = img.shape[0]
+        width = img.shape[1]
+        area = height * width
+
+        for _ in range(max_attempts):
+            target_area = random.uniform(*scale) * area
+            log_ratio = (math.log(ratio[0]), math.log(ratio[1]))
+            aspect_ratio = math.exp(random.uniform(*log_ratio))
+
+            target_width = int(round(math.sqrt(target_area * aspect_ratio)))
+            target_height = int(round(math.sqrt(target_area / aspect_ratio)))
+
+            if 0 < target_width <= width and 0 < target_height <= height:
+                ymin = random.randint(0, height - target_height)
+                xmin = random.randint(0, width - target_width)
+                ymax = ymin + target_height - 1
+                xmax = xmin + target_width - 1
+                return ymin, xmin, ymax, xmax
+
+        # Fallback to central crop
+        in_ratio = float(width) / float(height)
+        if in_ratio < min(ratio):
+            target_width = width
+            target_height = int(round(target_width / min(ratio)))
+        elif in_ratio > max(ratio):
+            target_height = height
+            target_width = int(round(target_height * max(ratio)))
+        else:  # whole image
+            target_width = width
+            target_height = height
+        ymin = (height - target_height) // 2
+        xmin = (width - target_width) // 2
+        ymax = ymin + target_height - 1
+        xmax = xmin + target_width - 1
+        return ymin, xmin, ymax, xmax
+
+    def crop_and_resize(self, img: np.ndarray, crop_bbox: np.ndarray):
+        """
+        Crop the image and resize it to the given size.
+        Args:
+            img (ndarray): Image to be cropped and resized.
+            crop_bbox (ndarray): The bounding box of the crop area.
+        Returns:
+            ndarray: The cropped and resized image.
+        """
+        img = mmcv.imcrop(img, bboxes=crop_bbox)
+        return mmcv.imresize(img, tuple(self.size[::-1]), interpolation=self.interpolation, backend=self.backend)
+
+    def __call__(self, results):
+        """Call function to randomly crop images, semantic segmentation maps.
+        Args:
+            results (dict): Result dict from loading pipeline.
+        Returns:
+            dict: Randomly cropped results, 'img_shape' key in result dict is
+                updated according to crop size.
+        """
+        if random.random() < self.p:
+            # crop the image
+            img = results["img"]
+            ymin, xmin, ymax, xmax = self.get_params(
+                img=img, scale=self.scale, ratio=self.ratio, max_attempts=self.max_attempts
+            )
+            crop_bbox_array = np.array([xmin, ymin, xmax, ymax])
+            results["img"] = self.crop_and_resize(img=img, crop_bbox=crop_bbox_array)
+
+            # crop semantic seg
+            for key in results.get("seg_fields", []):
+                results[key] = self.crop_and_resize(img=results[key], crop_bbox=crop_bbox_array)
+
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__ + f"(size={self.size}"
+        repr_str += f", scale={tuple(round(s, 4) for s in self.scale)}"
+        repr_str += f", ratio={tuple(round(r, 4) for r in self.ratio)}"
+        repr_str += f", max_attempts={self.max_attempts}"
+        repr_str += f", crop_padding={self.crop_padding}"
+        repr_str += f", interpolation={self.interpolation}"
+        repr_str += f", backend={self.backend})"
+        repr_str += f", p={self.p})"
+        return repr_str
+
+
 @PIPELINES.register_module()
 class RandomRotate(object):
     """Rotate the image & seg.