Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add random resized crop data augmentation strategy with tests #2824

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
159 changes: 159 additions & 0 deletions mmseg/datasets/pipelines/transforms.py
Expand Up @@ -3,6 +3,7 @@
import inspect

import cv2
import math
import mmcv
import numpy as np
from mmcv.utils import deprecated_api_warning, is_tuple_of
Expand Down Expand Up @@ -662,6 +663,164 @@ def __repr__(self):
return self.__class__.__name__ + f'(crop_size={self.crop_size})'


@PIPELINES.register_module()
class RandomResizedCrop(object):
"""Crop the given image to random size and aspect ratio.
A crop of random size (default: of 0.08 to 1.0) of the original size and a
random aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio
is made. This crop is finally resized to given size.
Args:
size (sequence | int): Desired output size of the crop. If size is an
int instead of sequence like (h, w), a square crop (size, size) is
made.
scale (tuple): Range of the random size of the cropped image compared
to the original image. Defaults to (0.08, 1.0).
ratio (tuple): Range of the random aspect ratio of the cropped image
compared to the original image. Defaults to (3. / 4., 4. / 3.).
max_attempts (int): Maximum number of attempts before falling back to
Central Crop. Defaults to 10.
efficientnet_style (bool): Whether to use efficientnet style Random
ResizedCrop. Defaults to False.
min_covered (Number): Minimum ratio of the cropped area to the original
area. Only valid if efficientnet_style is true. Defaults to 0.1.
crop_padding (int): The crop padding parameter in efficientnet style
center crop. Only valid if efficientnet_style is true.
Defaults to 32.
interpolation (str): Interpolation method, accepted values are
'nearest', 'bilinear', 'bicubic', 'area', 'lanczos'. Defaults to
'bilinear'.
backend (str): The image resize backend type, accepted values are
`cv2` and `pillow`. Defaults to `cv2`.
"""

def __init__(
self,
size,
scale=(0.08, 1.0),
ratio=(3.0 / 4.0, 4.0 / 3.0),
max_attempts=10,
crop_padding=32,
interpolation="bilinear",
p=1.0,
backend="cv2",
):
if isinstance(size, (tuple, list)):
self.size = size
else:
self.size = (size, size)
if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
raise ValueError("range should be of kind (min, max). " f"But received scale {scale} and rato {ratio}.")
assert isinstance(max_attempts, int) and max_attempts >= 0, "max_attempts mush be int and no less than 0."
assert interpolation in ("nearest", "bilinear", "bicubic", "area", "lanczos")
if backend not in ["cv2", "pillow"]:
raise ValueError(f"backend: {backend} is not supported for resize." 'Supported backends are "cv2", "pillow"')

self.scale = scale
self.ratio = ratio
self.max_attempts = max_attempts
self.crop_padding = crop_padding
self.interpolation = interpolation
self.backend = backend
self.p = p

@staticmethod
def get_params(img, scale, ratio, max_attempts=10):
"""Get parameters for ``crop`` for a random sized crop.
Args:
img (ndarray): Image to be cropped.
scale (tuple): Range of the random size of the cropped image
compared to the original image size.
ratio (tuple): Range of the random aspect ratio of the cropped
image compared to the original image area.
max_attempts (int): Maximum number of attempts before falling back
to central crop. Defaults to 10.
Returns:
tuple: Params (ymin, xmin, ymax, xmax) to be passed to `crop` for
a random sized crop.
"""
height = img.shape[0]
width = img.shape[1]
area = height * width

for _ in range(max_attempts):
target_area = random.uniform(*scale) * area
log_ratio = (math.log(ratio[0]), math.log(ratio[1]))
aspect_ratio = math.exp(random.uniform(*log_ratio))

target_width = int(round(math.sqrt(target_area * aspect_ratio)))
target_height = int(round(math.sqrt(target_area / aspect_ratio)))

if 0 < target_width <= width and 0 < target_height <= height:
ymin = random.randint(0, height - target_height)
xmin = random.randint(0, width - target_width)
ymax = ymin + target_height - 1
xmax = xmin + target_width - 1
return ymin, xmin, ymax, xmax

# Fallback to central crop
in_ratio = float(width) / float(height)
if in_ratio < min(ratio):
target_width = width
target_height = int(round(target_width / min(ratio)))
elif in_ratio > max(ratio):
target_height = height
target_width = int(round(target_height * max(ratio)))
else: # whole image
target_width = width
target_height = height
ymin = (height - target_height) // 2
xmin = (width - target_width) // 2
ymax = ymin + target_height - 1
xmax = xmin + target_width - 1
return ymin, xmin, ymax, xmax

def crop_and_resize(self, img: np.ndarray, crop_bbox: np.ndarray):
"""
Crop the image and resize it to the given size.
Args:
img (ndarray): Image to be cropped and resized.
crop_bbox (ndarray): The bounding box of the crop area.
Returns:
ndarray: The cropped and resized image.
"""
img = mmcv.imcrop(img, bboxes=crop_bbox)
return mmcv.imresize(img, tuple(self.size[::-1]), interpolation=self.interpolation, backend=self.backend)

def __call__(self, results):
"""Call function to randomly crop images, semantic segmentation maps.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Randomly cropped results, 'img_shape' key in result dict is
updated according to crop size.
"""
if random.random() < self.p:
# crop the image
img = results["img"]
ymin, xmin, ymax, xmax = self.get_params(
img=img, scale=self.scale, ratio=self.ratio, max_attempts=self.max_attempts
)
crop_bbox_array = np.array([xmin, ymin, xmax, ymax])
results["img"] = self.crop_and_resize(img=img, crop_bbox=crop_bbox_array)

# crop semantic seg
for key in results.get("seg_fields", []):
results[key] = self.crop_and_resize(img=results[key], crop_bbox=crop_bbox_array)

return results

def __repr__(self):
repr_str = self.__class__.__name__ + f"(size={self.size}"
repr_str += f", scale={tuple(round(s, 4) for s in self.scale)}"
repr_str += f", ratio={tuple(round(r, 4) for r in self.ratio)}"
repr_str += f", max_attempts={self.max_attempts}"
repr_str += f", crop_padding={self.crop_padding}"
repr_str += f", interpolation={self.interpolation}"
repr_str += f", backend={self.backend})"
repr_str += f", p={self.p})"
return repr_str


@PIPELINES.register_module()
class RandomRotate(object):
"""Rotate the image & seg.
Expand Down