Source code for augraphy.augmentations.geometric

import random

import cv2
import numpy as np

from augraphy.augmentations.lib import rotate_bounding_boxes
from augraphy.augmentations.lib import rotate_image_PIL
from augraphy.augmentations.lib import rotate_keypoints
from augraphy.augmentations.lib import update_mask_labels
from augraphy.base.augmentation import Augmentation


[docs] class Geometric(Augmentation): """Applies basic geometric transformations such as resizing, flips and rotation. :param scale: Pair of floats determining new scale of image. :type scale: tuple, optional :param translation: Pair of values determining x and y translation value. The translation value will be in percentage of the image size if the value is float and in between -1.0 - 1.0: x (int) = image width * x (float and -1.0 - 1.0); y (int) = image height * y (float and -1.0 - 1.0) :type translation: tuple, optional :param fliplr: Flag to flip image in left right direction. :type fliplr: int, optional :param flipud: Flag to flip image in up down direction. :type flipud: int, optional :param crop: Tuple of 4 (x0, y0, xn, yn) to crop section of image. The value will be in percentage of the image size if the value is float and in between 0.0 - 1.0: x0 (int) = image width * x0 (float and 0.0 - 1.0); y0 (int) = image height * y0 (float and 0.0 - 1.0); xn (int) = image width * xn (float and 0.0 - 1.0); yn (int) = image height * yn (float and 0.0 - 1.0) :type crop: tuple, optional :param rotate_range: Pair of ints or floats determining the range from which to sample the image rotation. :type rotate_range: tuple, optional :param randomize: Flag to apply random geometric transformations. :type randomize: int, optional :param padding: Padding amount on each (left, right, top, bottom) side. The padding amount will be in percentage of the image size if the value is float and in between 0.0 - 1.0: left (int) = image width * left (float and 0.0 - 1.0); right (int) = image height * right (float and 0.0 - 1.0); top (int) = image width * top (float and 0.0 - 1.0); bottom (int) = image height * bottom (float and 0.0 - 1.0) :type padding: list, optional :param padding_type: Padding methods, select from fill,duplicate and mirror. :type paddng_type: string, optional :param padding_value: Padding value (in BGR) for fill padding method. :type paddng_value: tuple, optional :param p: The probability that this Augmentation will be applied. :type p: float, optional """ def __init__( self, scale=(1, 1), translation=(0, 0), fliplr=0, flipud=0, crop=(), rotate_range=(0, 0), padding=[0, 0, 0, 0], padding_type="fill", padding_value=(255, 255, 255), randomize=0, p=1, ): """Constructor method""" super().__init__(p=p) self.scale = scale self.translation = translation self.fliplr = fliplr self.flipud = flipud self.crop = crop self.rotate_range = rotate_range self.randomize = randomize self.padding = padding self.padding_type = padding_type self.padding_value = padding_value # Constructs a string representation of this Augmentation. def __repr__(self): return f"Geometry(scale={self.scale}, translation={self.translation}, fliplr={self.fliplr}, flipud={self.flipud}, crop={self.crop}, rotate_range={self.rotate_range}, padding={self.padding}, padding_type={self.padding_type}, padding_value={self.padding_value}, randomize={self.randomize}, p={self.p})"
[docs] def randomize_parameters(self, image): """Randomize parameters for random geometrical effect. :param image: The input image. :type image: numpy array """ # randomize scale self.scale = (random.uniform(0.5, 1), random.uniform(1, 1.5)) # randomize translation value ysize, xsize = image.shape[:2] self.translation = (random.randint(0, int(xsize * 0.1)), random.randint(0, int(ysize * 0.1))) # randomize flip self.fliplr = random.choice([0, 1]) self.flipud = random.choice([0, 1]) # randomize crop cx1 = random.randint(0, int(xsize / 5)) cx2 = random.randint(int(xsize / 2), xsize - 1) cy1 = random.randint(0, int(ysize / 5)) cy2 = random.randint(int(ysize / 2), ysize - 1) self.crop = (cx1, cy1, cx2, cy2) # randomize rotate self.rotate_range = (-10, 10) # randomize padding self.padding = [ random.randint(0, int(xsize / 5)), random.randint(0, int(xsize / 5)), random.randint(0, int(ysize / 5)), random.randint(0, int(ysize / 5)), ] self.padding_value = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) self.padding_type = random.choice(["fill", "mirror", "duplicate"])
[docs] def run_crop(self, image, mask, keypoints, bounding_boxes): """Crop image based on the input cropping box. :param image: The input image. :type image: numpy array :param mask: The mask of labels for each pixel. Mask value should be in range of 1 to 255. Value of 0 will be assigned to the filled area after the transformation. :type mask: numpy array (uint8) :param keypoints: A dictionary of single or multiple labels where each label is a nested list of points coordinate. :type keypoints: dictionary :param bounding_boxes: A nested list where each nested list contains box location (x1, y1, x2, y2). :type bounding_boxes: list """ # make sure there's only 4 inputs, x0, y0, xn, yn if len(self.crop) == 4: ysize, xsize = image.shape[:2] xstart, ystart, xend, yend = self.crop # when value is float and in between 0-1, scale it with image size if xstart >= 0 and xstart <= 1 and isinstance(xstart, float): xstart = int(xstart * xsize) if ystart >= 0 and ystart <= 1 and isinstance(ystart, float): ystart = int(ystart * ysize) if xend >= 0 and xend <= 1 and isinstance(xend, float): xend = int(xend * xsize) if yend >= 0 and yend <= 1 and isinstance(yend, float): yend = int(yend * ysize) # when value is set to -1, it takes image size if yend == -1: yend = ysize if xend == -1: xend = xsize # condition to make sure cropping range is valid check_y = yend > ystart and ystart >= 0 check_x = xend > xstart and xstart >= 0 if check_y and check_x: # crop image image = image[ystart:yend, xstart:xend] # crop mask if mask is not None: mask = mask[ystart:yend, xstart:xend] # remove keypoints outside the cropping boundary if keypoints is not None: # check each keypoint, and remove them if it is outside the cropping area for name, points in keypoints.items(): remove_indices = [] # check and save the indices to be removed for i, (xpoint, ypoint) in enumerate(points): if xpoint < xstart or xpoint >= xend or ypoint < ystart or ypoint >= yend: remove_indices.append(i) # remove points while remove_indices: points.pop(remove_indices.pop()) # update points location after the cropping process for i, (xpoint, ypoint) in enumerate(points): xpoint -= xstart ypoint -= ystart points[i] = [xpoint, ypoint] # remove and limit bounding boxes to the cropped boundary if bounding_boxes is not None: # check each point, and remove them if it is outside the cropping area remove_indices = [] for i, bounding_box in enumerate(bounding_boxes): xspoint, yspoint, xepoint, yepoint = bounding_box # start point is outside the croped area, but end point is inside if (xspoint < xstart or xspoint >= xend or yspoint < ystart or yspoint >= yend) and ( xepoint >= xstart and xepoint < xend and yepoint >= ystart and yepoint < yend ): xspoint = min(max(xspoint, xstart), xend) yspoint = min(max(yspoint, ystart), yend) bounding_boxes[i] = [xspoint, yspoint, xepoint, yepoint] # end point is outside the croped area, but start point is inside elif (xepoint < xstart or xepoint >= xend or yepoint < ystart or yepoint >= yend) and ( xspoint >= xstart and xspoint < xend and yspoint >= ystart and yspoint < yend ): xepoint = min(max(xepoint, xstart), xend) yepoint = min(max(yepoint, ystart), yend) bounding_boxes[i] = [xspoint, yspoint, xepoint, yepoint] # start point and end point are outside the croped area, remove the whole box elif (xepoint < xstart or xepoint >= xend or yepoint < ystart or yepoint >= yend) and ( xspoint < xstart or xspoint >= xend or yspoint < ystart or yspoint >= yend ): remove_indices.append(i) # remove boxes while remove_indices: bounding_boxes.pop(remove_indices.pop()) # update points location after the cropping process for i, bounding_box in enumerate(bounding_boxes): xspoint, yspoint, xepoint, yepoint = bounding_box xspoint -= xstart yspoint -= ystart xepoint -= xstart yepoint -= ystart bounding_boxes[i] = [xspoint, yspoint, xepoint, yepoint] return image, mask
[docs] def run_padding(self, image, mask, keypoints, bounding_boxes): """Apply padding to image based on the input padding value. :param image: The input image. :type image: numpy array :param mask: The mask of labels for each pixel. Mask value should be in range of 1 to 255. Value of 0 will be assigned to the filled area after the transformation. :type mask: numpy array (uint8) :param keypoints: A dictionary of single or multiple labels where each label is a nested list of points coordinate. :type keypoints: dictionary :param bounding_boxes: A nested list where each nested list contains box location (x1, y1, x2, y2). :type bounding_boxes: list """ # convert from rgb to grayscale using their average if len(image.shape) < 3: padding_value = np.mean(self.padding_value) elif image.shape[2] == 3: padding_value = (self.padding_value[0], self.padding_value[1], self.padding_value[2]) elif image.shape[2] == 4: # add alpha value padding_value = (self.padding_value[0], self.padding_value[1], self.padding_value[2], 255) # padding on left side if self.padding[0] > 0: # get image size ysize, xsize = image.shape[:2] self.padding = list(self.padding) # convert percentage into pixel amount if self.padding[0] <= 1 and isinstance(self.padding[0], float): self.padding[0] = int(self.padding[0] * xsize) # different padding shape for grayscale and colored image if len(image.shape) > 2: padding_shape = (ysize, self.padding[0], image.shape[2]) else: padding_shape = (ysize, self.padding[0]) # create the padding image if self.padding_type == "duplicate": image_padding = image[:, -self.padding[0] :].copy() if mask is not None: mask_padding = mask[:, -self.padding[0] :].copy() elif self.padding_type == "mirror": image_padding = np.fliplr(image[:, : self.padding[0]].copy()) if mask is not None: mask_padding = np.fliplr(mask[:, : self.padding[0]].copy()) else: image_padding = np.full(padding_shape, fill_value=padding_value, dtype="uint8") if mask is not None: mask_padding = np.full(padding_shape[:2], fill_value=0, dtype="uint8") # combine padding image and original image image = np.concatenate([image_padding, image], axis=1) if mask is not None: mask = np.concatenate([mask_padding, mask], axis=1) # padding on right side if self.padding[1] > 0: # get image size ysize, xsize = image.shape[:2] # convert percentage into pixel amount if self.padding[1] <= 1 and isinstance(self.padding[1], float): self.padding[1] = int(self.padding[1] * xsize) # different padding shape for grayscale and colored image if len(image.shape) > 2: padding_shape = (ysize, self.padding[1], image.shape[2]) else: padding_shape = (ysize, self.padding[1]) # create the padding image if self.padding_type == "duplicate": image_padding = image[:, : self.padding[1]].copy() if mask is not None: mask_padding = mask[:, : self.padding[1]].copy() elif self.padding_type == "mirror": image_padding = np.fliplr(image[:, -self.padding[1] :].copy()) if mask is not None: mask_padding = np.fliplr(mask[:, -self.padding[1] :].copy()) else: image_padding = np.full(padding_shape, fill_value=padding_value, dtype="uint8") if mask is not None: mask_padding = np.full(padding_shape[:2], fill_value=0, dtype="uint8") # combine padding image and original image image = np.concatenate([image, image_padding], axis=1) if mask is not None: mask = np.concatenate([mask, mask_padding], axis=1) # padding on top side if self.padding[2] > 0: # get image size ysize, xsize = image.shape[:2] # convert percentage into pixel amount if self.padding[2] <= 1 and isinstance(self.padding[2], float): self.padding[2] = int(self.padding[2] * ysize) # different padding shape for grayscale and colored image if len(image.shape) > 2: padding_shape = (self.padding[2], xsize, image.shape[2]) else: padding_shape = (self.padding[2], xsize) # create the padding image if self.padding_type == "duplicate": image_padding = image[-self.padding[2] :, :].copy() if mask is not None: mask_padding = mask[-self.padding[2] :, :].copy() elif self.padding_type == "mirror": image_padding = np.flipud(image[: self.padding[2], :].copy()) if mask is not None: mask_padding = np.flipud(mask[: self.padding[2], :].copy()) else: image_padding = np.full(padding_shape, fill_value=padding_value, dtype="uint8") if mask is not None: mask_padding = np.full(padding_shape[:2], fill_value=0, dtype="uint8") # combine padding image and original image image = np.concatenate([image_padding, image], axis=0) if mask is not None: mask = np.concatenate([mask_padding, mask], axis=0) # padding on bottom side if self.padding[3] > 0: # get image size ysize, xsize = image.shape[:2] # convert percentage into pixel amount if self.padding[3] <= 1 and isinstance(self.padding[3], float): self.padding[3] = int(self.padding[3] * ysize) # different padding shape for grayscale and colored image if len(image.shape) > 2: padding_shape = (self.padding[3], xsize, image.shape[2]) else: padding_shape = (self.padding[3], xsize) # create the padding image if self.padding_type == "duplicate": image_padding = image[: self.padding[3], :].copy() if mask is not None: mask_padding = mask[: self.padding[3], :].copy() elif self.padding_type == "mirror": image_padding = np.flipud(image[-self.padding[3] :, :].copy()) if mask is not None: mask_padding = np.flipud(mask[-self.padding[3] :, :].copy()) else: image_padding = np.full(padding_shape, fill_value=padding_value, dtype="uint8") if mask is not None: mask_padding = np.full(padding_shape[:2], fill_value=0, dtype="uint8") # combine padding image and original image image = np.concatenate([image, image_padding], axis=0) if mask is not None: mask = np.concatenate([mask, mask_padding], axis=0) # update points location after the padding (we need to add x and y if there's padding on top and left) if keypoints is not None: for name, points in keypoints.items(): for i, (xpoint, ypoint) in enumerate(points): points[i] = [xpoint + self.padding[0], ypoint + self.padding[2]] # # update bounding boxes location after the padding (we need to add x and y if there's padding on top and left) if bounding_boxes is not None: for i, bounding_box in enumerate(bounding_boxes): xspoint, yspoint, xepoint, yepoint = bounding_box bounding_boxes[i] = [ xspoint + self.padding[0], yspoint + self.padding[2], xepoint + self.padding[0], yepoint + self.padding[2], ] return image, mask
[docs] def run_scale(self, image, mask, keypoints, bounding_boxes): """Scale image size based on the input scaling ratio. :param image: The input image. :type image: numpy array :param mask: The mask of labels for each pixel. Mask value should be in range of 1 to 255. Value of 0 will be assigned to the filled area after the transformation. :type mask: numpy array (uint8) :param keypoints: A dictionary of single or multiple labels where each label is a nested list of points coordinate. :type keypoints: dictionary :param bounding_boxes: A nested list where each nested list contains box location (x1, y1, x2, y2). :type bounding_boxes: list """ # resize based on scale # remove negative value (if any) self.scale = list(self.scale) self.scale[0] = abs(self.scale[0]) self.scale[1] = abs(self.scale[1]) if self.scale[1] != 1 and self.scale[0] != 1: scale = random.uniform(self.scale[0], self.scale[1]) if scale != 1: # scale image new_width = int(image.shape[1] * scale) new_height = int(image.shape[0] * scale) new_size = (new_width, new_height) image = cv2.resize(image, new_size, interpolation=cv2.INTER_AREA) # scale mask and update mask labels after the resize process if mask is not None: mask_labels = np.unique(mask).tolist() + [0] mask = cv2.resize(mask, new_size, interpolation=cv2.INTER_AREA) update_mask_labels(mask, mask_labels) # scale keypoints if keypoints is not None: for name, points in keypoints.items(): for i, (xpoint, ypoint) in enumerate(points): points[i] = [round(xpoint * scale), round(ypoint * scale)] # scale bounding boxes if bounding_boxes is not None: for i, bounding_box in enumerate(bounding_boxes): xspoint, yspoint, xepoint, yepoint = bounding_box bounding_boxes[i] = [ round(xspoint * scale), round(yspoint * scale), round(xepoint * scale), round(yepoint * scale), ] return image, mask
[docs] def run_translation(self, image, mask, keypoints, bounding_boxes): """Translate image based on the input translation value. :param image: The input image. :type image: numpy array :param mask: The mask of labels for each pixel. Mask value should be in range of 1 to 255. Value of 0 will be assigned to the filled area after the transformation. :type mask: numpy array (uint8) :param keypoints: A dictionary of single or multiple labels where each label is a nested list of points coordinate. :type keypoints: dictionary :param bounding_boxes: A nested list where each nested list contains box location (x1, y1, x2, y2). :type bounding_boxes: list """ ysize, xsize = image.shape[:2] if self.translation[0] <= 1 and self.translation[0] >= -1 and isinstance(self.translation[0], float): self.translation = list(self.translation) self.translation[0] = int(self.translation[0] * xsize) if self.translation[1] <= 1 and self.translation[1] >= -1 and isinstance(self.translation[1], float): self.translation = list(self.translation) self.translation[1] = int(self.translation[1] * ysize) image_new = np.full_like(image, fill_value=255, dtype="uint8") if mask is not None: mask_new = np.full((image.shape[0], image.shape[1]), fill_value=0, dtype="uint8") offset_x = self.translation[0] offset_y = self.translation[1] # x translation if offset_x > 0: image_new[:, offset_x:] = image[:, :-offset_x] image = image_new if mask is not None: mask_new[:, offset_x:] = mask[:, :-offset_x] mask = mask_new elif offset_x < 0: image_new[:, :offset_x] = image[:, abs(offset_x) :] image = image_new if mask is not None: mask_new[:, :offset_x] = mask[:, abs(offset_x) :] mask = mask_new image_new = np.full_like(image, fill_value=255, dtype="uint8") if mask is not None: mask_new = np.full((image.shape[0], image.shape[1]), fill_value=0, dtype="uint8") # y translation if offset_y > 0: image_new[offset_y:, :] = image[:-offset_y, :] image = image_new if mask is not None: mask_new[offset_y:, :] = mask[:-offset_y, :] mask = mask_new elif offset_y < 0: image_new[:offset_y, :] = image[abs(offset_y) :, :] image = image_new if mask is not None: mask_new[:offset_y, :] = mask[abs(offset_y) :, :] mask = mask_new # translate keypoints if keypoints is not None: for name, points in keypoints.items(): for i, (xpoint, ypoint) in enumerate(points): points[i] = [xpoint + offset_x, ypoint + offset_y] # translate bounding boxes if bounding_boxes is not None: for i, bounding_box in enumerate(bounding_boxes): xspoint, yspoint, xepoint, yepoint = bounding_box bounding_boxes[i] = [ xspoint + offset_x, yspoint + offset_y, xepoint + offset_x, yepoint + offset_y, ] return image, mask
[docs] def run_flip(self, image, mask, keypoints, bounding_boxes): """Flip image left-right or up-down based on the input flipping flags. :param image: The input image. :type image: numpy array :param mask: The mask of labels for each pixel. Mask value should be in range of 1 to 255. Value of 0 will be assigned to the filled area after the transformation. :type mask: numpy array (uint8) :param keypoints: A dictionary of single or multiple labels where each label is a nested list of points coordinate. :type keypoints: dictionary :param bounding_boxes: A nested list where each nested list contains box location (x1, y1, x2, y2). :type bounding_boxes: list """ # flip left right if self.fliplr: ysize, xsize = image.shape[:2] # flip left right on image image = np.fliplr(image) # flip left right on mask if mask is not None: mask = np.fliplr(mask) # flip left right on keypoints if keypoints is not None: for name, points in keypoints.items(): for i, (xpoint, ypoint) in enumerate(points): points[i] = [xsize - 1 - xpoint, ypoint] # flip left right on bounding boxes if bounding_boxes is not None: for i, bounding_box in enumerate(bounding_boxes): xspoint, yspoint, xepoint, yepoint = bounding_box bounding_boxes[i] = [ xsize - 1 - xspoint, yspoint, xsize - 1 - xepoint, yepoint, ] # flip up down if self.flipud: ysize, xsize = image.shape[:2] # flip up down on image image = np.flipud(image) # flip up down on mask if mask is not None: mask = np.flipud(mask) # flip up down on keypoints if keypoints is not None: for name, points in keypoints.items(): for i, (xpoint, ypoint) in enumerate(points): points[i] = [xpoint, ysize - 1 - ypoint] # flip up down on bounding boxes if bounding_boxes is not None: for i, bounding_box in enumerate(bounding_boxes): xspoint, yspoint, xepoint, yepoint = bounding_box bounding_boxes[i] = [ xspoint, ysize - 1 - yspoint, xepoint, ysize - 1 - yepoint, ] return image, mask
[docs] def run_rotation(self, image, mask, keypoints, bounding_boxes): """Rotate image based on the input rotation angle. :param image: The input image. :type image: numpy array :param mask: The mask of labels for each pixel. Mask value should be in range of 1 to 255. Value of 0 will be assigned to the filled area after the transformation. :type mask: numpy array (uint8) :param keypoints: A dictionary of single or multiple labels where each label is a nested list of points coordinate. :type keypoints: dictionary :param bounding_boxes: A nested list where each nested list contains box location (x1, y1, x2, y2). :type bounding_boxes: list """ # generate random angle if (self.rotate_range[0] != 0) | (self.rotate_range[1] != 0): angle = random.uniform(self.rotate_range[0], self.rotate_range[1]) else: angle = 0 # rotate image if angle != 0: ysize, xsize = image.shape[:2] # rotate image image = rotate_image_PIL(image, angle, expand=1, background_value=self.padding_value) # rotate mask if mask is not None: mask_labels = np.unique(mask).tolist() + [0] mask = rotate_image_PIL(mask, angle, expand=1) update_mask_labels(mask, mask_labels) # rotate keypoints if keypoints is not None: # center of rotation cy = int(ysize / 2) cx = int(xsize / 2) # compute offset after rotation rysize, rxsize = image.shape[:2] y_offset = (rysize / 2) - cy x_offset = (rxsize / 2) - cx # apply rotation # use -angle because image are rotated anticlockwise rotate_keypoints(keypoints, cx, cy, x_offset, y_offset, -angle) # rotate bounding boxes if bounding_boxes is not None: # center of rotation cy = int(ysize / 2) cx = int(xsize / 2) # compute offset after rotation rysize, rxsize = image.shape[:2] y_offset = (rysize / 2) - cy x_offset = (rxsize / 2) - cx # use -angle because image are rotated anticlockwise rotate_bounding_boxes(bounding_boxes, cx, cy, x_offset, y_offset, -angle) return image, mask
# Applies the Augmentation to input data. def __call__(self, image, layer=None, mask=None, keypoints=None, bounding_boxes=None, force=False): if force or self.should_run(): image = image.copy() # check and randmize geometric transformations if self.randomize: self.randomize_parameters(image) # crop image if self.crop: image, mask = self.run_crop(image, mask, keypoints, bounding_boxes) # apply padding if any(self.padding): image, mask = self.run_padding(image, mask, keypoints, bounding_boxes) # apply scaling image, mask = self.run_scale(image, mask, keypoints, bounding_boxes) # translate image based on translation value if self.translation[0] != 0 or self.translation[1] != 0: image, mask = self.run_translation(image, mask, keypoints, bounding_boxes) # apply flipping image, mask = self.run_flip(image, mask, keypoints, bounding_boxes) # apply rotation image, mask = self.run_rotation(image, mask, keypoints, bounding_boxes) # check for additional output of mask, keypoints and bounding boxes outputs_extra = [] if mask is not None or keypoints is not None or bounding_boxes is not None: outputs_extra = [mask, keypoints, bounding_boxes] # returns additional mask, keypoints and bounding boxes if there is additional input if outputs_extra: # returns in the format of [image, mask, keypoints, bounding_boxes] return [image] + outputs_extra else: return image