Source code for augraphy.augmentations.folding

import random

import numpy as np

from augraphy.augmentations.lib import rotate_bounding_boxes
from augraphy.augmentations.lib import rotate_image_PIL
from augraphy.augmentations.lib import rotate_keypoints
from augraphy.augmentations.lib import update_mask_labels
from augraphy.augmentations.lib import warp_fold
from augraphy.base.augmentation import Augmentation


[docs] class Folding(Augmentation): """Emulates folding effect from perspective transformation :param fold_x: X coordinate of the folding effect. :type fold_x: int, optional :param fold_deviation: Deviation (in pixels) of provided X coordinate location. :type fold_deviation: tuple, optional :param fold count: Number of applied foldings :type fold_count: int, optional :param fold_noise: Level of noise added to folding area. Range from value of 0 to 1. :type fold_noise: float, optional :param fold_angle_range: Tuple of ints determining the angle to rotate the image before applying a varying angle folding effect. :type fold_angle_range: tuple, optional :param gradient_width: Tuple (min, max) Measure of the space affected by fold prior to being warped (in units of percentage of width of page). :type gradient_width: tuple, optional :param gradient_height: Tuple (min, max) Measure of depth of fold (unit measured as percentage page height) :type gradient_height: tuple, optional :param backdrop_color: The backdrop color (BGR) of the folding effect. :type backdrop_color: tuple, optional :param p: The probability this Augmentation will be applied. :type p: float, optional """ def __init__( self, fold_x=None, fold_deviation=(0, 0), fold_count=2, fold_noise=0.01, fold_angle_range=(0, 0), gradient_width=(0.1, 0.2), gradient_height=(0.01, 0.02), backdrop_color=(0, 0, 0), p=1, ): super().__init__(p=p) self.fold_x = fold_x self.fold_deviation = fold_deviation self.fold_count = fold_count self.fold_noise = fold_noise self.fold_angle_range = fold_angle_range self.gradient_width = gradient_width self.gradient_height = gradient_height self.backdrop_color = backdrop_color # Constructs a string representation of this Augmentation. def __repr__(self): return f"Folding(fold_x={self.fold_x}, fold_deviation={self.fold_deviation}, fold_count={self.fold_count}, fold_noise={self.fold_noise}, fold_angle_range={self.fold_angle_range}, gradient_width={self.gradient_width}, gradient_height={self.gradient_height}, backdrop_color={self.backdrop_color}, p={self.p})"
[docs] def apply_folding( self, img, keypoints, bounding_boxes, ysize, xsize, fold_x, fold_width_one_side, fold_y_shift, fold_noise, fmask, ): """Apply perspective transform twice to get single folding effect. :param img: The image to apply the function. :type img: numpy.array (numpy.uint8) :param keypoints: A dictionary of single or multiple labels where each label is a nested list of points coordinate. :type keypoints: dictionary :param bounding_boxes: A nested list where each nested list contains box location (x1, y1, x2, y2). :type bounding_boxes: list :param ysize: Height of the image. :type ysize: int :param xsize: Width of the image. :type xsize: int :param gradient_width: Measure of the space affected by fold prior to being warped (in units of percentage of width of page). :type gradient_width: int :param gradient_height: Measure of depth of fold (unit measured as percentage page height). :type gradient_height: int :param fold_noise: Level of noise added to folding area. :type fold_noise: float :param fmask: Flag to identify if the input is mask instead of image. :type fmask: int """ # test for valid folding center line if (xsize - fold_width_one_side - 1) < (fold_width_one_side + 1): print("Folding augmentation is not applied, please increase image size") return img if (fold_width_one_side != 0) and (fold_y_shift != 0): img_fold_l = warp_fold( img, ysize, fold_noise, fold_x, fold_width_one_side, fold_y_shift, side="left", backdrop_color=self.backdrop_color, fmask=fmask, ) img_fold_r = warp_fold( img_fold_l, ysize, fold_noise, fold_x, fold_width_one_side, fold_y_shift, side="right", backdrop_color=self.backdrop_color, fmask=fmask, ) if not fmask: # warp keypoints if keypoints is not None: lx0, ly0, lxn, lyn = fold_x - fold_width_one_side, 0, fold_x, ysize rx0, ry0, rxn, ryn = fold_x, 0, fold_x + (fold_width_one_side), ysize # y shifting value for single pixel y_shift_single_step = fold_y_shift / fold_width_one_side # warp each label for name, points in keypoints.items(): for i, (xpoint, ypoint) in enumerate(points): # test for left box if xpoint >= lx0 and xpoint < lxn and ypoint >= ly0 and ypoint < lyn: # scale y shift based on their distance to center x of folding ypoint += round((xpoint - lx0) * y_shift_single_step) # test for right box elif xpoint >= rx0 and xpoint < rxn and ypoint >= ry0 and ypoint < ryn: # scale y shift based on their distance to center x of folding ypoint += round((fold_width_one_side - (xpoint - fold_x)) * y_shift_single_step) points[i] = [xpoint, ypoint] # warp bounding boxes if bounding_boxes is not None: lx0, ly0, lxn, lyn = fold_x - fold_width_one_side, 0, fold_x, ysize rx0, ry0, rxn, ryn = fold_x, 0, fold_x + (fold_width_one_side), ysize # y shifting value for single pixel y_shift_single_step = fold_y_shift / fold_width_one_side # warp each box for i, bounding_box in enumerate(bounding_boxes): xspoint, yspoint, xepoint, yepoint = bounding_box width = xepoint - xspoint height = yepoint - yspoint # test for left box if xspoint >= lx0 and xspoint < lxn and yspoint >= ly0 and yspoint < lyn: # scale y shift based on their distance to center x of folding yspoint += round((xspoint - lx0) * y_shift_single_step) # test for right box elif xspoint >= rx0 and xspoint < rxn and yspoint >= ry0 and yspoint < ryn: # scale y shift based on their distance to center x of folding yspoint += round((fold_width_one_side - (xspoint - fold_x)) * y_shift_single_step) bounding_boxes[i] = [xspoint, yspoint, xspoint + width, yspoint + height] return img_fold_r else: if fold_width_one_side == 0: print( "Folding augmentation is not applied, please increase gradient width or image size", ) else: print( "Folding augmentation is not applied, please increase gradient height or image size", ) return img
[docs] def apply_rotate_and_folding( self, image_fold, fold_angle, fold_x=None, fold_width_one_side=None, fold_y_shift=None, keypoints=None, bounding_boxes=None, fmask=0, ): """Apply rotation and folding effect. :param image_fold: The image to apply the function. :type image_gold: numpy.array (numpy.uint8) :param fold_angle: The angle of rotation. :type fold_angle: int :param fold_x: The folding center x coordinate :type fold_x: int :param fold_width_one_side: The warped width of folding effect from the fold_x. :type fold_width_one_side: int :param fold_y_shift: Depth of the folding effect. :type fold_y_shift: int :param fmask: Flag to identify if the input image is mask isntead of image. :type fmask: int """ # input image size iysize, ixsize = image_fold.shape[:2] if fold_angle != 0: # rotate image before the folding image_fold = rotate_image_PIL( image_fold, angle=fold_angle, background_value=self.backdrop_color, expand=1, ) if not fmask: # rotate keypoints if keypoints is not None: # center of rotation cy = int(iysize / 2) cx = int(ixsize / 2) # compute offset after rotation rysize, rxsize = image_fold.shape[:2] y_offset = (rysize / 2) - cy x_offset = (rxsize / 2) - cx # rotate keypoints rotate_keypoints(keypoints, cx, cy, x_offset, y_offset, -fold_angle) # rotate bounding boxes if bounding_boxes is not None: # center of rotation cy = int(iysize / 2) cx = int(ixsize / 2) # compute offset after rotation rysize, rxsize = image_fold.shape[:2] y_offset = (rysize / 2) - cy x_offset = (rxsize / 2) - cx # rotate bounding boxes # use - fold_angle because image are rotated anticlockwise rotate_bounding_boxes(bounding_boxes, cx, cy, x_offset, y_offset, -fold_angle) # rotated size ysize, xsize = image_fold.shape[:2] # create folding parameters (for non mask, mask will be using input parameters) if not fmask: # folding width from left to center of folding, or from right to center of folding min_fold_x = min(np.ceil(self.gradient_width[0] * xsize), xsize).astype("int") max_fold_x = min(np.ceil(self.gradient_width[1] * xsize), xsize).astype("int") fold_width_one_side = int( random.randint(min_fold_x, max_fold_x) / 2, ) # center of folding if self.fold_x is None: fold_x = random.randint( fold_width_one_side + 1, xsize - fold_width_one_side - 1, ) else: deviation = random.randint( self.fold_deviation[0], self.fold_deviation[1], ) * random.choice([-1, 1]) fold_x = min( max(self.fold_x + deviation, fold_width_one_side + 1), xsize - fold_width_one_side - 1, ) # y distortion in folding (support positive y value for now) fold_y_shift_min = min(np.ceil(self.gradient_height[0] * ysize), ysize).astype("int") fold_y_shift_max = min(np.ceil(self.gradient_height[1] * ysize), ysize).astype("int") fold_y_shift = random.randint( fold_y_shift_min, fold_y_shift_max, ) # no noise for mask if fmask: fold_noise = 0 else: fold_noise = self.fold_noise image_fold = self.apply_folding( image_fold, keypoints, bounding_boxes, image_fold.shape[0], image_fold.shape[1], fold_x, fold_width_one_side, fold_y_shift, fold_noise, fmask, ) if fold_angle != 0: # rotate back the image image_fold = rotate_image_PIL( image_fold, angle=-fold_angle, background_value=self.backdrop_color, expand=1, ) # rotate keypoints if not fmask: if keypoints is not None: # center of rotation cy = int(ysize / 2) cx = int(xsize / 2) # compute offset after rotation rysize, rxsize = image_fold.shape[:2] y_offset = (rysize / 2) - cy x_offset = (rxsize / 2) - cx # rotate keypoints rotate_keypoints(keypoints, cx, cy, x_offset, y_offset, fold_angle) # rotate bounding boxes if bounding_boxes is not None: # center of rotation cy = int(ysize / 2) cx = int(xsize / 2) # compute offset after rotation rysize, rxsize = image_fold.shape[:2] y_offset = (rysize / 2) - cy x_offset = (rxsize / 2) - cx # rotate bounding boxes # use - fold_angle because image are rotated anticlockwise rotate_bounding_boxes(bounding_boxes, cx, cy, x_offset, y_offset, fold_angle) # get the image without the padding area, we will get extra padding area after the rotation rysize, rxsize = image_fold.shape[:2] # center of x and y cx = int(rxsize / 2) cy = int(rysize / 2) rcx = int(ixsize / 2) rcy = int(iysize / 2) start_x = cx - rcx start_y = cy - rcy end_x = start_x + ixsize end_y = start_y + iysize image_fold = image_fold[start_y:end_y, start_x:end_x] # remove padding area of keypoints and bounding boxes if not fmask: if keypoints is not None: y_offset = (iysize / 2) - (rysize / 2) x_offset = (ixsize / 2) - (rxsize / 2) for name, points in keypoints.items(): for i, (xpoint, ypoint) in enumerate(points): points[i] = [round(xpoint + x_offset), round(ypoint + y_offset)] if bounding_boxes is not None: y_offset = (iysize / 2) - (rysize / 2) x_offset = (ixsize / 2) - (rxsize / 2) for i, bounding_box in enumerate(bounding_boxes): xspoint, yspoint, xepoint, yepoint = bounding_box bounding_boxes[i] = [ round(xspoint + x_offset), round(yspoint + y_offset), round(xepoint + x_offset), round(yepoint + y_offset), ] return image_fold, fold_x, fold_width_one_side, fold_y_shift
# Applies the Augmentation to input data. def __call__(self, image, layer=None, mask=None, keypoints=None, bounding_boxes=None, force=False): if force or self.should_run(): # get mask unique labels if mask is not None: mask_labels = np.unique(mask).tolist() + [0] # get image dimension ysize, xsize = image.shape[:2] # apply folding multiple times image_fold = image.copy() for _ in range(self.fold_count): # random fold angle fold_angle = random.randint(self.fold_angle_range[0], self.fold_angle_range[1]) # apply folding to image image_fold, fold_x, fold_width_one_side, fold_y_shift = self.apply_rotate_and_folding( image_fold, fold_angle, keypoints=keypoints, bounding_boxes=bounding_boxes, fmask=0, ) # apply folding to mask, using a same folding parameter of image if mask is not None: mask, _, _, _ = self.apply_rotate_and_folding( mask, fold_angle, fold_x, fold_width_one_side, fold_y_shift, fmask=1, ) # update new interpolated mask values into each mask labels if mask is not None: update_mask_labels(mask, mask_labels) # check for additional output of mask, keypoints and bounding boxes outputs_extra = [] if mask is not None or keypoints is not None or bounding_boxes is not None: outputs_extra = [mask, keypoints, bounding_boxes] # returns additional mask, keypoints and bounding boxes if there is additional input if outputs_extra: # returns in the format of [image, mask, keypoints, bounding_boxes] return [image_fold] + outputs_extra else: return image_fold