Source code for augraphy.augmentations.folding

import random

import numpy as np

from augraphy.augmentations.lib import rotate_bounding_boxes
from augraphy.augmentations.lib import rotate_image_PIL
from augraphy.augmentations.lib import rotate_keypoints
from augraphy.augmentations.lib import update_mask_labels
from augraphy.augmentations.lib import warp_fold
from augraphy.base.augmentation import Augmentation



[docs]
class Folding(Augmentation):
    """Emulates folding effect from perspective transformation

    :param fold_x: X coordinate of the folding effect.
    :type fold_x: int, optional
    :param fold_deviation: Deviation (in pixels) of provided X coordinate location.
    :type fold_deviation: tuple, optional
    :param fold count: Number of applied foldings
    :type fold_count: int, optional
    :param fold_noise: Level of noise added to folding area. Range from
        value of 0 to 1.
    :type fold_noise: float, optional
    :param fold_angle_range: Tuple of ints determining the angle to rotate the image
        before applying a varying angle folding effect.
    :type fold_angle_range: tuple, optional
    :param gradient_width: Tuple (min, max) Measure of the space affected
        by fold prior to being warped (in units of percentage of width of page).
    :type gradient_width: tuple, optional
    :param gradient_height: Tuple (min, max) Measure of depth of fold (unit
        measured as percentage page height)
    :type gradient_height: tuple, optional
    :param backdrop_color: The backdrop color (BGR) of the folding effect.
    :type backdrop_color: tuple, optional
    :param p: The probability this Augmentation will be applied.
    :type p: float, optional
    """

    def __init__(
        self,
        fold_x=None,
        fold_deviation=(0, 0),
        fold_count=2,
        fold_noise=0.01,
        fold_angle_range=(0, 0),
        gradient_width=(0.1, 0.2),
        gradient_height=(0.01, 0.02),
        backdrop_color=(0, 0, 0),
        p=1,
    ):
        super().__init__(p=p)
        self.fold_x = fold_x
        self.fold_deviation = fold_deviation
        self.fold_count = fold_count
        self.fold_noise = fold_noise
        self.fold_angle_range = fold_angle_range
        self.gradient_width = gradient_width
        self.gradient_height = gradient_height
        self.backdrop_color = backdrop_color

    # Constructs a string representation of this Augmentation.
    def __repr__(self):
        return f"Folding(fold_x={self.fold_x}, fold_deviation={self.fold_deviation}, fold_count={self.fold_count}, fold_noise={self.fold_noise}, fold_angle_range={self.fold_angle_range}, gradient_width={self.gradient_width}, gradient_height={self.gradient_height}, backdrop_color={self.backdrop_color}, p={self.p})"


[docs]
    def apply_folding(
        self,
        img,
        keypoints,
        bounding_boxes,
        ysize,
        xsize,
        fold_x,
        fold_width_one_side,
        fold_y_shift,
        fold_noise,
        fmask,
    ):
        """Apply perspective transform twice to get single folding effect.

        :param img: The image to apply the function.
        :type img: numpy.array (numpy.uint8)
        :param keypoints: A dictionary of single or multiple labels where each label is a nested list of points coordinate.
        :type keypoints: dictionary
        :param bounding_boxes: A nested list where each nested list contains box location (x1, y1, x2, y2).
        :type bounding_boxes: list
        :param ysize: Height of the image.
        :type ysize: int
        :param xsize: Width of the image.
        :type xsize: int
        :param gradient_width:  Measure of the space affected by fold prior to being warped (in units of percentage of width of page).
        :type gradient_width: int
        :param gradient_height: Measure of depth of fold (unit measured as percentage page height).
        :type gradient_height: int
        :param fold_noise: Level of noise added to folding area.
        :type fold_noise: float
        :param fmask: Flag to identify if the input is mask instead of image.
        :type fmask: int
        """

        # test for valid folding center line
        if (xsize - fold_width_one_side - 1) < (fold_width_one_side + 1):
            print("Folding augmentation is not applied, please increase image size")
            return img

        if (fold_width_one_side != 0) and (fold_y_shift != 0):
            img_fold_l = warp_fold(
                img,
                ysize,
                fold_noise,
                fold_x,
                fold_width_one_side,
                fold_y_shift,
                side="left",
                backdrop_color=self.backdrop_color,
                fmask=fmask,
            )
            img_fold_r = warp_fold(
                img_fold_l,
                ysize,
                fold_noise,
                fold_x,
                fold_width_one_side,
                fold_y_shift,
                side="right",
                backdrop_color=self.backdrop_color,
                fmask=fmask,
            )

            if not fmask:
                # warp keypoints
                if keypoints is not None:
                    lx0, ly0, lxn, lyn = fold_x - fold_width_one_side, 0, fold_x, ysize
                    rx0, ry0, rxn, ryn = fold_x, 0, fold_x + (fold_width_one_side), ysize

                    # y shifting value for single pixel
                    y_shift_single_step = fold_y_shift / fold_width_one_side

                    # warp each label
                    for name, points in keypoints.items():
                        for i, (xpoint, ypoint) in enumerate(points):
                            # test for left box
                            if xpoint >= lx0 and xpoint < lxn and ypoint >= ly0 and ypoint < lyn:
                                # scale y shift based on their distance to center x of folding
                                ypoint += round((xpoint - lx0) * y_shift_single_step)
                            # test for right box
                            elif xpoint >= rx0 and xpoint < rxn and ypoint >= ry0 and ypoint < ryn:
                                # scale y shift based on their distance to center x of folding
                                ypoint += round((fold_width_one_side - (xpoint - fold_x)) * y_shift_single_step)
                            points[i] = [xpoint, ypoint]

                # warp bounding boxes
                if bounding_boxes is not None:
                    lx0, ly0, lxn, lyn = fold_x - fold_width_one_side, 0, fold_x, ysize
                    rx0, ry0, rxn, ryn = fold_x, 0, fold_x + (fold_width_one_side), ysize

                    # y shifting value for single pixel
                    y_shift_single_step = fold_y_shift / fold_width_one_side

                    # warp each box
                    for i, bounding_box in enumerate(bounding_boxes):
                        xspoint, yspoint, xepoint, yepoint = bounding_box
                        width = xepoint - xspoint
                        height = yepoint - yspoint
                        # test for left box
                        if xspoint >= lx0 and xspoint < lxn and yspoint >= ly0 and yspoint < lyn:
                            # scale y shift based on their distance to center x of folding
                            yspoint += round((xspoint - lx0) * y_shift_single_step)
                        # test for right box
                        elif xspoint >= rx0 and xspoint < rxn and yspoint >= ry0 and yspoint < ryn:
                            # scale y shift based on their distance to center x of folding
                            yspoint += round((fold_width_one_side - (xspoint - fold_x)) * y_shift_single_step)
                        bounding_boxes[i] = [xspoint, yspoint, xspoint + width, yspoint + height]

            return img_fold_r

        else:
            if fold_width_one_side == 0:
                print(
                    "Folding augmentation is not applied, please increase gradient width or image size",
                )
            else:
                print(
                    "Folding augmentation is not applied, please increase gradient height or image size",
                )
            return img



[docs]
    def apply_rotate_and_folding(
        self,
        image_fold,
        fold_angle,
        fold_x=None,
        fold_width_one_side=None,
        fold_y_shift=None,
        keypoints=None,
        bounding_boxes=None,
        fmask=0,
    ):
        """Apply rotation and folding effect.

        :param image_fold: The image to apply the function.
        :type image_gold: numpy.array (numpy.uint8)
        :param fold_angle: The angle of rotation.
        :type fold_angle: int
        :param fold_x: The folding center x coordinate
        :type fold_x: int
        :param fold_width_one_side: The warped width of folding effect from the fold_x.
        :type fold_width_one_side: int
        :param fold_y_shift: Depth of the folding effect.
        :type fold_y_shift: int
        :param fmask: Flag to identify if the input image is mask isntead of image.
        :type fmask: int
        """

        # input image size
        iysize, ixsize = image_fold.shape[:2]

        if fold_angle != 0:
            # rotate image before the folding
            image_fold = rotate_image_PIL(
                image_fold,
                angle=fold_angle,
                background_value=self.backdrop_color,
                expand=1,
            )

            if not fmask:
                # rotate keypoints
                if keypoints is not None:
                    # center of rotation
                    cy = int(iysize / 2)
                    cx = int(ixsize / 2)
                    # compute offset after rotation
                    rysize, rxsize = image_fold.shape[:2]
                    y_offset = (rysize / 2) - cy
                    x_offset = (rxsize / 2) - cx
                    # rotate keypoints
                    rotate_keypoints(keypoints, cx, cy, x_offset, y_offset, -fold_angle)

                # rotate bounding boxes
                if bounding_boxes is not None:
                    # center of rotation
                    cy = int(iysize / 2)
                    cx = int(ixsize / 2)
                    # compute offset after rotation
                    rysize, rxsize = image_fold.shape[:2]
                    y_offset = (rysize / 2) - cy
                    x_offset = (rxsize / 2) - cx
                    # rotate bounding boxes
                    # use - fold_angle because image are rotated anticlockwise
                    rotate_bounding_boxes(bounding_boxes, cx, cy, x_offset, y_offset, -fold_angle)

        # rotated size
        ysize, xsize = image_fold.shape[:2]

        # create folding parameters (for non mask, mask will be using input parameters)
        if not fmask:

            # folding width from left to center of folding, or from right to center of folding
            min_fold_x = min(np.ceil(self.gradient_width[0] * xsize), xsize).astype("int")
            max_fold_x = min(np.ceil(self.gradient_width[1] * xsize), xsize).astype("int")
            fold_width_one_side = int(
                random.randint(min_fold_x, max_fold_x) / 2,
            )

            # center of folding
            if self.fold_x is None:
                fold_x = random.randint(
                    fold_width_one_side + 1,
                    xsize - fold_width_one_side - 1,
                )
            else:
                deviation = random.randint(
                    self.fold_deviation[0],
                    self.fold_deviation[1],
                ) * random.choice([-1, 1])
                fold_x = min(
                    max(self.fold_x + deviation, fold_width_one_side + 1),
                    xsize - fold_width_one_side - 1,
                )

            # y distortion in folding (support positive y value for now)
            fold_y_shift_min = min(np.ceil(self.gradient_height[0] * ysize), ysize).astype("int")
            fold_y_shift_max = min(np.ceil(self.gradient_height[1] * ysize), ysize).astype("int")
            fold_y_shift = random.randint(
                fold_y_shift_min,
                fold_y_shift_max,
            )

        # no noise for mask
        if fmask:
            fold_noise = 0
        else:
            fold_noise = self.fold_noise

        image_fold = self.apply_folding(
            image_fold,
            keypoints,
            bounding_boxes,
            image_fold.shape[0],
            image_fold.shape[1],
            fold_x,
            fold_width_one_side,
            fold_y_shift,
            fold_noise,
            fmask,
        )

        if fold_angle != 0:
            # rotate back the image
            image_fold = rotate_image_PIL(
                image_fold,
                angle=-fold_angle,
                background_value=self.backdrop_color,
                expand=1,
            )

            # rotate keypoints
            if not fmask:
                if keypoints is not None:
                    # center of rotation
                    cy = int(ysize / 2)
                    cx = int(xsize / 2)
                    # compute offset after rotation
                    rysize, rxsize = image_fold.shape[:2]
                    y_offset = (rysize / 2) - cy
                    x_offset = (rxsize / 2) - cx
                    # rotate keypoints
                    rotate_keypoints(keypoints, cx, cy, x_offset, y_offset, fold_angle)

                # rotate bounding boxes
                if bounding_boxes is not None:
                    # center of rotation
                    cy = int(ysize / 2)
                    cx = int(xsize / 2)
                    # compute offset after rotation
                    rysize, rxsize = image_fold.shape[:2]
                    y_offset = (rysize / 2) - cy
                    x_offset = (rxsize / 2) - cx
                    # rotate bounding boxes
                    # use - fold_angle because image are rotated anticlockwise
                    rotate_bounding_boxes(bounding_boxes, cx, cy, x_offset, y_offset, fold_angle)

            # get the image without the padding area, we will get extra padding area after the rotation
            rysize, rxsize = image_fold.shape[:2]

            # center of x and y
            cx = int(rxsize / 2)
            cy = int(rysize / 2)
            rcx = int(ixsize / 2)
            rcy = int(iysize / 2)
            start_x = cx - rcx
            start_y = cy - rcy
            end_x = start_x + ixsize
            end_y = start_y + iysize
            image_fold = image_fold[start_y:end_y, start_x:end_x]

            # remove padding area of keypoints and bounding boxes
            if not fmask:
                if keypoints is not None:
                    y_offset = (iysize / 2) - (rysize / 2)
                    x_offset = (ixsize / 2) - (rxsize / 2)
                    for name, points in keypoints.items():
                        for i, (xpoint, ypoint) in enumerate(points):
                            points[i] = [round(xpoint + x_offset), round(ypoint + y_offset)]

                if bounding_boxes is not None:
                    y_offset = (iysize / 2) - (rysize / 2)
                    x_offset = (ixsize / 2) - (rxsize / 2)
                    for i, bounding_box in enumerate(bounding_boxes):
                        xspoint, yspoint, xepoint, yepoint = bounding_box
                        bounding_boxes[i] = [
                            round(xspoint + x_offset),
                            round(yspoint + y_offset),
                            round(xepoint + x_offset),
                            round(yepoint + y_offset),
                        ]

        return image_fold, fold_x, fold_width_one_side, fold_y_shift


    # Applies the Augmentation to input data.
    def __call__(self, image, layer=None, mask=None, keypoints=None, bounding_boxes=None, force=False):
        if force or self.should_run():

            # get mask unique labels
            if mask is not None:
                mask_labels = np.unique(mask).tolist() + [0]

            # get image dimension
            ysize, xsize = image.shape[:2]

            # apply folding multiple times
            image_fold = image.copy()
            for _ in range(self.fold_count):
                # random fold angle
                fold_angle = random.randint(self.fold_angle_range[0], self.fold_angle_range[1])

                # apply folding to image
                image_fold, fold_x, fold_width_one_side, fold_y_shift = self.apply_rotate_and_folding(
                    image_fold,
                    fold_angle,
                    keypoints=keypoints,
                    bounding_boxes=bounding_boxes,
                    fmask=0,
                )

                # apply folding to mask, using a same folding parameter of image
                if mask is not None:
                    mask, _, _, _ = self.apply_rotate_and_folding(
                        mask,
                        fold_angle,
                        fold_x,
                        fold_width_one_side,
                        fold_y_shift,
                        fmask=1,
                    )

            # update new interpolated mask values into each mask labels
            if mask is not None:
                update_mask_labels(mask, mask_labels)

            # check for additional output of mask, keypoints and bounding boxes
            outputs_extra = []
            if mask is not None or keypoints is not None or bounding_boxes is not None:
                outputs_extra = [mask, keypoints, bounding_boxes]

            # returns additional mask, keypoints and bounding boxes if there is additional input
            if outputs_extra:
                # returns in the format of [image, mask, keypoints, bounding_boxes]
                return [image_fold] + outputs_extra
            else:
                return image_fold