Source code for augraphy.augmentations.sectionshift

import random

import cv2
import numpy as np

from augraphy.base.augmentation import Augmentation



[docs]
class SectionShift(Augmentation):
    """Shift single or multiple sections of image in horizontal, vertical or both directions to create an effect of shifted image sections.

    :param section_shift_number_range: Tuple of ints determing the number of section shift operation.
    :type section_shift_number_range: tuple, optional
    :param section_shift_locations: A nested list contains list of shifting boxes.
        Each box should be in format of [x0, y0, xn, yn].
        Use "random" for random location.
    :type section_shift_locations: list, optional
    :param section_shift_x_range: Tuple of ints determing the shifting value in horizontal direction.
        The shifting value will be in percentage of the image width if the value is float and in between -1.0 - 1.0:
        shifting_x (int) = image width  * shifting_x (float and -1.0 - 1.0)
    :type section_shift_x_range: tuple, optional
    :param section_shift_y_range: Tuple of ints determing the shifting value in vertical direction.
        The shifting value will be in percentage of the image height if the value is float and in between -1.0 - 1.0:
        shifting_y (int) = image height  * shifting_y (float and -1.0 - 1.0)
    :type section_shift_y_range: tuple, optional
    :param section_shift_fill_value: Tuple of values in BGR to fill in the shifted area.
        Use "-1" to not fill any value and the image default value will be used instead.
        Use "random" to fill random color.
    :type section_shift_fill_value: tuple, optional
    :param p: The probability that this Augmentation will be applied.
    :type p: float, optional
    """

    def __init__(
        self,
        section_shift_number_range=(3, 5),
        section_shift_locations="random",
        section_shift_x_range=(-10, 10),
        section_shift_y_range=(-10, 10),
        section_shift_fill_value=-1,
        p=1,
    ):
        """Constructor method"""
        super().__init__(p=p)
        self.section_shift_number_range = section_shift_number_range
        self.section_shift_locations = section_shift_locations
        self.section_shift_x_range = section_shift_x_range
        self.section_shift_y_range = section_shift_y_range
        self.section_shift_fill_value = section_shift_fill_value

    # Constructs a string representation of this Augmentation.
    def __repr__(self):
        return f"SectionShift(section_shift_number_range={self.section_shift_number_range}, section_shift_locations={self.section_shift_locations}, section_shift_x_range={self.section_shift_x_range}, section_shift_y_range={self.section_shift_y_range},  section_shift_fill_value={self.section_shift_fill_value}, p={self.p})"


[docs]
    def apply_shift(self, image, mask, keypoints, bounding_boxes, shift_box, section_shift_x, section_shift_y):
        """Core function to shift section of image based on the input box and shifting values.

        :param image: The input image.
        :type image: numpy array
        :param mask: The mask of labels for each pixel. Mask value should be in range of 1 to 255.
            Value of 0 will be assigned to the filled area after the transformation.
        :type mask: numpy array (uint8)
        :param keypoints: A dictionary of single or multiple labels where each label is a nested list of points coordinate.
        :type keypoints: dictionary
        :param bounding_boxes: A nested list where each nested list contains box location (x1, y1, x2, y2).
        :type bounding_boxes: list
        :param shift_box: Tuple contains the box of the shifting location in format of x0, y0, xn, yn.
        :type shift_box: tuple
        :param section_shift_x: The shifting value in horizontal direction.
        :type section_shift_x: int
        :param section_shift_y: The shifting value in vertical direction.
        :type section_shift_y: int
        """

        ysize, xsize = image.shape[:2]
        x0, y0, xn, yn = shift_box

        # make sure doesn't exceed image boundary
        x0 = min(xsize - section_shift_x - 1, x0)
        y0 = min(ysize - section_shift_y - 1, y0)
        if x0 + section_shift_x < 0:
            x0 = x0 - section_shift_x
        if y0 + section_shift_y < 0:
            y0 = y0 - section_shift_y
        if xn + section_shift_x > xsize:
            xn = xsize - section_shift_x
        if yn + section_shift_y > ysize:
            yn = ysize - section_shift_y

        # the section of shifted image
        image_section = image[y0:yn, x0:xn].copy()
        if mask is not None:
            mask_section = mask[y0:yn, x0:xn].copy()

        # fill the shifted area with value
        if self.section_shift_fill_value != -1:
            if self.section_shift_fill_value == "random":
                fill_value = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
            else:
                fill_value = self.section_shift_fill_value
            # add alpha value
            if image.shape[2] == 4:
                fill_value = (fill_value[0], fill_value[1], fill_value[2], 255)
            image[y0:yn, x0:xn] = fill_value
            # fill mask shifted area with 0
            if mask is not None:
                mask[y0:yn, x0:xn] = 0

        # shift the section of image
        image[y0 + section_shift_y : yn + section_shift_y, x0 + section_shift_x : xn + section_shift_x] = image_section

        # shift mask
        if mask is not None:
            mask[
                y0 + section_shift_y : yn + section_shift_y,
                x0 + section_shift_x : xn + section_shift_x,
            ] = mask_section

        # shift keypoints inside the shifting boundary
        if keypoints is not None:
            for name, points in keypoints.items():
                for i, (xpoint, ypoint) in enumerate(points):
                    if xpoint >= x0 and xpoint < xn and ypoint >= y0 and ypoint < yn:
                        points[i] = [xpoint + section_shift_x, ypoint + section_shift_y]

        # shift bounding boxes inside the shifting boundary
        if bounding_boxes is not None:
            for i, bounding_box in enumerate(bounding_boxes):
                xspoint, yspoint, xepoint, yepoint = bounding_box
                # start point is in the box
                if xspoint >= x0 and xspoint < xn and yspoint >= y0 and yspoint < yn:
                    xspoint += section_shift_x
                    yspoint += section_shift_y
                # end point is in the box
                if xepoint >= x0 and xepoint < xn and yepoint >= y0 and yepoint < yn:
                    xepoint += section_shift_x
                    yepoint += section_shift_y
                bounding_boxes[i] = [xspoint, yspoint, xepoint, yepoint]


    # Applies the Augmentation to input data.
    def __call__(self, image, layer=None, mask=None, keypoints=None, bounding_boxes=None, force=False):
        if force or self.should_run():
            image = image.copy()

            # convert and make sure image is color image
            if len(image.shape) > 2:
                is_gray = 0
            else:
                is_gray = 1
                image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)

            ysize, xsize = image.shape[:2]

            # generate number of shifting operation
            if self.section_shift_locations == "random":
                section_shift_number = random.randint(
                    self.section_shift_number_range[0],
                    self.section_shift_number_range[1],
                )
            else:
                section_shift_number = len(self.section_shift_locations)

            for i in range(section_shift_number):

                # check input to scale it with image width
                if (
                    isinstance(self.section_shift_x_range[1], float)
                    and self.section_shift_x_range[1] <= 1
                    and self.section_shift_x_range[1] >= -1
                ):
                    section_shift_x = random.randint(
                        int(self.section_shift_x_range[0] * xsize),
                        int(self.section_shift_x_range[1] * xsize),
                    )
                else:
                    section_shift_x = random.randint(self.section_shift_x_range[0], self.section_shift_x_range[1])

                # check input to scale it with image height
                if (
                    isinstance(self.section_shift_y_range[1], float)
                    and self.section_shift_y_range[1] <= 1
                    and self.section_shift_y_range[1] >= -1
                ):
                    section_shift_y = random.randint(
                        int(self.section_shift_y_range[0] * ysize),
                        int(self.section_shift_y_range[1] * ysize),
                    )
                else:
                    section_shift_y = random.randint(self.section_shift_y_range[0], self.section_shift_y_range[1])

                if self.section_shift_locations == "random":
                    # for random section, generate random section width and height
                    section_shift_width_size = random.randint(int(xsize / 20), int(xsize / 5))
                    section_shift_height_size = random.randint(int(ysize / 20), int(ysize / 5))

                    # generate random box
                    start_x = random.randint(0, xsize - section_shift_x - section_shift_width_size - 1)
                    start_y = random.randint(0, ysize - section_shift_y - section_shift_height_size - 1)
                    end_x = start_x + section_shift_width_size
                    end_y = start_y + section_shift_height_size
                    shift_box = [start_x, start_y, end_x, end_y]
                else:
                    shift_box = list(self.section_shift_locations[i])

                # check if shifting location beyind image boundary
                # check x0
                if shift_box[0] + section_shift_width_size < 0:
                    shift_box[0] = shift_box[0] - section_shift_width_size
                elif shift_box[0] + section_shift_width_size > xsize:
                    shift_box[0] = xsize - abs(section_shift_width_size) - 1
                # check y0
                if shift_box[1] + section_shift_height_size < 0:
                    shift_box[1] = shift_box[1] - section_shift_height_size
                elif shift_box[1] + section_shift_height_size > ysize:
                    shift_box[1] = ysize - abs(section_shift_height_size) - 1
                # check xn
                if shift_box[2] + section_shift_width_size < 0:
                    shift_box[2] = shift_box[2] - section_shift_width_size
                elif shift_box[2] + section_shift_width_size > xsize:
                    shift_box[2] = xsize - abs(section_shift_width_size) - 1
                # check yn
                if shift_box[3] + section_shift_height_size < 0:
                    shift_box[3] = shift_box[3] - section_shift_height_size
                elif shift_box[3] + section_shift_height_size > ysize:
                    shift_box[3] = ysize - abs(section_shift_height_size) - 1

                # apply section shift
                self.apply_shift(image, mask, keypoints, bounding_boxes, shift_box, section_shift_x, section_shift_y)

            # return image follows the input image color channel
            if is_gray:
                image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

            # check for additional output of mask, keypoints and bounding boxes
            outputs_extra = []
            if mask is not None or keypoints is not None or bounding_boxes is not None:
                outputs_extra = [mask, keypoints, bounding_boxes]

            # returns additional mask, keypoints and bounding boxes if there is additional input
            if outputs_extra:
                # returns in the format of [image, mask, keypoints, bounding_boxes]
                return [image] + outputs_extra
            else:
                return image