import random
import cv2
import numpy as np
from augraphy.base.augmentation import Augmentation
[docs]
class SectionShift(Augmentation):
"""Shift single or multiple sections of image in horizontal, vertical or both directions to create an effect of shifted image sections.
:param section_shift_number_range: Tuple of ints determing the number of section shift operation.
:type section_shift_number_range: tuple, optional
:param section_shift_locations: A nested list contains list of shifting boxes.
Each box should be in format of [x0, y0, xn, yn].
Use "random" for random location.
:type section_shift_locations: list, optional
:param section_shift_x_range: Tuple of ints determing the shifting value in horizontal direction.
The shifting value will be in percentage of the image width if the value is float and in between -1.0 - 1.0:
shifting_x (int) = image width * shifting_x (float and -1.0 - 1.0)
:type section_shift_x_range: tuple, optional
:param section_shift_y_range: Tuple of ints determing the shifting value in vertical direction.
The shifting value will be in percentage of the image height if the value is float and in between -1.0 - 1.0:
shifting_y (int) = image height * shifting_y (float and -1.0 - 1.0)
:type section_shift_y_range: tuple, optional
:param section_shift_fill_value: Tuple of values in BGR to fill in the shifted area.
Use "-1" to not fill any value and the image default value will be used instead.
Use "random" to fill random color.
:type section_shift_fill_value: tuple, optional
:param p: The probability that this Augmentation will be applied.
:type p: float, optional
"""
def __init__(
self,
section_shift_number_range=(3, 5),
section_shift_locations="random",
section_shift_x_range=(-10, 10),
section_shift_y_range=(-10, 10),
section_shift_fill_value=-1,
p=1,
):
"""Constructor method"""
super().__init__(p=p)
self.section_shift_number_range = section_shift_number_range
self.section_shift_locations = section_shift_locations
self.section_shift_x_range = section_shift_x_range
self.section_shift_y_range = section_shift_y_range
self.section_shift_fill_value = section_shift_fill_value
# Constructs a string representation of this Augmentation.
def __repr__(self):
return f"SectionShift(section_shift_number_range={self.section_shift_number_range}, section_shift_locations={self.section_shift_locations}, section_shift_x_range={self.section_shift_x_range}, section_shift_y_range={self.section_shift_y_range}, section_shift_fill_value={self.section_shift_fill_value}, p={self.p})"
[docs]
def apply_shift(self, image, mask, keypoints, bounding_boxes, shift_box, section_shift_x, section_shift_y):
"""Core function to shift section of image based on the input box and shifting values.
:param image: The input image.
:type image: numpy array
:param mask: The mask of labels for each pixel. Mask value should be in range of 1 to 255.
Value of 0 will be assigned to the filled area after the transformation.
:type mask: numpy array (uint8)
:param keypoints: A dictionary of single or multiple labels where each label is a nested list of points coordinate.
:type keypoints: dictionary
:param bounding_boxes: A nested list where each nested list contains box location (x1, y1, x2, y2).
:type bounding_boxes: list
:param shift_box: Tuple contains the box of the shifting location in format of x0, y0, xn, yn.
:type shift_box: tuple
:param section_shift_x: The shifting value in horizontal direction.
:type section_shift_x: int
:param section_shift_y: The shifting value in vertical direction.
:type section_shift_y: int
"""
ysize, xsize = image.shape[:2]
x0, y0, xn, yn = shift_box
# make sure doesn't exceed image boundary
x0 = min(xsize - section_shift_x - 1, x0)
y0 = min(ysize - section_shift_y - 1, y0)
if x0 + section_shift_x < 0:
x0 = x0 - section_shift_x
if y0 + section_shift_y < 0:
y0 = y0 - section_shift_y
if xn + section_shift_x > xsize:
xn = xsize - section_shift_x
if yn + section_shift_y > ysize:
yn = ysize - section_shift_y
# the section of shifted image
image_section = image[y0:yn, x0:xn].copy()
if mask is not None:
mask_section = mask[y0:yn, x0:xn].copy()
# fill the shifted area with value
if self.section_shift_fill_value != -1:
if self.section_shift_fill_value == "random":
fill_value = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
else:
fill_value = self.section_shift_fill_value
# add alpha value
if image.shape[2] == 4:
fill_value = (fill_value[0], fill_value[1], fill_value[2], 255)
image[y0:yn, x0:xn] = fill_value
# fill mask shifted area with 0
if mask is not None:
mask[y0:yn, x0:xn] = 0
# shift the section of image
image[y0 + section_shift_y : yn + section_shift_y, x0 + section_shift_x : xn + section_shift_x] = image_section
# shift mask
if mask is not None:
mask[
y0 + section_shift_y : yn + section_shift_y,
x0 + section_shift_x : xn + section_shift_x,
] = mask_section
# shift keypoints inside the shifting boundary
if keypoints is not None:
for name, points in keypoints.items():
for i, (xpoint, ypoint) in enumerate(points):
if xpoint >= x0 and xpoint < xn and ypoint >= y0 and ypoint < yn:
points[i] = [xpoint + section_shift_x, ypoint + section_shift_y]
# shift bounding boxes inside the shifting boundary
if bounding_boxes is not None:
for i, bounding_box in enumerate(bounding_boxes):
xspoint, yspoint, xepoint, yepoint = bounding_box
# start point is in the box
if xspoint >= x0 and xspoint < xn and yspoint >= y0 and yspoint < yn:
xspoint += section_shift_x
yspoint += section_shift_y
# end point is in the box
if xepoint >= x0 and xepoint < xn and yepoint >= y0 and yepoint < yn:
xepoint += section_shift_x
yepoint += section_shift_y
bounding_boxes[i] = [xspoint, yspoint, xepoint, yepoint]
# Applies the Augmentation to input data.
def __call__(self, image, layer=None, mask=None, keypoints=None, bounding_boxes=None, force=False):
if force or self.should_run():
image = image.copy()
# convert and make sure image is color image
if len(image.shape) > 2:
is_gray = 0
else:
is_gray = 1
image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
ysize, xsize = image.shape[:2]
# generate number of shifting operation
if self.section_shift_locations == "random":
section_shift_number = random.randint(
self.section_shift_number_range[0],
self.section_shift_number_range[1],
)
else:
section_shift_number = len(self.section_shift_locations)
for i in range(section_shift_number):
# check input to scale it with image width
if (
isinstance(self.section_shift_x_range[1], float)
and self.section_shift_x_range[1] <= 1
and self.section_shift_x_range[1] >= -1
):
section_shift_x = random.randint(
int(self.section_shift_x_range[0] * xsize),
int(self.section_shift_x_range[1] * xsize),
)
else:
section_shift_x = random.randint(self.section_shift_x_range[0], self.section_shift_x_range[1])
# check input to scale it with image height
if (
isinstance(self.section_shift_y_range[1], float)
and self.section_shift_y_range[1] <= 1
and self.section_shift_y_range[1] >= -1
):
section_shift_y = random.randint(
int(self.section_shift_y_range[0] * ysize),
int(self.section_shift_y_range[1] * ysize),
)
else:
section_shift_y = random.randint(self.section_shift_y_range[0], self.section_shift_y_range[1])
if self.section_shift_locations == "random":
# for random section, generate random section width and height
section_shift_width_size = random.randint(int(xsize / 20), int(xsize / 5))
section_shift_height_size = random.randint(int(ysize / 20), int(ysize / 5))
# generate random box
start_x = random.randint(0, xsize - section_shift_x - section_shift_width_size - 1)
start_y = random.randint(0, ysize - section_shift_y - section_shift_height_size - 1)
end_x = start_x + section_shift_width_size
end_y = start_y + section_shift_height_size
shift_box = [start_x, start_y, end_x, end_y]
else:
shift_box = list(self.section_shift_locations[i])
# check if shifting location beyind image boundary
# check x0
if shift_box[0] + section_shift_width_size < 0:
shift_box[0] = shift_box[0] - section_shift_width_size
elif shift_box[0] + section_shift_width_size > xsize:
shift_box[0] = xsize - abs(section_shift_width_size) - 1
# check y0
if shift_box[1] + section_shift_height_size < 0:
shift_box[1] = shift_box[1] - section_shift_height_size
elif shift_box[1] + section_shift_height_size > ysize:
shift_box[1] = ysize - abs(section_shift_height_size) - 1
# check xn
if shift_box[2] + section_shift_width_size < 0:
shift_box[2] = shift_box[2] - section_shift_width_size
elif shift_box[2] + section_shift_width_size > xsize:
shift_box[2] = xsize - abs(section_shift_width_size) - 1
# check yn
if shift_box[3] + section_shift_height_size < 0:
shift_box[3] = shift_box[3] - section_shift_height_size
elif shift_box[3] + section_shift_height_size > ysize:
shift_box[3] = ysize - abs(section_shift_height_size) - 1
# apply section shift
self.apply_shift(image, mask, keypoints, bounding_boxes, shift_box, section_shift_x, section_shift_y)
# return image follows the input image color channel
if is_gray:
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# check for additional output of mask, keypoints and bounding boxes
outputs_extra = []
if mask is not None or keypoints is not None or bounding_boxes is not None:
outputs_extra = [mask, keypoints, bounding_boxes]
# returns additional mask, keypoints and bounding boxes if there is additional input
if outputs_extra:
# returns in the format of [image, mask, keypoints, bounding_boxes]
return [image] + outputs_extra
else:
return image