Source code for augraphy.augmentations.bookbinding

import random

import cv2
import numpy as np
from numba import config
from numba import jit

from augraphy.augmentations.lib import four_point_transform
from augraphy.augmentations.lib import load_image_from_cache
from augraphy.augmentations.lib import update_mask_labels
from augraphy.augmentations.pageborder import PageBorder
from augraphy.base.augmentation import Augmentation
from augraphy.utilities.overlaybuilder import OverlayBuilder


[docs] class BookBinding(Augmentation): """Creates a book binding effect by applying shadow and combining two bended images. The input image will be served as the right image. If cache image is set, the left image will be selected from cache images, else it will be using the input image. :param shadow_radius_range: The range of radius in pixels for the darken shadow effect. :type shadow_radius_range: tuple, optional :param curve_range_right: Tuple of ints determing the amount of curving down pixels in the right image. :type curve_range_right: tuple, optional :param curve_range_left: Tuple of ints determing the amount of curving down pixels in the left image. :type curve_range_left: tuple, optional :param curve_ratio_right: Tuple of floats determing the percentage of right image (in width) should be squeezed horizontally. :type curve_ratio_right: tuple, optional :param curve_ratio_left: Tuple of floats determing the percentage of left image (in width) should be squeezed horizontally. :type curve_ratio_left: tuple, optional :param mirror_range: Tuple of floats to determine percentage of image to be mirrored. :type mirror_range: Tuple, optional :param binding_align: Flag to determine whether the pages in the binding effect should be properly aligned or not. :type binding_align int, optional :param binding_pages: Tuple of ints determing the number of pages in the book binding effect. :type binding_pages tuple, optional :param curling_direction: The direction of page curling, -1: random, 0: up, 1: down. :type curling_direction: int, optional :param backdrop_color: The backdrop color (BGR) of the book binding effect. :type backdrop_color: tuple, optional :param enable_shadow: Flag to enable shadow on top of the book binding effect. :type enable_shadow: int, optional :param use_cache_images: Flag to enable the usage of cache images in creating book binding effect. :type use_cache_images: int, optional :param numba_jit: The flag to enable numba jit to speed up the processing in the augmentation. :type numba_jit: int, optional :param p: The probability that this Augmentation will be applied. :type p: float, optional """ def __init__( self, shadow_radius_range=(30, 100), curve_range_right=(50, 100), curve_range_left=(200, 300), curve_ratio_right=(0.05, 0.1), curve_ratio_left=(0.5, 0.6), mirror_range=(1.0, 1.0), binding_align="random", binding_pages=(5, 10), curling_direction=-1, backdrop_color=(0, 0, 0), enable_shadow=1, use_cache_images=1, numba_jit=1, p=1, ): super().__init__(p=p, numba_jit=numba_jit) self.shadow_radius_range = shadow_radius_range self.curve_range_right = curve_range_right self.curve_range_left = curve_range_left self.curve_ratio_right = curve_ratio_right self.curve_ratio_left = curve_ratio_left self.mirror_range = mirror_range self.binding_align = binding_align self.binding_pages = binding_pages self.curling_direction = curling_direction self.backdrop_color = backdrop_color self.enable_shadow = enable_shadow self.use_cache_images = use_cache_images self.numba_jit = numba_jit config.DISABLE_JIT = bool(1 - numba_jit) def __repr__(self): return f"BookBinding(shadow_radius_range={self.shadow_radius_range}, curve_range_right={self.curve_range_right}, curve_range_left={self.curve_range_left}, curve_ratio_right={self.curve_ratio_right}, curve_ratio_left={self.curve_ratio_left}, mirror_range={self.mirror_range}, binding_align={self.binding_align}, binding_pages={self.binding_pages}, curling_direction={self.curling_direction}, backdrop_color={self.backdrop_color}, enable_shadow={self.enable_shadow}, use_cache_images={self.use_cache_images}, numba_jit={self.numba_jit}, p={self.p})"
[docs] def add_book_shadow(self, img, radius, angle=30): """Add shadow effect in the input image. :param img: The image to apply the function. :type img: numpy.array (numpy.uint8) :param radius: Radius of the shadow effect. :type radius: int :param angle: Angle value to generate shadow effect. :type angle: int """ rows = img.shape[0] cols = img.shape[1] # compute mask of shadow img_dist = np.repeat(np.arange(cols), rows) img_dist = np.transpose(img_dist.reshape(cols, rows)) img_d = img_dist + (radius * (1 - np.cos(np.deg2rad(angle)))) img_mask = (img_dist / img_d) ** 2 min_value = np.min(img_mask) max_value = np.max(img_mask) # rescale mask to 0.2 - 1 (prevent total black area) min_intensity = 0.2 max_intensity = 1.0 img_mask = ((img_mask - min_value) / (max_value - min_value)) * (max_intensity - min_intensity) + min_intensity # overlay mask of shadow to input image ob = OverlayBuilder( "darken", (img_mask * 255).astype("uint8"), img, 1, (1, 1), "center", 0, 1, ) img_output = ob.build_overlay() return img_output.astype("uint8")
[docs] def curve_page(self, img, curve_value, backdrop_color, mask=None, keypoints=None, bounding_boxes=None): """Generate curvy effect in the input image. :param img: The image to apply the function. :type img: numpy.array (numpy.uint8) :param curve_value: Pixel number of the page text should be curved. :type curve_value: int :param backdrop_color: The color of the filled background. :type backdrop_color: tuple :param mask: The mask of labels for each pixel. Mask value should be in range of 1 to 255. Value of 0 will be assigned to the filled area after the transformation. :type mask: numpy array (uint8), optional :param keypoints: A dictionary of single or multiple labels where each label is a nested list of points coordinate. :type keypoints: dictionary, optional :param bounding_boxes: A nested list where each nested list contains box location (x1, y1, x2, y2). :type bounding_boxes: list, optional """ rows = img.shape[0] cols = img.shape[1] # curve value not more than image height curve_value = min(rows, curve_value) if len(img.shape) > 2: channels = img.shape[2] img_output = np.full( (rows + curve_value, cols, channels), fill_value=backdrop_color, dtype=img.dtype, ) else: img_output = np.full((rows + curve_value, cols), fill_value=np.mean(self.backdrop_color), dtype=img.dtype) curve_factor = (cols * 2) + (cols * 2 * curve_value / rows) self.curve_page_processing(img, img_output, curve_value, rows, cols, curve_factor) # apply curve to mask if mask is not None: mask_output = np.full((rows + curve_value, cols), fill_value=0, dtype=mask.dtype) self.curve_page_processing(mask, mask_output, curve_value, rows, cols, curve_factor) mask = mask_output # apply curve processing to keypoints if keypoints is not None: for name, points in keypoints.items(): for i, (xpoint, ypoint) in enumerate(points): offset_y = int(curve_value * np.sin(2 * 3.14 * xpoint / curve_factor)) points[i] = [xpoint, ypoint + offset_y] # apply curve processing to bounding boxes if bounding_boxes is not None: for i, bounding_box in enumerate(bounding_boxes): xspoint, yspoint, xepoint, yepoint = bounding_box height = yepoint - yspoint offset_y = int(curve_value * np.sin(2 * 3.14 * xspoint / curve_factor)) bounding_boxes[i] = [xspoint, yspoint + offset_y, xepoint, yspoint + height + offset_y] if mask is not None or keypoints is not None or bounding_boxes is not None: return [img_output, mask, keypoints, bounding_boxes] else: return img_output
[docs] @staticmethod @jit(nopython=True, cache=True) def curve_page_processing(img, img_output, curve_value, rows, cols, curve_factor): """Function to apply sin function to create wavy image. :param img: The image to apply the function. :type img: numpy.array (numpy.uint8) :param img_output: The output image from the function. :type img_output: numpy.array (numpy.uint8) :param curve_value: Pixel number of the page text should be curved. :type curve_value: int :param rows: Number of rows in input image. :type rows: int :param cols: Number of columns in input image. :type cols: int :param curve_factor: The curve factor determines the opening size of book binding effect :type curve_factor: float """ for y in range(rows): for x in range(cols): offset_y = int(curve_value * np.sin(2 * 3.14 * x / curve_factor)) offseted_y = y + offset_y img_output[offseted_y, x] = img[y, x] return img_output
[docs] @staticmethod @jit(nopython=True, cache=True) def check_backdrop_color(image_output, image_mask, backdrop_color): """Function to compute mask of background :param image_output: The image with BookBinding effect. :type image_output: numpy.array (numpy.uint8) :param image_mask: The mask of background. :type image_mask: numpy.array (numpy.uint8) :param backdrop_color: The backdrop color in BGR format. :type backdrop_color: tuple """ for x in range(image_mask.shape[1]): # check top down for y in range(image_mask.shape[0]): image_mask[y, x] = backdrop_color color_eval = ( image_output[y, x][0] == backdrop_color[0] and image_output[y, x][1] == backdrop_color[1] and image_output[y, x][2] == backdrop_color[2] ) if not color_eval: break # # check bottom up : reverse loop for y in range(image_mask.shape[0] - 1, -1, -1): image_mask[y, x] = backdrop_color color_eval = ( image_output[y, x][0] == backdrop_color[0] and image_output[y, x][1] == backdrop_color[1] and image_output[y, x][2] == backdrop_color[2] ) if not color_eval: break for y in range(image_mask.shape[0]): # check left right for x in range(image_mask.shape[1]): image_mask[y, x] = backdrop_color color_eval = ( image_output[y, x][0] == backdrop_color[0] and image_output[y, x][1] == backdrop_color[1] and image_output[y, x][2] == backdrop_color[2] ) if not color_eval: break # # check right left : reverse loop for x in range(image_mask.shape[1] - 1, -1, -1): image_mask[y, x] = backdrop_color color_eval = ( image_output[y, x][0] == backdrop_color[0] and image_output[y, x][1] == backdrop_color[1] and image_output[y, x][2] == backdrop_color[2] ) if not color_eval: break
[docs] def curve_transform( self, image, curve_ratio_range, image_direction, mask=None, keypoints=None, bounding_boxes=None, ): """Bend image further in horizontal direction using perspective transform. :param image: The image to apply the function. :type image: numpy.array (numpy.uint8) :param curve_ratio: The ratio of transformation in horizontal direction.. :type curve_ratio: float :param image_direction: Flag to identify left or right side of image. :type image_direction: string :param mask: The mask of labels for each pixel. Mask value should be in range of 1 to 255. Value of 0 will be assigned to the filled area after the transformation. :type mask: numpy array (uint8), optional :param keypoints: A dictionary of single or multiple labels where each label is a nested list of points coordinate. :type keypoints: dictionary, optional :param bounding_boxes: A nested list where each nested list contains box location (x1, y1, x2, y2). :type bounding_boxes: list, optional """ curve_ratio = random.uniform(curve_ratio_range[0], curve_ratio_range[1]) ysize, xsize = image.shape[:2] # start and end points for left and right side of images if image_direction == "left": new_curve_ratio = curve_ratio source_points = np.float32([[0, 0], [xsize, 0], [xsize, ysize], [0, ysize]]) target_points = np.float32( [[int(xsize * new_curve_ratio), 0], [xsize, 0], [xsize, ysize], [int(xsize * new_curve_ratio), ysize]], ) else: new_curve_ratio = 1 - curve_ratio source_points = np.float32([[0, 0], [xsize, 0], [xsize, ysize], [0, ysize]]) target_points = np.float32( [[0, 0], [int(xsize * new_curve_ratio), 0], [int(xsize * new_curve_ratio), ysize], [0, ysize]], ) # perspective transform to further bend image in x direction only image_transformed = four_point_transform(image, source_points, target_points, xsize, ysize) # transform mask if mask is not None: mask_labels = np.unique(mask).tolist() + [0] mask = four_point_transform(mask, source_points, target_points, xsize, ysize) update_mask_labels(mask, mask_labels) # crop the blank area after the transform if image_direction == "left": image_transformed = image_transformed[:, int(xsize * curve_ratio) :] if mask is not None: mask = mask[:, int(xsize * curve_ratio) :] else: image_transformed = image_transformed[:, : int(xsize * (1 - curve_ratio))] if mask is not None: mask = mask[:, : int(xsize * (1 - curve_ratio))] # transform keypoints if keypoints is not None: for name, points in keypoints.items(): for i, (xpoint, ypoint) in enumerate(points): points[i] = [int(xpoint * new_curve_ratio), ypoint] # transform bounding boxes if bounding_boxes is not None: for i, bounding_box in enumerate(bounding_boxes): xspoint, yspoint, xepoint, yepoint = bounding_box bounding_boxes[i] = [ int(xspoint * new_curve_ratio), yspoint, int(xepoint * new_curve_ratio), yepoint, ] if mask is not None or keypoints is not None or bounding_boxes is not None: return [image_transformed.astype("uint8"), mask, keypoints, bounding_boxes] else: return image_transformed.astype("uint8")
[docs] def curve_processing(self, image, image_left, mask=None, keypoints=None, bounding_boxes=None): """Core function for curvy effect processing. :param image: The right image of the book binding effect. :type image: numpy.array (numpy.uint8) :param image_left: The left image of the book binding effect. :type image_left: numpy.array (numpy.uint8) :param mask: The mask of labels for each pixel. Mask value should be in range of 1 to 255. Value of 0 will be assigned to the filled area after the transformation. :type mask: numpy array (uint8), optional :param keypoints: A dictionary of single or multiple labels where each label is a nested list of points coordinate. :type keypoints: dictionary, optional :param bounding_boxes: A nested list where each nested list contains box location (x1, y1, x2, y2). :type bounding_boxes: list, optional """ # min value is 1, to differentiate between background backdrop_color = ( max(1, self.backdrop_color[0]), max(1, self.backdrop_color[1]), max(1, self.backdrop_color[2]), ) if self.curling_direction == -1 or self.curling_direction == "random": curve_down = random.choice([0, 1]) else: curve_down = self.curling_direction # generate parameters trim_sides = (1, 0, 0, 1) radius = random.randint(self.shadow_radius_range[0], self.shadow_radius_range[1]) curve_value_right = random.randint(self.curve_range_right[0], self.curve_range_right[1]) curve_value_left = random.randint(self.curve_range_left[0], self.curve_range_left[1]) if self.binding_align == "random": binding_align = random.choice([0, 1]) else: binding_align = self.binding_align if binding_align: angle = random.randint(2, 5) page_rotation = (angle, angle) else: page_rotation = (1, 5) page_number = random.randint(self.binding_pages[0], self.binding_pages[0]) curve_frequency = (0, 1) curve_height = (1, 3) curve_length_one_side = (10, 30) if self.use_cache_images == "random": use_cache_images = random.choice([0, 1]) else: use_cache_images = self.use_cache_images # page borders value added_border_height = int(max(image.shape[:2]) / 20) width_height = (int(added_border_height / 2), -added_border_height) # right side of image # create borders page_border = PageBorder( page_border_width_height=width_height, page_border_color=(0, 0, 0), page_border_background_color=backdrop_color, page_border_use_cache_images=use_cache_images, page_border_trim_sides=trim_sides, page_numbers=random.randint(page_number, page_number), page_rotate_angle_in_order=binding_align, page_rotation_angle_range=page_rotation, curve_frequency=curve_frequency, curve_height=curve_height, curve_length_one_side=curve_length_one_side, same_page_border=0, numba_jit=1, p=1, ) if not curve_down: iysize, ixsize = image.shape[:2] # flip image image = np.flipud(image) # flip mask if mask is not None: mask = np.flipud(mask) # flip keypoints if keypoints is not None: for name, points in keypoints.items(): for i, (xpoint, ypoint) in enumerate(points): points[i] = [xpoint, iysize - 1 - ypoint] # flip bounding boxes if bounding_boxes is not None: for i, bounding_box in enumerate(bounding_boxes): xspoint, yspoint, xepoint, yepoint = bounding_box bounding_boxes[i] = [ xspoint, iysize - 1 - yspoint, xepoint, iysize - 1 - yepoint, ] image_shadow = self.add_book_shadow(image, radius) image_added_border_right = page_border( image=image_shadow, mask=mask, keypoints=keypoints, bounding_boxes=bounding_boxes, ) if mask is not None or keypoints is not None or bounding_boxes is not None: image_added_border_right, mask, keypoints, bounding_boxes = image_added_border_right # left side of image # create borders page_border = PageBorder( page_border_width_height=width_height, page_border_color=(0, 0, 0), page_border_background_color=backdrop_color, page_border_use_cache_images=use_cache_images, page_border_trim_sides=trim_sides, page_numbers=random.randint(page_number, page_number), page_rotate_angle_in_order=binding_align, page_rotation_angle_range=page_rotation, curve_frequency=curve_frequency, curve_height=curve_height, curve_length_one_side=curve_length_one_side, same_page_border=0, numba_jit=1, p=1, ) if not curve_down: image_left = np.flipud(image_left) image_shadow_left = self.add_book_shadow(np.fliplr(image_left), radius) image_added_border_left = np.fliplr(page_border(image_shadow_left)) # pad image so that both images are aligned pad_value = abs(image_added_border_left.shape[0] - image_added_border_right.shape[0]) if curve_down: pad_value_top = pad_value pad_value_bottom = 0 else: pad_value_top = 0 pad_value_bottom = pad_value if image_added_border_left.shape[0] > image_added_border_right.shape[0]: # pad image image_added_border_right = np.pad( image_added_border_right, pad_width=((pad_value_top, pad_value_bottom), (0, 0), (0, 0)), mode="constant", constant_values=0, ) if pad_value_top: image_added_border_right[:pad_value_top, :] = backdrop_color else: image_added_border_right[-pad_value_bottom:, :] = backdrop_color # pad mask if mask is not None: mask = np.pad( mask, pad_width=((pad_value_top, pad_value_bottom), (0, 0)), mode="constant", constant_values=0, ) # pad keypoints if keypoints is not None and pad_value_top != 0: for name, points in keypoints.items(): for i, (xpoint, ypoint) in enumerate(points): points[i] = [xpoint, ypoint + pad_value_top] # pad bounding boxes if bounding_boxes is not None and pad_value_top != 0: for i, bounding_box in enumerate(bounding_boxes): xspoint, yspoint, xepoint, yepoint = bounding_box bounding_boxes[i] = [ xspoint, yspoint + pad_value_top, xepoint, yepoint + pad_value_top, ] elif image_added_border_right.shape[0] > image_added_border_left.shape[0]: image_added_border_left = np.pad( image_added_border_left, pad_width=((pad_value_top, pad_value_bottom), (0, 0), (0, 0)), mode="constant", constant_values=0, ) if pad_value_top: image_added_border_left[:pad_value_top, :] = backdrop_color else: image_added_border_left[-pad_value_bottom:, :] = backdrop_color # apply curvy effect # right image_right = self.curve_page( image_added_border_right, curve_value_right, backdrop_color, mask, keypoints, bounding_boxes, ) if mask is not None or keypoints is not None or bounding_boxes is not None: image_right, mask, keypoints, bounding_boxes = image_right if not curve_down: iysize, ixsize = image_right.shape[:2] # flip image image_right = np.flipud(image_right) # flip mask if mask is not None: mask = np.flipud(mask) # flip keypoints if keypoints is not None: for name, points in keypoints.items(): for i, (xpoint, ypoint) in enumerate(points): points[i] = [xpoint, iysize - 1 - ypoint] # flip bounding boxes if bounding_boxes is not None: for i, bounding_box in enumerate(bounding_boxes): xspoint, yspoint, xepoint, yepoint = bounding_box bounding_boxes[i] = [ xspoint, iysize - 1 - yspoint, xepoint, iysize - 1 - yepoint, ] # left image_left = np.fliplr(self.curve_page(np.fliplr(image_added_border_left), curve_value_left, backdrop_color)) if not curve_down: image_left = np.flipud(image_left) # further perspective transform image_right = self.curve_transform( image=image_right, curve_ratio_range=self.curve_ratio_right, image_direction="right", mask=mask, keypoints=keypoints, bounding_boxes=bounding_boxes, ) if mask is not None or keypoints is not None or bounding_boxes is not None: image_right, mask, keypoints, bounding_boxes = image_right image_left = self.curve_transform( image=image_left, curve_ratio_range=self.curve_ratio_left, image_direction="left", ) # pad image so that both images are aligned on the top portion if not curve_down: pad_value = abs(image_left.shape[0] - image_right.shape[0]) pad_value_top = pad_value if image_left.shape[0] > image_right.shape[0]: # pad image image_right = np.pad( image_right, pad_width=((pad_value_top, 0), (0, 0), (0, 0)), mode="constant", constant_values=0, ) image_right[:pad_value_top, :] = backdrop_color # pad mask if mask is not None: mask = np.pad( mask, pad_width=((pad_value_top, 0), (0, 0)), mode="constant", constant_values=0, ) # pad keypoints if keypoints is not None and pad_value_top != 0: for name, points in keypoints.items(): for i, (xpoint, ypoint) in enumerate(points): points[i] = [xpoint, ypoint + pad_value_top] # pad bounding boxes if bounding_boxes is not None and pad_value_top != 0: for i, bounding_box in enumerate(bounding_boxes): xspoint, yspoint, xepoint, yepoint = bounding_box bounding_boxes[i] = [ xspoint, yspoint + pad_value_top, xepoint, yepoint + pad_value_top, ] elif image_right.shape[0] > image_left.shape[0]: image_left = np.pad( image_left, pad_width=((pad_value_top, 0), (0, 0), (0, 0)), mode="constant", constant_values=0, ) image_left[:pad_value_top, :] = backdrop_color # further bend left image by using perspective transform ysize, xsize = image_left.shape[:2] # generate range of mirror and crop image based on mirror size mirror_range = np.random.uniform(self.mirror_range[0], self.mirror_range[1]) image_left = image_left[:, image_left.shape[1] - int(image_left.shape[1] * mirror_range) :] # get new y and x size of left image ysize, xsize = image_left.shape[:2] cysize, cxsize = image_right.shape[:2] # create new image with original size + mirror size image_output = np.full( (max(ysize, cysize), xsize + cxsize, image.shape[2]), fill_value=backdrop_color, dtype="uint8", ) # merged left image and right image image_output[:ysize, :xsize] = image_left image_output[:cysize, xsize:] = image_right # add image left to mask, keypoints and bounding boxes by ading offset or padding # pad mask with image left xsize if mask is not None: pad_y = 0 if cysize < ysize: pad_y = ysize - cysize mask = np.pad( mask, pad_width=((0, pad_y), (xsize, 0)), mode="constant", constant_values=0, ) # add image left xsize as offset to keypoints if keypoints is not None and pad_value_top != 0: for name, points in keypoints.items(): for i, (xpoint, ypoint) in enumerate(points): points[i] = [xpoint + xsize, ypoint] # add image left xsize as offset to bounding boxes if bounding_boxes is not None and pad_value_top != 0: for i, bounding_box in enumerate(bounding_boxes): xspoint, yspoint, xepoint, yepoint = bounding_box bounding_boxes[i] = [ xspoint + xsize, yspoint, xepoint + xsize, yepoint, ] # add shadow effect if self.enable_shadow: # get mask for shadow effect image_mask = np.zeros_like(image_output, dtype="uint8") self.check_backdrop_color(image_output, image_mask, backdrop_color) mysize, mxsize = image_mask.shape[:2] # extended value ext = 100 emysize, emxsize = ( mysize + (ext * 2), mxsize + (ext * 2), ) image_mask_extend = np.full((emysize, emxsize, 3), fill_value=backdrop_color, dtype="uint8") image_mask_extend[ext:-ext, ext:-ext] = image_mask image_shadow = cv2.resize( image_mask_extend, (max(50, int(emxsize / 10)), max(50, int(emysize / 10))), interpolation=cv2.INTER_AREA, ) image_shadow = cv2.GaussianBlur(image_shadow, (255, 255), cv2.BORDER_DEFAULT) image_shadow = cv2.resize(image_shadow, (emxsize, emysize), interpolation=cv2.INTER_LINEAR) image_shadow = image_shadow[ext:-ext, ext:-ext] image_output[image_mask > 0] = image_shadow[image_mask > 0] # replace with input backdrop_color for i in range(3): if backdrop_color[i] != self.backdrop_color[i]: indices = np.logical_and(image_output[:, :, i] == backdrop_color[i], image_mask[:, :, i] > 0) image_output[:, :, i][indices] = self.backdrop_color[i] return image_output, mask, keypoints, bounding_boxes
def __call__(self, image, layer=None, mask=None, keypoints=None, bounding_boxes=None, force=False): if force or self.should_run(): image = image.copy() # convert and make sure image is color image has_alpha = 0 if len(image.shape) > 2: is_gray = 0 if image.shape[2] == 4: has_alpha = 1 image, image_alpha = image[:, :, :3], image[:, :, 3] else: is_gray = 1 image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) # load left side of image from cache if self.use_cache_images: image_left = load_image_from_cache(random_image=1) else: image_left = None if image_left is not None: # resize foreground image_left = cv2.resize( image_left, (image.shape[1], image.shape[0]), interpolation=cv2.INTER_AREA, ) # consistent color channel if len(image_left) < 3: image_left = cv2.cvtColor(image_left, cv2.COLOR_GRAY2BGR) else: image_left = image.copy() image_output, mask, keypoints, bounding_boxes = self.curve_processing( image=image, image_left=image_left, mask=mask, keypoints=keypoints, bounding_boxes=bounding_boxes, ) # return image follows the input image color channel if is_gray: image_output = cv2.cvtColor(image_output, cv2.COLOR_BGR2GRAY) if has_alpha: ysize, xsize = image_output.shape[:2] if ysize != image_alpha.shape[0] or xsize != image_alpha.shape[1]: image_alpha = np.full((ysize, xsize), fill_value=255, dtype="uint8") image_output = np.dstack((image_output, image_alpha)) # check for additional output of mask, keypoints and bounding boxes outputs_extra = [] if mask is not None or keypoints is not None or bounding_boxes is not None: outputs_extra = [mask, keypoints, bounding_boxes] # returns additional mask, keypoints and bounding boxes if there is additional input if outputs_extra: # returns in the format of [image, mask, keypoints, bounding_boxes] return [image_output] + outputs_extra else: return image_output