Source code for augraphy.augmentations.markup

import math
import os
import random
from pathlib import Path

import cv2
import numpy as np

from augraphy.augmentations.brightness import Brightness
from augraphy.augmentations.lib import add_noise as lib_add_noise
from augraphy.augmentations.lib import generate_average_intensity
from augraphy.augmentations.lib import smooth
from augraphy.augmentations.lib import sobel
from augraphy.base.augmentation import Augmentation
from augraphy.utilities import *
from augraphy.utilities.inkgenerator import InkGenerator


[docs] class Markup(Augmentation): """Uses contours detection to detect text lines and add a smooth text strikethrough, highlight or underline effect. :param num_lines_range: Pair of ints determining the number of added markup effect. :type num_lines_range: int tuple, optional :param markup_length_range: Pair of floats between 0 to 1 , to determine the length of added markup effect. :type markup_length_range: float tuple, optional :param markup_thickness_range: Pair of ints, to determine the thickness of added markup effect. :type markup_thickness_range: int tuple, optional :param markup_type: Choice of markup "strikethrough", "highlight", "underline" or "crossed". :type markup_type: string :param markup_ink: Types of markup ink, choose from "random", "pencil", "pen", "marker" or "highlighter". :type markup_ink: string, optional :param markup_color: BGR color tuple. :type markup_color: tuple or string :param repetitions: Determine how many time a single markup effect should be drawn. :type repetitions: int :param large_word_mode: Set true to draw markup on large words, else large word will be ignored. :type large_word_mode: boolean :param single_word_mode: Set true to draw markup on a single word only. :type single_word_mode: boolean :param p: The probability that this Augmentation will be applied. :type p: float, optional """ def __init__( self, num_lines_range=(2, 7), markup_length_range=(0.5, 1), markup_thickness_range=(1, 3), markup_type="random", markup_ink="random", markup_color="random", large_word_mode="random", single_word_mode=False, repetitions=1, p=1, ): super().__init__(p=p) self.num_lines_range = num_lines_range self.markup_length_range = markup_length_range self.markup_thickness_range = markup_thickness_range self.markup_type = markup_type self.markup_ink = markup_ink self.markup_color = markup_color self.repetitions = repetitions self.large_word_mode = large_word_mode self.single_word_mode = single_word_mode def __repr__(self): return ( f"Markup(num_lines_range={self.num_lines_range}, markup_length_range={self.markup_length_range}, " f"markup_thickness_range={self.markup_thickness_range}, markup_type{self. markup_type}, " f"markup_ink={self.markup_ink}, markup_color={self.markup_color}, repetitions={self.repetitions}, " f"large_word_mode={self.large_word_mode}, single_word_mode={self.single_word_mode}, p={self.p})" )
[docs] def distribute_line(self, starting_point, ending_point, offset): """Create smoothed line from the provided starting and ending point. :param starting_point: Starting point (x, y) of the line. :type starting_point: tuple :param ending_point: Ending point (x, y) of the line. :type ending_point: tuple :param offset: Offset value to randomize point position. :type offset: int """ points_count = random.randint(3, 6) # dividing the line into points points_x = np.linspace(starting_point[0], ending_point[0], points_count) points_y = [starting_point[1] + random.uniform(-offset, offset) for _ in points_x] points = smooth( np.column_stack((points_x, points_y)).astype("float"), 6, ) # adding a smoothing effect in points using chaikin's algorithm return points
def _preprocess(self, image): """Preprocess image with binarization, dilation and erosion.""" blurred = cv2.blur(image, (5, 5)) blurred = blurred.astype("uint8") if len(blurred.shape) > 2 and blurred.shape[2] == 3: blurred = cv2.cvtColor(blurred, cv2.COLOR_BGR2GRAY) elif len(blurred.shape) > 2 and blurred.shape[2] == 4: blurred = cv2.cvtColor(blurred, cv2.COLOR_BGRA2GRAY) _, binarized = cv2.threshold( blurred, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV, ) # get kernel for dilation if self.single_word_mode is False: kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 1)) else: kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 1)) self.markup_length_range = (1, 1) # dilating the threshold image to combine horizontal lines dilation = cv2.dilate( binarized, kernel, iterations=2, ) dilation = cv2.erode( dilation, None, iterations=1, ) return dilation
[docs] def draw_line(self, p1, p2, markup_mask, markup_thickness, markup_color, reverse): """Draw line across two provided points. :param p1: Starting point (x, y) of the line. :type p1: tuple :param p2: Ending point (x, y) of the line. :type p2: tuple :param markup_mask: Mask of markup effect. :type markup_mask: numpy.array (numpy.uint8) :param markup_thickness: Thickness of the line. :type markup_thickness: int :param markup_color: Color of the line in BGR format. :type markup_color: tuple :param reverse: Reverse the order of line points distribution. :type reverse: int """ # get min and max of points min_x = min(p2[0], p1[0]) max_x = max(p2[0], p1[0]) min_y = min(p2[1], p1[1]) max_y = max(p2[1], p1[1]) # set point x in ascending or descending order based on direction if reverse: points_x = [min_x, random.randint(min_x, max_x), max_x] else: points_x = [max_x, random.randint(min_x, max_x), min_x] points_y = [min_y, random.randint(min_y, max_y), max_y] # smooth points points = smooth(np.column_stack((points_x, points_y)).astype("float"), 6) # draw curvy lines for (point1_x, point1_y), (point2_x, point2_y) in zip(points[:-1], points[1:]): if reverse: point1 = (int(point1_x), int(point1_y)) point2 = (int(point2_x), int(point2_y)) else: point1 = (int(point1_x), int(point1_y)) point2 = (int(point2_x), int(point2_y)) markup_mask = cv2.line( markup_mask, point1, point2, markup_color, markup_thickness, lineType=cv2.LINE_AA, )
def __call__(self, image, layer=None, mask=None, keypoints=None, bounding_boxes=None, force=False): # change to 3 channels BGR format has_alpha = 0 if len(image.shape) < 3: image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) elif image.shape[2] == 4: has_alpha = 1 image, image_alpha = image[:, :, :3], image[:, :, 3] markup_image = image.copy() if self.markup_color == "random": markup_color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) elif self.markup_color == "contrast": single_color = cv2.resize(image, (1, 1), interpolation=cv2.INTER_AREA) markup_color = 255 - single_color[0][0] markup_color = markup_color.tolist() else: markup_color = self.markup_color if self.large_word_mode == "random": large_word_mode = random.choice([True, False]) else: large_word_mode = self.large_word_mode if self.markup_type == "random": markup_type = random.choice(["strikethrough", "crossed", "underline", "highlight"]) else: markup_type = self.markup_type num_lines = random.randint(self.num_lines_range[0], self.num_lines_range[1]) binary_image = self._preprocess(image) # Applying dilate operation to connect text lines horizontaly. contours, hierarchy = cv2.findContours( binary_image, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE, ) # Each line is detected as a contour. heights = [] for cnt in contours: x, y, w, h = cv2.boundingRect(cnt) heights.append(h) # get average of character height bins = np.unique(heights) hist, bin_edges = np.histogram(heights, bins=bins, density=False) if len(bin_edges) > 1 and np.max(hist) > 20: character_height_min = bin_edges[np.argmax(hist)] character_height_max = bin_edges[np.argmax(hist) + 1] character_height_average = int((character_height_max + character_height_min) / 2) height_range = ((character_height_max - character_height_min) / 2) + 1 else: character_height_average = -1 height_range = -1 # initialize coordinates of lines lines_coordinates = [] # shuffle contours to get randomize location to apply augmentation if len(contours) > 0: contours = list(contours) random.shuffle(contours) for cnt in contours: # adding randomization. choice = random.choice([False, True]) x, y, w, h = cv2.boundingRect(cnt) if character_height_average == -1: check_height = h > 10 else: check_height = (h > character_height_average - height_range) and ( h < character_height_average + height_range ) if large_word_mode: conditions = check_height else: conditions = ( choice and (w > h * 2) and (w * h < (markup_image.shape[0] * markup_image.shape[1]) / 10) and w < int(markup_image.shape[1] / 5) and check_height ) if conditions: if num_lines == 0: break num_lines = num_lines - 1 markup_length = random.uniform( self.markup_length_range[0], self.markup_length_range[1], ) # adjusting width according to markup length w = int(w * markup_length) # adjusting starting-point according to markup length x = int(x + (1 - markup_length) * w) # offset to interpolate markup effect up/down offset = 6 # for strikethrough and highlight, we need center points if markup_type == "strikethrough" or markup_type == "highlight": starting_point = [x, int(y + (h / 2))] ending_point = [x + w, int(y + (h / 2))] # for corossed-off we need points representing primary diagonal elif markup_type == "crossed": starting_point = [x, y] ending_point = [x + w, y + h] else: # for underline, we need points corresponding to bottom part of text starting_point = [x, y + h] ending_point = [x + w, y + h] for i in range(self.repetitions): if markup_type == "crossed": ysize, xsize = markup_image.shape[:2] # primary diagonal p1_x = np.clip( starting_point[0] + random.randint(-offset * 5, offset * 5), 0, xsize, ) p1_y = np.clip( starting_point[1] + +random.randint(-offset * 1, offset * 1), 0, ysize, ) p2_x = np.clip( ending_point[0] + random.randint(-offset * 5, offset * 5), 0, xsize, ) p2_y = np.clip( ending_point[1] + +random.randint(-offset * 1, offset * 1), 0, ysize, ) p1 = (p1_x, p1_y) p2 = (p2_x, p2_y) lines_coordinates.append(np.array([p1, p2])) # secondary diagonal p1_x = np.clip( ending_point[0] + random.randint(-offset * 5, offset * 5), 0, xsize, ) p1_y = np.clip( starting_point[1] + +random.randint(-offset * 1, offset * 1), 0, ysize, ) p2_x = np.clip( starting_point[0] + random.randint(-offset * 5, offset * 5), 0, xsize, ) p2_y = np.clip( ending_point[1] + +random.randint(-offset * 1, offset * 1), 0, ysize, ) p1 = (p1_x, p1_y) p2 = (p2_x, p2_y) lines_coordinates.append(np.array([p1, p2])) else: # dividing the line into points to mimic a smoothing effect points_list = self.distribute_line( starting_point, ending_point, offset, ).astype("int") lines_coordinates.append(points_list) # prevent empty lines if lines_coordinates: # random select ink if self.markup_ink == "random": markup_ink = random.choice(["pencil", "pen", "marker", "highlighter"]) else: markup_ink = self.markup_ink # for highlight, the ink should be thicker if self.markup_type == "highlight": markup_thickness_range = (self.markup_thickness_range[0] + 5, self.markup_thickness_range[1] + 5) else: markup_thickness_range = self.markup_thickness_range # create ink generator ink_generator = InkGenerator( ink_type=markup_ink, ink_draw_method="lines", ink_draw_iterations=(1, 1), ink_location="random", ink_background=markup_image, ink_background_size=None, ink_background_color=None, ink_color=markup_color, ink_min_brightness=1, ink_min_brightness_value_range=(150, 200), ink_draw_size_range=None, ink_thickness_range=markup_thickness_range, ink_brightness_change=[0], ink_skeletonize=0, ink_skeletonize_iterations_range=(1, 1), ink_text=None, ink_text_font=None, ink_text_rotate_range=None, ink_lines_coordinates=lines_coordinates, ink_lines_stroke_count_range=(1, 1), ) markup_image = ink_generator.generate_ink() if has_alpha: markup_image = np.dstack((markup_image, image_alpha)) # check for additional output of mask, keypoints and bounding boxes outputs_extra = [] if mask is not None or keypoints is not None or bounding_boxes is not None: outputs_extra = [mask, keypoints, bounding_boxes] # returns additional mask, keypoints and bounding boxes if there is additional input if outputs_extra: # returns in the format of [image, mask, keypoints, bounding_boxes] return [markup_image] + outputs_extra else: return markup_image