Source code for augraphy.augmentations.markup

import math
import os
import random
from pathlib import Path

import cv2
import numpy as np

from augraphy.augmentations.brightness import Brightness
from augraphy.augmentations.lib import add_noise as lib_add_noise
from augraphy.augmentations.lib import generate_average_intensity
from augraphy.augmentations.lib import smooth
from augraphy.augmentations.lib import sobel
from augraphy.base.augmentation import Augmentation
from augraphy.utilities import *
from augraphy.utilities.inkgenerator import InkGenerator



[docs]
class Markup(Augmentation):
    """Uses contours detection to detect text lines and add a smooth text strikethrough, highlight or underline effect.

    :param num_lines_range: Pair of ints determining the number of added markup effect.
    :type num_lines_range: int tuple, optional
    :param markup_length_range: Pair of floats between 0 to 1 , to determine the length of added markup effect.
    :type markup_length_range: float tuple, optional
    :param markup_thickness_range: Pair of ints, to determine the thickness of added markup effect.
    :type markup_thickness_range: int tuple, optional
    :param markup_type: Choice of markup "strikethrough", "highlight", "underline" or "crossed".
    :type markup_type: string
    :param markup_ink: Types of markup ink, choose from "random", "pencil", "pen", "marker" or "highlighter".
    :type markup_ink: string, optional
    :param markup_color: BGR color tuple.
    :type markup_color: tuple or string
    :param repetitions: Determine how many time a single markup effect should be drawn.
    :type repetitions: int
    :param large_word_mode: Set true to draw markup on large words, else large word will be ignored.
    :type large_word_mode: boolean
    :param single_word_mode: Set true to draw markup on a single word only.
    :type single_word_mode: boolean
    :param p: The probability that this Augmentation will be applied.
    :type p: float, optional
    """

    def __init__(
        self,
        num_lines_range=(2, 7),
        markup_length_range=(0.5, 1),
        markup_thickness_range=(1, 3),
        markup_type="random",
        markup_ink="random",
        markup_color="random",
        large_word_mode="random",
        single_word_mode=False,
        repetitions=1,
        p=1,
    ):

        super().__init__(p=p)
        self.num_lines_range = num_lines_range
        self.markup_length_range = markup_length_range
        self.markup_thickness_range = markup_thickness_range
        self.markup_type = markup_type
        self.markup_ink = markup_ink
        self.markup_color = markup_color
        self.repetitions = repetitions
        self.large_word_mode = large_word_mode
        self.single_word_mode = single_word_mode

    def __repr__(self):
        return (
            f"Markup(num_lines_range={self.num_lines_range}, markup_length_range={self.markup_length_range}, "
            f"markup_thickness_range={self.markup_thickness_range},  markup_type{self. markup_type}, "
            f"markup_ink={self.markup_ink}, markup_color={self.markup_color}, repetitions={self.repetitions}, "
            f"large_word_mode={self.large_word_mode}, single_word_mode={self.single_word_mode}, p={self.p})"
        )


[docs]
    def distribute_line(self, starting_point, ending_point, offset):
        """Create smoothed line from the provided starting and ending point.

        :param starting_point: Starting point (x, y) of the line.
        :type starting_point: tuple
        :param ending_point: Ending point (x, y) of the line.
        :type ending_point: tuple
        :param offset: Offset value to randomize point position.
        :type offset: int
        """

        points_count = random.randint(3, 6)  # dividing the line into points
        points_x = np.linspace(starting_point[0], ending_point[0], points_count)
        points_y = [starting_point[1] + random.uniform(-offset, offset) for _ in points_x]
        points = smooth(
            np.column_stack((points_x, points_y)).astype("float"),
            6,
        )  # adding a smoothing effect in points using chaikin's algorithm
        return points


    def _preprocess(self, image):
        """Preprocess image with binarization, dilation and erosion."""
        blurred = cv2.blur(image, (5, 5))
        blurred = blurred.astype("uint8")
        if len(blurred.shape) > 2 and blurred.shape[2] == 3:
            blurred = cv2.cvtColor(blurred, cv2.COLOR_BGR2GRAY)
        elif len(blurred.shape) > 2 and blurred.shape[2] == 4:
            blurred = cv2.cvtColor(blurred, cv2.COLOR_BGRA2GRAY)

        _, binarized = cv2.threshold(
            blurred,
            0,
            255,
            cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV,
        )

        # get kernel for dilation
        if self.single_word_mode is False:
            kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 1))
        else:
            kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 1))
            self.markup_length_range = (1, 1)

        # dilating the threshold image to combine horizontal lines
        dilation = cv2.dilate(
            binarized,
            kernel,
            iterations=2,
        )
        dilation = cv2.erode(
            dilation,
            None,
            iterations=1,
        )

        return dilation


[docs]
    def draw_line(self, p1, p2, markup_mask, markup_thickness, markup_color, reverse):
        """Draw line across two provided points.

        :param p1: Starting point (x, y) of the line.
        :type p1: tuple
        :param p2: Ending point (x, y) of the line.
        :type p2: tuple
        :param markup_mask: Mask of markup effect.
        :type markup_mask: numpy.array (numpy.uint8)
        :param markup_thickness: Thickness of the line.
        :type markup_thickness: int
        :param markup_color: Color of the line in BGR format.
        :type markup_color: tuple
        :param reverse: Reverse the order of line points distribution.
        :type reverse: int
        """

        # get min and max of points
        min_x = min(p2[0], p1[0])
        max_x = max(p2[0], p1[0])
        min_y = min(p2[1], p1[1])
        max_y = max(p2[1], p1[1])

        # set point x in ascending or descending order based on direction
        if reverse:
            points_x = [min_x, random.randint(min_x, max_x), max_x]
        else:
            points_x = [max_x, random.randint(min_x, max_x), min_x]
        points_y = [min_y, random.randint(min_y, max_y), max_y]

        # smooth points
        points = smooth(np.column_stack((points_x, points_y)).astype("float"), 6)

        # draw curvy lines
        for (point1_x, point1_y), (point2_x, point2_y) in zip(points[:-1], points[1:]):

            if reverse:
                point1 = (int(point1_x), int(point1_y))
                point2 = (int(point2_x), int(point2_y))
            else:
                point1 = (int(point1_x), int(point1_y))
                point2 = (int(point2_x), int(point2_y))

            markup_mask = cv2.line(
                markup_mask,
                point1,
                point2,
                markup_color,
                markup_thickness,
                lineType=cv2.LINE_AA,
            )


    def __call__(self, image, layer=None, mask=None, keypoints=None, bounding_boxes=None, force=False):

        # change to 3 channels BGR format
        has_alpha = 0
        if len(image.shape) < 3:
            image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
        elif image.shape[2] == 4:
            has_alpha = 1
            image, image_alpha = image[:, :, :3], image[:, :, 3]

        markup_image = image.copy()

        if self.markup_color == "random":
            markup_color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
        elif self.markup_color == "contrast":
            single_color = cv2.resize(image, (1, 1), interpolation=cv2.INTER_AREA)
            markup_color = 255 - single_color[0][0]
            markup_color = markup_color.tolist()
        else:
            markup_color = self.markup_color

        if self.large_word_mode == "random":
            large_word_mode = random.choice([True, False])
        else:
            large_word_mode = self.large_word_mode

        if self.markup_type == "random":
            markup_type = random.choice(["strikethrough", "crossed", "underline", "highlight"])
        else:
            markup_type = self.markup_type

        num_lines = random.randint(self.num_lines_range[0], self.num_lines_range[1])

        binary_image = self._preprocess(image)

        # Applying dilate operation to connect text lines horizontaly.
        contours, hierarchy = cv2.findContours(
            binary_image,
            cv2.RETR_LIST,
            cv2.CHAIN_APPROX_NONE,
        )  # Each line is detected as a contour.

        heights = []
        for cnt in contours:
            x, y, w, h = cv2.boundingRect(cnt)
            heights.append(h)

        # get average of character height
        bins = np.unique(heights)
        hist, bin_edges = np.histogram(heights, bins=bins, density=False)
        if len(bin_edges) > 1 and np.max(hist) > 20:
            character_height_min = bin_edges[np.argmax(hist)]
            character_height_max = bin_edges[np.argmax(hist) + 1]
            character_height_average = int((character_height_max + character_height_min) / 2)
            height_range = ((character_height_max - character_height_min) / 2) + 1
        else:
            character_height_average = -1
            height_range = -1

        # initialize coordinates of lines
        lines_coordinates = []

        # shuffle contours to get randomize location to apply augmentation
        if len(contours) > 0:
            contours = list(contours)
            random.shuffle(contours)

        for cnt in contours:
            # adding randomization.
            choice = random.choice([False, True])
            x, y, w, h = cv2.boundingRect(cnt)

            if character_height_average == -1:
                check_height = h > 10
            else:
                check_height = (h > character_height_average - height_range) and (
                    h < character_height_average + height_range
                )

            if large_word_mode:
                conditions = check_height
            else:
                conditions = (
                    choice
                    and (w > h * 2)
                    and (w * h < (markup_image.shape[0] * markup_image.shape[1]) / 10)
                    and w < int(markup_image.shape[1] / 5)
                    and check_height
                )

            if conditions:
                if num_lines == 0:
                    break
                num_lines = num_lines - 1
                markup_length = random.uniform(
                    self.markup_length_range[0],
                    self.markup_length_range[1],
                )
                # adjusting width according to markup length
                w = int(w * markup_length)
                # adjusting starting-point according to markup length
                x = int(x + (1 - markup_length) * w)
                # offset to interpolate markup effect up/down
                offset = 6

                # for strikethrough and highlight, we need center points
                if markup_type == "strikethrough" or markup_type == "highlight":
                    starting_point = [x, int(y + (h / 2))]
                    ending_point = [x + w, int(y + (h / 2))]
                # for corossed-off we need points representing primary diagonal
                elif markup_type == "crossed":
                    starting_point = [x, y]
                    ending_point = [x + w, y + h]
                else:
                    # for underline, we need points corresponding to bottom part of text
                    starting_point = [x, y + h]
                    ending_point = [x + w, y + h]

                for i in range(self.repetitions):
                    if markup_type == "crossed":

                        ysize, xsize = markup_image.shape[:2]

                        # primary diagonal
                        p1_x = np.clip(
                            starting_point[0] + random.randint(-offset * 5, offset * 5),
                            0,
                            xsize,
                        )
                        p1_y = np.clip(
                            starting_point[1] + +random.randint(-offset * 1, offset * 1),
                            0,
                            ysize,
                        )
                        p2_x = np.clip(
                            ending_point[0] + random.randint(-offset * 5, offset * 5),
                            0,
                            xsize,
                        )
                        p2_y = np.clip(
                            ending_point[1] + +random.randint(-offset * 1, offset * 1),
                            0,
                            ysize,
                        )
                        p1 = (p1_x, p1_y)
                        p2 = (p2_x, p2_y)
                        lines_coordinates.append(np.array([p1, p2]))

                        # secondary diagonal
                        p1_x = np.clip(
                            ending_point[0] + random.randint(-offset * 5, offset * 5),
                            0,
                            xsize,
                        )
                        p1_y = np.clip(
                            starting_point[1] + +random.randint(-offset * 1, offset * 1),
                            0,
                            ysize,
                        )
                        p2_x = np.clip(
                            starting_point[0] + random.randint(-offset * 5, offset * 5),
                            0,
                            xsize,
                        )
                        p2_y = np.clip(
                            ending_point[1] + +random.randint(-offset * 1, offset * 1),
                            0,
                            ysize,
                        )
                        p1 = (p1_x, p1_y)
                        p2 = (p2_x, p2_y)
                        lines_coordinates.append(np.array([p1, p2]))

                    else:
                        # dividing the line into points to mimic a smoothing effect
                        points_list = self.distribute_line(
                            starting_point,
                            ending_point,
                            offset,
                        ).astype("int")
                        lines_coordinates.append(points_list)

        # prevent empty lines
        if lines_coordinates:
            # random select ink
            if self.markup_ink == "random":
                markup_ink = random.choice(["pencil", "pen", "marker", "highlighter"])
            else:
                markup_ink = self.markup_ink

            # for highlight, the ink should be thicker
            if self.markup_type == "highlight":
                markup_thickness_range = (self.markup_thickness_range[0] + 5, self.markup_thickness_range[1] + 5)
            else:
                markup_thickness_range = self.markup_thickness_range

            # create ink generator
            ink_generator = InkGenerator(
                ink_type=markup_ink,
                ink_draw_method="lines",
                ink_draw_iterations=(1, 1),
                ink_location="random",
                ink_background=markup_image,
                ink_background_size=None,
                ink_background_color=None,
                ink_color=markup_color,
                ink_min_brightness=1,
                ink_min_brightness_value_range=(150, 200),
                ink_draw_size_range=None,
                ink_thickness_range=markup_thickness_range,
                ink_brightness_change=[0],
                ink_skeletonize=0,
                ink_skeletonize_iterations_range=(1, 1),
                ink_text=None,
                ink_text_font=None,
                ink_text_rotate_range=None,
                ink_lines_coordinates=lines_coordinates,
                ink_lines_stroke_count_range=(1, 1),
            )

            markup_image = ink_generator.generate_ink()

        if has_alpha:
            markup_image = np.dstack((markup_image, image_alpha))

        # check for additional output of mask, keypoints and bounding boxes
        outputs_extra = []
        if mask is not None or keypoints is not None or bounding_boxes is not None:
            outputs_extra = [mask, keypoints, bounding_boxes]

        # returns additional mask, keypoints and bounding boxes if there is additional input
        if outputs_extra:
            # returns in the format of [image, mask, keypoints, bounding_boxes]
            return [markup_image] + outputs_extra
        else:
            return markup_image