Shortcuts

Source code for mmedit.utils.trans_utils

# Copyright (c) OpenMMLab. All rights reserved.

import logging
import math

import cv2
import numpy as np
import torch
from mmengine.logging import print_log
from mmengine.utils import is_tuple_of
from PIL import Image, ImageDraw


[docs]def make_coord(shape, ranges=None, flatten=True): """Make coordinates at grid centers. Args: shape (tuple): shape of image. ranges (tuple): range of coordinate value. Default: None. flatten (bool): flatten to (n, 2) or Not. Default: True. Returns: coord (Tensor): coordinates. """ coord_seqs = [] for i, n in enumerate(shape): if ranges is None: v0, v1 = -1, 1 else: v0, v1 = ranges[i] r = (v1 - v0) / (2 * n) seq = v0 + r + (2 * r) * torch.arange(n).float() coord_seqs.append(seq) if 'indexing' in torch.meshgrid.__code__.co_varnames: coord = torch.meshgrid(*coord_seqs, indexing='ij') else: coord = torch.meshgrid(*coord_seqs) coord = torch.stack(coord, dim=-1) if flatten: coord = coord.view(-1, coord.shape[-1]) return coord
[docs]def bbox2mask(img_shape, bbox, dtype='uint8'): """Generate mask in np.ndarray from bbox. The returned mask has the shape of (h, w, 1). '1' indicates the hole and '0' indicates the valid regions. We prefer to use `uint8` as the data type of masks, which may be different from other codes in the community. Args: img_shape (tuple[int]): The size of the image. bbox (tuple[int]): Configuration tuple, (top, left, height, width) np.dtype (str): Indicate the data type of returned masks. Default: 'uint8' Returns: mask (np.ndarray): Mask in the shape of (h, w, 1). """ height, width = img_shape[:2] mask = np.zeros((height, width, 1), dtype=dtype) mask[bbox[0]:bbox[0] + bbox[2], bbox[1]:bbox[1] + bbox[3], :] = 1 return mask
[docs]def brush_stroke_mask(img_shape, num_vertices=(4, 12), mean_angle=2 * math.pi / 5, angle_range=2 * math.pi / 15, brush_width=(12, 40), max_loops=4, dtype='uint8'): """Generate free-form mask. The method of generating free-form mask is in the following paper: Free-Form Image Inpainting with Gated Convolution. When you set the config of this type of mask. You may note the usage of `np.random.randint` and the range of `np.random.randint` is [left, right). We prefer to use `uint8` as the data type of masks, which may be different from other codes in the community. TODO: Rewrite the implementation of this function. Args: img_shape (tuple[int]): Size of the image. num_vertices (int | tuple[int]): Min and max number of vertices. If only give an integer, we will fix the number of vertices. Default: (4, 12). mean_angle (float): Mean value of the angle in each vertex. The angle is measured in radians. Default: 2 * math.pi / 5. angle_range (float): Range of the random angle. Default: 2 * math.pi / 15. brush_width (int | tuple[int]): (min_width, max_width). If only give an integer, we will fix the width of brush. Default: (12, 40). max_loops (int): The max number of for loops of drawing strokes. Default: 4. np.dtype (str): Indicate the data type of returned masks. Default: 'uint8'. Returns: mask (np.ndarray): Mask in the shape of (h, w, 1). """ img_h, img_w = img_shape[:2] if isinstance(num_vertices, int): min_num_vertices, max_num_vertices = num_vertices, num_vertices + 1 elif isinstance(num_vertices, tuple): min_num_vertices, max_num_vertices = num_vertices else: raise TypeError('The type of num_vertices should be int' f'or tuple[int], but got type: {num_vertices}') if isinstance(brush_width, tuple): min_width, max_width = brush_width elif isinstance(brush_width, int): min_width, max_width = brush_width, brush_width + 1 else: raise TypeError('The type of brush_width should be int' f'or tuple[int], but got type: {brush_width}') average_radius = math.sqrt(img_h * img_h + img_w * img_w) / 8 mask = Image.new('L', (img_w, img_h), 0) loop_num = np.random.randint(1, max_loops) num_vertex_list = np.random.randint( min_num_vertices, max_num_vertices, size=loop_num) angle_min_list = np.random.uniform(0, angle_range, size=loop_num) angle_max_list = np.random.uniform(0, angle_range, size=loop_num) for loop_n in range(loop_num): num_vertex = num_vertex_list[loop_n] angle_min = mean_angle - angle_min_list[loop_n] angle_max = mean_angle + angle_max_list[loop_n] angles = [] vertex = [] # set random angle on each vertex angles = np.random.uniform(angle_min, angle_max, size=num_vertex) reverse_mask = (np.arange(num_vertex, dtype=np.float32) % 2) == 0 angles[reverse_mask] = 2 * math.pi - angles[reverse_mask] h, w = mask.size # set random vertices vertex.append((np.random.randint(0, w), np.random.randint(0, h))) r_list = np.random.normal( loc=average_radius, scale=average_radius // 2, size=num_vertex) for i in range(num_vertex): r = np.clip(r_list[i], 0, 2 * average_radius) new_x = np.clip(vertex[-1][0] + r * math.cos(angles[i]), 0, w) new_y = np.clip(vertex[-1][1] + r * math.sin(angles[i]), 0, h) vertex.append((int(new_x), int(new_y))) # draw brush strokes according to the vertex and angle list draw = ImageDraw.Draw(mask) width = np.random.randint(min_width, max_width) draw.line(vertex, fill=1, width=width) for v in vertex: draw.ellipse((v[0] - width // 2, v[1] - width // 2, v[0] + width // 2, v[1] + width // 2), fill=1) # randomly flip the mask if np.random.normal() > 0: mask.transpose(Image.FLIP_LEFT_RIGHT) if np.random.normal() > 0: mask.transpose(Image.FLIP_TOP_BOTTOM) mask = np.array(mask).astype(dtype=getattr(np, dtype)) mask = mask[:, :, None] return mask
[docs]def random_bbox(img_shape, max_bbox_shape, max_bbox_delta=40, min_margin=20): """Generate a random bbox for the mask on a given image. In our implementation, the max value cannot be obtained since we use `np.random.randint`. And this may be different with other standard scripts in the community. Args: img_shape (tuple[int]): The size of a image, in the form of (h, w). max_bbox_shape (int | tuple[int]): Maximum shape of the mask box, in the form of (h, w). If it is an integer, the mask box will be square. max_bbox_delta (int | tuple[int]): Maximum delta of the mask box, in the form of (delta_h, delta_w). If it is an integer, delta_h and delta_w will be the same. Mask shape will be randomly sampled from the range of `max_bbox_shape - max_bbox_delta` and `max_bbox_shape`. Default: (40, 40). min_margin (int | tuple[int]): The minimum margin size from the edges of mask box to the image boarder, in the form of (margin_h, margin_w). If it is an integer, margin_h and margin_w will be the same. Default: (20, 20). Returns: tuple[int]: The generated box, (top, left, h, w). """ if not isinstance(max_bbox_shape, tuple): max_bbox_shape = (max_bbox_shape, max_bbox_shape) if not isinstance(max_bbox_delta, tuple): max_bbox_delta = (max_bbox_delta, max_bbox_delta) if not isinstance(min_margin, tuple): min_margin = (min_margin, min_margin) assert is_tuple_of(max_bbox_shape, int) assert is_tuple_of(max_bbox_delta, int) assert is_tuple_of(min_margin, int) img_h, img_w = img_shape[:2] max_mask_h, max_mask_w = max_bbox_shape max_delta_h, max_delta_w = max_bbox_delta margin_h, margin_w = min_margin if max_mask_h > img_h or max_mask_w > img_w: raise ValueError(f'mask shape {max_bbox_shape} should be smaller than ' f'image shape {img_shape}') if (max_delta_h // 2 * 2 >= max_mask_h or max_delta_w // 2 * 2 >= max_mask_w): raise ValueError(f'mask delta {max_bbox_delta} should be smaller than' f'mask shape {max_bbox_shape}') if img_h - max_mask_h < 2 * margin_h or img_w - max_mask_w < 2 * margin_w: raise ValueError(f'Margin {min_margin} cannot be satisfied for img' f'shape {img_shape} and mask shape {max_bbox_shape}') # get the max value of (top, left) max_top = img_h - margin_h - max_mask_h max_left = img_w - margin_w - max_mask_w # randomly select a (top, left) top = np.random.randint(margin_h, max_top) left = np.random.randint(margin_w, max_left) # randomly shrink the shape of mask box according to `max_bbox_delta` # the center of box is fixed delta_top = np.random.randint(0, max_delta_h // 2 + 1) delta_left = np.random.randint(0, max_delta_w // 2 + 1) top = top + delta_top left = left + delta_left h = max_mask_h - delta_top w = max_mask_w - delta_left return (top, left, h, w)
[docs]def random_irregular_mask(img_shape, num_vertices=(4, 8), max_angle=4, length_range=(10, 100), brush_width=(10, 40), dtype='uint8'): """Generate random irregular masks. This is a modified version of free-form mask implemented in 'brush_stroke_mask'. We prefer to use `uint8` as the data type of masks, which may be different from other codes in the community. TODO: Rewrite the implementation of this function. Args: img_shape (tuple[int]): Size of the image. num_vertices (int | tuple[int]): Min and max number of vertices. If only give an integer, we will fix the number of vertices. Default: (4, 8). max_angle (float): Max value of angle at each vertex. Default 4.0. length_range (int | tuple[int]): (min_length, max_length). If only give an integer, we will fix the length of brush. Default: (10, 100). brush_width (int | tuple[int]): (min_width, max_width). If only give an integer, we will fix the width of brush. Default: (10, 40). np.dtype (str): Indicate the data type of returned masks. Default: 'uint8' Returns: mask (np.ndarray): Mask in the shape of (h, w, 1). """ h, w = img_shape[:2] mask = np.zeros((h, w), dtype=dtype) if isinstance(length_range, int): min_length, max_length = length_range, length_range + 1 elif isinstance(length_range, tuple): min_length, max_length = length_range else: raise TypeError('The type of length_range should be int' f'or tuple[int], but got type: {length_range}') if isinstance(num_vertices, int): min_num_vertices, max_num_vertices = num_vertices, num_vertices + 1 elif isinstance(num_vertices, tuple): min_num_vertices, max_num_vertices = num_vertices else: raise TypeError('The type of num_vertices should be int' f'or tuple[int], but got type: {num_vertices}') if isinstance(brush_width, int): min_brush_width, max_brush_width = brush_width, brush_width + 1 elif isinstance(brush_width, tuple): min_brush_width, max_brush_width = brush_width else: raise TypeError('The type of brush_width should be int' f'or tuple[int], but got type: {brush_width}') num_v = np.random.randint(min_num_vertices, max_num_vertices) for i in range(num_v): start_x = np.random.randint(w) start_y = np.random.randint(h) # from the start point, randomly setlect n \in [1, 6] directions. direction_num = np.random.randint(1, 6) angle_list = np.random.randint(0, max_angle, size=direction_num) length_list = np.random.randint( min_length, max_length, size=direction_num) brush_width_list = np.random.randint( min_brush_width, max_brush_width, size=direction_num) for direct_n in range(direction_num): angle = 0.01 + angle_list[direct_n] if i % 2 == 0: angle = 2 * math.pi - angle length = length_list[direct_n] brush_w = brush_width_list[direct_n] # compute end point according to the random angle end_x = (start_x + length * np.sin(angle)).astype(np.int32) end_y = (start_y + length * np.cos(angle)).astype(np.int32) cv2.line(mask, (start_y, start_x), (end_y, end_x), 1, brush_w) start_x, start_y = end_x, end_y mask = np.expand_dims(mask, axis=2) return mask
[docs]def get_irregular_mask(img_shape, area_ratio_range=(0.15, 0.5), **kwargs): """Get irregular mask with the constraints in mask ratio. Args: img_shape (tuple[int]): Size of the image. area_ratio_range (tuple(float)): Contain the minimum and maximum area ratio. Default: (0.15, 0.5). Returns: mask (np.ndarray): Mask in the shape of (h, w, 1). """ mask = random_irregular_mask(img_shape, **kwargs) min_ratio, max_ratio = area_ratio_range while not min_ratio < (np.sum(mask) / (img_shape[0] * img_shape[1])) < max_ratio: mask = random_irregular_mask(img_shape, **kwargs) return mask
[docs]_integer_types = ( np.byte, np.ubyte, # 8 bits np.short, np.ushort, # 16 bits np.intc, np.uintc, # 16 or 32 or 64 bits np.int_, np.uint, # 32 or 64 bits np.longlong, np.ulonglong) # 64 bits
[docs]_integer_ranges = { t: (np.iinfo(t).min, np.iinfo(t).max) for t in _integer_types
}
[docs]dtype_range = { np.bool_: (False, True), np.bool8: (False, True), np.float16: (-1, 1), np.float32: (-1, 1), np.float64: (-1, 1)
} dtype_range.update(_integer_ranges)
[docs]def dtype_limits(image, clip_negative=False): """Return intensity limits, i.e. (min, max) tuple, of the image's dtype. This function is adopted from skimage: https://github.com/scikit-image/scikit-image/blob/ 7e4840bd9439d1dfb6beaf549998452c99f97fdd/skimage/util/dtype.py#L35 Args: image (np.ndarray): Input image. clip_negative (bool, optional): If True, clip the negative range (i.e. return 0 for min intensity) even if the image dtype allows negative values. Default: False. Returns tuple: Lower and upper intensity limits. """ imin, imax = dtype_range[image.dtype.type] if clip_negative: imin = 0 return imin, imax
[docs]def adjust_gamma(image, gamma=1, gain=1): """Performs Gamma Correction on the input image. This function is adopted from skimage: https://github.com/scikit-image/scikit-image/blob/ 7e4840bd9439d1dfb6beaf549998452c99f97fdd/skimage/exposure/ exposure.py#L439-L494 Also known as Power Law Transform. This function transforms the input image pixelwise according to the equation ``O = I**gamma`` after scaling each pixel to the range 0 to 1. Args: image (np.ndarray): Input image. gamma (float, optional): Non negative real number. Defaults to 1. gain (float, optional): The constant multiplier. Defaults to 1. Returns: np.ndarray: Gamma corrected output image. """ if np.any(image < 0): raise ValueError('Image Correction methods work correctly only on ' 'images with non-negative values. Use ' 'skimage.exposure.rescale_intensity.') dtype = image.dtype.type if gamma < 0: raise ValueError('Gamma should be a non-negative real number.') scale = float(dtype_limits(image, True)[1] - dtype_limits(image, True)[0]) out = ((image / scale)**gamma) * scale * gain return out.astype(dtype)
[docs]def add_gaussian_noise(img: np.ndarray, mu, sigma): """Add Gaussian Noise on the input image. Args: img (np.ndarray): Input image. mu (float): The mu value of the Gaussian function. sigma (float): The sigma value of the Gaussian function. Returns: noisy_img (np.ndarray): Gaussian noisy output image. """ img = img.astype(np.float32) gauss_noise = np.random.normal(mu, sigma, img.shape) noisy_img = img + gauss_noise noisy_img = np.clip(noisy_img, 0, 255) return noisy_img
[docs]def random_choose_unknown(unknown, crop_size): """Randomly choose an unknown start (top-left) point for a given crop_size. Args: unknown (np.ndarray): The binary unknown mask. crop_size (tuple[int]): The given crop size. Returns: tuple[int]: The top-left point of the chosen bbox. """ h, w = unknown.shape crop_h, crop_w = crop_size delta_h = center_h = crop_h // 2 delta_w = center_w = crop_w // 2 # mask out the validate area for selecting the cropping center mask = np.zeros_like(unknown) mask[delta_h:h - delta_h, delta_w:w - delta_w] = 1 if np.any(unknown & mask): center_h_list, center_w_list = np.where(unknown & mask) elif np.any(unknown): center_h_list, center_w_list = np.where(unknown) else: print_log('No unknown pixels found!', level=logging.WARNING) center_h_list = [center_h] center_w_list = [center_w] num_unknowns = len(center_h_list) rand_ind = np.random.randint(num_unknowns) center_h = center_h_list[rand_ind] center_w = center_w_list[rand_ind] # make sure the top-left point is valid top = np.clip(center_h - delta_h, 0, h - crop_h) left = np.clip(center_w - delta_w, 0, w - crop_w) return top, left
Read the Docs v: latest
Versions
master
latest
stable
zyh-re-docs
zyh-doc-notfound-extend
zyh-api-rendering
Downloads
pdf
epub
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.