Source code for crowdcount.transforms.transforms

# -*- coding:utf-8 -*-
from PIL import Image
import numpy as np
import random
import collections
import numbers

__all__ = ["SingleCompose", "ComplexCompose", "ResizeShrink",
           "LabelEnlarge", "TransposeFlip", "RandomCrop",
           "Scale"]


[docs]class SingleCompose(object): """Compose several transforms witch only transform single input (image or density map) Args: transforms (list of ``Transform`` objects which transform one object: [``ResizeShrink``, ``LabelEnlarge``]): list of transforms to Compose Example: >>> import crowdcount.transforms as cc_transforms >>> cc_transforms.SingleCompose([ >>> ResizeShrink(8), >>> LabelEnlarge(10), >>> ]) """ def __init__(self, cc_transforms): self.cc_transforms = cc_transforms def __call__(self, img): for t in self.cc_transforms: img = t(img) return img def __repr__(self): format_string = self.__class__.__name__ + '(' for t in self.cc_transforms: format_string += '\n' format_string += ' {0}'.format(t) format_string += '\n)' return format_string
[docs]class ComplexCompose(object): """Compose several transforms witch transform both of image and density map Args: transforms (list of ``Transform`` objects which transform two objects: [``TransposeFlip``, ``RandomCrop``, ``Scale``]): list of transforms to Compose Example: >>> import crowdcount.transforms as cc_transforms >>> cc_transforms.ComplexCompose([ >>> TransposeFlip(), >>> RandomCrop([512, 512]), >>> Scale([512, 512]), >>> ]) """ def __init__(self, cc_transforms): self.cc_transforms = cc_transforms def __call__(self, img, den): for t in self.cc_transforms: img, den = t(img, den) return img, den def __repr__(self): format_string = self.__class__.__name__ + '(' for t in self.cc_transforms: format_string += '\n' format_string += ' {0}'.format(t) format_string += '\n)' return format_string
[docs]class ResizeShrink(object): """ Reduce the density map scale_factor times (to suit the output be pooled) Args: scale_factor (int): Desired reduction factor. The output size will be divided by scale_factor. If the scale_factor is 8 and the size of input is (20, 10), the output size will be (20 // 8, 10 // 8) = (2, 1) to match the output image which be pooled. Example: >>> import crowdcount.transforms as cc_transforms >>> import numpy as np >>> resize_shrink = cc_transforms.ResizeShrink(8) >>> density_map = np.random.rand(20, 10) >>> density_map.shape (20, 10) >>> resize_shrink(density_map) array([[52.175777], [46.061344]], dtype=float32) """ def __init__(self, scale_factor): self.scale_factor = scale_factor
[docs] def __call__(self, den): """ Args: density map (PIL Image or numpy.ndarray): density map to be shrunk Returns: numpy.ndarray: Rescaled image """ if not isinstance(den, Image.Image): den = Image.fromarray(den) w, h = den.size h_trans = h // self.scale_factor w_trans = w // self.scale_factor den = np.asarray(den.resize((w_trans, h_trans), Image.BICUBIC)) * (h * w) / (h_trans * w_trans) return den
def __repr__(self): return __class__.__name__ + '()'
[docs]class LabelEnlarge(object): """ Training trick from the `"C^3 Framework..." <https://arxiv.org/abs/1907.02724>`_ paper. They find neural network could get faster convergence and lower estimation error when the density map dots a large integer value Args: number (int): the magnification of density map. default is 100. Example: >>> import crowdcount.transforms as cc_transforms >>> import numpy as np >>> label_enlarge = cc_transforms.LabelEnlarge(100) >>> density_map = np.random.rand(4, 4) array([[0.62494003, 0.35120895, 0.21002026, 0.52596833], [0.45540917, 0.41721004, 0.45287173, 0.35665398], [0.63187118, 0.25588367, 0.44660365, 0.0367272 ], [0.86808967, 0.11982928, 0.44544907, 0.81409479]]) >>> label_enlarge(density_map) array([[62.49400293, 35.12089511, 21.00202647, 52.59683332], [45.54091666, 41.72100355, 45.28717272, 35.66539753], [63.18711774, 25.58836725, 44.66036518, 3.67272008], [86.8089669 , 11.98292805, 44.54490721, 81.40947874]]) """ def __init__(self, number=100): self.number = number
[docs] def __call__(self, den): """ Args: density map (PIL Image or numpy.ndarray): density map to be enlarged Returns: numpy.ndarray: enlarged density map """ return den * self.number
def __repr__(self): return __class__.__name__ + '()'
[docs]class TransposeFlip(object): """ Randomly flip both of the image and density map left and right Example: >>> import crowdcount.transforms as cc_transforms >>> import numpy as np >>> img = np.randn(4, 4) >>> density_map = np.randn(4, 4) >>> transpose_flip = cc_transforms.TransposeFlip() >>> img, density_map = transpose_flip(img, density_map) """
[docs] def __call__(self, img, den): """ Args: image (PIL Image or numpy.ndarray): image to be flipped density map (PIL Image or numpy.ndarray): density map to be flipped Returns: (PIL Image, numpy.ndarray) """ if not isinstance(den, Image.Image): den = Image.fromarray(den) if not isinstance(img, Image.Image): img = Image.fromarray(img) if random.random() > 0.5: img = img.transpose(Image.FLIP_LEFT_RIGHT) den = den.transpose(Image.FLIP_LEFT_RIGHT) return img, np.asarray(den)
def __repr__(self): return __class__.__name__ + '()'
[docs]class RandomCrop(object): """ In order to use multi-batch training to irregular datasets (like ShanghaiTech Part A where images have different shape), This function random crops both of image and density map with input size. Args: size (sequence or int): Desired output size of the crop. If size is an int instead of sequence like (h, w), a square crop (size, size) is made. Example: >>> import crowdcount.transforms as cc_transforms >>> import numpy as np >>> img = np.randn(4, 4) >>> density_map = np.randn(4, 4) >>> random_crop = cc_transforms.RandomCrop([2, 2]) >>> img, density_map = random_crop(img, density_map) """ def __init__(self, size): if isinstance(size, numbers.Number): self.size = (size, size) else: self.size = size
[docs] def __call__(self, img, den): """ Args: image (PIL Image or numpy.ndarray): image to be cropped density map (PIL Image or numpy.ndarray): density map to be cropped Returns: (PIL Image, numpy.ndarray) """ if not isinstance(den, Image.Image): den = Image.fromarray(den) if not isinstance(img, Image.Image): img = Image.fromarray(img) width, height = img.size h, w = self.size height_start = int(random.random() * (height - h)) width_start = int(random.random() * (width - w)) img = img.crop((width_start, height_start, width_start + w, height_start + h)) den = den.crop((width_start, height_start, width_start + w, height_start + h)) return img, np.asarray(den)
def __repr__(self): return __class__.__name__ + '()'
[docs]class Scale(object): """In order to use multi-batch training to irregular datasets (like ShanghaiTech Part A where images have different shape), This function resize both of image and density map with input size. Args: size (sequence or int): Desired output size of the crop. If size is an int instead of sequence like (h, w), a square crop (size, size) is made. Example: >>> import crowdcount.transforms as cc_transforms >>> import numpy as np >>> img = np.randn(4, 4) >>> density_map = np.randn(4, 4) >>> scale = cc_transforms.Scale([2, 2]) >>> img, density_map = scale(img, density_map) """ def __init__(self, size, interpolation=Image.BILINEAR): assert isinstance(size, int) or (isinstance(size, collections.Iterable) and len(size) == 2) self.size = size self.interpolation = interpolation
[docs] def __call__(self, img, den): """ Args: image (PIL Image or numpy.ndarray): image to be cropped density map (PIL Image or numpy.ndarray): density map to be cropped Returns: (PIL Image, numpy.ndarray) """ if not isinstance(den, Image.Image): den = Image.fromarray(den) if not isinstance(img, Image.Image): img = Image.fromarray(img) if isinstance(self.size, int): w, h = img.size if (w <= h and w == self.size) or (h <= w and h == self.size): return img, np.asarray(den) if w < h: ow = self.size oh = int(self.size * h / w) return img.resize((ow, oh), self.interpolation), np.asarray(den.resize((ow, oh), self.interpolation)) else: oh = self.size ow = int(self.size * w / h) return img.resize((ow, oh), self.interpolation), np.asarray(den.resize((ow, oh), self.interpolation)) else: return img.resize(self.size[::-1], self.interpolation), \ np.asarray(den.resize(self.size[::-1], self.interpolation))
def __repr__(self): return __class__.__name__ + '()'