Source code for crowdcount.transforms.transforms

# -*- coding:utf-8 -*-
from PIL import Image
import numpy as np
import random
import collections
import numbers

__all__ = ["SingleCompose", "ComplexCompose", "ResizeShrink",
           "LabelEnlarge", "TransposeFlip", "RandomCrop",
           "Scale"]


[docs]class SingleCompose(object):
    """Compose several transforms witch only transform single input (image or density map)

    Args:
        transforms (list of ``Transform`` objects which transform one object:
            [``ResizeShrink``, ``LabelEnlarge``]): list of transforms to Compose

    Example:
        >>> import crowdcount.transforms as cc_transforms
        >>> cc_transforms.SingleCompose([
        >>>     ResizeShrink(8),
        >>>     LabelEnlarge(10),
        >>> ])
    """
    def __init__(self, cc_transforms):
        self.cc_transforms = cc_transforms

    def __call__(self, img):
        for t in self.cc_transforms:
            img = t(img)
        return img

    def __repr__(self):
        format_string = self.__class__.__name__ + '('
        for t in self.cc_transforms:
            format_string += '\n'
            format_string += '    {0}'.format(t)
        format_string += '\n)'
        return format_string


[docs]class ComplexCompose(object):
    """Compose several transforms witch transform both of image and density map

    Args:
        transforms (list of ``Transform`` objects which transform two objects:
            [``TransposeFlip``, ``RandomCrop``, ``Scale``]): list of transforms to Compose

    Example:
        >>> import crowdcount.transforms as cc_transforms
        >>> cc_transforms.ComplexCompose([
        >>>     TransposeFlip(),
        >>>     RandomCrop([512, 512]),
        >>>     Scale([512, 512]),
        >>> ])
    """

    def __init__(self, cc_transforms):
        self.cc_transforms = cc_transforms

    def __call__(self, img, den):
        for t in self.cc_transforms:
            img, den = t(img, den)
        return img, den

    def __repr__(self):
        format_string = self.__class__.__name__ + '('
        for t in self.cc_transforms:
            format_string += '\n'
            format_string += '    {0}'.format(t)
        format_string += '\n)'
        return format_string


[docs]class ResizeShrink(object):
    """ Reduce the density map scale_factor times (to suit the output be pooled)

    Args:
        scale_factor (int): Desired reduction factor. The output size will be divided by scale_factor.
            If the scale_factor is 8 and the size of input is (20, 10), the output size will be (20 // 8, 10 // 8) = (2, 1)
            to match the output image which be pooled.

    Example:
        >>> import crowdcount.transforms as cc_transforms
        >>> import numpy as np
        >>> resize_shrink = cc_transforms.ResizeShrink(8)
        >>> density_map = np.random.rand(20, 10)
        >>> density_map.shape
        (20, 10)
        >>> resize_shrink(density_map)
        array([[52.175777], [46.061344]], dtype=float32)
    """

    def __init__(self, scale_factor):
        self.scale_factor = scale_factor

[docs]    def __call__(self, den):
        """
        Args:
            density map (PIL Image or numpy.ndarray): density map to be shrunk

        Returns:
            numpy.ndarray: Rescaled image
        """
        if not isinstance(den, Image.Image):
            den = Image.fromarray(den)
        w, h = den.size
        h_trans = h // self.scale_factor
        w_trans = w // self.scale_factor
        den = np.asarray(den.resize((w_trans, h_trans), Image.BICUBIC)) * (h * w) / (h_trans * w_trans)
        return den

    def __repr__(self):
        return __class__.__name__ + '()'


[docs]class LabelEnlarge(object):
    """ Training trick from the
    `"C^3 Framework..." <https://arxiv.org/abs/1907.02724>`_ paper.
    They find neural network could get faster convergence and lower estimation
    error when the density map dots a large integer value

    Args:
        number (int): the magnification of density map. default is 100.

    Example:
        >>> import crowdcount.transforms as cc_transforms
        >>> import numpy as np
        >>> label_enlarge = cc_transforms.LabelEnlarge(100)
        >>> density_map = np.random.rand(4, 4)
        array([[0.62494003, 0.35120895, 0.21002026, 0.52596833],
               [0.45540917, 0.41721004, 0.45287173, 0.35665398],
               [0.63187118, 0.25588367, 0.44660365, 0.0367272 ],
               [0.86808967, 0.11982928, 0.44544907, 0.81409479]])
        >>> label_enlarge(density_map)
        array([[62.49400293, 35.12089511, 21.00202647, 52.59683332],
               [45.54091666, 41.72100355, 45.28717272, 35.66539753],
               [63.18711774, 25.58836725, 44.66036518,  3.67272008],
               [86.8089669 , 11.98292805, 44.54490721, 81.40947874]])
    """

    def __init__(self, number=100):
        self.number = number

[docs]    def __call__(self, den):
        """
        Args:
            density map (PIL Image or numpy.ndarray): density map to be enlarged

        Returns:
            numpy.ndarray: enlarged density map
        """
        return den * self.number

    def __repr__(self):
        return __class__.__name__ + '()'


[docs]class TransposeFlip(object):
    """ Randomly flip both of the image and density map left and right

    Example:
        >>> import crowdcount.transforms as cc_transforms
        >>> import numpy as np
        >>> img = np.randn(4, 4)
        >>> density_map = np.randn(4, 4)
        >>> transpose_flip = cc_transforms.TransposeFlip()
        >>> img, density_map = transpose_flip(img, density_map)
    """

[docs]    def __call__(self, img, den):
        """
        Args:
            image (PIL Image or numpy.ndarray): image to be flipped
            density map (PIL Image or numpy.ndarray): density map to be flipped

        Returns:
            (PIL Image, numpy.ndarray)
        """
        if not isinstance(den, Image.Image):
            den = Image.fromarray(den)
        if not isinstance(img, Image.Image):
            img = Image.fromarray(img)
        if random.random() > 0.5:
            img = img.transpose(Image.FLIP_LEFT_RIGHT)
            den = den.transpose(Image.FLIP_LEFT_RIGHT)
        return img, np.asarray(den)

    def __repr__(self):
        return __class__.__name__ + '()'


[docs]class RandomCrop(object):
    """ In order to use multi-batch training to irregular datasets (like ShanghaiTech Part A where images
    have different shape), This function random crops both of image and density map with input size.

    Args:
        size (sequence or int): Desired output size of the crop. If size is an int instead of sequence like (h, w),
        a square crop (size, size) is made.

    Example:
        >>> import crowdcount.transforms as cc_transforms
        >>> import numpy as np
        >>> img = np.randn(4, 4)
        >>> density_map = np.randn(4, 4)
        >>> random_crop = cc_transforms.RandomCrop([2, 2])
        >>> img, density_map = random_crop(img, density_map)
    """

    def __init__(self, size):
        if isinstance(size, numbers.Number):
            self.size = (size, size)
        else:
            self.size = size

[docs]    def __call__(self, img, den):
        """
        Args:
            image (PIL Image or numpy.ndarray): image to be cropped
            density map (PIL Image or numpy.ndarray): density map to be cropped

        Returns:
            (PIL Image, numpy.ndarray)
        """
        if not isinstance(den, Image.Image):
            den = Image.fromarray(den)
        if not isinstance(img, Image.Image):
            img = Image.fromarray(img)
        width, height = img.size
        h, w = self.size
        height_start = int(random.random() * (height - h))
        width_start = int(random.random() * (width - w))
        img = img.crop((width_start, height_start, width_start + w, height_start + h))
        den = den.crop((width_start, height_start, width_start + w, height_start + h))
        return img, np.asarray(den)

    def __repr__(self):
        return __class__.__name__ + '()'


[docs]class Scale(object):
    """In order to use multi-batch training to irregular datasets (like ShanghaiTech Part A where images
    have different shape), This function resize both of image and density map with input size.

    Args:
        size (sequence or int): Desired output size of the crop. If size is an int instead of sequence like (h, w),
        a square crop (size, size) is made.

    Example:
        >>> import crowdcount.transforms as cc_transforms
        >>> import numpy as np
        >>> img = np.randn(4, 4)
        >>> density_map = np.randn(4, 4)
        >>> scale = cc_transforms.Scale([2, 2])
        >>> img, density_map = scale(img, density_map)
    """
    def __init__(self, size, interpolation=Image.BILINEAR):
        assert isinstance(size, int) or (isinstance(size, collections.Iterable) and len(size) == 2)
        self.size = size
        self.interpolation = interpolation

[docs]    def __call__(self, img, den):
        """
        Args:
            image (PIL Image or numpy.ndarray): image to be cropped
            density map (PIL Image or numpy.ndarray): density map to be cropped

        Returns:
            (PIL Image, numpy.ndarray)
        """
        if not isinstance(den, Image.Image):
            den = Image.fromarray(den)
        if not isinstance(img, Image.Image):
            img = Image.fromarray(img)
        if isinstance(self.size, int):
            w, h = img.size
            if (w <= h and w == self.size) or (h <= w and h == self.size):
                return img, np.asarray(den)
            if w < h:
                ow = self.size
                oh = int(self.size * h / w)
                return img.resize((ow, oh), self.interpolation), np.asarray(den.resize((ow, oh), self.interpolation))
            else:
                oh = self.size
                ow = int(self.size * w / h)
                return img.resize((ow, oh), self.interpolation), np.asarray(den.resize((ow, oh), self.interpolation))
        else:
            return img.resize(self.size[::-1], self.interpolation), \
                   np.asarray(den.resize(self.size[::-1], self.interpolation))

    def __repr__(self):
        return __class__.__name__ + '()'