Source code for miprometheus.utils.problems_utils.generate_feature_maps

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# MIT License
#
# Copyright (c) 2018 Kim Seonghyeon
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
# ------------------------------------------------------------------------------
#
# Copyright (C) IBM Corporation 2018
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
generate_feature_maps.py: This file contains 1 class:

    - GenerateFeatureMaps: This class instantiates a specified pretrained CNN model to extract feature maps from\
     images stored in the indicated directory. It also creates a DataLoader to generate batches of these images.

This class is used in problems.image_text_to_class.CLEVR.generate_feature_maps_file.

"""
__author__ = "Vincent Marois"
import os
import torchvision
from torchvision import transforms
import torch
from PIL import Image

from torch.utils.data import Dataset


[docs]class GenerateFeatureMaps(Dataset):
    """
    Class handling the generation of feature using a pretrained CNN for specified images.
    """

[docs]    def __init__(self, image_dir, cnn_model, num_blocks, filename_template, set='train', transform=transforms.ToTensor):
        """
        Creates the pretrained CNN model & move it to CUDA if available.

        :param image_dir: Directory path to the images to extract features from.
        :type image_dir: str

        :param cnn_model: Name of the pretrained CNN model to use. Must be in ``torchvision.models.``
        :type cnn_model: str

        :param num_blocks: number of layers to use from the cnn_model. **This is dependent on the specified\
        cnn_model, please check this value beforehand.**

        :param filename_template: The template followed by the filenames in ``image_dir``. It should indicate with\
         brackets where the index is located, e.g.

            >>> filename_template = 'CLEVR_train_{}.png'

        The index will be filled up on 6 characters.

        :param set: The dataset split to use. e.g. ``train``, ``val`` etc.
        :type set: str, optional.

        :param transform: ``torchvision.transform`` to apply on the images before passing them to the CNN model.\
        default:

            >>> transform = transforms.ToTensor

        :type transform: transforms, optional.

        """
        # call base constructor
        super(GenerateFeatureMaps, self).__init__()

        # parse params
        self.image_dir = image_dir
        self.set = set
        self.cnn_model = cnn_model
        self.num_blocks = num_blocks
        self.transform = transform
        self.filename_template = filename_template

        # Get specified pretrained cnn model
        cnn = getattr(torchvision.models, self.cnn_model)(pretrained=True)

        # First layer added with num_channel equal 3
        layers = [
            cnn.conv1,
            cnn.bn1,
            cnn.relu,
            cnn.maxpool,
        ]

        # get subsequent layers: May not work for all torchvision.models!
        for i in range(1, self.num_blocks):
            name = 'layer%d' % i
            layers.append(getattr(cnn, name))

        # build pretrained cnn cut at specified layer
        self.model = torch.nn.Sequential(*layers)

        # move it to CUDA & specify evaluation behavior
        self.model.cuda() if torch.cuda.is_available() else None
        self.model.eval()

        # set the dataset size as the numbers of images in the folder
        self.length = len(os.listdir(os.path.expanduser(self.image_dir)))

[docs]    def __getitem__(self, index):
        """
        Gets a image from the ``image_dir`` and apply a transform on it if specified.

        :param index: index of the sample to get.

        :return: transformed image as a tensor (shape should be [224, 224, 3])

        """
        # open image
        img = os.path.join(self.image_dir, self.filename_template.format(str(index).zfill(6)))
        img = Image.open(img).convert('RGB')

        # apply transform & return it as a tensor.
        return self.transform(img)

[docs]    def __len__(self):
        """
        :return: length of dataset.
        """
        return self.length