Source code for miprometheus.models.vision.alexnet_wrapper

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Copyright (C) IBM Corporation 2018
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
alexnet_wrapper.py: Contains a small wrapper class to the AlexNet model available in ``TorchVision``.

"""
__author__ = "Tomasz Kornuta, Younes Bouhadjar, Vincent Marois"

import torch
import numpy as np
from torchvision.models import alexnet

from miprometheus.models.model import Model


[docs]class AlexnetWrapper(Model):
    """
    Wrapper class to Alexnet model from TorchVision.
    """

[docs]    def __init__(self, params, problem_default_values_={}):
        """
        Constructor for the AlexNet wrapper. Simply instantiate the Alexnet model \
        from ``torchvision.models.``

        .. note::

           The model expects input images normalized as follows: \
           mini-batches of 3-channel RGB images of shape (3 x H x W), where H and W are expected to be at least `224`. \
           The images have to be loaded in to a range of [0, 1] and then normalized using mean = [0.485, 0.456, 0.406] \
           and std = [0.229, 0.224, 0.225].


        :param params: dictionary of parameters (read from the ``.yaml`` configuration file.)

        :param problem_default_values_: default values coming from the ``Problem`` class.
        :type problem_default_values_: dict

        """
        # call base constructor
        super(AlexnetWrapper, self).__init__(params, problem_default_values_)

        try:
            # number of output nodes
            self.num_classes = problem_default_values_['num_classes']

        except KeyError:
            self.logger.warning("Couldn't retrieve the number of classes from problem_default_values")
            exit(-1)

        # set model from torchvision
        self.model = alexnet(pretrained=params['pretrained'], num_classes=self.num_classes)

        self.name = 'AlexNetWrapper'

        self.data_definitions = {'images': {'size': [-1, -1, 224, 224],
                                            'type': [torch.Tensor]},
                                 'targets': {'size': [-1, 1], 'type': [torch.Tensor]}
                                 }

[docs]    def forward(self, data_dict):
        """
        Main forward pass of the Alexnet wrapper.

        :param data_dict: DataDict({'images',**}), where:

            - images: [batch_size, num_channels, width, height],

        :return: Predictions [batch_size, num_classes]
        """

        images = data_dict['images']

        # checks if the num_channels is different than 3 (e.g. for MNIST)
        if images.size(1) != 3:
            # inputs_size = (batch_size, num_channel, numb_columns, num_rows)
            num_channel = 3
            inputs_size = (images.size(0), num_channel, images.size(2), images.size(3))
            inputs = torch.zeros(inputs_size).type(self.app_state.dtype)

            for i in range(num_channel):
                inputs[:, None, i, :, :] = images

            # pass the transformed images through the model
            outputs = self.model(inputs)

        else:
            # pass directly the images through the model
            outputs = self.model(images)

        return outputs

[docs]    def plot(self, data_dict, predictions, sample_number=0):
        """
        Simple plot - shows the ``Problem``'s images with the target & actual predicted class.\

        :param data_dict: DataDict({'images','targets', 'targets_label'})
        :type data_dict: utils.DataDict

        :param predictions: Predictions of the ``AlexnetWrapper``.
        :type predictions: torch.tensor

        :param sample_number: Index of the sample in batch (DEFAULT: 0).
        :type sample_number: int

        """
        # Check if we are supposed to visualize at all.
        if not self.app_state.visualize:
            return False
        import matplotlib.pyplot as plt

        # unpack data_dict
        images = data_dict['images']
        targets = data_dict['targets']

        # Get sample.
        image = images[sample_number].cpu().detach().numpy()
        target = targets[sample_number].cpu().detach().numpy()
        prediction = predictions[sample_number].cpu().detach().numpy()

        # Reshape image.
        if image.shape[0] == 1:
            # This is single channel image - get rid of that dimension
            image = np.squeeze(image, axis=0)
        else:
            # More channels - move channels to axis2
            # (X : array_like, shape (n, m) or (n, m, 3) or (n, m, 4))
            image = image.transpose(1, 2, 0)

        # Show data.
        plt.title('Prediction: Class # {} (Target: Class # {})'.format(
            np.argmax(prediction), target))
        plt.imshow(image, interpolation='nearest', aspect='auto')

        # Plot!
        plt.show()


if __name__ == '__main__':
    # Set visualization.
    from miprometheus.utils.app_state import AppState
    AppState().visualize = True

    from miprometheus.utils.param_interface import ParamInterface
    from torch.utils.data import DataLoader
    from miprometheus.problems import CIFAR10

    problem_params = ParamInterface()
    problem_params.add_config_params({'use_train_data': True,
                                      'root_dir': '~/data/cifar10',
                                      'padding': [0, 0, 0, 0],
                                      'up_scaling': True})
    batch_size = 64

    # create problem
    problem = CIFAR10(problem_params)
    print('Problem {} instantiated.'.format(problem.name))

    # instantiate DataLoader object
    dataloader = DataLoader(problem, batch_size=batch_size, collate_fn=problem.collate_fn)

    # Test base model.
    from miprometheus.utils.param_interface import ParamInterface
    model_params = ParamInterface()
    model_params.add_config_params({'pretrained': False})

    # model
    model = AlexnetWrapper(model_params, problem.default_values)
    print('Model {} instantiated.'.format(model.name))

    # perform handshaking between MAC & CLEVR
    model.handshake_definitions(problem.data_definitions)

    # generate a batch
    for i_batch, sample in enumerate(dataloader):
        print('Sample # {} - {}'.format(i_batch, sample['images'].shape), type(sample))
        logits = model(sample)
        print(logits.shape)

        # Plot it and check whether window was closed or not.
        if model.plot(sample, logits):
            break