Source code for miprometheus.problems.image_text_to_class.image_text_to_class_problem

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Copyright (C) IBM Corporation 2018
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""image_text_to_class_problem.py: contains abstract base class for Visual Question Answering problems."""
__author__ = "Tomasz Kornuta & Vincent Marois"


import torch
import torch.nn as nn
from miprometheus.problems.problem import Problem


class ObjectRepresentation(object):
    """
    Class storing some features representing an object being present in a given scene.

    Used in ShapeColorQuery and SortOfCLEVR.

    """

    def __init__(self, x, y, color, shape):
        """
        Represents an object.

        :param x: x coordinate.
        :param y: y coordinate.
        :param color: Color of the object.
        :param shape: Shape of the object.

        """
        self.x = x
        self.y = y
        self.color = color
        self.shape = shape


[docs]class ImageTextToClassProblem(Problem): """ Abstract base class for VQA (`Visual Question Answering`) problems. Problem classes like CLEVR inherits from it. Provides some basic features useful in all problems of such type. """
[docs] def __init__(self, params): """ Initializes problem: - Calls ``problems.problem.Problem`` class constructor, - Sets loss function to ``CrossEntropy``, - sets ``self.data_definitions`` to: >>> self.data_definitions = {'texts': {'size': [-1, -1], 'type': [torch.Tensor]}, >>> 'images': {'size': [-1, -1, -1, 3], 'type': [torch.Tensor]}, >>> 'targets': {'size': [-1, 1], 'type': [torch.Tensor]} >>> } :param params: Dictionary of parameters (read from configuration ``.yaml`` file). """ # Call base class constructors. super(ImageTextToClassProblem, self).__init__(params) # set default loss function self.loss_function = nn.CrossEntropyLoss() # set default data_definitions dict self.data_definitions = {'texts': {'size': [-1, -1], 'type': [torch.Tensor]}, 'images': {'size': [-1, -1, -1, 3], 'type': [torch.Tensor]}, 'targets': {'size': [-1, 1], 'type': [torch.Tensor]} } # "Default" problem name. self.name = 'ImageTextToClassProblem'
[docs] def calculate_accuracy(self, data_dict, logits): """ Calculates the accuracy as the mean number of correct answers in a given batch. :param data_dict: DataDict containing the targets. :type data_dict: DataDict :param logits: Predictions of the model. :return: Accuracy. """ # Get the index of the max log-probability. pred = logits.max(1, keepdim=True)[1] correct = pred.eq(data_dict['targets'].view_as(pred)).sum().item() # Calculate the accuracy. batch_size = logits.size(0) accuracy = correct / batch_size return accuracy
[docs] def add_statistics(self, stat_col): """ Add accuracy statistic to ``StatisticsCollector``. :param stat_col: ``StatisticsCollector``. """ # Add basic statistics. super(ImageTextToClassProblem, self).add_statistics(stat_col) stat_col.add_statistic('acc', '{:12.10f}') stat_col.add_statistic('batch_size', '{:06d}')
[docs] def collect_statistics(self, stat_col, data_dict, logits): """ Collects accuracy. :param stat_col: ``StatisticsCollector``. :param data_dict: DataDict containing the targets and the mask. :type data_dict: DataDict :param logits: Predictions of the model. """ # Collect basic statistics. super(ImageTextToClassProblem, self).collect_statistics(stat_col, data_dict, logits) stat_col['acc'] = self.calculate_accuracy(data_dict, logits) stat_col['batch_size'] = logits.shape[0] # Batch major.
[docs] def add_aggregators(self, stat_agg): """ Adds problem-dependent statistical aggregators to ``StatisticsAggregator``. :param stat_agg: ``StatisticsAggregator``. """ # Add basic aggregators. super(ImageTextToClassProblem, self).add_aggregators(stat_agg) stat_agg.add_aggregator('acc', '{:12.10f}') # represents the average accuracy stat_agg.add_aggregator('acc_min', '{:12.10f}') stat_agg.add_aggregator('acc_max', '{:12.10f}') stat_agg.add_aggregator('acc_std', '{:12.10f}') stat_agg.add_aggregator('samples_aggregated', '{:06d}')
[docs] def aggregate_statistics(self, stat_col, stat_agg): """ Aggregates the statistics collected by ``StatisticsCollector`` and adds the results to ``StatisticsAggregator``. :param stat_col: ``StatisticsCollector``. :param stat_agg: ``StatisticsAggregator``. """ # Aggregate base statistics. super(ImageTextToClassProblem, self).aggregate_statistics(stat_col, stat_agg) stat_agg['acc_min'] = min(stat_col['acc']) stat_agg['acc_max'] = max(stat_col['acc']) stat_agg['acc'] = torch.mean(torch.tensor(stat_col['acc'])) stat_agg['acc_std'] = 0.0 if len(stat_col['acc']) <= 1 else torch.std(torch.tensor(stat_col['acc'])) stat_agg['samples_aggregated'] = sum(stat_col['batch_size'])
if __name__ == '__main__': from miprometheus.utils.param_interface import ParamInterface sample = ImageTextToClassProblem(ParamInterface())[0] # equivalent to ImageTextToClassProblem(params={}).__getitem__(index=0) print(repr(sample))