Source code for miprometheus.models.lstm.lstm_model

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Copyright (C) IBM Corporation 2018
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""lstm_model.py: File containing Long Short-Term Memory model class."""
__author__ = "Alexis Asseman, Tomasz Kornuta"

import torch
from torch import nn

from miprometheus.models.sequential_model import SequentialModel


[docs]class LSTM(SequentialModel): """ Class implementing the Long Short-Term Memory model. """
[docs] def __init__(self, params, problem_default_values_={}): """ Constructor. Initializes parameters on the basis of dictionary passed as argument. :param params: Local view to the Parameter Regsitry ''model'' section. :param problem_default_values_: Dictionary containing key-values received from problem. """ super(LSTM, self).__init__(params) # Parse default values received from problem. self.params.add_default_params({ 'input_item_size': problem_default_values_['input_item_size'], 'output_item_size': problem_default_values_['output_item_size'] }) self.input_item_size = params["input_item_size"] self.output_item_size = params["output_item_size"] self.hidden_state_size = params["hidden_state_size"] self.num_layers = params["num_layers"] assert self.num_layers > 0, "Number of LSTM layers should be > 0" # Create the stacked LSTM. self.lstm_layers = nn.ModuleList() # First layer. self.lstm_layers.append(nn.LSTMCell( self.input_item_size, self.hidden_state_size)) # Following, stacked layers. self.lstm_layers.extend( [nn.LSTMCell(self.hidden_state_size, self.hidden_state_size) for _ in range(1, self.num_layers)]) # Output linear layer. self.linear = nn.Linear(self.hidden_state_size, self.output_item_size)
[docs] def forward(self, data_dict): """ Forward function requires that the data_dict will contain at least "sequences" :param data_dict: DataDict containing at least: - "sequences": a tensor of input data of size [BATCH_SIZE x LENGTH_SIZE x INPUT_SIZE] :returns: Predictions (logits) being a tensor of size [BATCH_SIZE x LENGTH_SIZE x OUTPUT_SIZE]. """ # Get dtype. dtype = self.app_state.dtype # Unpack dict. inputs_BxSxI = data_dict['sequences'] # Get batch size. batch_size = inputs_BxSxI.size(0) # Create the hidden state tensors h = [ torch.zeros( batch_size, self.hidden_state_size, requires_grad=False).type(dtype) for _ in range(self.num_layers)] # Create the internal state tensors c = [ torch.zeros( batch_size, self.hidden_state_size, requires_grad=False).type(dtype) for _ in range( self.num_layers)] outputs = [] # Process items one-by-one. for item in inputs_BxSxI.chunk(inputs_BxSxI.size(1), dim=1): h[0], c[0] = self.lstm_layers[0](item.squeeze(1), (h[0], c[0])) for i in range(1, self.num_layers): h[i], c[i] = self.lstm_layers[i](h[i - 1], (h[i], c[i])) out = self.linear(h[-1]) outputs += [out] outputs = torch.stack(outputs, 1) return outputs