Source code for elm.elmr

# -*- coding: utf-8 -*-

"""
    This file contains ELMKernel classes and all developed methods.
"""

# Python2 support
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import
from __future__ import print_function

from .mltools import *

import numpy as np
import optunity
import ast

import sys
if sys.version_info < (3, 0):
    import ConfigParser as configparser
else:
    import configparser

try:
    from scipy.special import expit
except ImportError:
    _SCIPY = 0
else:
    _SCIPY = 1

# Find configuration file
from pkg_resources import Requirement, resource_filename
_ELMR_CONFIG = resource_filename(Requirement.parse("elm"), "elm/elmr.cfg")


[docs]class ELMRandom(MLTools):
    """
        A Python implementation of ELM Random Neurons defined by Huang[1].

        An ELM is a single-hidden layer feedforward network (SLFN) proposed by
        Huang  back in 2006, in 2012 the author revised and introduced a new
        concept of using kernel functions to his previous work.

        This implementation currently accepts both methods proposed at 2012,
        random neurons and kernel functions to estimate classifier/regression
        functions.

        Let the dimensionality "d" of the problem be the sum of "t" size (number of
        targets per pattern) and "f" size (number of features per pattern).
        So, d = t + f

        The data will be set as Pattern = (Target | Features).

        If database has *N* patterns, its size follows *Nxd*.


        Note:
            [1] Paper reference: Huang, 2012, "Extreme Learning Machine for
            Regression and  Multiclass Classification"

        Attributes:
            input_weight (numpy.ndarray): a random matrix (*Lxd-1*) needed
                to calculate H(**x**).
            output_weight (numpy.ndarray): a column vector (*Nx1*) calculated
                after training, represent :math:\\beta.
            bias_of_hidden_neurons (numpy.ndarray): a random column vector
                (*Lx1*) needed to calculate H(**x**).
            param_function (str): function that will be used for training.
            param_c (float): regularization coefficient (*C*) used for training.
            param_l (list of float): number of neurons that will be used for
                training.
            param_opt (bool): a boolean used to calculate an optimization
                when number of training patterns are much larger than neurons
                (N >> L).

        Other Parameters:
            regressor_name (str): The name of classifier/regressor.
            available_functions (list of str): List with all available
                functions.
            default_param_function (str): Default function if not set at
                class constructor.
            default_param_c (float): Default parameter c value if not set at
                class constructor.
            default_param_l (integer): Default number of neurons if not set at
                class constructor.
            default_param_opt (bool): Default boolean optimization flag.

        Note:
            * **regressor_name**: defaults to "elmr".
            * **default_param_function**: defaults to "sigmoid".
            * **default_param_c**: defaults to 2 ** -6.
            * **default_param_l**: defaults to 500.
            * **default_param_opt**: defaults to False.

    """

[docs]    def __init__(self, params=[]):
        """
            Class constructor.

            Arguments:
                params (list): first argument (*str*) is an available function,
                    second argument (*float*) is the coefficient *C* of
                    regularization, the third is the number of hidden neurons
                    and the last argument is an optimization boolean.

            Example:

                >>> import elm
                >>> params = ["sigmoid", 1, 500, False]
                >>> elmr = elm.ELMRandom(params)

        """
        super(self.__class__, self).__init__()

        self.available_functions = ["sigmoid", "multiquadric"]

        self.regressor_name = "elmr"

        self.default_param_function = "sigmoid"
        self.default_param_c = 2 ** -6
        self.default_param_l = 500
        self.default_param_opt = False

        self.input_weight = []
        self.output_weight = []
        self.bias_of_hidden_neurons = []

        # Initialized parameters values
        if not params:
            self.param_function = self.default_param_function
            self.param_c = self.default_param_c
            self.param_l = self.default_param_l
            self.param_opt = self.default_param_opt
        else:
            self.param_function = params[0]
            self.param_c = params[1]
            self.param_l = params[2]
            self.param_opt = params[3]

    # ########################
    # Private Methods
    # ########################

    def __set_random_weights(self, number_of_hidden_nodes,
                             number_of_attributes):
        """
            Initialize random values to calculate function

            Arguments:
                number_hidden_nodes (int): number of neurons.
                number_of_attributes (int): number of features.

        """

        self.input_weight = np.random.rand(number_of_hidden_nodes,
                                           number_of_attributes) * 2 - 1

        self.bias_of_hidden_neurons = np.random.rand(number_of_hidden_nodes, 1)

    def __map_hidden_layer(self, function_type, number_hidden_nodes, data):
        """
            Map argument "data" to the hidden layer feature space.

            Arguments:
                function_type (str): function to map input data to feature
                    space.
                number_hidden_nodes (int): number of hidden neurons.
                data (numpy.ndarray): data to be mapped to feature space.

            Returns:
                numpy.ndarray: mapped data.

        """

        number_of_data = data.shape[0]

        if function_type == "sigmoid" or function_type == "sig" or \
            function_type == "sin" or function_type == "sine" or \
            function_type == "hardlim" or \
                function_type == "tribas":

            temp = np.dot(self.input_weight, data.conj().T)
            bias_matrix = np.tile(self.bias_of_hidden_neurons,
                                  number_of_data)
            temp = temp + bias_matrix

        elif function_type == "mtquadric" or function_type == "multiquadric":
            temph1 = np.tile(np.sum(data ** 2, axis=1).reshape(-1, 1),
                             number_hidden_nodes)

            temph2 = \
                np.tile(np.sum(self.input_weight ** 2, axis=1).reshape(-1, 1),
                        number_of_data)

            temp = temph1 + temph2.conj().T \
                   - 2 * np.dot(data, self.input_weight.conj().T)

            temp = temp.conj().T + \
                   np.tile(self.bias_of_hidden_neurons ** 2, number_of_data)

        elif function_type == "gaussian" or function_type == "rbf":
            temph1 = np.tile(np.sum(data ** 2, axis=1).reshape(-1, 1),
                             number_hidden_nodes)

            temph2 = \
                np.tile(np.sum(self.input_weight ** 2, axis=1).reshape(-1, 1),
                        number_of_data)

            temp = temph1 + temph2.conj().T \
                - 2 * np.dot(data, self.input_weight.conj().T)

            temp = \
                np.multiply(temp.conj().T, np.tile(self.bias_of_hidden_neurons,
                                                   number_of_data))
        else:
            print("Error: Invalid function type")
            return

        if function_type == "sigmoid" or function_type == "sig":
            if _SCIPY:
                h_matrix = expit(temp)
            else:
                h_matrix = 1 / (1 + np.exp(-temp))
        elif function_type == "sine" or function_type == "sin":
            h_matrix = np.sin(temp)
        elif function_type == "mtquadric" or function_type == "multiquadric":
            h_matrix = np.sqrt(temp)
        elif function_type == "gaussian" or function_type == "rbf":
            h_matrix = np.exp(temp)
        else:
            print("Error: Invalid function type")
            return

        return h_matrix

    def _local_train(self, training_patterns, training_expected_targets,
                     params):

        # If params not provided, uses initialized parameters values
        if not params:
            pass
        else:
            self.param_function = params[0]
            self.param_c = params[1]
            self.param_l = params[2]
            self.param_opt = params[3]

        number_of_attributes = training_patterns.shape[1]

        self.__set_random_weights(self.param_l, number_of_attributes)

        h_train = self.__map_hidden_layer(self.param_function, self.param_l,
                                          training_patterns)

        # If N >>> L, param_opt should be True
        if self.param_opt:
            self.output_weight = np.linalg.solve(
                (np.eye(h_train.shape[0]) / self.param_c) +
                np.dot(h_train, h_train.conj().T),
                np.dot(h_train, training_expected_targets))

        else:
            self.output_weight = np.dot(h_train, np.linalg.solve(
                ((np.eye(h_train.shape[1]) / self.param_c) + np.dot(
                    h_train.conj().T, h_train)),
                training_expected_targets))

        training_predicted_targets = np.dot(h_train.conj().T,
                                            self.output_weight)

        return training_predicted_targets

    def _local_test(self, testing_patterns, testing_expected_targets,
                    predicting):

        h_test = self.__map_hidden_layer(self.param_function, self.param_l,
                                         testing_patterns)

        testing_predicted_targets = np.dot(h_test.conj().T, self.output_weight)

        return testing_predicted_targets

    # ########################
    # Public Methods
    # ########################

[docs]    def search_param(self, database, dataprocess=None, path_filename=("", ""),
                     save=False, cv="ts", of="rmse", f=None, eval=50):
        """
            Search best hyperparameters for classifier/regressor based on
            optunity algorithms.

            Arguments:
                database (numpy.ndarray): a matrix containing all patterns
                    that will be used for training/testing at some
                    cross-validation method.
                dataprocess (DataProcess): an object that will pre-process
                    database before training. Defaults to None.
                path_filename (tuple): *TODO*.
                save (bool): *TODO*.
                cv (str): Cross-validation method. Defaults to "ts".
                of (str): Objective function to be minimized at
                    optunity.minimize. Defaults to "rmse".
                f (list of str): a list of functions to be used by the
                    search. Defaults to None, this set all available
                    functions.
                eval (int): Number of steps (evaluations) to optunity algorithm.

            Each set of hyperparameters will perform a cross-validation
            method chosen by param cv.

            Available *cv* methods:
                - "ts" :func:`mltools.time_series_cross_validation()`
                    Perform a time-series cross-validation suggested by Hydman.

                - "kfold" :func:`mltools.kfold_cross_validation()`
                    Perform a k-fold cross-validation.

            Available *of* function:
                - "accuracy", "rmse", "mape", "me".


            See Also:
                http://optunity.readthedocs.org/en/latest/user/index.html
        """

        if f is None:
            search_functions = self.available_functions
        elif type(f) is list:
            search_functions = f
        else:
            raise Exception("Invalid format for argument 'f'.")

        print(self.regressor_name)
        print("##### Start search #####")

        config = configparser.ConfigParser()
        if sys.version_info < (3, 0):
            config.readfp(open(_ELMR_CONFIG))
        else:
            config.read_file(open(_ELMR_CONFIG))

        best_function_error = 99999.9
        temp_error = best_function_error
        best_param_function = ""
        best_param_c = 0
        best_param_l = 0
        for function in search_functions:

            if sys.version_info < (3, 0):
                elmr_c_range = ast.literal_eval(config.get("DEFAULT",
                                                           "elmr_c_range"))

                neurons = config.getint("DEFAULT", "elmr_neurons")

            else:
                function_config = config["DEFAULT"]
                elmr_c_range = ast.literal_eval(function_config["elmr_c_range"])
                neurons = ast.literal_eval(function_config["elmr_neurons"])

            param_ranges = [[elmr_c_range[0][0], elmr_c_range[0][1]]]

            def wrapper_opt(param_c):
                """
                    Wrapper for optunity.
                """

                if cv == "ts":
                    cv_tr_error, cv_te_error = \
                        time_series_cross_validation(self, database,
                                                     params=[function,
                                                             2 ** param_c,
                                                             neurons,
                                                             False],
                                                     number_folds=10,
                                                     dataprocess=dataprocess)

                elif cv == "kfold":
                    cv_tr_error, cv_te_error = \
                        kfold_cross_validation(self, database,
                                               params=[function,
                                                       2 ** param_c,
                                                       neurons,
                                                       False],
                                               number_folds=10,
                                               dataprocess=dataprocess)

                else:
                    raise Exception("Invalid type of cross-validation.")

                if of == "accuracy":
                    util = 1 / cv_te_error.get_accuracy()
                else:
                    util = cv_te_error.get(of)

                # print("c:", param_c, "util: ", util)
                return util

            optimal_pars, details, _ =  \
                optunity.minimize(wrapper_opt,
                                  solver_name="cma-es",
                                  num_evals=eval,
                                  param_c=param_ranges[0])

            # Save best function result
            if details[0] < temp_error:
                temp_error = details[0]

                if of == "accuracy":
                    best_function_error = 1 / temp_error
                else:
                    best_function_error = temp_error

                best_param_function = function
                best_param_c = optimal_pars["param_c"]
                best_param_l = neurons

            if of == "accuracy":
                print("Function: ", function,
                      " best cv value: ", 1/details[0])
            else:
                print("Function: ", function,
                      " best cv value: ", details[0])

        # MLTools Attribute
        self.cv_best_rmse = best_function_error

        # elmr Attribute
        self.param_function = best_param_function
        self.param_c = best_param_c
        self.param_l = best_param_l

        print("##### Search complete #####")
        self.print_parameters()

        return None

[docs]    def print_parameters(self):
        """
            Print current parameters.
        """

        print()
        print("Regressor Parameters")
        print()
        print("Regularization coefficient: ", self.param_c)
        print("Function: ", self.param_function)
        print("Hidden Neurons: ", self.param_l)
        print()
        print("CV error: ", self.cv_best_rmse)
        print("")
        print()

[docs]    def get_available_functions(self):
        """
            Return available functions.
        """

        return self.available_functions

[docs]    def train(self, training_matrix, params=[]):
        """
            Calculate output_weight values needed to test/predict data.

            If params is provided, this method will use at training phase.
            Else, it will use the default value provided at object
            initialization.

            Arguments:
                training_matrix (numpy.ndarray): a matrix containing all
                    patterns that will be used for training.
                params (list): a list of parameters defined at
                    :func:`ELMKernel.__init__`

            Returns:
                :class:`Error`: training error object containing expected,
                    predicted targets and all error metrics.

            Note:
                Training matrix must have target variables as the first column.
        """

        return self._ml_train(training_matrix, params)

[docs]    def test(self, testing_matrix, predicting=False):
        """
            Calculate test predicted values based on previous training.

            Args:
                testing_matrix (numpy.ndarray): a matrix containing all
                    patterns that will be used for testing.
                predicting (bool): Don't set.

            Returns:
                :class:`Error`: testing error object containing expected,
                    predicted targets and all error metrics.

            Note:
                Testing matrix must have target variables as the first column.
        """

        return self._ml_test(testing_matrix, predicting)


    @copy_doc_of(MLTools._ml_predict)
[docs]    def predict(self, horizon=1):

        return self._ml_predict(horizon)

    @copy_doc_of(MLTools._ml_train_iterative)
[docs]    def train_iterative(self, database_matrix, params=[], sliding_window=168,
                        k=1):

        return self._ml_train_iterative(database_matrix, params,
                                        sliding_window, k)