Source code for itea._base._BaseITExpr

# Author:  Guilherme Aldeia
# Contact: guilherme.aldeia@ufabc.edu.br
# Version: 1.0.2
# Last modified: 06-25-2021 by Guilherme Aldeia


"""Base class to represent an IT expression.
"""

import numpy as np

from sklearn.base             import BaseEstimator
from sklearn.utils.validation import check_array


[docs]class BaseITExpr(BaseEstimator): """This class describes the structure that an ``ITExpr`` should have, and implements only the methods that have similar behavior for classification and regression. The ITEA implementations for classification and regression will create a population of ``ITExpr`` instances and evolve this population to find a final best solution ``itea.bestsol_``. The best solution will be a scikit estimator and can be used in many scikit methods. It can also be used to create ICE and PDP plots, which are particularly interesting to complement explanations given by the ``ITExpr_explainer``. Methods that should be specialized are created as virtual methods. In practice, this class should never be instantiated. """ def __init__(self, *, expr, tfuncs, labels=[], **kwargs): r"""Constructor method. Parameters ---------- expr : list of Tuple[Transformation, Interaction] list of IT terms to create an IT expression. It **must** be a python built-in list. An IT term is the tuple :math:`(t, p)`, where :math:`t : \mathbb{R} \rightarrow \mathbb{R}` is a unary function called **transformation** function, and :math:`p \in \mathbb{R}^d` is a vector of size :math:`d`, where :math:`d` is the number of variables of the problem. The tuple contains the information to create an expression: :math:`ITterm(x) = t \circ p(x),` and :math:`p` is the **interaction** of the variables: :math:`p(x) = \prod_{i=1}^{d} x_{i}^{k_{i}}.` Each IT term is a tuple containing the name of the transformation function and a list of exponents to be used in the interaction function. The whole expression is evaluated as :math:`f(x) = \sum_{i=1}^{n} w_i \cdot t_i \circ p_i(x),` with :math:`w_i` being a coefficient to be adjusted with the ``fit()`` method, and :math:`n` the number of terms. tfuncs : dict should always be a dict where the keys are the names of the transformation functions and the values are unary vectorized functions (for example, numpy functions). For user-defined functions, see numpy.vectorize for more information on how to vectorize your transformation functions. labels : list of strings, default=[] list containing the labels of the variables that will be used. When the list of labels is empty, the variables are named :math:`x_0, x_1, \cdots`. """ self.expr = expr self.labels = labels self.tfuncs = tfuncs self.n_terms = len(expr) # attributes that are changed by fit method self._is_fitted = False self._fitness = np.inf
[docs] def to_str(self, places=3, term_separator=None): r"""Method to represent the IT expression as a string. The variable names used are the ones given to ``labels`` in the constructor. Some simplifications are made to omit trivial operations: - if a variable has zero as an exponent, it is omitted (since it will eval to 1 regardless of the x value); - if the coefficient (or all coefficients, in the multi-class task) is zero, the whole term is omitted. Parameters ---------- places : int, default=3 Number of decimal places to round the coefficients when printing the expression. term_separator : string or None, default=None string that will be used to contatenate each term. Suggestions are ``['\n', ' + ', ', ']``. If set to None, then the separator used is ``' + '`` . """ if term_separator == None: term_separator = " + " # If is not fitted, the method will use placeholder coefs and intercept coefs = np.ones(self.n_terms) intercept = np.array(0.0) if self._is_fitted: coefs = self.coef_ intercept = self.intercept_ str_terms = [] for w, (fi_str, ti) in zip(coefs.T, self.expr): if np.all(w == 0): continue w_str = f"{w.round(places)}*" t_str = " * ".join([ f"placeholder_{i}" + (f"^{t}" if t!=1 else "") for i, t in enumerate(ti) if t!=0 ]) str_terms.append(f"{w_str}{fi_str}({t_str})") expr_str = term_separator.join(str_terms) if len(self.labels)>0: for i, l in enumerate(self.labels): expr_str = expr_str.replace(f"placeholder_{i}", l) else: expr_str = expr_str.replace(f"placeholder_", "x_") return expr_str + f"{term_separator}{intercept.round(places)}"
def _eval(self, X): r"""Method to evaluate each IT term on a given data set. This makes the mapping from the original variable space to the IT expression space. Parameters ---------- X : numpy.array of shape (n_samples, n_features) data set to be evaluated Returns ------- Z : numpy.array of shape (n_samples, n_terms) the Z matrix will have one column for each IT term in the expression, where the column ``Z[:, i]`` is the evaluation of the i-th term for all samples. This translates to: :math:`Z_{(:, i)} = t_i \circ p_i(x).` """ Z = np.zeros( (len(X), self.n_terms) ) for i, (fi, ti) in enumerate( self.expr ): Z[:, i] = self.tfuncs[fi]( np.prod(np.power(X, ti), axis=1) ) return Z def __str__(self): """Overload of the ``__str__`` method. Calls ``itexpr.to_string()`` method with the default values for the arguments. """ return self.to_str()
[docs] def complexity(self): """Method to calculate the IT expression size as if it was an expression tree, like the conventional representation for symbolic regression. Some simplifications will be made (the same that we do in ``to_str()``), so the complexity value corresponds to the string returned by the method. Returns ------- complexity : int the number of nodes that a symbolic tree would have if the IT expression was converted to it. """ coefs = np.ones(self.n_terms) if hasattr(self, "coef_"): coefs = self.coef_ tlen = 0 for coef, (fi, ti) in zip(coefs, self.expr): if np.all(coef == 0): continue # coef, multiplication and transformation function tlen += 3 # exponents != [0, 1] always are a 3 node subtree tlen += sum([3 for t in ti if t not in [0, 1]]) # when exponent is 1, then x^1 = x and we consider only 1 node tlen += sum([1 for t in ti if t == 1]) # exponents equals to 0 are discarded, since x^0 = 1 # multiplication between each variable in the interaction tlen += sum([1 for t in ti if t != 0]) - 1 # sum between terms and the intercept return tlen + self.n_terms + 1
[docs] def gradient(self, X, tfuncs_dx, logit=False): r"""Method to evaluate the gradient of the IT expression for all data points in ``X``. The gradients are useful for the ``ITExpr_explainer`` class, which calculates feature importances and generate plots using the gradient information. Parameters ---------- X : numpy.array of shape (n_samples, n_features) points to evaluate the gradients. tfuncs_dx : dict dictionary like ``tfuncs`` , where the key is the name of the function (should have the derivatives of every function in tfuncs) and the value is a vectorized function representing its derivative. logit : boolean, default=False boolean variable indicating if the IT expression is being used as a linear model or as a linear method of a logistic regression predictor. When it is true, then we must consider the derivative of the logistic regression. let :math:`it(x)` be the IT expression. It is used in a logit model: :math:`logit(x) = \frac{1}{1 + e^{-it(x)}}` The partial derivative needed to calculate the gradient is: :math:`\frac{\partial}{\partial x_i} logit(x)` :math:`\Rightarrow \frac{e^{it(x)} it'(x)}{(e^{it(x)} + 1)^2}` Returns ------- nabla : numpy.array of shape (n_classes, n_samples, n_features) returns a 3-dimensional array. For regression and binary classification, ``n_classes=1``. Each line of the matrix inside ``nabla[i]`` is the gradient evaluated to the corresponding sample in X. To ilustrate: - Gradient of observation i for regression: ``gradients(X, tfuncs_dx)[1, i, :]`` - Gradient of observation i according to coefficients to classify the class j in multi-class classification: ``gradients(X, tfuncs_dx, logit=True)[j, i, :]`` """ X = check_array(X) # the gradients can be calculated even before fit intercept = [0.0] if hasattr(self, "intercept_"): intercept = self.intercept_ coefs = np.ones(self.n_terms) if hasattr(self, "coef_"): coefs = self.coef_ # if coefs.ndim==1, then it is a regression ITExpr. Let's make it 2d if coefs.ndim == 1: coefs = coefs.reshape(-1, 1).T # Storing the gradient of each term term_gradients = [] # i iterates over terms, j over variables for j in range(X.shape[1]): # evaluating the partial derivative w.r.t each variable g_partialx = np.zeros( (len(X), self.n_terms) ) for i, (fi, ti) in enumerate( self.expr ): ti_aux = np.copy(ti) ti_aux[j] = ti_aux[j] - 1 pi = np.prod(np.power(X, ti), axis=1) # avoid multiply zero and nan, would result in nan if ti[j] == 0: pi_partialx = np.zeros_like(X[:, j]) else: pi_partialx = ti[j] * np.prod(np.power(X, ti_aux), axis=1) g_partialx[:, i] = tfuncs_dx[fi](pi)*pi_partialx term_gradients.append(g_partialx) gradients = np.array([np.dot(term_gradients, coef).T for coef in coefs]) if logit: it_eval = np.dot(self._eval(X), coefs.T) + intercept for c_idx in range(gradients.shape[0]): # number of classes for x_idx in range(gradients.shape[2]): # number of variables gradients[c_idx, :, x_idx] = np.divide( np.exp(it_eval)[:, c_idx] * gradients[c_idx, :, x_idx], np.power(np.exp(it_eval)[:, c_idx] + 1.0, 2) ) return gradients
[docs] def covariance_matrix(self, X, y): """virtual method to estimate the covariance matrix. Should be overridden by sub-classes. """ raise NotImplementedError()
[docs] def fit(self, X, y): """virtual fit method. Should be overridden by sub-classes. It takes a dictionary as named arguments to allow sub-classes to have specific parameters to the fit method. """ raise NotImplementedError()
[docs] def predict(self, X): """virtual predict method. Should be overridden by sub-classes. """ raise NotImplementedError()
[docs] def predict_proba(self, X): """virtual predict_proba method. Should be overridden by sub-classes. """ raise NotImplementedError()