# Author: Guilherme Aldeia
# Contact: guilherme.aldeia@ufabc.edu.br
# Version: 1.0.2
# Last modified: 06-25-2021 by Guilherme Aldeia
"""Base class to represent an IT expression.
"""
import numpy as np
from sklearn.base import BaseEstimator
from sklearn.utils.validation import check_array
[docs]class BaseITExpr(BaseEstimator):
"""This class describes the structure that an ``ITExpr`` should have, and
implements only the methods that have similar behavior for classification
and regression.
The ITEA implementations for classification and regression will create
a population of ``ITExpr`` instances and evolve this population to find a
final best solution ``itea.bestsol_``.
The best solution will be a scikit estimator and can be used in many scikit
methods. It can also be used to create ICE and PDP plots, which are
particularly interesting to complement explanations given by the
``ITExpr_explainer``.
Methods that should be specialized are created as virtual methods.
In practice, this class should never be instantiated.
"""
def __init__(self, *, expr, tfuncs, labels=[], **kwargs):
r"""Constructor method.
Parameters
----------
expr : list of Tuple[Transformation, Interaction]
list of IT terms to create an IT expression. It **must** be a
python built-in list.
An IT term is the tuple :math:`(t, p)`, where
:math:`t : \mathbb{R} \rightarrow \mathbb{R}` is a unary function
called **transformation** function, and :math:`p \in \mathbb{R}^d`
is a vector of size :math:`d`, where :math:`d` is the number of
variables of the problem. The tuple contains the information to
create an expression:
:math:`ITterm(x) = t \circ p(x),`
and :math:`p` is the **interaction** of the variables:
:math:`p(x) = \prod_{i=1}^{d} x_{i}^{k_{i}}.`
Each IT term is a tuple containing the name of the transformation
function and a list of exponents to be used in the interaction
function.
The whole expression is evaluated as
:math:`f(x) = \sum_{i=1}^{n} w_i \cdot t_i \circ p_i(x),`
with :math:`w_i` being a coefficient to be adjusted with the
``fit()`` method, and :math:`n` the number of terms.
tfuncs : dict
should always be a dict where the
keys are the names of the transformation functions and
the values are unary vectorized functions (for example,
numpy functions). For user-defined functions, see
numpy.vectorize for more information on how to vectorize
your transformation functions.
labels : list of strings, default=[]
list containing the labels of the variables that will be used.
When the list of labels is empty, the variables are named
:math:`x_0, x_1, \cdots`.
"""
self.expr = expr
self.labels = labels
self.tfuncs = tfuncs
self.n_terms = len(expr)
# attributes that are changed by fit method
self._is_fitted = False
self._fitness = np.inf
[docs] def to_str(self, places=3, term_separator=None):
r"""Method to represent the IT expression as a string.
The variable names used are the ones given to ``labels`` in the
constructor.
Some simplifications are made to omit trivial operations:
- if a variable has zero as an exponent, it is omitted (since it will
eval to 1 regardless of the x value);
- if the coefficient (or all coefficients, in the multi-class task)
is zero, the whole term is omitted.
Parameters
----------
places : int, default=3
Number of decimal places to round the coefficients when printing
the expression.
term_separator : string or None, default=None
string that will be used to contatenate each term. Suggestions
are ``['\n', ' + ', ', ']``. If set to None, then the separator
used is ``' + '`` .
"""
if term_separator == None:
term_separator = " + "
# If is not fitted, the method will use placeholder coefs and intercept
coefs = np.ones(self.n_terms)
intercept = np.array(0.0)
if self._is_fitted:
coefs = self.coef_
intercept = self.intercept_
str_terms = []
for w, (fi_str, ti) in zip(coefs.T, self.expr):
if np.all(w == 0):
continue
w_str = f"{w.round(places)}*"
t_str = " * ".join([
f"placeholder_{i}" + (f"^{t}" if t!=1 else "")
for i, t in enumerate(ti) if t!=0
])
str_terms.append(f"{w_str}{fi_str}({t_str})")
expr_str = term_separator.join(str_terms)
if len(self.labels)>0:
for i, l in enumerate(self.labels):
expr_str = expr_str.replace(f"placeholder_{i}", l)
else:
expr_str = expr_str.replace(f"placeholder_", "x_")
return expr_str + f"{term_separator}{intercept.round(places)}"
def _eval(self, X):
r"""Method to evaluate each IT term on a given data set.
This makes the mapping from the original variable space to the
IT expression space.
Parameters
----------
X : numpy.array of shape (n_samples, n_features)
data set to be evaluated
Returns
-------
Z : numpy.array of shape (n_samples, n_terms)
the Z matrix will have one column for each IT term in the
expression, where the column ``Z[:, i]`` is the evaluation of
the i-th term for all samples.
This translates to:
:math:`Z_{(:, i)} = t_i \circ p_i(x).`
"""
Z = np.zeros( (len(X), self.n_terms) )
for i, (fi, ti) in enumerate( self.expr ):
Z[:, i] = self.tfuncs[fi]( np.prod(np.power(X, ti), axis=1) )
return Z
def __str__(self):
"""Overload of the ``__str__`` method. Calls ``itexpr.to_string()``
method with the default values for the arguments.
"""
return self.to_str()
[docs] def complexity(self):
"""Method to calculate the IT expression size as if it was an expression
tree, like the conventional representation for symbolic regression.
Some simplifications will be made (the same that we do in ``to_str()``),
so the complexity value corresponds to the string returned by the
method.
Returns
-------
complexity : int
the number of nodes that a symbolic tree would have if the
IT expression was converted to it.
"""
coefs = np.ones(self.n_terms)
if hasattr(self, "coef_"):
coefs = self.coef_
tlen = 0
for coef, (fi, ti) in zip(coefs, self.expr):
if np.all(coef == 0):
continue
# coef, multiplication and transformation function
tlen += 3
# exponents != [0, 1] always are a 3 node subtree
tlen += sum([3 for t in ti if t not in [0, 1]])
# when exponent is 1, then x^1 = x and we consider only 1 node
tlen += sum([1 for t in ti if t == 1])
# exponents equals to 0 are discarded, since x^0 = 1
# multiplication between each variable in the interaction
tlen += sum([1 for t in ti if t != 0]) - 1
# sum between terms and the intercept
return tlen + self.n_terms + 1
[docs] def gradient(self, X, tfuncs_dx, logit=False):
r"""Method to evaluate the gradient of the IT expression for all
data points in ``X``. The gradients are useful for the
``ITExpr_explainer`` class, which calculates feature importances
and generate plots using the gradient information.
Parameters
----------
X : numpy.array of shape (n_samples, n_features)
points to evaluate the gradients.
tfuncs_dx : dict
dictionary like ``tfuncs`` , where the key is the name of the
function (should have the derivatives of every function in
tfuncs) and the value is a vectorized function
representing its derivative.
logit : boolean, default=False
boolean variable indicating if the IT expression is being used
as a linear model or as a linear method of a logistic regression
predictor. When it is true, then we must consider the derivative
of the logistic regression.
let :math:`it(x)` be the IT expression. It is used in a logit model:
:math:`logit(x) = \frac{1}{1 + e^{-it(x)}}`
The partial derivative needed to calculate the gradient is:
:math:`\frac{\partial}{\partial x_i} logit(x)`
:math:`\Rightarrow \frac{e^{it(x)} it'(x)}{(e^{it(x)} + 1)^2}`
Returns
-------
nabla : numpy.array of shape (n_classes, n_samples, n_features)
returns a 3-dimensional array. For regression and binary
classification, ``n_classes=1``. Each line of the matrix inside
``nabla[i]`` is the gradient evaluated to the corresponding
sample in X.
To ilustrate:
- Gradient of observation i for regression:
``gradients(X, tfuncs_dx)[1, i, :]``
- Gradient of observation i according to coefficients to classify
the class j in multi-class classification:
``gradients(X, tfuncs_dx, logit=True)[j, i, :]``
"""
X = check_array(X)
# the gradients can be calculated even before fit
intercept = [0.0]
if hasattr(self, "intercept_"):
intercept = self.intercept_
coefs = np.ones(self.n_terms)
if hasattr(self, "coef_"):
coefs = self.coef_
# if coefs.ndim==1, then it is a regression ITExpr. Let's make it 2d
if coefs.ndim == 1:
coefs = coefs.reshape(-1, 1).T
# Storing the gradient of each term
term_gradients = []
# i iterates over terms, j over variables
for j in range(X.shape[1]):
# evaluating the partial derivative w.r.t each variable
g_partialx = np.zeros( (len(X), self.n_terms) )
for i, (fi, ti) in enumerate( self.expr ):
ti_aux = np.copy(ti)
ti_aux[j] = ti_aux[j] - 1
pi = np.prod(np.power(X, ti), axis=1)
# avoid multiply zero and nan, would result in nan
if ti[j] == 0:
pi_partialx = np.zeros_like(X[:, j])
else:
pi_partialx = ti[j] * np.prod(np.power(X, ti_aux), axis=1)
g_partialx[:, i] = tfuncs_dx[fi](pi)*pi_partialx
term_gradients.append(g_partialx)
gradients = np.array([np.dot(term_gradients, coef).T for coef in coefs])
if logit:
it_eval = np.dot(self._eval(X), coefs.T) + intercept
for c_idx in range(gradients.shape[0]): # number of classes
for x_idx in range(gradients.shape[2]): # number of variables
gradients[c_idx, :, x_idx] = np.divide(
np.exp(it_eval)[:, c_idx] * gradients[c_idx, :, x_idx],
np.power(np.exp(it_eval)[:, c_idx] + 1.0, 2)
)
return gradients
[docs] def covariance_matrix(self, X, y):
"""virtual method to estimate the covariance matrix.
Should be overridden by sub-classes.
"""
raise NotImplementedError()
[docs] def fit(self, X, y):
"""virtual fit method. Should be overridden by sub-classes.
It takes a dictionary as named arguments to allow sub-classes to have
specific parameters to the fit method.
"""
raise NotImplementedError()
[docs] def predict(self, X):
"""virtual predict method. Should be overridden by sub-classes.
"""
raise NotImplementedError()
[docs] def predict_proba(self, X):
"""virtual predict_proba method. Should be overridden by sub-classes.
"""
raise NotImplementedError()