import numpy as np
from .module import Module
__all__ = [
[docs]class TanH(Module):
r"""Hyperbolic Tangent activation function.
.. math::
\text{TanH}(x) &= \tanh(x) \\
&= \frac{\sinh x}{\cosh x} \\
&= \frac{\exp(x) - \exp(-x)} {\exp(x) + \exp(-x)} \\
&= \frac{e^{2x} - 1 }{e^{2x} + 1}
def __init__(self) -> None:
[docs] def forward(self, X):
return np.tanh(X)
[docs] def zero_grad(self):
[docs] def backward_update_gradient(self, input, delta):
pass # No gradient to update in TanH
[docs] def backward_delta(self, input, delta):
.. math:: \frac{\partial M}{\partial z^h} = 1 - \tanh (z^h)^2
return delta * (1 - self(input) ** 2)
[docs] def update_parameters(self, learning_rate):
pass # No parameters to update in TanH
[docs]class Sigmoid(Module):
r"""Sigmoid activation function.
.. math:: \text{Sigmoid}(x) = \sigma(x) = \frac{1}{1 + \exp(-x)}
def __init__(self) -> None:
[docs] def zero_grad(self):
[docs] def forward(self, X):
# X a grande valeurs vas donner +inf, nécéssité de normaliser ?
return 1 / (1 + np.exp(-X))
[docs] def backward_update_gradient(self, input, delta):
pass # No gradient to update in Sigmoid
[docs] def backward_delta(self, input, delta):
.. math :: \frac{\partial M}{\partial z^h} = \sigma(z^h) * (1 - \sigma(z^h))
sig_X = self(input)
return delta * sig_X * (1 - sig_X)
[docs] def update_parameters(self, learning_rate):
pass # No parameters to update in Sigmoid
[docs]class StableSigmoid(Module):
r"""Numerically stable Sigmoid activation function."""
def __init__(self) -> None:
[docs] def zero_grad(self):
[docs] def forward(self, X):
return np.where(X >= 0, 1 / (1 + np.exp(-X)), np.exp(X) / (1 + np.exp(X)))
[docs] def backward_update_gradient(self, input, delta):
pass # No gradient to update in Sigmoid
[docs] def backward_delta(self, input, delta):
sig_X = self(input)
return delta * sig_X * (1 - sig_X)
[docs] def update_parameters(self, learning_rate):
pass # No parameters to update in Sigmoid
[docs]class Softmax(Module):
r"""Softmax activation function.
Commonly used along with a cross entropy loss. See [Softmax and cross-entropy loss]( and [Derivative of Cross Entropy Loss with Softmax](
.. math:: \text{Softmax}(x_{i}) = \frac{\exp(x_i)}{\sum_j \exp(x_j)}
def __init__(self) -> None:
[docs] def zero_grad(self):
[docs] def forward(self, X):
Implemented using a log sum exp trick to avoid NaN. See [Computing softmax and numerical stability](
exp_X = np.exp(X - np.max(X, axis=-1, keepdims=True))
return exp_X / np.sum(exp_X, axis=-1, keepdims=True)
[docs] def backward_update_gradient(self, input, delta):
pass # No gradient to update in Softmax
[docs] def backward_delta(self, input, delta):
.. math::
\frac{\partial M(x_i)}{\partial x_i} = M^h(x_i) * (1 - M^h(x_i))
Plus précisement
.. math::
\frac{\partial M^h(x_i)}{x_j} = \begin{cases}
M^h(x_i) * ( 1 - M^h(x_j) ) &\text{si } i = j \\
- M^h(x_j) M^h(x_i) &\text{ si } i \neq j \\
softmax = self(input)
return delta * (softmax * (1 - softmax))
[docs] def update_parameters(self, learning_rate):
pass # No parameters to update in Softmax
[docs]class LogSoftmax(Module):
r"""LogSoftmax activation function.
.. math::
\text{LogSoftmax}(x_{i}) =
\log \left( \frac{\exp(x_i)}{\sum_j \exp(x_j)} \right)
def __init__(self) -> None:
[docs] def forward(self, X):
X_shifted = X - np.max(X, axis=-1, keepdims=True)
return X_shifted - np.log(np.sum(np.exp(X_shifted), axis=-1, keepdims=True))
[docs] def zero_grad(self):
[docs] def backward_update_gradient(self, input, delta):
pass # No gradient to update in LogSoftmax
[docs] def backward_delta(self, input, delta):
softmax = np.exp(self(input))
return delta - softmax * np.sum(delta, axis=-1, keepdims=True)
[docs] def update_parameters(self, learning_rate):
pass # No parameters to update in LogSoftmax
[docs]class ReLU(Module):
r"""ReLU (rectified linear unit) activation function.
.. math:: \text{ReLU}(x) = x^+ = \max(0, x)
def __init__(self) -> None:
[docs] def zero_grad(self):
[docs] def forward(self, X):
return np.maximum(0, X)
[docs] def backward_update_gradient(self, input, delta):
pass # No gradient to update in ReLU
[docs] def backward_delta(self, input, delta):
r""".. math:: \frac{\partial M}{\partial z^h} = 1 \text{ if } x > 0 \text{ else } 0."""
return delta * (input > 0)
[docs] def update_parameters(self, learning_rate):
pass # No parameters to update in ReLU
[docs]class LeakyReLU(Module):
r"""Leaky ReLU activation function.
.. math::
\text{LeakyReLU}(x) = \max(\alpha x, x) =
x, & \text{ if } x \geq 0 \\
\alpha \times x, & \text{ otherwise }
def __init__(self, alpha=0.01):
self.alpha = alpha
[docs] def zero_grad(self):
[docs] def forward(self, X):
return np.maximum(self.alpha * X, X)
[docs] def backward_update_gradient(self, input, delta):
pass # No gradient to update in Leaky ReLU
[docs] def backward_delta(self, input, delta):
.. math::
\frac{\partial M}{\partial z^h} = \begin{cases}
1 & \text{if } x>0, \\
\alpha & \text{otherwise}.
dx = np.ones_like(input)
dx[input <= 0] = self.alpha
return delta * dx
[docs] def update_parameters(self, learning_rate):
pass # No parameters to update in Leaky ReLU
[docs]class Softplus(Module):
r"""Smooth approximation of the ReLU activation function.
.. math:: \text{Softplus}(x) = \ln(1 + e^x)
def __init__(self) -> None:
[docs] def zero_grad(self):
[docs] def forward(self, X):
return np.log(1 + np.exp(X))
[docs] def backward_update_gradient(self, input, delta):
pass # No gradient to update in Softplus
[docs] def backward_delta(self, input, delta):
r""".. math:: \frac{\partial M}{\partial z^h} = \sigma (x) = \frac{1}{1 + e^{-x}}"""
return delta / (1 + np.exp(-input))
[docs] def update_parameters(self, learning_rate):
pass # No parameters to update in Softplus