Code source de src.activation

import numpy as np
from .module import Module

__all__ = [
    "TanH",
    "Sigmoid",
    "StableSigmoid",
    "Softmax",
    "LogSoftmax",
    "ReLU",
    "LeakyReLU",
    "Softplus",
]


[docs]class TanH(Module):
    r"""Hyperbolic Tangent activation function.

    .. math::
        \begin{align*}
            \text{TanH}(x) &= \tanh(x)  \\
                &= \frac{\sinh x}{\cosh x} \\
                &= \frac{\exp(x) - \exp(-x)} {\exp(x) + \exp(-x)} \\
                &= \frac{e^{2x} - 1 }{e^{2x} + 1}
            \end{align*}
    """

    def __init__(self) -> None:
        super().__init__()

[docs]    def forward(self, X):
        return np.tanh(X)

[docs]    def zero_grad(self):
        pass

[docs]    def backward_update_gradient(self, input, delta):
        pass  # No gradient to update in TanH

[docs]    def backward_delta(self, input, delta):
        r"""
        .. math:: \frac{\partial M}{\partial z^h} = 1 - \tanh (z^h)^2
        """
        return delta * (1 - self(input) ** 2)

[docs]    def update_parameters(self, learning_rate):
        pass  # No parameters to update in TanH


[docs]class Sigmoid(Module):
    r"""Sigmoid activation function.

    .. math:: \text{Sigmoid}(x) = \sigma(x) = \frac{1}{1 + \exp(-x)}
    """

    def __init__(self) -> None:
        super().__init__()

[docs]    def zero_grad(self):
        pass

[docs]    def forward(self, X):
        # X a grande valeurs vas donner +inf, nécéssité de normaliser ?
        return 1 / (1 + np.exp(-X))

[docs]    def backward_update_gradient(self, input, delta):
        pass  # No gradient to update in Sigmoid

[docs]    def backward_delta(self, input, delta):
        r"""
        .. math :: \frac{\partial M}{\partial z^h} = \sigma(z^h) * (1 - \sigma(z^h))
        """
        sig_X = self(input)
        return delta * sig_X * (1 - sig_X)

[docs]    def update_parameters(self, learning_rate):
        pass  # No parameters to update in Sigmoid


[docs]class StableSigmoid(Module):
    r"""Numerically stable Sigmoid activation function."""

    def __init__(self) -> None:
        super().__init__()

[docs]    def zero_grad(self):
        pass

[docs]    def forward(self, X):
        return np.where(X >= 0, 1 / (1 + np.exp(-X)), np.exp(X) / (1 + np.exp(X)))

[docs]    def backward_update_gradient(self, input, delta):
        pass  # No gradient to update in Sigmoid

[docs]    def backward_delta(self, input, delta):
        sig_X = self(input)
        return delta * sig_X * (1 - sig_X)

[docs]    def update_parameters(self, learning_rate):
        pass  # No parameters to update in Sigmoid


[docs]class Softmax(Module):
    r"""Softmax activation function. 
    Commonly used along with a cross entropy loss. See [Softmax and cross-entropy loss](https://eli.thegreenplace.net/2016/the-softmax-function-and-its-derivative/) and [Derivative of Cross Entropy Loss with Softmax](https://www.parasdahal.com/softmax-crossentropy)

    .. math:: \text{Softmax}(x_{i}) = \frac{\exp(x_i)}{\sum_j \exp(x_j)}
    """

    def __init__(self) -> None:
        super().__init__()

[docs]    def zero_grad(self):
        pass

[docs]    def forward(self, X):
        """
        Implemented using a log sum exp trick to avoid NaN. See [Computing softmax and numerical stability](https://eli.thegreenplace.net/2016/the-softmax-function-and-its-derivative/).
        """
        exp_X = np.exp(X - np.max(X, axis=-1, keepdims=True))
        return exp_X / np.sum(exp_X, axis=-1, keepdims=True)

[docs]    def backward_update_gradient(self, input, delta):
        pass  # No gradient to update in Softmax

[docs]    def backward_delta(self, input, delta):
        r"""
        .. math::
            \frac{\partial M(x_i)}{\partial x_i} = M^h(x_i) * (1 - M^h(x_i))

        Plus précisement 
        
        .. math::
            \frac{\partial M^h(x_i)}{x_j} = \begin{cases}
                M^h(x_i) * ( 1 - M^h(x_j) ) &\text{si } i = j \\
                - M^h(x_j) M^h(x_i) &\text{ si } i \neq j \\
            \end{cases}
        """
        softmax = self(input)
        return delta * (softmax * (1 - softmax))

[docs]    def update_parameters(self, learning_rate):
        pass  # No parameters to update in Softmax


[docs]class LogSoftmax(Module):
    r"""LogSoftmax activation function.

    .. math::
            \text{LogSoftmax}(x_{i}) =
            \log \left( \frac{\exp(x_i)}{\sum_j \exp(x_j)} \right)
    """

    def __init__(self) -> None:
        super().__init__()

[docs]    def forward(self, X):
        X_shifted = X - np.max(X, axis=-1, keepdims=True)
        return X_shifted - np.log(np.sum(np.exp(X_shifted), axis=-1, keepdims=True))

[docs]    def zero_grad(self):
        pass

[docs]    def backward_update_gradient(self, input, delta):
        pass  # No gradient to update in LogSoftmax

[docs]    def backward_delta(self, input, delta):
        softmax = np.exp(self(input))
        return delta - softmax * np.sum(delta, axis=-1, keepdims=True)

[docs]    def update_parameters(self, learning_rate):
        pass  # No parameters to update in LogSoftmax


[docs]class ReLU(Module):
    r"""ReLU (rectified linear unit) activation function.

    .. math:: \text{ReLU}(x) = x^+ = \max(0, x)
    """

    def __init__(self) -> None:
        super().__init__()

[docs]    def zero_grad(self):
        pass

[docs]    def forward(self, X):
        return np.maximum(0, X)

[docs]    def backward_update_gradient(self, input, delta):
        pass  # No gradient to update in ReLU

[docs]    def backward_delta(self, input, delta):
        r""".. math:: \frac{\partial M}{\partial z^h} = 1 \text{ if } x > 0 \text{ else } 0."""
        return delta * (input > 0)

[docs]    def update_parameters(self, learning_rate):
        pass  # No parameters to update in ReLU


[docs]class LeakyReLU(Module):
    r"""Leaky ReLU activation function.

    .. math::
        \text{LeakyReLU}(x) = \max(\alpha x, x) =
        \begin{cases}
        x, & \text{ if } x \geq 0 \\
        \alpha \times x, & \text{ otherwise }
        \end{cases}
    """

    def __init__(self, alpha=0.01):
        super().__init__()
        self.alpha = alpha

[docs]    def zero_grad(self):
        pass

[docs]    def forward(self, X):
        return np.maximum(self.alpha * X, X)

[docs]    def backward_update_gradient(self, input, delta):
        pass  # No gradient to update in Leaky ReLU

[docs]    def backward_delta(self, input, delta):
        r"""
        .. math::
            \frac{\partial M}{\partial z^h} = \begin{cases} 
                1 & \text{if } x>0, \\
                \alpha & \text{otherwise}.
            \end{cases}
        """
        dx = np.ones_like(input)
        dx[input <= 0] = self.alpha
        return delta * dx

[docs]    def update_parameters(self, learning_rate):
        pass  # No parameters to update in Leaky ReLU


[docs]class Softplus(Module):
    r"""Smooth approximation of the ReLU activation function.

    .. math:: \text{Softplus}(x) = \ln(1 + e^x)
    """

    def __init__(self) -> None:
        super().__init__()

[docs]    def zero_grad(self):
        pass

[docs]    def forward(self, X):
        return np.log(1 + np.exp(X))

[docs]    def backward_update_gradient(self, input, delta):
        pass  # No gradient to update in Softplus

[docs]    def backward_delta(self, input, delta):
        r""".. math:: \frac{\partial M}{\partial z^h} = \sigma (x) = \frac{1}{1 + e^{-x}}"""
        return delta / (1 + np.exp(-input))

[docs]    def update_parameters(self, learning_rate):
        pass  # No parameters to update in Softplus