Code source de src.linear

from typing import Literal
import numpy as np
from .module import Module


[docs]class Linear(Module): r"""Linear module. Parameters ---------- input_size : int Size of input sample. output_size : int Size of output sample. bias : bool, optional, default=False If True, adds a learnable bias to the output. init_type : str, optional, default="normal" Change the initialization of parameters. Shape ----- - Input : ndarray (batch, input_size) - Output : ndarray (batch, output_size) - Weight : ndarray (input_size, output_size) - Bias : ndarray (1, output_size) """ def __init__( self, input_size: int, output_size: int, bias: bool = True, init_type: Literal[ "normal", "uniform", "zeros", "ones", "he_normal", "he_uniform", "xavier_normal", "xavier_uniform", ] = "he_normal", ): super().__init__() assert isinstance(input_size, int), ValueError(f"Input size of Linear module must be int, not {type(input_size)}") assert isinstance(output_size, int), ValueError(f"Input size of Linear module must be int, not {type(output_size)}") self.input_size = input_size self.output_size = output_size self.include_bias = bias self.__init_params(init_type) def __init_params(self, init_type): gain = self.calculate_gain() if init_type == "normal": self._parameters["weight"] = np.random.randn( self.input_size, self.output_size ) self._parameters["bias"] = np.random.randn(1, self.output_size) elif init_type == "uniform": self._parameters["weight"] = np.random.uniform( 0.0, 1.0, (self.input_size, self.output_size) ) self._parameters["bias"] = np.random.uniform( 0.0, 1.0, (1, self.output_size) ) elif init_type == "zeros": self._parameters["weight"] = np.zeros((self.input_size, self.output_size)) self._parameters["bias"] = np.zeros((1, self.output_size)) elif init_type == "ones": self._parameters["weight"] = np.ones((self.input_size, self.output_size)) self._parameters["bias"] = np.ones((1, self.output_size)) elif init_type == "he_normal": std_dev = gain * np.sqrt(2 / self.input_size) self._parameters["weight"] = np.random.normal( 0, std_dev, (self.input_size, self.output_size) ) self._parameters["bias"] = np.random.normal( 0, std_dev, (1, self.output_size) ) elif init_type == "he_uniform": limit = gain * np.sqrt(6 / self.input_size) self._parameters["weight"] = np.random.uniform( -limit, limit, (self.input_size, self.output_size) ) self._parameters["bias"] = np.random.uniform( -limit, limit, (1, self.output_size) ) elif init_type == "xavier_normal": std_dev = gain * np.sqrt(2 / (self.input_size + self.output_size)) self._parameters["weight"] = np.random.normal( 0, std_dev, (self.input_size, self.output_size) ) self._parameters["bias"] = np.random.normal( 0, std_dev, (1, self.output_size) ) elif init_type == "xavier_uniform": limit = gain * np.sqrt(6 / (self.input_size + self.output_size)) self._parameters["weight"] = np.random.uniform( -limit, limit, (self.input_size, self.output_size) ) self._parameters["bias"] = np.random.uniform( -limit, limit, (1, self.output_size) ) else: raise ValueError(f"Unknown initialization type: {init_type}") self._gradient["weight"] = np.zeros_like(self._parameters["weight"]) self._gradient["bias"] = np.zeros_like(self._parameters["bias"]) if not self.include_bias: self._parameters["bias"] = None self._gradient["bias"] = None
[docs] def forward(self, X): r"""Forward pass. Notes ----- X @ w = (batch, input_size) @ (input_size, output_size) = (batch, output_size) """ assert X.shape[1] == self.input_size, ValueError( "X must be of shape (batch_size, input_size)" ) self.output = X @ self._parameters["weight"] if self.include_bias: self.output += self._parameters["bias"] return self.output
[docs] def backward_update_gradient(self, input, delta): assert input.shape[1] == self.input_size assert delta.shape[1] == self.output_size # delta : ndarray (output_size, input_size) self._gradient["weight"] += input.T @ delta # (output_size, batch) if self.include_bias: self._gradient["bias"] += delta.sum(axis=0)
[docs] def backward_delta(self, input, delta): assert input.shape[1] == self.input_size assert delta.shape[1] == self.output_size # delta : ndarray (output_size, input_size) self.d_out = delta @ self._parameters["weight"].T return self.d_out
[docs] def zero_grad(self): self._gradient["weight"] = np.zeros((self.input_size, self.output_size)) if self.include_bias: self._gradient["bias"] = np.zeros((1, self.output_size))
[docs] def update_parameters(self, learning_rate=0.001): self._parameters["weight"] -= learning_rate * self._gradient["weight"] if self.include_bias: self._parameters["bias"] -= learning_rate * self._gradient["bias"]