Source code for myoverse.models.components.activation_functions

"""Custom activation functions for neural networks."""

from __future__ import annotations

import torch
from torch import nn


[docs] class PSerf(nn.Module): """PSerf activation function from Biswas et al. Parameters ---------- gamma : float, optional The gamma parameter, by default 1.0. sigma : float, optional The sigma parameter, by default 1.25. stabilisation_term : float, optional The stabilisation term, by default 1e-12. References ---------- Biswas, K., Kumar, S., Banerjee, S., Pandey, A.K., 2021. ErfAct and PSerf: Non-monotonic smooth trainable Activation Functions. arXiv:2109.04386 [cs]. """
[docs] def __init__( self, gamma: float = 1.0, sigma: float = 1.25, stabilisation_term: float = 1e-12, ): super().__init__() self.gamma = nn.Parameter(torch.tensor(gamma), requires_grad=True) self.sigma = nn.Parameter(torch.tensor(sigma), requires_grad=True) self.stabilisation_term = torch.tensor(stabilisation_term)
[docs] def forward(self, x: torch.Tensor) -> torch.Tensor: return ( x * torch.erf(self.gamma * torch.log(1 + torch.exp(self.sigma * x))) + self.stabilisation_term )
[docs] class SAU(nn.Module): """SAU activation function from Biswas et al. Parameters ---------- alpha : float, optional The alpha parameter, by default 0.15. n : int, optional The n parameter, by default 20000. References ---------- Biswas, K., Kumar, S., Banerjee, S., Pandey, A.K., 2021. SAU: Smooth activation function using convolution with approximate identities. arXiv:2109.13210 [cs]. """
[docs] def __init__(self, alpha: float = 0.15, n: int = 20000): super().__init__() self.alpha = nn.Parameter(torch.tensor(alpha), requires_grad=True) # Register constants as buffers so they move with the module to different devices self.register_buffer("n", torch.tensor(n, dtype=torch.float32)) self.register_buffer("sqrt_2_over_pi", torch.sqrt(torch.tensor(2.0 / torch.pi))) self.register_buffer("sqrt_2", torch.sqrt(torch.tensor(2.0)))
[docs] def forward(self, x: torch.Tensor) -> torch.Tensor: n_squared = self.n * self.n return ( self.sqrt_2_over_pi * torch.exp(-(n_squared * x * x) / 2) / (2 * self.n) + (1 + self.alpha) / 2 * x + (1 - self.alpha) / 2 * x * torch.erf(self.n * x / self.sqrt_2) )
[docs] class SMU(nn.Module): """SMU activation function from Biswas et al. Parameters ---------- alpha : float, optional The alpha parameter, by default 0.01. mu : float, optional The mu parameter, by default 2.5. References ---------- Biswas, K., Kumar, S., Banerjee, S., Pandey, A.K., 2022. SMU: smooth activation function for deep networks using smoothing maximum technique. arXiv:2111.04682 [cs]. Notes ----- This version also make alpha trainable. """
[docs] def __init__(self, alpha: float = 0.01, mu: float = 2.5): super().__init__() self.alpha = nn.Parameter(torch.tensor(alpha), requires_grad=True) self.mu = nn.Parameter(torch.tensor(mu), requires_grad=True)
[docs] def forward(self, x: torch.Tensor) -> torch.Tensor: return ( (1 + self.alpha) * x + (1 - self.alpha) * x * torch.erf(self.mu * (1 - self.alpha) * x) ) / 2