Source code for doc_octopy.models.components.activation_functions

import torch
from torch import nn


[docs] class PSerf(nn.Module): """PSerf activation function from Biswas et al. Parameters ---------- gamma : float, optional The gamma parameter, by default 1.0. sigma : float, optional The sigma parameter, by default 1.25. stabilisation_term : float, optional The stabilisation term, by default 1e-12. References ---------- Biswas, K., Kumar, S., Banerjee, S., Pandey, A.K., 2021. ErfAct and PSerf: Non-monotonic smooth trainable Activation Functions. arXiv:2109.04386 [cs]. """ def __init__( self, gamma: float = 1.0, sigma: float = 1.25, stabilisation_term: float = 1e-12 ): super(PSerf, self).__init__() self.gamma = nn.Parameter(torch.tensor(gamma), requires_grad=True) self.sigma = nn.Parameter(torch.tensor(sigma), requires_grad=True) self.stabilisation_term = torch.tensor(stabilisation_term)
[docs] def forward(self, x) -> torch.Tensor: return ( x * torch.erf(self.gamma * torch.log(1 + torch.exp(self.sigma * x))) + self.stabilisation_term )
[docs] class SAU(nn.Module): """SAU activation function from Biswas et al. Parameters ---------- alpha : float, optional The alpha parameter, by default 0.15. n : int, optional The n parameter, by default 20000. References ---------- Biswas, K., Kumar, S., Banerjee, S., Pandey, A.K., 2021. SAU: Smooth activation function using convolution with approximate identities. arXiv:2109.13210 [cs]. """ def __init__(self, alpha=0.15, n=20000): super(SAU, self).__init__() self.alpha = nn.Parameter(torch.tensor(alpha), requires_grad=True) self.n = torch.tensor(n)
[docs] def forward(self, x) -> torch.Tensor: return ( torch.sqrt(torch.tensor(2 / torch.pi)) * torch.exp(-(torch.pow(self.n, 2) * torch.pow(x, 2)) / 2) / (2 * self.n) + (1 + self.alpha) / 2 * x + (1 - self.alpha) / 2 * x * torch.erf(self.n * x / torch.sqrt(torch.tensor(2))) )
[docs] class SMU(nn.Module): """SMU activation function from Biswas et al. Parameters ---------- alpha : float, optional The alpha parameter, by default 0.01. mu : float, optional The mu parameter, by default 2.5. References ---------- Biswas, K., Kumar, S., Banerjee, S., Pandey, A.K., 2022. SMU: smooth activation function for deep networks using smoothing maximum technique. arXiv:2111.04682 [cs]. Notes ----- This version also make alpha trainable. """ def __init__(self, alpha=0.01, mu=2.5): super(SMU, self).__init__() self.alpha = nn.Parameter(torch.tensor(alpha), requires_grad=True) self.mu = nn.Parameter(torch.tensor(mu), requires_grad=True)
[docs] def forward(self, x) -> torch.Tensor: return ( (1 + self.alpha) * x + (1 - self.alpha) * x * torch.erf(self.mu * (1 - self.alpha) * x) ) / 2
[docs] class SMU_old(nn.Module): """SMU activation function from Biswas et al. This is an older version of the SMU activation function and should not be used. Warning ------- This is an older version of the SMU activation function and should not be used. Parameters ---------- alpha : float, optional The alpha parameter, by default 0.01. mu : float, optional The mu parameter, by default 2.5. References ---------- Biswas, K., Kumar, S., Banerjee, S., Pandey, A.K., 2022. SMU: smooth activation function for deep networks using smoothing maximum technique. arXiv:2111.04682 [cs]. """ def __init__(self, alpha=0.01, mu=2.5): super(SMU_old, self).__init__() self.alpha = torch.tensor(alpha) self.mu = nn.Parameter(torch.tensor(mu), requires_grad=True)
[docs] def forward(self, x) -> torch.Tensor: return ( (1 + self.alpha) * x + (1 - self.alpha) * x * torch.erf(self.mu * (1 - self.alpha) * x) ) / 2