Source code for nets.autograd.parameter
"""
A parameter is a trainable tensor.
"""
import numpy as np
from nets.tensor import Tensor
[docs]class Parameter(Tensor):
r"""
Instantiate a parameter, made of trainable data. A trainable data is a value that will be updated during
the back-propagation, usually it refers to ``weights`` and ``biases`` of a layer.
"""
def __init__(self, data=None, shape=None):
# First check if there are enough information to build the Parameter
if data is None and shape is None:
raise ValueError('you must specify the shape or data to create a Parameter')
# If there is no data, generate data from a uniform distribution
if shape is not None and data is None:
data = np.random.randn(*shape)
# Create the Tensor
super().__init__(data, requires_grad=True)
[docs] @classmethod
def scaled_weight(cls, input_dim, output_dim):
r"""Scaled initialization from :math:`He et al.`
Args:
input_dim (int): dimension of the input layer
output_dim (int): dimension of the output layer
Returns:
Parameter
"""
mu = 0
var = 2 / input_dim
sigma = np.sqrt(var)
weight_shape = (input_dim, output_dim)
data = np.random.normal(loc=mu, scale=sigma, size=weight_shape)
return Parameter(data=data)
[docs] @classmethod
def zeros(cls, shape):
r"""Generate a zero-Parameter
Args:
shape (tuple): shape of the ``Parameter``
Returns:
Parameter
"""
return Parameter(data=np.zeros(shape))
[docs] @classmethod
def normal(cls, shape, mu=0, sigma=1):
r"""Generate a ``Parameter`` following a normal distribution center at ``mu`` with a standard deviation of
``sigma``.
Args:
shape (tuple): shape of the ``Parameter``
mu (scalar): mean of the normal distribution. Default is ``0``.
sigma (scalar): standard deviation of the normal distribution. Default is ``1``.
Returns:
Parameter
"""
data = np.random.normal(mu, sigma, shape)
return Parameter(data=data)
[docs] @classmethod
def orthogonal(cls, shape):
r"""Initializes weight parameters orthogonally.
From the [exercise 02456 from DTU course](https://github.com/DeepLearningDTU/02456-deep-learning-with-PyTorch).
.. note::
Refer to [this paper](https://arxiv.org/abs/1312.6120) for an explanation of this initialization.
Args:
shape (tuple): shape of dimensionality greater than 2 (weight matrix)
Returns:
Parameter
"""
if len(shape) < 2:
raise ValueError("only parameters with 2 or more dimensions are supported.")
rows, cols = shape
data = np.random.randn(rows, cols)
if rows < cols:
data = data.T
# Compute QR factorization
q, r = np.linalg.qr(data)
# Make Q uniform according to https://arxiv.org/pdf/math-ph/0609050.pdf
diag = np.diag(r, 0)
sign = np.sign(diag)
q *= sign
if rows < cols:
q = q.T
data = q
return Parameter(data=data)