Module `torchtt.nn`

Implements a basic TT layer for constructing deep TT networks.

Expand source code

"""
Implements a basic TT layer for constructing deep TT networks.

"""
import torch as tn
import torch.nn as nn
import torchtt 
from ._aux_ops import dense_matvec
from .errors import *

class LinearLayerTT(nn.Module):
    """
    Basic class for TT layers. See [Tensorizing Neural Networks](https://arxiv.org/abs/1509.06569) for a detailed description.
    It can be used similarily to any layer from `torch.nn`.
    The output of the layer is \(\\mathcal{LTT}(\\mathsf{x}) =\\mathsf{Wx}+\\mathsf{b}\), where the tensor operator \(\\mathsf{W}\) is represented in the TT format (with a fixed prescribed rank).

    """
    def __init__(self, size_in, size_out, rank, dtype = tn.float32, initializer = 'He'):
        """
        The constructor of the TT layer class takes as arguments the input shape and the output shape for the layer, the rank as well as the dtype and the initializer.
        
        Possible initializers are:
        
         * `'He'` for He Normal (He-et-al) initialization.
         * `'Glo'` for Glorot initialization.
            
        Args:
            size_in (list[int]): the size of the input tensor.
            size_out (list[int]): the size of the output tensor.
            rank (list[int]): the rank of the tensor operator.
            dtype (torch.dtype, optional): the dtype of the layer. Defaults to torch.float32.
            initializer (str, optional): the initializer for the weights and biases. Defaults to 'He'.
            
        Raises:
            InvalidArguments: Initializer not defined. Possible choices are 'He' and 'Glo'.
        """
        super().__init__()
        self.size_in, self.size_out, self.rank = size_in, size_out, rank
        if initializer=='He':
            t = torchtt.randn([(s2,s1) for s1,s2 in zip(size_in,size_out)], rank, dtype=dtype, var = 2/tn.prod(tn.tensor([s1 for s1 in size_in])))
            #self.cores = [nn.Parameter(tn.Tensor(c.clone())) for c in t.cores] 
            self.cores = nn.ParameterList([nn.Parameter(c) for c in t.cores])
            #bias
            bias = tn.zeros(size_out, dtype = dtype) 
            self.bias = nn.Parameter(bias)
        elif initializer=='Glo':
            t = torchtt.randn([(s2,s1) for s1,s2 in zip(size_in,size_out)], rank, dtype=dtype, var = 1/(tn.prod(tn.tensor([s1 for s1 in size_in]))+tn.prod(tn.tensor([s1 for s1 in size_out]))) )
            #self.cores = [nn.Parameter(tn.Tensor(c.clone())) for c in t.cores] 
            self.cores = nn.ParameterList([nn.Parameter(c) for c in t.cores])
            #bias
            bias = tn.zeros(size_out, dtype = dtype) 
            self.bias = nn.Parameter(bias)
        else:
            raise InvalidArguments('Initializer not defined. Possible choices are \'He\' and \'Glo\'.')

    @tn.jit.export 
    def forward(self, x):
        """
        Computes the output of the layer for the given input. 
        
        Supports trailing dimensiond broadcasting. If the input of the layer is set to `[M1,...,Md]` and a tensor od shape `[...,M1,...,Md]` is provided then the multiplication is performed along the last d dimensions.

        Args:
            x (torch.tensor): input of the layer.

        Returns:
            torch.tensor: output of the layer.
        """
        
        # return dense_matvec(self.cores,x) + self.bias
        
        result = tn.unsqueeze(x,-1)

        d = len(self.size_in)
        D = len(x.shape)

        for c in self.cores:
            result = tn.tensordot(result,c,([D-d,-1],[2,0]))
        result = tn.squeeze(result,-1)

        return result+self.bias

Classes

class LinearLayerTT (size_in, size_out, rank, dtype=torch.float32, initializer='He')

Basic class for TT layers. See Tensorizing Neural Networks for a detailed description. It can be used similarily to any layer from torch.nn. The output of the layer is $\mathcal{LTT}(\mathsf{x}) =\mathsf{Wx}+\mathsf{b}$ , where the tensor operator $\mathsf{W}$ is represented in the TT format (with a fixed prescribed rank).

The constructor of the TT layer class takes as arguments the input shape and the output shape for the layer, the rank as well as the dtype and the initializer.

Possible initializers are:

'He' for He Normal (He-et-al) initialization.
'Glo' for Glorot initialization.

Args

size_in : list[int]: the size of the input tensor.
size_out : list[int]: the size of the output tensor.
rank : list[int]: the rank of the tensor operator.
dtype : torch.dtype, optional: the dtype of the layer. Defaults to torch.float32.
initializer : str, optional: the initializer for the weights and biases. Defaults to 'He'.

Raises

InvalidArguments: Initializer not defined. Possible choices are 'He' and 'Glo'.

Expand source code

class LinearLayerTT(nn.Module):
    """
    Basic class for TT layers. See [Tensorizing Neural Networks](https://arxiv.org/abs/1509.06569) for a detailed description.
    It can be used similarily to any layer from `torch.nn`.
    The output of the layer is \(\\mathcal{LTT}(\\mathsf{x}) =\\mathsf{Wx}+\\mathsf{b}\), where the tensor operator \(\\mathsf{W}\) is represented in the TT format (with a fixed prescribed rank).

    """
    def __init__(self, size_in, size_out, rank, dtype = tn.float32, initializer = 'He'):
        """
        The constructor of the TT layer class takes as arguments the input shape and the output shape for the layer, the rank as well as the dtype and the initializer.
        
        Possible initializers are:
        
         * `'He'` for He Normal (He-et-al) initialization.
         * `'Glo'` for Glorot initialization.
            
        Args:
            size_in (list[int]): the size of the input tensor.
            size_out (list[int]): the size of the output tensor.
            rank (list[int]): the rank of the tensor operator.
            dtype (torch.dtype, optional): the dtype of the layer. Defaults to torch.float32.
            initializer (str, optional): the initializer for the weights and biases. Defaults to 'He'.
            
        Raises:
            InvalidArguments: Initializer not defined. Possible choices are 'He' and 'Glo'.
        """
        super().__init__()
        self.size_in, self.size_out, self.rank = size_in, size_out, rank
        if initializer=='He':
            t = torchtt.randn([(s2,s1) for s1,s2 in zip(size_in,size_out)], rank, dtype=dtype, var = 2/tn.prod(tn.tensor([s1 for s1 in size_in])))
            #self.cores = [nn.Parameter(tn.Tensor(c.clone())) for c in t.cores] 
            self.cores = nn.ParameterList([nn.Parameter(c) for c in t.cores])
            #bias
            bias = tn.zeros(size_out, dtype = dtype) 
            self.bias = nn.Parameter(bias)
        elif initializer=='Glo':
            t = torchtt.randn([(s2,s1) for s1,s2 in zip(size_in,size_out)], rank, dtype=dtype, var = 1/(tn.prod(tn.tensor([s1 for s1 in size_in]))+tn.prod(tn.tensor([s1 for s1 in size_out]))) )
            #self.cores = [nn.Parameter(tn.Tensor(c.clone())) for c in t.cores] 
            self.cores = nn.ParameterList([nn.Parameter(c) for c in t.cores])
            #bias
            bias = tn.zeros(size_out, dtype = dtype) 
            self.bias = nn.Parameter(bias)
        else:
            raise InvalidArguments('Initializer not defined. Possible choices are \'He\' and \'Glo\'.')

    @tn.jit.export 
    def forward(self, x):
        """
        Computes the output of the layer for the given input. 
        
        Supports trailing dimensiond broadcasting. If the input of the layer is set to `[M1,...,Md]` and a tensor od shape `[...,M1,...,Md]` is provided then the multiplication is performed along the last d dimensions.

        Args:
            x (torch.tensor): input of the layer.

        Returns:
            torch.tensor: output of the layer.
        """
        
        # return dense_matvec(self.cores,x) + self.bias
        
        result = tn.unsqueeze(x,-1)

        d = len(self.size_in)
        D = len(x.shape)

        for c in self.cores:
            result = tn.tensordot(result,c,([D-d,-1],[2,0]))
        result = tn.squeeze(result,-1)

        return result+self.bias

Ancestors

torch.nn.modules.module.Module

Methods

def forward(self, x) ‑> Callable[..., Any]

Computes the output of the layer for the given input.

Supports trailing dimensiond broadcasting. If the input of the layer is set to [M1,…,Md] and a tensor od shape […,M1,…,Md] is provided then the multiplication is performed along the last d dimensions.

Args

x : torch.tensor: input of the layer.

Returns

torch.tensor: output of the layer.

Expand source code

@tn.jit.export 
def forward(self, x):
    """
    Computes the output of the layer for the given input. 
    
    Supports trailing dimensiond broadcasting. If the input of the layer is set to `[M1,...,Md]` and a tensor od shape `[...,M1,...,Md]` is provided then the multiplication is performed along the last d dimensions.

    Args:
        x (torch.tensor): input of the layer.

    Returns:
        torch.tensor: output of the layer.
    """
    
    # return dense_matvec(self.cores,x) + self.bias
    
    result = tn.unsqueeze(x,-1)

    d = len(self.size_in)
    D = len(x.shape)

    for c in self.cores:
        result = tn.tensordot(result,c,([D-d,-1],[2,0]))
    result = tn.squeeze(result,-1)

    return result+self.bias