Source code for packnet_sfm.networks.layers.packnet.layers01

# Copyright 2020 Toyota Research Institute.  All rights reserved.

import torch
import torch.nn as nn
from functools import partial
import torch.nn.functional as F

########################################################################################################################

[docs]class Conv2D(nn.Module): """ 2D convolution with GroupNorm and ELU Parameters ---------- in_channels : int Number of input channels out_channels : int Number of output channels kernel_size : int Kernel size stride : int Stride """ def __init__(self, in_channels, out_channels, kernel_size, stride): super().__init__() self.kernel_size = kernel_size self.conv_base = nn.Conv2d( in_channels, out_channels, kernel_size=kernel_size, stride=stride) self.pad = nn.ConstantPad2d([kernel_size // 2] * 4, value=0) self.normalize = torch.nn.GroupNorm(16, out_channels) self.activ = nn.ELU(inplace=True)
[docs] def forward(self, x): """Runs the Conv2D layer.""" x = self.conv_base(self.pad(x)) return self.activ(self.normalize(x))
[docs]class ResidualConv(nn.Module): """2D Convolutional residual block with GroupNorm and ELU""" def __init__(self, in_channels, out_channels, stride, dropout=None): """ Initializes a ResidualConv object. Parameters ---------- in_channels : int Number of input channels out_channels : int Number of output channels stride : int Stride dropout : float Dropout value """ super().__init__() self.conv1 = Conv2D(in_channels, out_channels, 3, stride) self.conv2 = Conv2D(out_channels, out_channels, 3, 1) self.conv3 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride) self.normalize = torch.nn.GroupNorm(16, out_channels) self.activ = nn.ELU(inplace=True) if dropout: self.conv3 = nn.Sequential(self.conv3, nn.Dropout2d(dropout))
[docs] def forward(self, x): """Runs the ResidualConv layer.""" x_out = self.conv1(x) x_out = self.conv2(x_out) shortcut = self.conv3(x) return self.activ(self.normalize(x_out + shortcut))
[docs]def ResidualBlock(in_channels, out_channels, num_blocks, stride, dropout=None): """ Returns a ResidualBlock with various ResidualConv layers. Parameters ---------- in_channels : int Number of input channels out_channels : int Number of output channels num_blocks : int Number of residual blocks stride : int Stride dropout : float Dropout value """ layers = [ResidualConv(in_channels, out_channels, stride, dropout=dropout)] for i in range(1, num_blocks): layers.append(ResidualConv(out_channels, out_channels, 1, dropout=dropout)) return nn.Sequential(*layers)
[docs]class InvDepth(nn.Module): """Inverse depth layer""" def __init__(self, in_channels, out_channels=1, min_depth=0.5): """ Initializes an InvDepth object. Parameters ---------- in_channels : int Number of input channels out_channels : int Number of output channels min_depth : float Minimum depth value to calculate """ super().__init__() self.min_depth = min_depth self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1) self.pad = nn.ConstantPad2d([1] * 4, value=0) self.activ = nn.Sigmoid()
[docs] def forward(self, x): """Runs the InvDepth layer.""" x = self.conv1(self.pad(x)) return self.activ(x) / self.min_depth
########################################################################################################################
[docs]def packing(x, r=2): """ Takes a [B,C,H,W] tensor and returns a [B,(r^2)C,H/r,W/r] tensor, by concatenating neighbor spatial pixels as extra channels. It is the inverse of nn.PixelShuffle (if you apply both sequentially you should get the same tensor) Parameters ---------- x : torch.Tensor [B,C,H,W] Input tensor r : int Packing ratio Returns ------- out : torch.Tensor [B,(r^2)C,H/r,W/r] Packed tensor """ b, c, h, w = x.shape out_channel = c * (r ** 2) out_h, out_w = h // r, w // r x = x.contiguous().view(b, c, out_h, r, out_w, r) return x.permute(0, 1, 3, 5, 2, 4).contiguous().view(b, out_channel, out_h, out_w)
########################################################################################################################
[docs]class PackLayerConv2d(nn.Module): """ Packing layer with 2d convolutions. Takes a [B,C,H,W] tensor, packs it into [B,(r^2)C,H/r,W/r] and then convolves it to produce [B,C,H/r,W/r]. """ def __init__(self, in_channels, kernel_size, r=2): """ Initializes a PackLayerConv2d object. Parameters ---------- in_channels : int Number of input channels kernel_size : int Kernel size r : int Packing ratio """ super().__init__() self.conv = Conv2D(in_channels * (r ** 2), in_channels, kernel_size, 1) self.pack = partial(packing, r=r)
[docs] def forward(self, x): """Runs the PackLayerConv2d layer.""" x = self.pack(x) x = self.conv(x) return x
[docs]class UnpackLayerConv2d(nn.Module): """ Unpacking layer with 2d convolutions. Takes a [B,C,H,W] tensor, convolves it to produce [B,(r^2)C,H,W] and then unpacks it to produce [B,C,rH,rW]. """ def __init__(self, in_channels, out_channels, kernel_size, r=2): """ Initializes a UnpackLayerConv2d object. Parameters ---------- in_channels : int Number of input channels out_channels : int Number of output channels kernel_size : int Kernel size r : int Packing ratio """ super().__init__() self.conv = Conv2D(in_channels, out_channels * (r ** 2), kernel_size, 1) self.unpack = nn.PixelShuffle(r)
[docs] def forward(self, x): """Runs the UnpackLayerConv2d layer.""" x = self.conv(x) x = self.unpack(x) return x
########################################################################################################################
[docs]class PackLayerConv3d(nn.Module): """ Packing layer with 3d convolutions. Takes a [B,C,H,W] tensor, packs it into [B,(r^2)C,H/r,W/r] and then convolves it to produce [B,C,H/r,W/r]. """ def __init__(self, in_channels, kernel_size, r=2, d=8): """ Initializes a PackLayerConv3d object. Parameters ---------- in_channels : int Number of input channels kernel_size : int Kernel size r : int Packing ratio d : int Number of 3D features """ super().__init__() self.conv = Conv2D(in_channels * (r ** 2) * d, in_channels, kernel_size, 1) self.pack = partial(packing, r=r) self.conv3d = nn.Conv3d(1, d, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
[docs] def forward(self, x): """Runs the PackLayerConv3d layer.""" x = self.pack(x) x = x.unsqueeze(1) x = self.conv3d(x) b, c, d, h, w = x.shape x = x.view(b, c * d, h, w) x = self.conv(x) return x
[docs]class UnpackLayerConv3d(nn.Module): """ Unpacking layer with 3d convolutions. Takes a [B,C,H,W] tensor, convolves it to produce [B,(r^2)C,H,W] and then unpacks it to produce [B,C,rH,rW]. """ def __init__(self, in_channels, out_channels, kernel_size, r=2, d=8): """ Initializes a UnpackLayerConv3d object. Parameters ---------- in_channels : int Number of input channels out_channels : int Number of output channels kernel_size : int Kernel size r : int Packing ratio d : int Number of 3D features """ super().__init__() self.conv = Conv2D(in_channels, out_channels * (r ** 2) // d, kernel_size, 1) self.unpack = nn.PixelShuffle(r) self.conv3d = nn.Conv3d(1, d, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
[docs] def forward(self, x): """Runs the UnpackLayerConv3d layer.""" x = self.conv(x) x = x.unsqueeze(1) x = self.conv3d(x) b, c, d, h, w = x.shape x = x.view(b, c * d, h, w) x = self.unpack(x) return x
########################################################################################################################