# Copyright 2020 Toyota Research Institute. All rights reserved.
import torch
import torch.nn as nn
from functools import partial
import torch.nn.functional as F
########################################################################################################################
[docs]class Conv2D(nn.Module):
"""
2D convolution with GroupNorm and ELU
Parameters
----------
in_channels : int
Number of input channels
out_channels : int
Number of output channels
kernel_size : int
Kernel size
stride : int
Stride
"""
def __init__(self, in_channels, out_channels, kernel_size, stride):
super().__init__()
self.kernel_size = kernel_size
self.conv_base = nn.Conv2d(
in_channels, out_channels, kernel_size=kernel_size, stride=stride)
self.pad = nn.ConstantPad2d([kernel_size // 2] * 4, value=0)
self.normalize = torch.nn.GroupNorm(16, out_channels)
self.activ = nn.ELU(inplace=True)
[docs] def forward(self, x):
"""Runs the Conv2D layer."""
x = self.conv_base(self.pad(x))
return self.activ(self.normalize(x))
[docs]class ResidualConv(nn.Module):
"""2D Convolutional residual block with GroupNorm and ELU"""
def __init__(self, in_channels, out_channels, stride, dropout=None):
"""
Initializes a ResidualConv object.
Parameters
----------
in_channels : int
Number of input channels
out_channels : int
Number of output channels
stride : int
Stride
dropout : float
Dropout value
"""
super().__init__()
self.conv1 = Conv2D(in_channels, out_channels, 3, stride)
self.conv2 = Conv2D(out_channels, out_channels, 3, 1)
self.conv3 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride)
self.normalize = torch.nn.GroupNorm(16, out_channels)
self.activ = nn.ELU(inplace=True)
if dropout:
self.conv3 = nn.Sequential(self.conv3, nn.Dropout2d(dropout))
[docs] def forward(self, x):
"""Runs the ResidualConv layer."""
x_out = self.conv1(x)
x_out = self.conv2(x_out)
shortcut = self.conv3(x)
return self.activ(self.normalize(x_out + shortcut))
[docs]def ResidualBlock(in_channels, out_channels, num_blocks, stride, dropout=None):
"""
Returns a ResidualBlock with various ResidualConv layers.
Parameters
----------
in_channels : int
Number of input channels
out_channels : int
Number of output channels
num_blocks : int
Number of residual blocks
stride : int
Stride
dropout : float
Dropout value
"""
layers = [ResidualConv(in_channels, out_channels, stride, dropout=dropout)]
for i in range(1, num_blocks):
layers.append(ResidualConv(out_channels, out_channels, 1, dropout=dropout))
return nn.Sequential(*layers)
[docs]class InvDepth(nn.Module):
"""Inverse depth layer"""
def __init__(self, in_channels, out_channels=1, min_depth=0.5):
"""
Initializes an InvDepth object.
Parameters
----------
in_channels : int
Number of input channels
out_channels : int
Number of output channels
min_depth : float
Minimum depth value to calculate
"""
super().__init__()
self.min_depth = min_depth
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1)
self.pad = nn.ConstantPad2d([1] * 4, value=0)
self.activ = nn.Sigmoid()
[docs] def forward(self, x):
"""Runs the InvDepth layer."""
x = self.conv1(self.pad(x))
return self.activ(x) / self.min_depth
########################################################################################################################
[docs]def packing(x, r=2):
"""
Takes a [B,C,H,W] tensor and returns a [B,(r^2)C,H/r,W/r] tensor, by concatenating
neighbor spatial pixels as extra channels. It is the inverse of nn.PixelShuffle
(if you apply both sequentially you should get the same tensor)
Parameters
----------
x : torch.Tensor [B,C,H,W]
Input tensor
r : int
Packing ratio
Returns
-------
out : torch.Tensor [B,(r^2)C,H/r,W/r]
Packed tensor
"""
b, c, h, w = x.shape
out_channel = c * (r ** 2)
out_h, out_w = h // r, w // r
x = x.contiguous().view(b, c, out_h, r, out_w, r)
return x.permute(0, 1, 3, 5, 2, 4).contiguous().view(b, out_channel, out_h, out_w)
########################################################################################################################
[docs]class PackLayerConv2d(nn.Module):
"""
Packing layer with 2d convolutions. Takes a [B,C,H,W] tensor, packs it
into [B,(r^2)C,H/r,W/r] and then convolves it to produce [B,C,H/r,W/r].
"""
def __init__(self, in_channels, kernel_size, r=2):
"""
Initializes a PackLayerConv2d object.
Parameters
----------
in_channels : int
Number of input channels
kernel_size : int
Kernel size
r : int
Packing ratio
"""
super().__init__()
self.conv = Conv2D(in_channels * (r ** 2), in_channels, kernel_size, 1)
self.pack = partial(packing, r=r)
[docs] def forward(self, x):
"""Runs the PackLayerConv2d layer."""
x = self.pack(x)
x = self.conv(x)
return x
[docs]class UnpackLayerConv2d(nn.Module):
"""
Unpacking layer with 2d convolutions. Takes a [B,C,H,W] tensor, convolves it
to produce [B,(r^2)C,H,W] and then unpacks it to produce [B,C,rH,rW].
"""
def __init__(self, in_channels, out_channels, kernel_size, r=2):
"""
Initializes a UnpackLayerConv2d object.
Parameters
----------
in_channels : int
Number of input channels
out_channels : int
Number of output channels
kernel_size : int
Kernel size
r : int
Packing ratio
"""
super().__init__()
self.conv = Conv2D(in_channels, out_channels * (r ** 2), kernel_size, 1)
self.unpack = nn.PixelShuffle(r)
[docs] def forward(self, x):
"""Runs the UnpackLayerConv2d layer."""
x = self.conv(x)
x = self.unpack(x)
return x
########################################################################################################################
[docs]class PackLayerConv3d(nn.Module):
"""
Packing layer with 3d convolutions. Takes a [B,C,H,W] tensor, packs it
into [B,(r^2)C,H/r,W/r] and then convolves it to produce [B,C,H/r,W/r].
"""
def __init__(self, in_channels, kernel_size, r=2, d=8):
"""
Initializes a PackLayerConv3d object.
Parameters
----------
in_channels : int
Number of input channels
kernel_size : int
Kernel size
r : int
Packing ratio
d : int
Number of 3D features
"""
super().__init__()
self.conv = Conv2D(in_channels * (r ** 2) * d, in_channels, kernel_size, 1)
self.pack = partial(packing, r=r)
self.conv3d = nn.Conv3d(1, d, kernel_size=(3, 3, 3),
stride=(1, 1, 1), padding=(1, 1, 1))
[docs] def forward(self, x):
"""Runs the PackLayerConv3d layer."""
x = self.pack(x)
x = x.unsqueeze(1)
x = self.conv3d(x)
b, c, d, h, w = x.shape
x = x.view(b, c * d, h, w)
x = self.conv(x)
return x
[docs]class UnpackLayerConv3d(nn.Module):
"""
Unpacking layer with 3d convolutions. Takes a [B,C,H,W] tensor, convolves it
to produce [B,(r^2)C,H,W] and then unpacks it to produce [B,C,rH,rW].
"""
def __init__(self, in_channels, out_channels, kernel_size, r=2, d=8):
"""
Initializes a UnpackLayerConv3d object.
Parameters
----------
in_channels : int
Number of input channels
out_channels : int
Number of output channels
kernel_size : int
Kernel size
r : int
Packing ratio
d : int
Number of 3D features
"""
super().__init__()
self.conv = Conv2D(in_channels, out_channels * (r ** 2) // d, kernel_size, 1)
self.unpack = nn.PixelShuffle(r)
self.conv3d = nn.Conv3d(1, d, kernel_size=(3, 3, 3),
stride=(1, 1, 1), padding=(1, 1, 1))
[docs] def forward(self, x):
"""Runs the UnpackLayerConv3d layer."""
x = self.conv(x)
x = x.unsqueeze(1)
x = self.conv3d(x)
b, c, d, h, w = x.shape
x = x.view(b, c * d, h, w)
x = self.unpack(x)
return x
########################################################################################################################