ML/Pytorch/GANs/ProGAN/model.py

"""
Implementation of ProGAN generator and discriminator with the key
attributions from the paper. We have tried to make the implementation
compact but a goal is also to keep it readable and understandable.
Specifically the key points implemented are:

1) Progressive growing (of model and layers)
2) Minibatch std on Discriminator
3) Normalization with PixelNorm
4) Equalized Learning Rate (here I cheated and only did it on Conv layers)
"""

import torch
import torch.nn as nn
import torch.nn.functional as F
from math import log2

"""
Factors is used in Discrmininator and Generator for how much
the channels should be multiplied and expanded for each layer,
so specifically the first 5 layers the channels stay the same,
whereas when we increase the img_size (towards the later layers)
we decrease the number of chanels by 1/2, 1/4, etc.
"""
factors = [1, 1, 1, 1, 1/2, 1/4, 1/4, 1/8, 1/16]


class WSConv2d(nn.Module):
    """
    Weight scaled Conv2d (Equalized Learning Rate)
    Note that input is multiplied rather than changing weights
    this will have the same result.

    Inspired by:
    https://github.com/nvnbny/progressive_growing_of_gans/blob/master/modelUtils.py
    """

    def __init__(
        self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, gain=2
    ):
        super(WSConv2d, self).__init__()
        self.conv = nn.Conv2d(
            in_channels, out_channels, kernel_size, stride, padding
        )
        self.scale = (gain / (self.conv.weight[0].numel())) ** 0.5

        # initialize conv layer
        nn.init.normal_(self.conv.weight)
        nn.init.zeros_(self.conv.bias)

    def forward(self, x):
        return self.conv(x * self.scale)


class PixelNorm(nn.Module):
    def __init__(self):
        super(PixelNorm, self).__init__()
        self.epsilon = 1e-8

    def forward(self, x):
        return x / torch.sqrt(
            torch.mean(x ** 2, dim=1, keepdim=True) + self.epsilon
        )


class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, use_pixelnorm=True):
        super(ConvBlock, self).__init__()
        self.use_pn = use_pixelnorm
        self.conv1 = WSConv2d(in_channels, out_channels)
        self.conv2 = WSConv2d(out_channels, out_channels)
        self.leaky = nn.LeakyReLU(0.2)
        self.pn = PixelNorm()

    def forward(self, x):
        x = self.leaky(self.conv1(x))
        x = self.pn(x) if self.use_pn else x
        x = self.leaky(self.conv2(x))
        x = self.pn(x) if self.use_pn else x
        return x


class Generator(nn.Module):
    def __init__(self, z_dim, in_channels, img_size, img_channels=3):
        super(Generator, self).__init__()
        self.prog_blocks, self.rgb_layers = nn.ModuleList([]), nn.ModuleList([])

        # initial takes 1x1 -> 4x4
        self.initial = nn.Sequential(
            nn.ConvTranspose2d(z_dim, in_channels, 4, 1, 0),
            nn.LeakyReLU(0.2),
            PixelNorm(),
        )

        # Create progression blocks and rgb layers
        channels = in_channels

        # we need to double img for log2(img_size/4) and
        # +1 in loop for initial 4x4
        for idx in range(int(log2(img_size/4)) + 1):
            conv_in = channels
            conv_out = int(in_channels*factors[idx])
            self.prog_blocks.append(ConvBlock(conv_in, conv_out))
            self.rgb_layers.append(WSConv2d(conv_out, img_channels, kernel_size=1, stride=1, padding=0))
            channels = conv_out

    def fade_in(self, alpha, upscaled, generated):
        #assert 0 <= alpha <= 1, "Alpha not between 0 and 1"
        #assert upscaled.shape == generated.shape
        return torch.tanh(alpha * generated + (1 - alpha) * upscaled)

    def forward(self, x, alpha, steps):
        upscaled = self.initial(x)
        out = self.prog_blocks[0](upscaled)

        if steps == 0:
            return self.rgb_layers[0](out)

        for step in range(1, steps+1):
            upscaled = F.interpolate(out, scale_factor=2, mode="nearest")
            out = self.prog_blocks[step](upscaled)

        # The number of channels in upscale will stay the same, while
        # out which has moved through prog_blocks might change. To ensure
        # we can convert both to rgb we use different rgb_layers
        # (steps-1) and steps for upscaled, out respectively
        final_upscaled = self.rgb_layers[steps - 1](upscaled)
        final_out = self.rgb_layers[steps](out)
        return self.fade_in(alpha, final_upscaled, final_out)


class Discriminator(nn.Module):
    def __init__(self, img_size, z_dim, in_channels, img_channels=3):
        super(Discriminator, self).__init__()
        self.prog_blocks, self.rgb_layers = nn.ModuleList([]), nn.ModuleList([])

        # Create progression blocks and rgb layers
        channels = in_channels
        for idx in range(int(log2(img_size/4)) + 1):
            conv_in = int(in_channels * factors[idx])
            conv_out = channels
            self.rgb_layers.append(WSConv2d(img_channels, conv_in, kernel_size=1, stride=1, padding=0))
            self.prog_blocks.append(ConvBlock(conv_in, conv_out, use_pixelnorm=False))
            channels = conv_in

        self.avg_pool = nn.AvgPool2d(kernel_size=2, stride=2)
        # +1 to in_channels because we concatenate from minibatch std
        self.conv = WSConv2d(in_channels + 1, z_dim, kernel_size=4, stride=1, padding=0)
        self.linear = nn.Linear(z_dim, 1)

    def fade_in(self, alpha, downscaled, out):
        """Used to fade in downscaled using avgpooling and output from CNN"""
        #assert 0 <= alpha <= 1, "Alpha needs to be between [0, 1]"
        #assert downscaled.shape == out.shape
        return alpha * out + (1 - alpha) * downscaled

    def minibatch_std(self, x):
        batch_statistics = (
            torch.std(x, dim=0)
            .mean()
            .repeat(x.shape[0], 1, x.shape[2], x.shape[3])
        )
        return torch.cat([x, batch_statistics], dim=1)

    def forward(self, x, alpha, steps):
        out = self.rgb_layers[steps](x) # convert from rgb as initial step

        if steps == 0: # i.e, image is 4x4
            out = self.minibatch_std(out)
            out = self.conv(out)
            return self.linear(out.view(-1, out.shape[1]))

        # index steps which has the "reverse" fade_in
        downscaled = self.rgb_layers[steps - 1](self.avg_pool(x))
        out = self.avg_pool(self.prog_blocks[steps](out))
        out = self.fade_in(alpha, downscaled, out)

        for step in range(steps - 1, 0, -1):
            downscaled = self.avg_pool(out)
            out = self.prog_blocks[step](downscaled)

        out = self.minibatch_std(out)
        out = self.conv(out)
        return self.linear(out.view(-1, out.shape[1]))


if __name__ == "__main__":
    import time
    Z_DIM = 100
    IN_CHANNELS = 16
    img_size = 512
    num_steps = int(log2(img_size / 4))
    x = torch.randn((5, Z_DIM, 1, 1))
    gen = Generator(Z_DIM, IN_CHANNELS, img_size=img_size)
    disc = Discriminator(img_size, Z_DIM, IN_CHANNELS)
    start = time.time()
    with torch.autograd.profiler.profile(use_cuda=True) as prof:
        z = gen(x, alpha=0.5, steps=num_steps)
    print(prof)
    gen_time = time.time()-start
    t = time.time()
    out = disc(z, 0.01, num_steps)
    disc_time = time.time()-t
    print(gen_time, disc_time)
    #print(disc(z, 0.01, num_steps).shape)
Initial commit 2021-01-30 21:49:15 +01:00			`"""`
			`Implementation of ProGAN generator and discriminator with the key`
			`attributions from the paper. We have tried to make the implementation`
			`compact but a goal is also to keep it readable and understandable.`
			`Specifically the key points implemented are:`

			`1) Progressive growing (of model and layers)`
			`2) Minibatch std on Discriminator`
			`3) Normalization with PixelNorm`
			`4) Equalized Learning Rate (here I cheated and only did it on Conv layers)`
			`"""`

			`import torch`
			`import torch.nn as nn`
			`import torch.nn.functional as F`
			`from math import log2`

			`"""`
			`Factors is used in Discrmininator and Generator for how much`
			`the channels should be multiplied and expanded for each layer,`
			`so specifically the first 5 layers the channels stay the same,`
			`whereas when we increase the img_size (towards the later layers)`
			`we decrease the number of chanels by 1/2, 1/4, etc.`
			`"""`
			`factors = [1, 1, 1, 1, 1/2, 1/4, 1/4, 1/8, 1/16]`


			`class WSConv2d(nn.Module):`
			`"""`
			`Weight scaled Conv2d (Equalized Learning Rate)`
			`Note that input is multiplied rather than changing weights`
			`this will have the same result.`

			`Inspired by:`
			`https://github.com/nvnbny/progressive_growing_of_gans/blob/master/modelUtils.py`
			`"""`

			`def __init__(`
			`self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, gain=2`
			`):`
			`super(WSConv2d, self).__init__()`
			`self.conv = nn.Conv2d(`
			`in_channels, out_channels, kernel_size, stride, padding`
			`)`
			`self.scale = (gain / (self.conv.weight[0].numel())) ** 0.5`

			`# initialize conv layer`
			`nn.init.normal_(self.conv.weight)`
			`nn.init.zeros_(self.conv.bias)`

			`def forward(self, x):`
			`return self.conv(x * self.scale)`


			`class PixelNorm(nn.Module):`
			`def __init__(self):`
			`super(PixelNorm, self).__init__()`
			`self.epsilon = 1e-8`

			`def forward(self, x):`
			`return x / torch.sqrt(`
			`torch.mean(x ** 2, dim=1, keepdim=True) + self.epsilon`
			`)`


			`class ConvBlock(nn.Module):`
			`def __init__(self, in_channels, out_channels, use_pixelnorm=True):`
			`super(ConvBlock, self).__init__()`
			`self.use_pn = use_pixelnorm`
			`self.conv1 = WSConv2d(in_channels, out_channels)`
			`self.conv2 = WSConv2d(out_channels, out_channels)`
			`self.leaky = nn.LeakyReLU(0.2)`
			`self.pn = PixelNorm()`

			`def forward(self, x):`
			`x = self.leaky(self.conv1(x))`
			`x = self.pn(x) if self.use_pn else x`
			`x = self.leaky(self.conv2(x))`
			`x = self.pn(x) if self.use_pn else x`
			`return x`


			`class Generator(nn.Module):`
			`def __init__(self, z_dim, in_channels, img_size, img_channels=3):`
			`super(Generator, self).__init__()`
			`self.prog_blocks, self.rgb_layers = nn.ModuleList([]), nn.ModuleList([])`

			`# initial takes 1x1 -> 4x4`
			`self.initial = nn.Sequential(`
			`nn.ConvTranspose2d(z_dim, in_channels, 4, 1, 0),`
			`nn.LeakyReLU(0.2),`
			`PixelNorm(),`
			`)`

			`# Create progression blocks and rgb layers`
			`channels = in_channels`

			`# we need to double img for log2(img_size/4) and`
			`# +1 in loop for initial 4x4`
			`for idx in range(int(log2(img_size/4)) + 1):`
			`conv_in = channels`
			`conv_out = int(in_channels*factors[idx])`
			`self.prog_blocks.append(ConvBlock(conv_in, conv_out))`
			`self.rgb_layers.append(WSConv2d(conv_out, img_channels, kernel_size=1, stride=1, padding=0))`
			`channels = conv_out`

			`def fade_in(self, alpha, upscaled, generated):`
			`#assert 0 <= alpha <= 1, "Alpha not between 0 and 1"`
			`#assert upscaled.shape == generated.shape`
			`return torch.tanh(alpha * generated + (1 - alpha) * upscaled)`

			`def forward(self, x, alpha, steps):`
			`upscaled = self.initial(x)`
			`out = self.prog_blocks[0](upscaled)`

			`if steps == 0:`
			`return self.rgb_layers[0](out)`

			`for step in range(1, steps+1):`
			`upscaled = F.interpolate(out, scale_factor=2, mode="nearest")`
			`out = self.prog_blocks[step](upscaled)`

			`# The number of channels in upscale will stay the same, while`
			`# out which has moved through prog_blocks might change. To ensure`
			`# we can convert both to rgb we use different rgb_layers`
			`# (steps-1) and steps for upscaled, out respectively`
			`final_upscaled = self.rgb_layers[steps - 1](upscaled)`
			`final_out = self.rgb_layers[steps](out)`
			`return self.fade_in(alpha, final_upscaled, final_out)`


			`class Discriminator(nn.Module):`
			`def __init__(self, img_size, z_dim, in_channels, img_channels=3):`
			`super(Discriminator, self).__init__()`
			`self.prog_blocks, self.rgb_layers = nn.ModuleList([]), nn.ModuleList([])`

			`# Create progression blocks and rgb layers`
			`channels = in_channels`
			`for idx in range(int(log2(img_size/4)) + 1):`
			`conv_in = int(in_channels * factors[idx])`
			`conv_out = channels`
			`self.rgb_layers.append(WSConv2d(img_channels, conv_in, kernel_size=1, stride=1, padding=0))`
			`self.prog_blocks.append(ConvBlock(conv_in, conv_out, use_pixelnorm=False))`
			`channels = conv_in`

			`self.avg_pool = nn.AvgPool2d(kernel_size=2, stride=2)`
			`# +1 to in_channels because we concatenate from minibatch std`
			`self.conv = WSConv2d(in_channels + 1, z_dim, kernel_size=4, stride=1, padding=0)`
			`self.linear = nn.Linear(z_dim, 1)`

			`def fade_in(self, alpha, downscaled, out):`
			`"""Used to fade in downscaled using avgpooling and output from CNN"""`
			`#assert 0 <= alpha <= 1, "Alpha needs to be between [0, 1]"`
			`#assert downscaled.shape == out.shape`
			`return alpha * out + (1 - alpha) * downscaled`

			`def minibatch_std(self, x):`
			`batch_statistics = (`
			`torch.std(x, dim=0)`
			`.mean()`
			`.repeat(x.shape[0], 1, x.shape[2], x.shape[3])`
			`)`
			`return torch.cat([x, batch_statistics], dim=1)`

			`def forward(self, x, alpha, steps):`
			`out = self.rgb_layers[steps](x) # convert from rgb as initial step`

			`if steps == 0: # i.e, image is 4x4`
			`out = self.minibatch_std(out)`
			`out = self.conv(out)`
			`return self.linear(out.view(-1, out.shape[1]))`

			`# index steps which has the "reverse" fade_in`
			`downscaled = self.rgb_layers[steps - 1](self.avg_pool(x))`
			`out = self.avg_pool(self.prog_blocks[steps](out))`
			`out = self.fade_in(alpha, downscaled, out)`

			`for step in range(steps - 1, 0, -1):`
			`downscaled = self.avg_pool(out)`
			`out = self.prog_blocks[step](downscaled)`

			`out = self.minibatch_std(out)`
			`out = self.conv(out)`
			`return self.linear(out.view(-1, out.shape[1]))`


			`if __name__ == "__main__":`
			`import time`
			`Z_DIM = 100`
			`IN_CHANNELS = 16`
			`img_size = 512`
			`num_steps = int(log2(img_size / 4))`
			`x = torch.randn((5, Z_DIM, 1, 1))`
			`gen = Generator(Z_DIM, IN_CHANNELS, img_size=img_size)`
			`disc = Discriminator(img_size, Z_DIM, IN_CHANNELS)`
			`start = time.time()`
			`with torch.autograd.profiler.profile(use_cuda=True) as prof:`
			`z = gen(x, alpha=0.5, steps=num_steps)`
			`print(prof)`
			`gen_time = time.time()-start`
			`t = time.time()`
			`out = disc(z, 0.01, num_steps)`
			`disc_time = time.time()-t`
			`print(gen_time, disc_time)`
			`#print(disc(z, 0.01, num_steps).shape)`