mirror of
https://github.com/aladdinpersson/Machine-Learning-Collection.git
synced 2026-02-21 19:27:58 +00:00
206 lines
7.2 KiB
Python
206 lines
7.2 KiB
Python
"""
|
|
Implementation of ProGAN generator and discriminator with the key
|
|
attributions from the paper. We have tried to make the implementation
|
|
compact but a goal is also to keep it readable and understandable.
|
|
Specifically the key points implemented are:
|
|
|
|
1) Progressive growing (of model and layers)
|
|
2) Minibatch std on Discriminator
|
|
3) Normalization with PixelNorm
|
|
4) Equalized Learning Rate (here I cheated and only did it on Conv layers)
|
|
"""
|
|
|
|
import torch
|
|
import torch.nn as nn
|
|
import torch.nn.functional as F
|
|
from math import log2
|
|
|
|
"""
|
|
Factors is used in Discrmininator and Generator for how much
|
|
the channels should be multiplied and expanded for each layer,
|
|
so specifically the first 5 layers the channels stay the same,
|
|
whereas when we increase the img_size (towards the later layers)
|
|
we decrease the number of chanels by 1/2, 1/4, etc.
|
|
"""
|
|
factors = [1, 1, 1, 1, 1/2, 1/4, 1/4, 1/8, 1/16]
|
|
|
|
|
|
class WSConv2d(nn.Module):
|
|
"""
|
|
Weight scaled Conv2d (Equalized Learning Rate)
|
|
Note that input is multiplied rather than changing weights
|
|
this will have the same result.
|
|
|
|
Inspired by:
|
|
https://github.com/nvnbny/progressive_growing_of_gans/blob/master/modelUtils.py
|
|
"""
|
|
|
|
def __init__(
|
|
self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, gain=2
|
|
):
|
|
super(WSConv2d, self).__init__()
|
|
self.conv = nn.Conv2d(
|
|
in_channels, out_channels, kernel_size, stride, padding
|
|
)
|
|
self.scale = (gain / (self.conv.weight[0].numel())) ** 0.5
|
|
|
|
# initialize conv layer
|
|
nn.init.normal_(self.conv.weight)
|
|
nn.init.zeros_(self.conv.bias)
|
|
|
|
def forward(self, x):
|
|
return self.conv(x * self.scale)
|
|
|
|
|
|
class PixelNorm(nn.Module):
|
|
def __init__(self):
|
|
super(PixelNorm, self).__init__()
|
|
self.epsilon = 1e-8
|
|
|
|
def forward(self, x):
|
|
return x / torch.sqrt(
|
|
torch.mean(x ** 2, dim=1, keepdim=True) + self.epsilon
|
|
)
|
|
|
|
|
|
class ConvBlock(nn.Module):
|
|
def __init__(self, in_channels, out_channels, use_pixelnorm=True):
|
|
super(ConvBlock, self).__init__()
|
|
self.use_pn = use_pixelnorm
|
|
self.conv1 = WSConv2d(in_channels, out_channels)
|
|
self.conv2 = WSConv2d(out_channels, out_channels)
|
|
self.leaky = nn.LeakyReLU(0.2)
|
|
self.pn = PixelNorm()
|
|
|
|
def forward(self, x):
|
|
x = self.leaky(self.conv1(x))
|
|
x = self.pn(x) if self.use_pn else x
|
|
x = self.leaky(self.conv2(x))
|
|
x = self.pn(x) if self.use_pn else x
|
|
return x
|
|
|
|
|
|
class Generator(nn.Module):
|
|
def __init__(self, z_dim, in_channels, img_size, img_channels=3):
|
|
super(Generator, self).__init__()
|
|
self.prog_blocks, self.rgb_layers = nn.ModuleList([]), nn.ModuleList([])
|
|
|
|
# initial takes 1x1 -> 4x4
|
|
self.initial = nn.Sequential(
|
|
nn.ConvTranspose2d(z_dim, in_channels, 4, 1, 0),
|
|
nn.LeakyReLU(0.2),
|
|
PixelNorm(),
|
|
)
|
|
|
|
# Create progression blocks and rgb layers
|
|
channels = in_channels
|
|
|
|
# we need to double img for log2(img_size/4) and
|
|
# +1 in loop for initial 4x4
|
|
for idx in range(int(log2(img_size/4)) + 1):
|
|
conv_in = channels
|
|
conv_out = int(in_channels*factors[idx])
|
|
self.prog_blocks.append(ConvBlock(conv_in, conv_out))
|
|
self.rgb_layers.append(WSConv2d(conv_out, img_channels, kernel_size=1, stride=1, padding=0))
|
|
channels = conv_out
|
|
|
|
def fade_in(self, alpha, upscaled, generated):
|
|
#assert 0 <= alpha <= 1, "Alpha not between 0 and 1"
|
|
#assert upscaled.shape == generated.shape
|
|
return torch.tanh(alpha * generated + (1 - alpha) * upscaled)
|
|
|
|
def forward(self, x, alpha, steps):
|
|
upscaled = self.initial(x)
|
|
out = self.prog_blocks[0](upscaled)
|
|
|
|
if steps == 0:
|
|
return self.rgb_layers[0](out)
|
|
|
|
for step in range(1, steps+1):
|
|
upscaled = F.interpolate(out, scale_factor=2, mode="nearest")
|
|
out = self.prog_blocks[step](upscaled)
|
|
|
|
# The number of channels in upscale will stay the same, while
|
|
# out which has moved through prog_blocks might change. To ensure
|
|
# we can convert both to rgb we use different rgb_layers
|
|
# (steps-1) and steps for upscaled, out respectively
|
|
final_upscaled = self.rgb_layers[steps - 1](upscaled)
|
|
final_out = self.rgb_layers[steps](out)
|
|
return self.fade_in(alpha, final_upscaled, final_out)
|
|
|
|
|
|
class Discriminator(nn.Module):
|
|
def __init__(self, img_size, z_dim, in_channels, img_channels=3):
|
|
super(Discriminator, self).__init__()
|
|
self.prog_blocks, self.rgb_layers = nn.ModuleList([]), nn.ModuleList([])
|
|
|
|
# Create progression blocks and rgb layers
|
|
channels = in_channels
|
|
for idx in range(int(log2(img_size/4)) + 1):
|
|
conv_in = int(in_channels * factors[idx])
|
|
conv_out = channels
|
|
self.rgb_layers.append(WSConv2d(img_channels, conv_in, kernel_size=1, stride=1, padding=0))
|
|
self.prog_blocks.append(ConvBlock(conv_in, conv_out, use_pixelnorm=False))
|
|
channels = conv_in
|
|
|
|
self.avg_pool = nn.AvgPool2d(kernel_size=2, stride=2)
|
|
# +1 to in_channels because we concatenate from minibatch std
|
|
self.conv = WSConv2d(in_channels + 1, z_dim, kernel_size=4, stride=1, padding=0)
|
|
self.linear = nn.Linear(z_dim, 1)
|
|
|
|
def fade_in(self, alpha, downscaled, out):
|
|
"""Used to fade in downscaled using avgpooling and output from CNN"""
|
|
#assert 0 <= alpha <= 1, "Alpha needs to be between [0, 1]"
|
|
#assert downscaled.shape == out.shape
|
|
return alpha * out + (1 - alpha) * downscaled
|
|
|
|
def minibatch_std(self, x):
|
|
batch_statistics = (
|
|
torch.std(x, dim=0)
|
|
.mean()
|
|
.repeat(x.shape[0], 1, x.shape[2], x.shape[3])
|
|
)
|
|
return torch.cat([x, batch_statistics], dim=1)
|
|
|
|
def forward(self, x, alpha, steps):
|
|
out = self.rgb_layers[steps](x) # convert from rgb as initial step
|
|
|
|
if steps == 0: # i.e, image is 4x4
|
|
out = self.minibatch_std(out)
|
|
out = self.conv(out)
|
|
return self.linear(out.view(-1, out.shape[1]))
|
|
|
|
# index steps which has the "reverse" fade_in
|
|
downscaled = self.rgb_layers[steps - 1](self.avg_pool(x))
|
|
out = self.avg_pool(self.prog_blocks[steps](out))
|
|
out = self.fade_in(alpha, downscaled, out)
|
|
|
|
for step in range(steps - 1, 0, -1):
|
|
downscaled = self.avg_pool(out)
|
|
out = self.prog_blocks[step](downscaled)
|
|
|
|
out = self.minibatch_std(out)
|
|
out = self.conv(out)
|
|
return self.linear(out.view(-1, out.shape[1]))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import time
|
|
Z_DIM = 100
|
|
IN_CHANNELS = 16
|
|
img_size = 512
|
|
num_steps = int(log2(img_size / 4))
|
|
x = torch.randn((5, Z_DIM, 1, 1))
|
|
gen = Generator(Z_DIM, IN_CHANNELS, img_size=img_size)
|
|
disc = Discriminator(img_size, Z_DIM, IN_CHANNELS)
|
|
start = time.time()
|
|
with torch.autograd.profiler.profile(use_cuda=True) as prof:
|
|
z = gen(x, alpha=0.5, steps=num_steps)
|
|
print(prof)
|
|
gen_time = time.time()-start
|
|
t = time.time()
|
|
out = disc(z, 0.01, num_steps)
|
|
disc_time = time.time()-t
|
|
print(gen_time, disc_time)
|
|
#print(disc(z, 0.01, num_steps).shape)
|