mirror of
https://github.com/aladdinpersson/Machine-Learning-Collection.git
synced 2026-04-10 12:33:44 +00:00
cyclegan, progan
This commit is contained in:
@@ -22,7 +22,7 @@ so specifically the first 5 layers the channels stay the same,
|
||||
whereas when we increase the img_size (towards the later layers)
|
||||
we decrease the number of chanels by 1/2, 1/4, etc.
|
||||
"""
|
||||
factors = [1, 1, 1, 1, 1/2, 1/4, 1/4, 1/8, 1/16]
|
||||
factors = [1, 1, 1, 1, 1 / 2, 1 / 4, 1 / 8, 1 / 16, 1 / 32]
|
||||
|
||||
|
||||
class WSConv2d(nn.Module):
|
||||
@@ -31,7 +31,7 @@ class WSConv2d(nn.Module):
|
||||
Note that input is multiplied rather than changing weights
|
||||
this will have the same result.
|
||||
|
||||
Inspired by:
|
||||
Inspired and looked at:
|
||||
https://github.com/nvnbny/progressive_growing_of_gans/blob/master/modelUtils.py
|
||||
"""
|
||||
|
||||
@@ -39,17 +39,17 @@ class WSConv2d(nn.Module):
|
||||
self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, gain=2
|
||||
):
|
||||
super(WSConv2d, self).__init__()
|
||||
self.conv = nn.Conv2d(
|
||||
in_channels, out_channels, kernel_size, stride, padding
|
||||
)
|
||||
self.scale = (gain / (self.conv.weight[0].numel())) ** 0.5
|
||||
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
|
||||
self.scale = (gain / (in_channels * (kernel_size ** 2))) ** 0.5
|
||||
self.bias = self.conv.bias
|
||||
self.conv.bias = None
|
||||
|
||||
# initialize conv layer
|
||||
nn.init.normal_(self.conv.weight)
|
||||
nn.init.zeros_(self.conv.bias)
|
||||
nn.init.zeros_(self.bias)
|
||||
|
||||
def forward(self, x):
|
||||
return self.conv(x * self.scale)
|
||||
return self.conv(x * self.scale) + self.bias.view(1, self.bias.shape[0], 1, 1)
|
||||
|
||||
|
||||
class PixelNorm(nn.Module):
|
||||
@@ -58,9 +58,7 @@ class PixelNorm(nn.Module):
|
||||
self.epsilon = 1e-8
|
||||
|
||||
def forward(self, x):
|
||||
return x / torch.sqrt(
|
||||
torch.mean(x ** 2, dim=1, keepdim=True) + self.epsilon
|
||||
)
|
||||
return x / torch.sqrt(torch.mean(x ** 2, dim=1, keepdim=True) + self.epsilon)
|
||||
|
||||
|
||||
class ConvBlock(nn.Module):
|
||||
@@ -81,42 +79,48 @@ class ConvBlock(nn.Module):
|
||||
|
||||
|
||||
class Generator(nn.Module):
|
||||
def __init__(self, z_dim, in_channels, img_size, img_channels=3):
|
||||
def __init__(self, z_dim, in_channels, img_channels=3):
|
||||
super(Generator, self).__init__()
|
||||
self.prog_blocks, self.rgb_layers = nn.ModuleList([]), nn.ModuleList([])
|
||||
|
||||
# initial takes 1x1 -> 4x4
|
||||
self.initial = nn.Sequential(
|
||||
PixelNorm(),
|
||||
nn.ConvTranspose2d(z_dim, in_channels, 4, 1, 0),
|
||||
nn.LeakyReLU(0.2),
|
||||
WSConv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1),
|
||||
nn.LeakyReLU(0.2),
|
||||
PixelNorm(),
|
||||
)
|
||||
|
||||
# Create progression blocks and rgb layers
|
||||
channels = in_channels
|
||||
self.initial_rgb = WSConv2d(
|
||||
in_channels, img_channels, kernel_size=1, stride=1, padding=0
|
||||
)
|
||||
self.prog_blocks, self.rgb_layers = (
|
||||
nn.ModuleList([]),
|
||||
nn.ModuleList([self.initial_rgb]),
|
||||
)
|
||||
|
||||
# we need to double img for log2(img_size/4) and
|
||||
# +1 in loop for initial 4x4
|
||||
for idx in range(int(log2(img_size/4)) + 1):
|
||||
conv_in = channels
|
||||
conv_out = int(in_channels*factors[idx])
|
||||
self.prog_blocks.append(ConvBlock(conv_in, conv_out))
|
||||
self.rgb_layers.append(WSConv2d(conv_out, img_channels, kernel_size=1, stride=1, padding=0))
|
||||
channels = conv_out
|
||||
for i in range(
|
||||
len(factors) - 1
|
||||
): # -1 to prevent index error because of factors[i+1]
|
||||
conv_in_c = int(in_channels * factors[i])
|
||||
conv_out_c = int(in_channels * factors[i + 1])
|
||||
self.prog_blocks.append(ConvBlock(conv_in_c, conv_out_c))
|
||||
self.rgb_layers.append(
|
||||
WSConv2d(conv_out_c, img_channels, kernel_size=1, stride=1, padding=0)
|
||||
)
|
||||
|
||||
def fade_in(self, alpha, upscaled, generated):
|
||||
#assert 0 <= alpha <= 1, "Alpha not between 0 and 1"
|
||||
#assert upscaled.shape == generated.shape
|
||||
# alpha should be scalar within [0, 1], and upscale.shape == generated.shape
|
||||
return torch.tanh(alpha * generated + (1 - alpha) * upscaled)
|
||||
|
||||
def forward(self, x, alpha, steps):
|
||||
upscaled = self.initial(x)
|
||||
out = self.prog_blocks[0](upscaled)
|
||||
out = self.initial(x)
|
||||
|
||||
if steps == 0:
|
||||
return self.rgb_layers[0](out)
|
||||
return self.initial_rgb(out)
|
||||
|
||||
for step in range(1, steps+1):
|
||||
for step in range(steps):
|
||||
upscaled = F.interpolate(out, scale_factor=2, mode="nearest")
|
||||
out = self.prog_blocks[step](upscaled)
|
||||
|
||||
@@ -130,76 +134,101 @@ class Generator(nn.Module):
|
||||
|
||||
|
||||
class Discriminator(nn.Module):
|
||||
def __init__(self, img_size, z_dim, in_channels, img_channels=3):
|
||||
def __init__(self, z_dim, in_channels, img_channels=3):
|
||||
super(Discriminator, self).__init__()
|
||||
self.prog_blocks, self.rgb_layers = nn.ModuleList([]), nn.ModuleList([])
|
||||
self.leaky = nn.LeakyReLU(0.2)
|
||||
|
||||
# Create progression blocks and rgb layers
|
||||
channels = in_channels
|
||||
for idx in range(int(log2(img_size/4)) + 1):
|
||||
conv_in = int(in_channels * factors[idx])
|
||||
conv_out = channels
|
||||
self.rgb_layers.append(WSConv2d(img_channels, conv_in, kernel_size=1, stride=1, padding=0))
|
||||
# here we work back ways from factors because the discriminator
|
||||
# should be mirrored from the generator. So the first prog_block and
|
||||
# rgb layer we append will work for input size 1024x1024, then 512->256-> etc
|
||||
for i in range(len(factors) - 1, 0, -1):
|
||||
conv_in = int(in_channels * factors[i])
|
||||
conv_out = int(in_channels * factors[i - 1])
|
||||
self.prog_blocks.append(ConvBlock(conv_in, conv_out, use_pixelnorm=False))
|
||||
channels = conv_in
|
||||
self.rgb_layers.append(
|
||||
WSConv2d(img_channels, conv_in, kernel_size=1, stride=1, padding=0)
|
||||
)
|
||||
|
||||
self.avg_pool = nn.AvgPool2d(kernel_size=2, stride=2)
|
||||
# +1 to in_channels because we concatenate from minibatch std
|
||||
self.conv = WSConv2d(in_channels + 1, z_dim, kernel_size=4, stride=1, padding=0)
|
||||
self.linear = nn.Linear(z_dim, 1)
|
||||
# perhaps confusing name "initial_rgb" this is just the RGB layer for 4x4 input size
|
||||
# did this to "mirror" the generator initial_rgb
|
||||
self.initial_rgb = WSConv2d(
|
||||
img_channels, in_channels, kernel_size=1, stride=1, padding=0
|
||||
)
|
||||
self.rgb_layers.append(self.initial_rgb)
|
||||
self.avg_pool = nn.AvgPool2d(
|
||||
kernel_size=2, stride=2
|
||||
) # down sampling using avg pool
|
||||
|
||||
# this is the block for 4x4 input size
|
||||
self.final_block = nn.Sequential(
|
||||
# +1 to in_channels because we concatenate from MiniBatch std
|
||||
WSConv2d(in_channels + 1, in_channels, kernel_size=3, padding=1),
|
||||
nn.LeakyReLU(0.2),
|
||||
WSConv2d(in_channels, in_channels, kernel_size=4, padding=0, stride=1),
|
||||
nn.LeakyReLU(0.2),
|
||||
WSConv2d(
|
||||
in_channels, 1, kernel_size=1, padding=0, stride=1
|
||||
), # we use this instead of linear layer
|
||||
)
|
||||
|
||||
def fade_in(self, alpha, downscaled, out):
|
||||
"""Used to fade in downscaled using avgpooling and output from CNN"""
|
||||
#assert 0 <= alpha <= 1, "Alpha needs to be between [0, 1]"
|
||||
#assert downscaled.shape == out.shape
|
||||
"""Used to fade in downscaled using avg pooling and output from CNN"""
|
||||
# alpha should be scalar within [0, 1], and upscale.shape == generated.shape
|
||||
return alpha * out + (1 - alpha) * downscaled
|
||||
|
||||
def minibatch_std(self, x):
|
||||
batch_statistics = (
|
||||
torch.std(x, dim=0)
|
||||
.mean()
|
||||
.repeat(x.shape[0], 1, x.shape[2], x.shape[3])
|
||||
torch.std(x, dim=0).mean().repeat(x.shape[0], 1, x.shape[2], x.shape[3])
|
||||
)
|
||||
# we take the std for each example (across all channels, and pixels) then we repeat it
|
||||
# for a single channel and concatenate it with the image. In this way the discriminator
|
||||
# will get information about the variation in the batch/image
|
||||
return torch.cat([x, batch_statistics], dim=1)
|
||||
|
||||
def forward(self, x, alpha, steps):
|
||||
out = self.rgb_layers[steps](x) # convert from rgb as initial step
|
||||
# where we should start in the list of prog_blocks, maybe a bit confusing but
|
||||
# the last is for the 4x4. So example let's say steps=1, then we should start
|
||||
# at the second to last because input_size will be 8x8. If steps==0 we just
|
||||
# use the final block
|
||||
cur_step = len(self.prog_blocks) - steps
|
||||
|
||||
if steps == 0: # i.e, image is 4x4
|
||||
# convert from rgb as initial step, this will depend on
|
||||
# the image size (each will have it's on rgb layer)
|
||||
out = self.leaky(self.rgb_layers[cur_step](x))
|
||||
|
||||
if steps == 0: # i.e, image is 4x4
|
||||
out = self.minibatch_std(out)
|
||||
out = self.conv(out)
|
||||
return self.linear(out.view(-1, out.shape[1]))
|
||||
return self.final_block(out).view(out.shape[0], -1)
|
||||
|
||||
# index steps which has the "reverse" fade_in
|
||||
downscaled = self.rgb_layers[steps - 1](self.avg_pool(x))
|
||||
out = self.avg_pool(self.prog_blocks[steps](out))
|
||||
# because prog_blocks might change the channels, for down scale we use rgb_layer
|
||||
# from previous/smaller size which in our case correlates to +1 in the indexing
|
||||
downscaled = self.leaky(self.rgb_layers[cur_step + 1](self.avg_pool(x)))
|
||||
out = self.avg_pool(self.prog_blocks[cur_step](out))
|
||||
|
||||
# the fade_in is done first between the downscaled and the input
|
||||
# this is opposite from the generator
|
||||
out = self.fade_in(alpha, downscaled, out)
|
||||
|
||||
for step in range(steps - 1, 0, -1):
|
||||
downscaled = self.avg_pool(out)
|
||||
out = self.prog_blocks[step](downscaled)
|
||||
for step in range(cur_step + 1, len(self.prog_blocks)):
|
||||
out = self.prog_blocks[step](out)
|
||||
out = self.avg_pool(out)
|
||||
|
||||
out = self.minibatch_std(out)
|
||||
out = self.conv(out)
|
||||
return self.linear(out.view(-1, out.shape[1]))
|
||||
return self.final_block(out).view(out.shape[0], -1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import time
|
||||
Z_DIM = 100
|
||||
IN_CHANNELS = 16
|
||||
img_size = 512
|
||||
num_steps = int(log2(img_size / 4))
|
||||
x = torch.randn((5, Z_DIM, 1, 1))
|
||||
gen = Generator(Z_DIM, IN_CHANNELS, img_size=img_size)
|
||||
disc = Discriminator(img_size, Z_DIM, IN_CHANNELS)
|
||||
start = time.time()
|
||||
with torch.autograd.profiler.profile(use_cuda=True) as prof:
|
||||
z = gen(x, alpha=0.5, steps=num_steps)
|
||||
print(prof)
|
||||
gen_time = time.time()-start
|
||||
t = time.time()
|
||||
out = disc(z, 0.01, num_steps)
|
||||
disc_time = time.time()-t
|
||||
print(gen_time, disc_time)
|
||||
#print(disc(z, 0.01, num_steps).shape)
|
||||
IN_CHANNELS = 256
|
||||
gen = Generator(Z_DIM, IN_CHANNELS, img_channels=3)
|
||||
critic = Discriminator(Z_DIM, IN_CHANNELS, img_channels=3)
|
||||
|
||||
for img_size in [4, 8, 16, 32, 64, 128, 256, 512, 1024]:
|
||||
num_steps = int(log2(img_size / 4))
|
||||
x = torch.randn((1, Z_DIM, 1, 1))
|
||||
z = gen(x, 0.5, steps=num_steps)
|
||||
assert z.shape == (1, 3, img_size, img_size)
|
||||
out = critic(z, alpha=0.5, steps=num_steps)
|
||||
assert out.shape == (1, 1)
|
||||
print(f"Success! At img size: {img_size}")
|
||||
|
||||
Reference in New Issue
Block a user