Coding 6: UpConvolutional Networks for image generation

In this exercise, we will train a network with up-convolutional layers to generate the following images:

drawing

The model converts a one-hot vector to an image. We will limit the model size (in bytes) to be less than 1/4 of the images. We will evaluate the generated images based on the L1 distance in pixel.

In [0]:
%matplotlib inline
import matplotlib.pyplot as plt
from PIL import Image
import torch
import torchvision
import torchvision.transforms as transforms
In [0]:
# Download and load images. Ignore this.
def show_images(images):
    vis = torchvision.utils.make_grid(images, nrow=5)
    plt.imshow(vis.permute(1, 2, 0))
    plt.show()
!wget http://www.philkr.net/cs342/coding/coding_06_assets/photos.zip
!unzip photos.zip
image_names = ['0_Brady.png', '1_Ishan.png', '2_Xingyi.png',\
    '3_Philipp.png', '4_Don_CS_Chair.png', '5_Pual_Dean_Sci.png',\ 
    '6_UTProvost.png', '7_UT_President.png', '8_James_UTSystem.png', \
    '9_Greg_Texas_Governor.png']
image_list = [Image.open("photos/{}".format(image_name)) \
          for image_name in image_names if '.png' in image_name]
images = torch.cat(
    [transforms.ToTensor()(image)[None] for image in image_list])
show_images(images)
In [0]:
class UpConvNet(torch.nn.Module):
    def __init__(self, input_dim=10, output_size=128):
        """
        TODO: Implement.
        """
        super().__init__()
        
    def forward(self, z):
        """
        TODO: Implement.
        input z (batch_size x 10): one-hot representqation.
        output x (batch_size x 3 x 128 x 128): images.
        """
        pass
In [ ]:
def train(model, images, writer, device,
          lr=0.001, n_epochs=500):
    optim = torch.optim.Adam(model.parameters(), lr=lr)
    ## TODO: define loss here
    loss_func = None
    z = torch.eye(10)
    x = images
    z, x = z.to(device), x.to(device)
    model.to(device)
    for epoch in range(n_epochs):    
        x_pred = model(z)
        loss = loss_func(x_pred, x)
        optim.zero_grad()
        loss.backward()
        optim.step()
        writer.add_scalar('loss', loss.item(), epoch)
        if epoch % 100 == 0:
            print('loss', epoch, loss.item())
In [0]:
%reload_ext tensorboard
%tensorboard --logdir log --reload_interval 1
import time
import torch.utils.tensorboard as tb

model = UpConvNet()
writer = tb.SummaryWriter('log/{}'.format(time.strftime('%m-%d-%H-%M')))
train(model, images, writer, torch.device('cuda'))
x_pred = model(torch.eye(10).cuda()).detach().cpu()
show_images(x_pred)
num_bytes = {torch.float32: 4, torch.float64: 8, torch.float16: 2}
# Calulate the model size
model_bytes = sum(p.numel() * num_bytes[p.dtype] \
  for p in model.parameters() if p.requires_grad)
print('model bytes: ', model_bytes)
# The original type of the image is uint8
print('data bytes: ', images.numel() * 1) 
print('model_bytes / data_bytes', model_bytes / images.numel())
print('L1 error', torch.abs(x_pred - images).mean())