In this exercise, we will train a network with up-convolutional layers to generate the following images:
The model converts a one-hot vector to an image. We will limit the model size (in bytes) to be less than 1/4 of the images. We will evaluate the generated images based on the L1 distance in pixel.
%matplotlib inline
import matplotlib.pyplot as plt
from PIL import Image
import torch
import torchvision
import torchvision.transforms as transforms
# Download and load images. Ignore this.
def show_images(images):
vis = torchvision.utils.make_grid(images, nrow=5)
plt.imshow(vis.permute(1, 2, 0))
plt.show()
!wget http://www.philkr.net/cs342/coding/coding_06_assets/photos.zip
!unzip photos.zip
image_names = ['0_Brady.png', '1_Ishan.png', '2_Xingyi.png',\
'3_Philipp.png', '4_Don_CS_Chair.png', '5_Pual_Dean_Sci.png',\
'6_UTProvost.png', '7_UT_President.png', '8_James_UTSystem.png', \
'9_Greg_Texas_Governor.png']
image_list = [Image.open("photos/{}".format(image_name)) \
for image_name in image_names if '.png' in image_name]
images = torch.cat(
[transforms.ToTensor()(image)[None] for image in image_list])
show_images(images)
class UpConvNet(torch.nn.Module):
def __init__(self, input_dim=10, output_size=128):
"""
TODO: Implement.
"""
super().__init__()
def forward(self, z):
"""
TODO: Implement.
input z (batch_size x 10): one-hot representqation.
output x (batch_size x 3 x 128 x 128): images.
"""
pass
def train(model, images, writer, device,
lr=0.001, n_epochs=500):
optim = torch.optim.Adam(model.parameters(), lr=lr)
## TODO: define loss here
loss_func = None
z = torch.eye(10)
x = images
z, x = z.to(device), x.to(device)
model.to(device)
for epoch in range(n_epochs):
x_pred = model(z)
loss = loss_func(x_pred, x)
optim.zero_grad()
loss.backward()
optim.step()
writer.add_scalar('loss', loss.item(), epoch)
if epoch % 100 == 0:
print('loss', epoch, loss.item())
%reload_ext tensorboard
%tensorboard --logdir log --reload_interval 1
import time
import torch.utils.tensorboard as tb
model = UpConvNet()
writer = tb.SummaryWriter('log/{}'.format(time.strftime('%m-%d-%H-%M')))
train(model, images, writer, torch.device('cuda'))
x_pred = model(torch.eye(10).cuda()).detach().cpu()
show_images(x_pred)
num_bytes = {torch.float32: 4, torch.float64: 8, torch.float16: 2}
# Calulate the model size
model_bytes = sum(p.numel() * num_bytes[p.dtype] \
for p in model.parameters() if p.requires_grad)
print('model bytes: ', model_bytes)
# The original type of the image is uint8
print('data bytes: ', images.numel() * 1)
print('model_bytes / data_bytes', model_bytes / images.numel())
print('L1 error', torch.abs(x_pred - images).mean())