diff --git a/ACGAN.py b/ACGAN.py index 920ade5..e7a2eb2 100644 --- a/ACGAN.py +++ b/ACGAN.py @@ -2,25 +2,24 @@ import numpy as np import torch.nn as nn import torch.optim as optim -from torch.autograd import Variable +from dataloader import dataloader class generator(nn.Module): # Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657) # Architecture : FC1024_BR-FC7x7x128_BR-(64)4dc2s_BR-(1)4dc2s_S - def __init__(self, dataset = 'mnist'): + def __init__(self, input_dim=100, output_dim=1, input_size=32, class_num=10): super(generator, self).__init__() - if dataset == 'mnist' or 'fashion-mnist': - self.input_height = 28 - self.input_width = 28 - self.input_dim = 62 + 10 - self.output_dim = 1 + self.input_dim = input_dim + self.output_dim = output_dim + self.input_size = input_size + self.class_num = class_num self.fc = nn.Sequential( - nn.Linear(self.input_dim, 1024), + nn.Linear(self.input_dim + self.class_num, 1024), nn.BatchNorm1d(1024), nn.ReLU(), - nn.Linear(1024, 128 * (self.input_height // 4) * (self.input_width // 4)), - nn.BatchNorm1d(128 * (self.input_height // 4) * (self.input_width // 4)), + nn.Linear(1024, 128 * (self.input_size // 4) * (self.input_size // 4)), + nn.BatchNorm1d(128 * (self.input_size // 4) * (self.input_size // 4)), nn.ReLU(), ) self.deconv = nn.Sequential( @@ -28,14 +27,14 @@ def __init__(self, dataset = 'mnist'): nn.BatchNorm2d(64), nn.ReLU(), nn.ConvTranspose2d(64, self.output_dim, 4, 2, 1), - nn.Sigmoid(), + nn.Tanh(), ) utils.initialize_weights(self) def forward(self, input, label): x = torch.cat([input, label], 1) x = self.fc(x) - x = x.view(-1, 128, (self.input_height // 4), (self.input_width // 4)) + x = x.view(-1, 128, (self.input_size // 4), (self.input_size // 4)) x = self.deconv(x) return x @@ -43,14 +42,12 @@ def forward(self, input, label): class discriminator(nn.Module): # Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657) # Architecture : (64)4c2s-(128)4c2s_BL-FC1024_BL-FC1_S - def __init__(self, dataset='mnist'): + def __init__(self, input_dim=1, output_dim=1, input_size=32, class_num=10): super(discriminator, self).__init__() - if dataset == 'mnist' or 'fashion-mnist': - self.input_height = 28 - self.input_width = 28 - self.input_dim = 1 - self.output_dim = 1 - self.class_num = 10 + self.input_dim = input_dim + self.output_dim = output_dim + self.input_size = input_size + self.class_num = class_num self.conv = nn.Sequential( nn.Conv2d(self.input_dim, 64, 4, 2, 1), @@ -60,7 +57,7 @@ def __init__(self, dataset='mnist'): nn.LeakyReLU(0.2), ) self.fc1 = nn.Sequential( - nn.Linear(128 * (self.input_height // 4) * (self.input_width // 4), 1024), + nn.Linear(128 * (self.input_size // 4) * (self.input_size // 4), 1024), nn.BatchNorm1d(1024), nn.LeakyReLU(0.2), ) @@ -75,7 +72,7 @@ def __init__(self, dataset='mnist'): def forward(self, input): x = self.conv(input) - x = x.view(-1, 128 * (self.input_height // 4) * (self.input_width // 4)) + x = x.view(-1, 128 * (self.input_size // 4) * (self.input_size // 4)) x = self.fc1(x) d = self.dc(x) c = self.cl(x) @@ -94,10 +91,18 @@ def __init__(self, args): self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type + self.input_size = args.input_size + self.z_dim = 62 + self.class_num = 10 + self.sample_num = self.class_num ** 2 + + # load dataset + self.data_loader = dataloader(self.dataset, self.input_size, self.batch_size) + data = self.data_loader.__iter__().__next__()[0] # networks init - self.G = generator(self.dataset) - self.D = discriminator(self.dataset) + self.G = generator(input_dim=self.z_dim, output_dim=data.shape[1], input_size=self.input_size) + self.D = discriminator(input_dim=data.shape[1], output_dim=1, input_size=self.input_size) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) @@ -115,32 +120,24 @@ def __init__(self, args): utils.print_network(self.D) print('-----------------------------------------------') - # load mnist - self.data_X, self.data_Y = utils.load_mnist(args.dataset) - self.z_dim = 62 - self.y_dim = 10 - # fixed noise & condition self.sample_z_ = torch.zeros((self.sample_num, self.z_dim)) - for i in range(10): - self.sample_z_[i*self.y_dim] = torch.rand(1, self.z_dim) - for j in range(1, self.y_dim): - self.sample_z_[i*self.y_dim + j] = self.sample_z_[i*self.y_dim] + for i in range(self.class_num): + self.sample_z_[i*self.class_num] = torch.rand(1, self.z_dim) + for j in range(1, self.class_num): + self.sample_z_[i*self.class_num + j] = self.sample_z_[i*self.class_num] - temp = torch.zeros((10, 1)) - for i in range(self.y_dim): + temp = torch.zeros((self.class_num, 1)) + for i in range(self.class_num): temp[i, 0] = i temp_y = torch.zeros((self.sample_num, 1)) - for i in range(10): - temp_y[i*self.y_dim: (i+1)*self.y_dim] = temp + for i in range(self.class_num): + temp_y[i*self.class_num: (i+1)*self.class_num] = temp - self.sample_y_ = torch.zeros((self.sample_num, self.y_dim)) - self.sample_y_.scatter_(1, temp_y.type(torch.LongTensor), 1) + self.sample_y_ = torch.zeros((self.sample_num, self.class_num)).scatter_(1, temp_y.type(torch.LongTensor), 1) if self.gpu_mode: - self.sample_z_, self.sample_y_ = Variable(self.sample_z_.cuda(), volatile=True), Variable(self.sample_y_.cuda(), volatile=True) - else: - self.sample_z_, self.sample_y_ = Variable(self.sample_z_, volatile=True), Variable(self.sample_y_, volatile=True) + self.sample_z_, self.sample_y_ = self.sample_z_.cuda(), self.sample_y_.cuda() def train(self): self.train_hist = {} @@ -149,10 +146,9 @@ def train(self): self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] + self.y_real_, self.y_fake_ = torch.ones(self.batch_size, 1), torch.zeros(self.batch_size, 1) if self.gpu_mode: - self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda()), Variable(torch.zeros(self.batch_size, 1).cuda()) - else: - self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1)) + self.y_real_, self.y_fake_ = self.y_real_.cuda(), self.y_fake_.cuda() self.D.train() print('training start!!') @@ -160,15 +156,14 @@ def train(self): for epoch in range(self.epoch): self.G.train() epoch_start_time = time.time() - for iter in range(len(self.data_X) // self.batch_size): - x_ = self.data_X[iter*self.batch_size:(iter+1)*self.batch_size] + for iter, (x_, y_) in enumerate(self.data_loader): + if iter == self.data_loader.dataset.__len__() // self.batch_size: + break z_ = torch.rand((self.batch_size, self.z_dim)) - y_vec_ = self.data_Y[iter*self.batch_size:(iter+1)*self.batch_size] - + y_vec_ = torch.zeros((self.batch_size, self.class_num)).scatter_(1, y_.type(torch.LongTensor).unsqueeze(1), 1) + y_fill_ = y_vec_.unsqueeze(2).unsqueeze(3).expand(self.batch_size, self.class_num, self.input_size, self.input_size) if self.gpu_mode: - x_, z_, y_vec_ = Variable(x_.cuda()), Variable(z_.cuda()), Variable(y_vec_.cuda()) - else: - x_, z_, y_vec_ = Variable(x_), Variable(z_), Variable(y_vec_) + x_, z_, y_vec_, y_fill_ = x_.cuda(), z_.cuda(), y_vec_.cuda(), y_fill_.cuda() # update D network self.D_optimizer.zero_grad() @@ -183,7 +178,7 @@ def train(self): C_fake_loss = self.CE_loss(C_fake, torch.max(y_vec_, 1)[1]) D_loss = D_real_loss + C_real_loss + D_fake_loss + C_fake_loss - self.train_hist['D_loss'].append(D_loss.data[0]) + self.train_hist['D_loss'].append(D_loss.item()) D_loss.backward() self.D_optimizer.step() @@ -198,17 +193,18 @@ def train(self): C_fake_loss = self.CE_loss(C_fake, torch.max(y_vec_, 1)[1]) G_loss += C_fake_loss - self.train_hist['G_loss'].append(G_loss.data[0]) + self.train_hist['G_loss'].append(G_loss.item()) G_loss.backward() self.G_optimizer.step() if ((iter + 1) % 100) == 0: print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" % - ((epoch + 1), (iter + 1), len(self.data_X) // self.batch_size, D_loss.data[0], G_loss.data[0])) + ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.item(), G_loss.item())) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) - self.visualize_results((epoch+1)) + with torch.no_grad(): + self.visualize_results((epoch+1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), @@ -233,16 +229,10 @@ def visualize_results(self, epoch, fix=True): samples = self.G(self.sample_z_, self.sample_y_) else: """ random noise """ - temp = torch.LongTensor(self.batch_size, 1).random_() % 10 - sample_y_ = torch.FloatTensor(self.batch_size, 10) - sample_y_.zero_() - sample_y_.scatter_(1, temp, 1) + sample_y_ = torch.zeros(self.batch_size, self.class_num).scatter_(1, torch.randint(0, self.class_num - 1, (self.batch_size, 1)).type(torch.LongTensor), 1) + sample_z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: - sample_z_, sample_y_ = Variable(torch.rand((self.batch_size, self.z_dim)).cuda(), volatile=True), \ - Variable(sample_y_.cuda(), volatile=True) - else: - sample_z_, sample_y_ = Variable(torch.rand((self.batch_size, self.z_dim)), volatile=True), \ - Variable(sample_y_, volatile=True) + sample_z_, sample_y_ = sample_z_.cuda(), sample_y_.cuda() samples = self.G(sample_z_, sample_y_) @@ -251,6 +241,7 @@ def visualize_results(self, epoch, fix=True): else: samples = samples.data.numpy().transpose(0, 2, 3, 1) + samples = (samples + 1) / 2 utils.save_images(samples[:image_frame_dim * image_frame_dim, :, :, :], [image_frame_dim, image_frame_dim], self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name + '_epoch%03d' % epoch + '.png') diff --git a/BEGAN.py b/BEGAN.py index 35f5669..6883c3a 100644 --- a/BEGAN.py +++ b/BEGAN.py @@ -2,32 +2,23 @@ import numpy as np import torch.nn as nn import torch.optim as optim -from torch.autograd import Variable -from torch.utils.data import DataLoader -from torchvision import datasets, transforms +from dataloader import dataloader class generator(nn.Module): # Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657) # Architecture : FC1024_BR-FC7x7x128_BR-(64)4dc2s_BR-(1)4dc2s_S - def __init__(self, dataset = 'mnist'): + def __init__(self, input_dim=100, output_dim=1, input_size=32): super(generator, self).__init__() - if dataset == 'mnist' or dataset == 'fashion-mnist': - self.input_height = 28 - self.input_width = 28 - self.input_dim = 62 - self.output_dim = 1 - elif dataset == 'celebA': - self.input_height = 64 - self.input_width = 64 - self.input_dim = 62 - self.output_dim = 3 + self.input_dim = input_dim + self.output_dim = output_dim + self.input_size = input_size self.fc = nn.Sequential( nn.Linear(self.input_dim, 1024), nn.BatchNorm1d(1024), nn.ReLU(), - nn.Linear(1024, 128 * (self.input_height // 4) * (self.input_width // 4)), - nn.BatchNorm1d(128 * (self.input_height // 4) * (self.input_width // 4)), + nn.Linear(1024, 128 * (self.input_size // 4) * (self.input_size // 4)), + nn.BatchNorm1d(128 * (self.input_size // 4) * (self.input_size // 4)), nn.ReLU(), ) self.deconv = nn.Sequential( @@ -35,13 +26,13 @@ def __init__(self, dataset = 'mnist'): nn.BatchNorm2d(64), nn.ReLU(), nn.ConvTranspose2d(64, self.output_dim, 4, 2, 1), - nn.Sigmoid(), + nn.Tanh(), ) utils.initialize_weights(self) def forward(self, input): x = self.fc(input) - x = x.view(-1, 128, (self.input_height // 4), (self.input_width // 4)) + x = x.view(-1, 128, (self.input_size // 4), (self.input_size // 4)) x = self.deconv(x) return x @@ -49,42 +40,32 @@ def forward(self, input): class discriminator(nn.Module): # It must be Auto-Encoder style architecture # Architecture : (64)4c2s-FC32-FC64*14*14_BR-(1)4dc2s_S - def __init__(self, dataset = 'mnist'): + def __init__(self, input_dim=1, output_dim=1, input_size=32): super(discriminator, self).__init__() - if dataset == 'mnist' or dataset == 'fashion-mnist': - self.input_height = 28 - self.input_width = 28 - self.input_dim = 1 - self.output_dim = 1 - elif dataset == 'celebA': - self.input_height = 64 - self.input_width = 64 - self.input_dim = 3 - self.output_dim = 3 + self.input_dim = input_dim + self.output_dim = output_dim + self.input_size = input_size self.conv = nn.Sequential( nn.Conv2d(self.input_dim, 64, 4, 2, 1), nn.ReLU(), ) self.fc = nn.Sequential( - nn.Linear(64 * (self.input_height // 2) * (self.input_width // 2), 32), - nn.BatchNorm1d(32), - nn.ReLU(), - nn.Linear(32, 64 * (self.input_height // 2) * (self.input_width // 2)), - nn.BatchNorm1d(64 * (self.input_height // 2) * (self.input_width // 2)), - nn.ReLU(), + nn.Linear(64 * (self.input_size // 2) * (self.input_size // 2), 32), + nn.Linear(32, 64 * (self.input_size // 2) * (self.input_size // 2)), ) self.deconv = nn.Sequential( nn.ConvTranspose2d(64, self.output_dim, 4, 2, 1), #nn.Sigmoid(), ) + utils.initialize_weights(self) def forward(self, input): x = self.conv(input) x = x.view(x.size()[0], -1) x = self.fc(x) - x = x.view(-1, 64, (self.input_height // 2), (self.input_width // 2)) + x = x.view(-1, 64, (self.input_size // 2), (self.input_size // 2)) x = self.deconv(x) return x @@ -93,7 +74,7 @@ class BEGAN(object): def __init__(self, args): # parameters self.epoch = args.epoch - self.sample_num = 64 + self.sample_num = 100 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir @@ -101,52 +82,36 @@ def __init__(self, args): self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type - - # BEGAN parameters - self.gamma = 0.75 + self.input_size = args.input_size + self.z_dim = 62 + self.gamma = 1 self.lambda_ = 0.001 - self.k = 0. + self.k = 0.0 + self.lr_lower_boundary = 0.00002 + + # load dataset + self.data_loader = dataloader(self.dataset, self.input_size, self.batch_size) + data = self.data_loader.__iter__().__next__()[0] # networks init - self.G = generator(self.dataset) - self.D = discriminator(self.dataset) - self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) - self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) + self.G = generator(input_dim=self.z_dim, output_dim=data.shape[1], input_size=self.input_size) + self.D = discriminator(input_dim=data.shape[1], output_dim=1, input_size=self.input_size) + self.G_optimizer = optim.Adam(self.G.parameters(), lr=0.0002, betas=(args.beta1, args.beta2)) + self.D_optimizer = optim.Adam(self.D.parameters(), lr=0.0002, betas=(args.beta1, args.beta2)) if self.gpu_mode: self.G.cuda() self.D.cuda() - # self.L1_loss = torch.nn.L1loss().cuda() # BEGAN does not work well when using L1loss(). - # else: - # self.L1_loss = torch.nn.L1loss() print('---------- Networks architecture -------------') utils.print_network(self.G) utils.print_network(self.D) print('-----------------------------------------------') - # load dataset - if self.dataset == 'mnist': - self.data_loader = DataLoader(datasets.MNIST('data/mnist', train=True, download=True, - transform=transforms.Compose( - [transforms.ToTensor()])), - batch_size=self.batch_size, shuffle=True) - elif self.dataset == 'fashion-mnist': - self.data_loader = DataLoader( - datasets.FashionMNIST('data/fashion-mnist', train=True, download=True, transform=transforms.Compose( - [transforms.ToTensor()])), - batch_size=self.batch_size, shuffle=True) - elif self.dataset == 'celebA': - self.data_loader = utils.load_celebA('data/celebA', transform=transforms.Compose( - [transforms.CenterCrop(160), transforms.Scale(64), transforms.ToTensor()]), batch_size=self.batch_size, - shuffle=True) - self.z_dim = 62 - # fixed noise + self.sample_z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: - self.sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)).cuda(), volatile=True) - else: - self.sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)), volatile=True) + self.sample_z_ = self.sample_z_.cuda() def train(self): self.train_hist = {} @@ -154,11 +119,14 @@ def train(self): self.train_hist['G_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] + self.M = {} + self.M['pre'] = [] + self.M['pre'].append(1) + self.M['cur'] = [] + self.y_real_, self.y_fake_ = torch.ones(self.batch_size, 1), torch.zeros(self.batch_size, 1) if self.gpu_mode: - self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda()), Variable(torch.zeros(self.batch_size, 1).cuda()) - else: - self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1)) + self.y_real_, self.y_fake_ = self.y_real_.cuda(), self.y_fake_.cuda() self.D.train() print('training start!!') @@ -173,22 +141,20 @@ def train(self): z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: - x_, z_ = Variable(x_.cuda()), Variable(z_.cuda()) - else: - x_, z_ = Variable(x_), Variable(z_) + x_, z_ = x_.cuda(), z_.cuda() # update D network self.D_optimizer.zero_grad() D_real = self.D(x_) - D_real_err = torch.mean(torch.abs(D_real - x_)) + D_real_loss = torch.mean(torch.abs(D_real - x_)) G_ = self.G(z_) D_fake = self.D(G_) - D_fake_err = torch.mean(torch.abs(D_fake - G_)) + D_fake_loss = torch.mean(torch.abs(D_fake - G_)) - D_loss = D_real_err - self.k * D_fake_err - self.train_hist['D_loss'].append(D_loss.data[0]) + D_loss = D_real_loss - self.k * D_fake_loss + self.train_hist['D_loss'].append(D_loss.item()) D_loss.backward() self.D_optimizer.step() @@ -198,31 +164,51 @@ def train(self): G_ = self.G(z_) D_fake = self.D(G_) - D_fake_err = torch.mean(torch.abs(D_fake - G_)) + D_fake_loss = torch.mean(torch.abs(D_fake - G_)) - G_loss = D_fake_err - self.train_hist['G_loss'].append(G_loss.data[0]) + G_loss = D_fake_loss + self.train_hist['G_loss'].append(G_loss.item()) G_loss.backward() self.G_optimizer.step() # convergence metric - temp_M = D_real_err + torch.abs(self.gamma * D_real_err - D_fake_err) + temp_M = D_real_loss + torch.abs(self.gamma * D_real_loss - G_loss) # operation for updating k - temp_k = self.k + self.lambda_ * (self.gamma * D_real_err - D_fake_err) - temp_k = temp_k.data[0] + temp_k = self.k + self.lambda_ * (self.gamma * D_real_loss - G_loss) + temp_k = temp_k.item() - # self.k = temp_k.data[0] self.k = min(max(temp_k, 0), 1) - self.M = temp_M.data[0] + self.M['cur'] = temp_M.item() if ((iter + 1) % 100) == 0: print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f, M: %.8f, k: %.8f" % - ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.data[0], G_loss.data[0], self.M, self.k)) + ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.item(), G_loss.item(), self.M['cur'], self.k)) + + + # if epoch == 0: + # self.M['pre'] = self.M['cur'] + # self.M['cur'] = [] + # else: + if np.mean(self.M['pre']) < np.mean(self.M['cur']): + pre_lr = self.G_optimizer.param_groups[0]['lr'] + self.G_optimizer.param_groups[0]['lr'] = max(self.G_optimizer.param_groups[0]['lr'] / 2.0, + self.lr_lower_boundary) + self.D_optimizer.param_groups[0]['lr'] = max(self.D_optimizer.param_groups[0]['lr'] / 2.0, + self.lr_lower_boundary) + print('M_pre: ' + str(np.mean(self.M['pre'])) + ', M_cur: ' + str( + np.mean(self.M['cur'])) + ', lr: ' + str(pre_lr) + ' --> ' + str( + self.G_optimizer.param_groups[0]['lr'])) + else: + print('M_pre: ' + str(np.mean(self.M['pre'])) + ', M_cur: ' + str(np.mean(self.M['cur']))) + self.M['pre'] = self.M['cur'] + + self.M['cur'] = [] self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) - self.visualize_results((epoch+1)) + with torch.no_grad(): + self.visualize_results((epoch+1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), @@ -248,10 +234,9 @@ def visualize_results(self, epoch, fix=True): samples = self.G(self.sample_z_) else: """ random noise """ + sample_z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: - sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)).cuda(), volatile=True) - else: - sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)), volatile=True) + sample_z_ = sample_z_.cuda() samples = self.G(sample_z_) @@ -260,6 +245,7 @@ def visualize_results(self, epoch, fix=True): else: samples = samples.data.numpy().transpose(0, 2, 3, 1) + samples = (samples + 1) / 2 utils.save_images(samples[:image_frame_dim * image_frame_dim, :, :, :], [image_frame_dim, image_frame_dim], self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name + '_epoch%03d' % epoch + '.png') diff --git a/CGAN.py b/CGAN.py index f0ea1d3..bbf55ee 100644 --- a/CGAN.py +++ b/CGAN.py @@ -2,25 +2,24 @@ import numpy as np import torch.nn as nn import torch.optim as optim -from torch.autograd import Variable +from dataloader import dataloader class generator(nn.Module): # Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657) # Architecture : FC1024_BR-FC7x7x128_BR-(64)4dc2s_BR-(1)4dc2s_S - def __init__(self, dataset = 'mnist'): + def __init__(self, input_dim=100, output_dim=1, input_size=32, class_num=10): super(generator, self).__init__() - if dataset == 'mnist' or 'fashion-mnist': - self.input_height = 28 - self.input_width = 28 - self.input_dim = 62 + 10 - self.output_dim = 1 + self.input_dim = input_dim + self.output_dim = output_dim + self.input_size = input_size + self.class_num = class_num self.fc = nn.Sequential( - nn.Linear(self.input_dim, 1024), + nn.Linear(self.input_dim + self.class_num, 1024), nn.BatchNorm1d(1024), nn.ReLU(), - nn.Linear(1024, 128 * (self.input_height // 4) * (self.input_width // 4)), - nn.BatchNorm1d(128 * (self.input_height // 4) * (self.input_width // 4)), + nn.Linear(1024, 128 * (self.input_size // 4) * (self.input_size // 4)), + nn.BatchNorm1d(128 * (self.input_size // 4) * (self.input_size // 4)), nn.ReLU(), ) self.deconv = nn.Sequential( @@ -28,14 +27,14 @@ def __init__(self, dataset = 'mnist'): nn.BatchNorm2d(64), nn.ReLU(), nn.ConvTranspose2d(64, self.output_dim, 4, 2, 1), - nn.Sigmoid(), + nn.Tanh(), ) utils.initialize_weights(self) def forward(self, input, label): x = torch.cat([input, label], 1) x = self.fc(x) - x = x.view(-1, 128, (self.input_height // 4), (self.input_width // 4)) + x = x.view(-1, 128, (self.input_size // 4), (self.input_size // 4)) x = self.deconv(x) return x @@ -43,23 +42,22 @@ def forward(self, input, label): class discriminator(nn.Module): # Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657) # Architecture : (64)4c2s-(128)4c2s_BL-FC1024_BL-FC1_S - def __init__(self, dataset = 'mnist'): + def __init__(self, input_dim=1, output_dim=1, input_size=32, class_num=10): super(discriminator, self).__init__() - if dataset == 'mnist' or 'fashion-mnist': - self.input_height = 28 - self.input_width = 28 - self.input_dim = 1 + 10 - self.output_dim = 1 + self.input_dim = input_dim + self.output_dim = output_dim + self.input_size = input_size + self.class_num = class_num self.conv = nn.Sequential( - nn.Conv2d(self.input_dim, 64, 4, 2, 1), + nn.Conv2d(self.input_dim + self.class_num, 64, 4, 2, 1), nn.LeakyReLU(0.2), nn.Conv2d(64, 128, 4, 2, 1), nn.BatchNorm2d(128), nn.LeakyReLU(0.2), ) self.fc = nn.Sequential( - nn.Linear(128 * (self.input_height // 4) * (self.input_width // 4), 1024), + nn.Linear(128 * (self.input_size // 4) * (self.input_size // 4), 1024), nn.BatchNorm1d(1024), nn.LeakyReLU(0.2), nn.Linear(1024, self.output_dim), @@ -70,7 +68,7 @@ def __init__(self, dataset = 'mnist'): def forward(self, input, label): x = torch.cat([input, label], 1) x = self.conv(x) - x = x.view(-1, 128 * (self.input_height // 4) * (self.input_width // 4)) + x = x.view(-1, 128 * (self.input_size // 4) * (self.input_size // 4)) x = self.fc(x) return x @@ -79,7 +77,6 @@ class CGAN(object): def __init__(self, args): # parameters self.epoch = args.epoch - self.sample_num = 100 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir @@ -87,10 +84,18 @@ def __init__(self, args): self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type + self.input_size = args.input_size + self.z_dim = 62 + self.class_num = 10 + self.sample_num = self.class_num ** 2 + + # load dataset + self.data_loader = dataloader(self.dataset, self.input_size, self.batch_size) + data = self.data_loader.__iter__().__next__()[0] # networks init - self.G = generator(self.dataset) - self.D = discriminator(self.dataset) + self.G = generator(input_dim=self.z_dim, output_dim=data.shape[1], input_size=self.input_size, class_num=self.class_num) + self.D = discriminator(input_dim=data.shape[1], output_dim=1, input_size=self.input_size, class_num=self.class_num) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) @@ -106,32 +111,24 @@ def __init__(self, args): utils.print_network(self.D) print('-----------------------------------------------') - # load mnist - self.data_X, self.data_Y = utils.load_mnist(args.dataset) - self.z_dim = 62 - self.y_dim = 10 - # fixed noise & condition self.sample_z_ = torch.zeros((self.sample_num, self.z_dim)) - for i in range(10): - self.sample_z_[i*self.y_dim] = torch.rand(1, self.z_dim) - for j in range(1, self.y_dim): - self.sample_z_[i*self.y_dim + j] = self.sample_z_[i*self.y_dim] + for i in range(self.class_num): + self.sample_z_[i*self.class_num] = torch.rand(1, self.z_dim) + for j in range(1, self.class_num): + self.sample_z_[i*self.class_num + j] = self.sample_z_[i*self.class_num] - temp = torch.zeros((10, 1)) - for i in range(self.y_dim): + temp = torch.zeros((self.class_num, 1)) + for i in range(self.class_num): temp[i, 0] = i temp_y = torch.zeros((self.sample_num, 1)) - for i in range(10): - temp_y[i*self.y_dim: (i+1)*self.y_dim] = temp + for i in range(self.class_num): + temp_y[i*self.class_num: (i+1)*self.class_num] = temp - self.sample_y_ = torch.zeros((self.sample_num, self.y_dim)) - self.sample_y_.scatter_(1, temp_y.type(torch.LongTensor), 1) + self.sample_y_ = torch.zeros((self.sample_num, self.class_num)).scatter_(1, temp_y.type(torch.LongTensor), 1) if self.gpu_mode: - self.sample_z_, self.sample_y_ = Variable(self.sample_z_.cuda(), volatile=True), Variable(self.sample_y_.cuda(), volatile=True) - else: - self.sample_z_, self.sample_y_ = Variable(self.sample_z_, volatile=True), Variable(self.sample_y_, volatile=True) + self.sample_z_, self.sample_y_ = self.sample_z_.cuda(), self.sample_y_.cuda() def train(self): self.train_hist = {} @@ -140,14 +137,9 @@ def train(self): self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] + self.y_real_, self.y_fake_ = torch.ones(self.batch_size, 1), torch.zeros(self.batch_size, 1) if self.gpu_mode: - self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda()), Variable(torch.zeros(self.batch_size, 1).cuda()) - else: - self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1)) - - self.fill = torch.zeros([10, 10, self.data_X.size()[2], self.data_X.size()[3]]) - for i in range(10): - self.fill[i, i, :, :] = 1 + self.y_real_, self.y_fake_ = self.y_real_.cuda(), self.y_fake_.cuda() self.D.train() print('training start!!') @@ -155,17 +147,15 @@ def train(self): for epoch in range(self.epoch): self.G.train() epoch_start_time = time.time() - for iter in range(len(self.data_X) // self.batch_size): - x_ = self.data_X[iter*self.batch_size:(iter+1)*self.batch_size] - z_ = torch.rand((self.batch_size, self.z_dim)) - y_vec_ = self.data_Y[iter*self.batch_size:(iter+1)*self.batch_size] - y_fill_ = self.fill[torch.max(y_vec_, 1)[1].squeeze()] + for iter, (x_, y_) in enumerate(self.data_loader): + if iter == self.data_loader.dataset.__len__() // self.batch_size: + break + z_ = torch.rand((self.batch_size, self.z_dim)) + y_vec_ = torch.zeros((self.batch_size, self.class_num)).scatter_(1, y_.type(torch.LongTensor).unsqueeze(1), 1) + y_fill_ = y_vec_.unsqueeze(2).unsqueeze(3).expand(self.batch_size, self.class_num, self.input_size, self.input_size) if self.gpu_mode: - x_, z_, y_vec_, y_fill_ = Variable(x_.cuda()), Variable(z_.cuda()), \ - Variable(y_vec_.cuda()), Variable(y_fill_.cuda()) - else: - x_, z_, y_vec_, y_fill_ = Variable(x_), Variable(z_), Variable(y_vec_), Variable(y_fill_) + x_, z_, y_vec_, y_fill_ = x_.cuda(), z_.cuda(), y_vec_.cuda(), y_fill_.cuda() # update D network self.D_optimizer.zero_grad() @@ -178,7 +168,7 @@ def train(self): D_fake_loss = self.BCE_loss(D_fake, self.y_fake_) D_loss = D_real_loss + D_fake_loss - self.train_hist['D_loss'].append(D_loss.data[0]) + self.train_hist['D_loss'].append(D_loss.item()) D_loss.backward() self.D_optimizer.step() @@ -189,17 +179,18 @@ def train(self): G_ = self.G(z_, y_vec_) D_fake = self.D(G_, y_fill_) G_loss = self.BCE_loss(D_fake, self.y_real_) - self.train_hist['G_loss'].append(G_loss.data[0]) + self.train_hist['G_loss'].append(G_loss.item()) G_loss.backward() self.G_optimizer.step() if ((iter + 1) % 100) == 0: print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" % - ((epoch + 1), (iter + 1), len(self.data_X) // self.batch_size, D_loss.data[0], G_loss.data[0])) + ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.item(), G_loss.item())) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) - self.visualize_results((epoch+1)) + with torch.no_grad(): + self.visualize_results((epoch+1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), @@ -224,16 +215,10 @@ def visualize_results(self, epoch, fix=True): samples = self.G(self.sample_z_, self.sample_y_) else: """ random noise """ - temp = torch.LongTensor(self.batch_size, 1).random_() % 10 - sample_y_ = torch.FloatTensor(self.batch_size, 10) - sample_y_.zero_() - sample_y_.scatter_(1, temp, 1) + sample_y_ = torch.zeros(self.batch_size, self.class_num).scatter_(1, torch.randint(0, self.class_num - 1, (self.batch_size, 1)).type(torch.LongTensor), 1) + sample_z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: - sample_z_, sample_y_ = Variable(torch.rand((self.batch_size, self.z_dim)).cuda(), volatile=True), \ - Variable(sample_y_.cuda(), volatile=True) - else: - sample_z_, sample_y_ = Variable(torch.rand((self.batch_size, self.z_dim)), volatile=True), \ - Variable(sample_y_, volatile=True) + sample_z_, sample_y_ = sample_z_.cuda(), sample_y_.cuda() samples = self.G(sample_z_, sample_y_) @@ -242,6 +227,7 @@ def visualize_results(self, epoch, fix=True): else: samples = samples.data.numpy().transpose(0, 2, 3, 1) + samples = (samples + 1) / 2 utils.save_images(samples[:image_frame_dim * image_frame_dim, :, :, :], [image_frame_dim, image_frame_dim], self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name + '_epoch%03d' % epoch + '.png') diff --git a/DRAGAN.py b/DRAGAN.py index 6aea79a..1fafc14 100644 --- a/DRAGAN.py +++ b/DRAGAN.py @@ -2,32 +2,24 @@ import numpy as np import torch.nn as nn import torch.optim as optim -from torch.autograd import Variable, grad -from torch.utils.data import DataLoader -from torchvision import datasets, transforms +from torch.autograd import grad +from dataloader import dataloader class generator(nn.Module): # Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657) # Architecture : FC1024_BR-FC7x7x128_BR-(64)4dc2s_BR-(1)4dc2s_S - def __init__(self, dataset = 'mnist'): + def __init__(self, input_dim=100, output_dim=1, input_size=32): super(generator, self).__init__() - if dataset == 'mnist' or dataset == 'fashion-mnist': - self.input_height = 28 - self.input_width = 28 - self.input_dim = 62 - self.output_dim = 1 - elif dataset == 'celebA': - self.input_height = 64 - self.input_width = 64 - self.input_dim = 62 - self.output_dim = 3 + self.input_dim = input_dim + self.output_dim = output_dim + self.input_size = input_size self.fc = nn.Sequential( nn.Linear(self.input_dim, 1024), nn.BatchNorm1d(1024), nn.ReLU(), - nn.Linear(1024, 128 * (self.input_height // 4) * (self.input_width // 4)), - nn.BatchNorm1d(128 * (self.input_height // 4) * (self.input_width // 4)), + nn.Linear(1024, 128 * (self.input_size // 4) * (self.input_size // 4)), + nn.BatchNorm1d(128 * (self.input_size // 4) * (self.input_size // 4)), nn.ReLU(), ) self.deconv = nn.Sequential( @@ -35,13 +27,13 @@ def __init__(self, dataset = 'mnist'): nn.BatchNorm2d(64), nn.ReLU(), nn.ConvTranspose2d(64, self.output_dim, 4, 2, 1), - nn.Sigmoid(), + nn.Tanh(), ) utils.initialize_weights(self) def forward(self, input): x = self.fc(input) - x = x.view(-1, 128, (self.input_height // 4), (self.input_width // 4)) + x = x.view(-1, 128, (self.input_size // 4), (self.input_size // 4)) x = self.deconv(x) return x @@ -49,18 +41,11 @@ def forward(self, input): class discriminator(nn.Module): # Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657) # Architecture : (64)4c2s-(128)4c2s_BL-FC1024_BL-FC1_S - def __init__(self, dataset = 'mnist'): + def __init__(self, input_dim=1, output_dim=1, input_size=32): super(discriminator, self).__init__() - if dataset == 'mnist' or dataset == 'fashion-mnist': - self.input_height = 28 - self.input_width = 28 - self.input_dim = 1 - self.output_dim = 1 - elif dataset == 'celebA': - self.input_height = 64 - self.input_width = 64 - self.input_dim = 3 - self.output_dim = 1 + self.input_dim = input_dim + self.output_dim = output_dim + self.input_size = input_size self.conv = nn.Sequential( nn.Conv2d(self.input_dim, 64, 4, 2, 1), @@ -70,7 +55,7 @@ def __init__(self, dataset = 'mnist'): nn.LeakyReLU(0.2), ) self.fc = nn.Sequential( - nn.Linear(128 * (self.input_height // 4) * (self.input_width // 4), 1024), + nn.Linear(128 * (self.input_size // 4) * (self.input_size // 4), 1024), nn.BatchNorm1d(1024), nn.LeakyReLU(0.2), nn.Linear(1024, self.output_dim), @@ -80,7 +65,7 @@ def __init__(self, dataset = 'mnist'): def forward(self, input): x = self.conv(input) - x = x.view(-1, 128 * (self.input_height // 4) * (self.input_width // 4)) + x = x.view(-1, 128 * (self.input_size // 4) * (self.input_size // 4)) x = self.fc(x) return x @@ -89,7 +74,7 @@ class DRAGAN(object): def __init__(self, args): # parameters self.epoch = args.epoch - self.sample_num = 64 + self.sample_num = 100 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir @@ -97,11 +82,17 @@ def __init__(self, args): self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type + self.input_size = args.input_size + self.z_dim = 62 self.lambda_ = 0.25 + # load dataset + self.data_loader = dataloader(self.dataset, self.input_size, self.batch_size) + data = self.data_loader.__iter__().__next__()[0] + # networks init - self.G = generator(self.dataset) - self.D = discriminator(self.dataset) + self.G = generator(input_dim=self.z_dim, output_dim=data.shape[1], input_size=self.input_size) + self.D = discriminator(input_dim=data.shape[1], output_dim=1, input_size=self.input_size) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) @@ -117,28 +108,10 @@ def __init__(self, args): utils.print_network(self.D) print('-----------------------------------------------') - # load dataset - if self.dataset == 'mnist': - self.data_loader = DataLoader(datasets.MNIST('data/mnist', train=True, download=True, - transform=transforms.Compose( - [transforms.ToTensor()])), - batch_size=self.batch_size, shuffle=True) - elif self.dataset == 'fashion-mnist': - self.data_loader = DataLoader( - datasets.FashionMNIST('data/fashion-mnist', train=True, download=True, transform=transforms.Compose( - [transforms.ToTensor()])), - batch_size=self.batch_size, shuffle=True) - elif self.dataset == 'celebA': - self.data_loader = utils.load_celebA('data/celebA', transform=transforms.Compose( - [transforms.CenterCrop(160), transforms.Scale(64), transforms.ToTensor()]), batch_size=self.batch_size, - shuffle=True) - self.z_dim = 62 - # fixed noise + self.sample_z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: - self.sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)).cuda(), volatile=True) - else: - self.sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)), volatile=True) + self.sample_z_ = self.sample_z_.cuda() def train(self): self.train_hist = {} @@ -147,10 +120,9 @@ def train(self): self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] + self.y_real_, self.y_fake_ = torch.ones(self.batch_size, 1), torch.zeros(self.batch_size, 1) if self.gpu_mode: - self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda()), Variable(torch.zeros(self.batch_size, 1).cuda()) - else: - self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1)) + self.y_real_, self.y_fake_ = self.y_real_.cuda(), self.y_fake_.cuda() self.D.train() print('training start!!') @@ -163,11 +135,8 @@ def train(self): break z_ = torch.rand((self.batch_size, self.z_dim)) - if self.gpu_mode: - x_, z_ = Variable(x_.cuda()), Variable(z_.cuda()) - else: - x_, z_ = Variable(x_), Variable(z_) + x_, z_ = x_.cuda(), z_.cuda() # update D network self.D_optimizer.zero_grad() @@ -180,27 +149,28 @@ def train(self): D_fake_loss = self.BCE_loss(D_fake, self.y_fake_) """ DRAGAN Loss (Gradient penalty) """ - # This is borrowed from https://github.com/jfsantos/dragan-pytorch/blob/master/dragan.py + # This is borrowed from https://github.com/kodalinaveen3/DRAGAN/blob/master/DRAGAN.ipynb + alpha = torch.rand(self.batch_size, 1, 1, 1).cuda() if self.gpu_mode: - alpha = torch.rand(x_.size()).cuda() - x_hat = Variable(alpha * x_.data + (1 - alpha) * (x_.data + 0.5 * x_.data.std() * torch.rand(x_.size()).cuda()), - requires_grad=True) + alpha = alpha.cuda() + x_p = x_ + 0.5 * x_.std() * torch.rand(x_.size()).cuda() else: - alpha = torch.rand(x_.size()) - x_hat = Variable(alpha * x_.data + (1 - alpha) * (x_.data + 0.5 * x_.data.std() * torch.rand(x_.size())), - requires_grad=True) - pred_hat = self.D(x_hat) + x_p = x_ + 0.5 * x_.std() * torch.rand(x_.size()) + differences = x_p - x_ + interpolates = x_ + (alpha * differences) + interpolates.requires_grad = True + pred_hat = self.D(interpolates) if self.gpu_mode: - gradients = grad(outputs=pred_hat, inputs=x_hat, grad_outputs=torch.ones(pred_hat.size()).cuda(), + gradients = grad(outputs=pred_hat, inputs=interpolates, grad_outputs=torch.ones(pred_hat.size()).cuda(), create_graph=True, retain_graph=True, only_inputs=True)[0] else: - gradients = grad(outputs=pred_hat, inputs=x_hat, grad_outputs=torch.ones(pred_hat.size()), - create_graph=True, retain_graph=True, only_inputs=True)[0] + gradients = grad(outputs=pred_hat, inputs=interpolates, grad_outputs=torch.ones(pred_hat.size()), + create_graph=True, retain_graph=True, only_inputs=True)[0] gradient_penalty = self.lambda_ * ((gradients.view(gradients.size()[0], -1).norm(2, 1) - 1) ** 2).mean() D_loss = D_real_loss + D_fake_loss + gradient_penalty - self.train_hist['D_loss'].append(D_loss.data[0]) + self.train_hist['D_loss'].append(D_loss.item()) D_loss.backward() self.D_optimizer.step() @@ -211,17 +181,18 @@ def train(self): D_fake = self.D(G_) G_loss = self.BCE_loss(D_fake, self.y_real_) - self.train_hist['G_loss'].append(G_loss.data[0]) + self.train_hist['G_loss'].append(G_loss.item()) G_loss.backward() self.G_optimizer.step() if ((iter + 1) % 100) == 0: print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" % - ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.data[0], G_loss.data[0])) + ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.item(), G_loss.item())) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) - self.visualize_results((epoch+1)) + with torch.no_grad(): + self.visualize_results((epoch+1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), @@ -246,10 +217,9 @@ def visualize_results(self, epoch, fix=True): samples = self.G(self.sample_z_) else: """ random noise """ + sample_z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: - sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)).cuda(), volatile=True) - else: - sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)), volatile=True) + sample_z_ = sample_z_.cuda() samples = self.G(sample_z_) @@ -258,6 +228,7 @@ def visualize_results(self, epoch, fix=True): else: samples = samples.data.numpy().transpose(0, 2, 3, 1) + samples = (samples + 1) / 2 utils.save_images(samples[:image_frame_dim * image_frame_dim, :, :, :], [image_frame_dim, image_frame_dim], self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name + '_epoch%03d' % epoch + '.png') diff --git a/EBGAN.py b/EBGAN.py index be44825..b2b44f8 100644 --- a/EBGAN.py +++ b/EBGAN.py @@ -5,29 +5,23 @@ from torch.autograd import Variable from torch.utils.data import DataLoader from torchvision import datasets, transforms +from dataloader import dataloader class generator(nn.Module): # Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657) # Architecture : FC1024_BR-FC7x7x128_BR-(64)4dc2s_BR-(1)4dc2s_S - def __init__(self, dataset = 'mnist'): + def __init__(self, input_dim=100, output_dim=1, input_size=32): super(generator, self).__init__() - if dataset == 'mnist' or dataset == 'fashion-mnist': - self.input_height = 28 - self.input_width = 28 - self.input_dim = 62 - self.output_dim = 1 - elif dataset == 'celebA': - self.input_height = 64 - self.input_width = 64 - self.input_dim = 62 - self.output_dim = 3 + self.input_dim = input_dim + self.output_dim = output_dim + self.input_size = input_size self.fc = nn.Sequential( nn.Linear(self.input_dim, 1024), nn.BatchNorm1d(1024), nn.ReLU(), - nn.Linear(1024, 128 * (self.input_height // 4) * (self.input_width // 4)), - nn.BatchNorm1d(128 * (self.input_height // 4) * (self.input_width // 4)), + nn.Linear(1024, 128 * (self.input_size // 4) * (self.input_size // 4)), + nn.BatchNorm1d(128 * (self.input_size // 4) * (self.input_size // 4)), nn.ReLU(), ) self.deconv = nn.Sequential( @@ -35,13 +29,13 @@ def __init__(self, dataset = 'mnist'): nn.BatchNorm2d(64), nn.ReLU(), nn.ConvTranspose2d(64, self.output_dim, 4, 2, 1), - nn.Sigmoid(), + nn.Tanh(), ) utils.initialize_weights(self) def forward(self, input): x = self.fc(input) - x = x.view(-1, 128, (self.input_height // 4), (self.input_width // 4)) + x = x.view(-1, 128, (self.input_size // 4), (self.input_size // 4)) x = self.deconv(x) return x @@ -49,34 +43,27 @@ def forward(self, input): class discriminator(nn.Module): # It must be Auto-Encoder style architecture # Architecture : (64)4c2s-FC32-FC64*14*14_BR-(1)4dc2s_S - def __init__(self, dataset = 'mnist'): + def __init__(self, input_dim=1, output_dim=1, input_size=32): super(discriminator, self).__init__() - if dataset == 'mnist' or dataset == 'fashion-mnist': - self.input_height = 28 - self.input_width = 28 - self.input_dim = 1 - self.output_dim = 1 - elif dataset == 'celebA': - self.input_height = 64 - self.input_width = 64 - self.input_dim = 3 - self.output_dim = 3 + self.input_dim = input_dim + self.output_dim = output_dim + self.input_size = input_size self.conv = nn.Sequential( nn.Conv2d(self.input_dim, 64, 4, 2, 1), nn.ReLU(), ) self.code = nn.Sequential( - nn.Linear(64 * (self.input_height // 2) * (self.input_width // 2), 32), # bn and relu are excluded since code is used in pullaway_loss + nn.Linear(64 * (self.input_size // 2) * (self.input_size // 2), 32), # bn and relu are excluded since code is used in pullaway_loss ) self.fc = nn.Sequential( - nn.Linear(32, 64 * (self.input_height // 2) * (self.input_width // 2)), - nn.BatchNorm1d(64 * (self.input_height // 2) * (self.input_width // 2)), + nn.Linear(32, 64 * (self.input_size // 2) * (self.input_size // 2)), + nn.BatchNorm1d(64 * (self.input_size // 2) * (self.input_size // 2)), nn.ReLU(), ) self.deconv = nn.Sequential( nn.ConvTranspose2d(64, self.output_dim, 4, 2, 1), - #nn.Sigmoid(), # EBGAN does not work well when using Sigmoid(). + # nn.Sigmoid(), ) utils.initialize_weights(self) @@ -85,7 +72,7 @@ def forward(self, input): x = x.view(x.size()[0], -1) code = self.code(x) x = self.fc(code) - x = x.view(-1, 64, (self.input_height // 2), (self.input_width // 2)) + x = x.view(-1, 64, (self.input_size // 2), (self.input_size // 2)) x = self.deconv(x) return x, code @@ -94,7 +81,7 @@ class EBGAN(object): def __init__(self, args): # parameters self.epoch = args.epoch - self.sample_num = 64 + self.sample_num = 100 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir @@ -102,15 +89,18 @@ def __init__(self, args): self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type - - # EBGAN parameters + self.input_size = args.input_size + self.z_dim = 62 self.pt_loss_weight = 0.1 - self.margin = max(1, self.batch_size / 64.) # margin for loss function - # usually margin of 1 is enough, but for large batch size it must be larger than 1 + self.margin = 1 + + # load dataset + self.data_loader = dataloader(self.dataset, self.input_size, self.batch_size) + data = self.data_loader.__iter__().__next__()[0] # networks init - self.G = generator(self.dataset) - self.D = discriminator(self.dataset) + self.G = generator(input_dim=self.z_dim, output_dim=data.shape[1], input_size=self.input_size) + self.D = discriminator(input_dim=data.shape[1], output_dim=1, input_size=self.input_size) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) @@ -126,28 +116,10 @@ def __init__(self, args): utils.print_network(self.D) print('-----------------------------------------------') - # load dataset - if self.dataset == 'mnist': - self.data_loader = DataLoader(datasets.MNIST('data/mnist', train=True, download=True, - transform=transforms.Compose( - [transforms.ToTensor()])), - batch_size=self.batch_size, shuffle=True) - elif self.dataset == 'fashion-mnist': - self.data_loader = DataLoader( - datasets.FashionMNIST('data/fashion-mnist', train=True, download=True, transform=transforms.Compose( - [transforms.ToTensor()])), - batch_size=self.batch_size, shuffle=True) - elif self.dataset == 'celebA': - self.data_loader = utils.load_celebA('data/celebA', transform=transforms.Compose( - [transforms.CenterCrop(160), transforms.Scale(64), transforms.ToTensor()]), batch_size=self.batch_size, - shuffle=True) - self.z_dim = 62 - # fixed noise + self.sample_z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: - self.sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)).cuda(), volatile=True) - else: - self.sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)), volatile=True) + self.sample_z_ = self.sample_z_.cuda() def train(self): self.train_hist = {} @@ -156,10 +128,9 @@ def train(self): self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] + self.y_real_, self.y_fake_ = torch.ones(self.batch_size, 1), torch.zeros(self.batch_size, 1) if self.gpu_mode: - self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda()), Variable(torch.zeros(self.batch_size, 1).cuda()) - else: - self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1)) + self.y_real_, self.y_fake_ = self.y_real_.cuda(), self.y_fake_.cuda() self.D.train() print('training start!!') @@ -172,26 +143,21 @@ def train(self): break z_ = torch.rand((self.batch_size, self.z_dim)) - if self.gpu_mode: - x_, z_ = Variable(x_.cuda()), Variable(z_.cuda()) - else: - x_, z_ = Variable(x_), Variable(z_) + x_, z_ = x_.cuda(), z_.cuda() # update D network self.D_optimizer.zero_grad() - D_real, D_real_code = self.D(x_) - D_real_err = self.MSE_loss(D_real, x_) + D_real, _ = self.D(x_) + D_real_loss = self.MSE_loss(D_real, x_) G_ = self.G(z_) - D_fake, D_fake_code = self.D(G_) - D_fake_err = self.MSE_loss(D_fake, G_.detach()) - if list(self.margin-D_fake_err.data)[0] > 0: - D_loss = D_real_err + (self.margin - D_fake_err) - else: - D_loss = D_real_err - self.train_hist['D_loss'].append(D_loss.data[0]) + D_fake, _ = self.D(G_) + D_fake_loss = self.MSE_loss(D_fake, G_.detach()) + + D_loss = D_real_loss + torch.clamp(self.margin - D_fake_loss, min=0) + self.train_hist['D_loss'].append(D_loss.item()) D_loss.backward() self.D_optimizer.step() @@ -201,19 +167,20 @@ def train(self): G_ = self.G(z_) D_fake, D_fake_code = self.D(G_) - D_fake_err = self.MSE_loss(D_fake, G_.detach()) - G_loss = D_fake_err + self.pt_loss_weight * self.pullaway_loss(D_fake_code) - self.train_hist['G_loss'].append(G_loss.data[0]) + D_fake_loss = self.MSE_loss(D_fake, G_.detach()) + G_loss = D_fake_loss + self.pt_loss_weight * self.pullaway_loss(D_fake_code.view(self.batch_size, -1)) + self.train_hist['G_loss'].append(G_loss.item()) G_loss.backward() self.G_optimizer.step() if ((iter + 1) % 100) == 0: print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" % - ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.data[0], G_loss.data[0])) + ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.item(), G_loss.item())) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) - self.visualize_results((epoch+1)) + with torch.no_grad(): + self.visualize_results((epoch+1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), @@ -237,11 +204,18 @@ def pullaway_loss(self, embeddings): return pt_loss """ - norm = torch.sqrt(torch.sum(embeddings ** 2, 1, keepdim=True)) + # norm = torch.sqrt(torch.sum(embeddings ** 2, 1, keepdim=True)) + # normalized_embeddings = embeddings / norm + # similarity = torch.matmul(normalized_embeddings, normalized_embeddings.transpose(1, 0)) + # batch_size = embeddings.size()[0] + # pt_loss = (torch.sum(similarity) - batch_size) / (batch_size * (batch_size - 1)) + + norm = torch.norm(embeddings, 1) normalized_embeddings = embeddings / norm - similarity = torch.matmul(normalized_embeddings, normalized_embeddings.transpose(1, 0)) + similarity = torch.matmul(normalized_embeddings, normalized_embeddings.transpose(1, 0)) ** 2 batch_size = embeddings.size()[0] pt_loss = (torch.sum(similarity) - batch_size) / (batch_size * (batch_size - 1)) + return pt_loss @@ -259,10 +233,9 @@ def visualize_results(self, epoch, fix=True): samples = self.G(self.sample_z_) else: """ random noise """ + sample_z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: - sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)).cuda(), volatile=True) - else: - sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)), volatile=True) + sample_z_ = sample_z_.cuda() samples = self.G(sample_z_) @@ -271,6 +244,7 @@ def visualize_results(self, epoch, fix=True): else: samples = samples.data.numpy().transpose(0, 2, 3, 1) + samples = (samples + 1) / 2 utils.save_images(samples[:image_frame_dim * image_frame_dim, :, :, :], [image_frame_dim, image_frame_dim], self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name + '_epoch%03d' % epoch + '.png') diff --git a/GAN.py b/GAN.py index 998ddd0..9b4609b 100644 --- a/GAN.py +++ b/GAN.py @@ -2,32 +2,23 @@ import numpy as np import torch.nn as nn import torch.optim as optim -from torch.autograd import Variable -from torch.utils.data import DataLoader -from torchvision import datasets, transforms +from dataloader import dataloader class generator(nn.Module): # Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657) # Architecture : FC1024_BR-FC7x7x128_BR-(64)4dc2s_BR-(1)4dc2s_S - def __init__(self, dataset = 'mnist'): + def __init__(self, input_dim=100, output_dim=1, input_size=32): super(generator, self).__init__() - if dataset == 'mnist' or dataset == 'fashion-mnist': - self.input_height = 28 - self.input_width = 28 - self.input_dim = 62 - self.output_dim = 1 - elif dataset == 'celebA': - self.input_height = 64 - self.input_width = 64 - self.input_dim = 62 - self.output_dim = 3 + self.input_dim = input_dim + self.output_dim = output_dim + self.input_size = input_size self.fc = nn.Sequential( nn.Linear(self.input_dim, 1024), nn.BatchNorm1d(1024), nn.ReLU(), - nn.Linear(1024, 128 * (self.input_height // 4) * (self.input_width // 4)), - nn.BatchNorm1d(128 * (self.input_height // 4) * (self.input_width // 4)), + nn.Linear(1024, 128 * (self.input_size // 4) * (self.input_size // 4)), + nn.BatchNorm1d(128 * (self.input_size // 4) * (self.input_size // 4)), nn.ReLU(), ) self.deconv = nn.Sequential( @@ -35,13 +26,13 @@ def __init__(self, dataset = 'mnist'): nn.BatchNorm2d(64), nn.ReLU(), nn.ConvTranspose2d(64, self.output_dim, 4, 2, 1), - nn.Sigmoid(), + nn.Tanh(), ) utils.initialize_weights(self) def forward(self, input): x = self.fc(input) - x = x.view(-1, 128, (self.input_height // 4), (self.input_width // 4)) + x = x.view(-1, 128, (self.input_size // 4), (self.input_size // 4)) x = self.deconv(x) return x @@ -49,18 +40,11 @@ def forward(self, input): class discriminator(nn.Module): # Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657) # Architecture : (64)4c2s-(128)4c2s_BL-FC1024_BL-FC1_S - def __init__(self, dataset = 'mnist'): + def __init__(self, input_dim=1, output_dim=1, input_size=32): super(discriminator, self).__init__() - if dataset == 'mnist' or dataset == 'fashion-mnist': - self.input_height = 28 - self.input_width = 28 - self.input_dim = 1 - self.output_dim = 1 - elif dataset == 'celebA': - self.input_height = 64 - self.input_width = 64 - self.input_dim = 3 - self.output_dim = 1 + self.input_dim = input_dim + self.output_dim = output_dim + self.input_size = input_size self.conv = nn.Sequential( nn.Conv2d(self.input_dim, 64, 4, 2, 1), @@ -70,7 +54,7 @@ def __init__(self, dataset = 'mnist'): nn.LeakyReLU(0.2), ) self.fc = nn.Sequential( - nn.Linear(128 * (self.input_height // 4) * (self.input_width // 4), 1024), + nn.Linear(128 * (self.input_size // 4) * (self.input_size // 4), 1024), nn.BatchNorm1d(1024), nn.LeakyReLU(0.2), nn.Linear(1024, self.output_dim), @@ -80,7 +64,7 @@ def __init__(self, dataset = 'mnist'): def forward(self, input): x = self.conv(input) - x = x.view(-1, 128 * (self.input_height // 4) * (self.input_width // 4)) + x = x.view(-1, 128 * (self.input_size // 4) * (self.input_size // 4)) x = self.fc(x) return x @@ -89,7 +73,7 @@ class GAN(object): def __init__(self, args): # parameters self.epoch = args.epoch - self.sample_num = 16 + self.sample_num = 100 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir @@ -97,10 +81,16 @@ def __init__(self, args): self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type + self.input_size = args.input_size + self.z_dim = 62 + + # load dataset + self.data_loader = dataloader(self.dataset, self.input_size, self.batch_size) + data = self.data_loader.__iter__().__next__()[0] # networks init - self.G = generator(self.dataset) - self.D = discriminator(self.dataset) + self.G = generator(input_dim=self.z_dim, output_dim=data.shape[1], input_size=self.input_size) + self.D = discriminator(input_dim=data.shape[1], output_dim=1, input_size=self.input_size) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) @@ -116,28 +106,12 @@ def __init__(self, args): utils.print_network(self.D) print('-----------------------------------------------') - # load dataset - if self.dataset == 'mnist': - self.data_loader = DataLoader(datasets.MNIST('data/mnist', train=True, download=True, - transform=transforms.Compose( - [transforms.ToTensor()])), - batch_size=self.batch_size, shuffle=True) - elif self.dataset == 'fashion-mnist': - self.data_loader = DataLoader( - datasets.FashionMNIST('data/fashion-mnist', train=True, download=True, transform=transforms.Compose( - [transforms.ToTensor()])), - batch_size=self.batch_size, shuffle=True) - elif self.dataset == 'celebA': - self.data_loader = utils.load_celebA('data/celebA', transform=transforms.Compose( - [transforms.CenterCrop(160), transforms.Scale(64), transforms.ToTensor()]), batch_size=self.batch_size, - shuffle=True) - self.z_dim = 62 # fixed noise + self.sample_z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: - self.sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)).cuda(), volatile=True) - else: - self.sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)), volatile=True) + self.sample_z_ = self.sample_z_.cuda() + def train(self): self.train_hist = {} @@ -146,10 +120,9 @@ def train(self): self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] + self.y_real_, self.y_fake_ = torch.ones(self.batch_size, 1), torch.zeros(self.batch_size, 1) if self.gpu_mode: - self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda()), Variable(torch.zeros(self.batch_size, 1).cuda()) - else: - self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1)) + self.y_real_, self.y_fake_ = self.y_real_.cuda(), self.y_fake_.cuda() self.D.train() print('training start!!') @@ -162,11 +135,8 @@ def train(self): break z_ = torch.rand((self.batch_size, self.z_dim)) - if self.gpu_mode: - x_, z_ = Variable(x_.cuda()), Variable(z_.cuda()) - else: - x_, z_ = Variable(x_), Variable(z_) + x_, z_ = x_.cuda(), z_.cuda() # update D network self.D_optimizer.zero_grad() @@ -179,7 +149,7 @@ def train(self): D_fake_loss = self.BCE_loss(D_fake, self.y_fake_) D_loss = D_real_loss + D_fake_loss - self.train_hist['D_loss'].append(D_loss.data[0]) + self.train_hist['D_loss'].append(D_loss.item()) D_loss.backward() self.D_optimizer.step() @@ -190,17 +160,18 @@ def train(self): G_ = self.G(z_) D_fake = self.D(G_) G_loss = self.BCE_loss(D_fake, self.y_real_) - self.train_hist['G_loss'].append(G_loss.data[0]) + self.train_hist['G_loss'].append(G_loss.item()) G_loss.backward() self.G_optimizer.step() if ((iter + 1) % 100) == 0: print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" % - ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.data[0], G_loss.data[0])) + ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.item(), G_loss.item())) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) - self.visualize_results((epoch+1)) + with torch.no_grad(): + self.visualize_results((epoch+1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), @@ -226,10 +197,9 @@ def visualize_results(self, epoch, fix=True): samples = self.G(self.sample_z_) else: """ random noise """ + sample_z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: - sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)).cuda(), volatile=True) - else: - sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)), volatile=True) + sample_z_ = sample_z_.cuda() samples = self.G(sample_z_) @@ -238,6 +208,7 @@ def visualize_results(self, epoch, fix=True): else: samples = samples.data.numpy().transpose(0, 2, 3, 1) + samples = (samples + 1) / 2 utils.save_images(samples[:image_frame_dim * image_frame_dim, :, :, :], [image_frame_dim, image_frame_dim], self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name + '_epoch%03d' % epoch + '.png') diff --git a/LSGAN.py b/LSGAN.py index 30a0663..95aa57b 100644 --- a/LSGAN.py +++ b/LSGAN.py @@ -2,32 +2,23 @@ import numpy as np import torch.nn as nn import torch.optim as optim -from torch.autograd import Variable -from torch.utils.data import DataLoader -from torchvision import datasets, transforms +from dataloader import dataloader class generator(nn.Module): # Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657) # Architecture : FC1024_BR-FC7x7x128_BR-(64)4dc2s_BR-(1)4dc2s_S - def __init__(self, dataset = 'mnist'): + def __init__(self, input_dim=100, output_dim=1, input_size=32): super(generator, self).__init__() - if dataset == 'mnist' or dataset == 'fashion-mnist': - self.input_height = 28 - self.input_width = 28 - self.input_dim = 62 - self.output_dim = 1 - elif dataset == 'celebA': - self.input_height = 64 - self.input_width = 64 - self.input_dim = 62 - self.output_dim = 3 + self.input_dim = input_dim + self.output_dim = output_dim + self.input_size = input_size self.fc = nn.Sequential( nn.Linear(self.input_dim, 1024), nn.BatchNorm1d(1024), nn.ReLU(), - nn.Linear(1024, 128 * (self.input_height // 4) * (self.input_width // 4)), - nn.BatchNorm1d(128 * (self.input_height // 4) * (self.input_width // 4)), + nn.Linear(1024, 128 * (self.input_size // 4) * (self.input_size // 4)), + nn.BatchNorm1d(128 * (self.input_size // 4) * (self.input_size // 4)), nn.ReLU(), ) self.deconv = nn.Sequential( @@ -35,13 +26,13 @@ def __init__(self, dataset = 'mnist'): nn.BatchNorm2d(64), nn.ReLU(), nn.ConvTranspose2d(64, self.output_dim, 4, 2, 1), - nn.Sigmoid(), + nn.Tanh(), ) utils.initialize_weights(self) def forward(self, input): x = self.fc(input) - x = x.view(-1, 128, (self.input_height // 4), (self.input_width // 4)) + x = x.view(-1, 128, (self.input_size // 4), (self.input_size // 4)) x = self.deconv(x) return x @@ -49,18 +40,11 @@ def forward(self, input): class discriminator(nn.Module): # Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657) # Architecture : (64)4c2s-(128)4c2s_BL-FC1024_BL-FC1_S - def __init__(self, dataset = 'mnist'): + def __init__(self, input_dim=1, output_dim=1, input_size=32): super(discriminator, self).__init__() - if dataset == 'mnist' or dataset == 'fashion-mnist': - self.input_height = 28 - self.input_width = 28 - self.input_dim = 1 - self.output_dim = 1 - elif dataset == 'celebA': - self.input_height = 64 - self.input_width = 64 - self.input_dim = 3 - self.output_dim = 1 + self.input_dim = input_dim + self.output_dim = output_dim + self.input_size = input_size self.conv = nn.Sequential( nn.Conv2d(self.input_dim, 64, 4, 2, 1), @@ -70,17 +54,17 @@ def __init__(self, dataset = 'mnist'): nn.LeakyReLU(0.2), ) self.fc = nn.Sequential( - nn.Linear(128 * (self.input_height // 4) * (self.input_width // 4), 1024), + nn.Linear(128 * (self.input_size // 4) * (self.input_size // 4), 1024), nn.BatchNorm1d(1024), nn.LeakyReLU(0.2), nn.Linear(1024, self.output_dim), - nn.Sigmoid(), + # nn.Sigmoid(), ) utils.initialize_weights(self) def forward(self, input): x = self.conv(input) - x = x.view(-1, 128 * (self.input_height // 4) * (self.input_width // 4)) + x = x.view(-1, 128 * (self.input_size // 4) * (self.input_size // 4)) x = self.fc(x) return x @@ -89,7 +73,7 @@ class LSGAN(object): def __init__(self, args): # parameters self.epoch = args.epoch - self.sample_num = 64 + self.sample_num = 100 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir @@ -97,10 +81,16 @@ def __init__(self, args): self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type + self.input_size = args.input_size + self.z_dim = 62 + + # load dataset + self.data_loader = dataloader(self.dataset, self.input_size, self.batch_size) + data = self.data_loader.__iter__().__next__()[0] # networks init - self.G = generator(self.dataset) - self.D = discriminator(self.dataset) + self.G = generator(input_dim=self.z_dim, output_dim=data.shape[1], input_size=self.input_size) + self.D = discriminator(input_dim=data.shape[1], output_dim=1, input_size=self.input_size) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) @@ -116,28 +106,10 @@ def __init__(self, args): utils.print_network(self.D) print('-----------------------------------------------') - # load dataset - if self.dataset == 'mnist': - self.data_loader = DataLoader(datasets.MNIST('data/mnist', train=True, download=True, - transform=transforms.Compose( - [transforms.ToTensor()])), - batch_size=self.batch_size, shuffle=True) - elif self.dataset == 'fashion-mnist': - self.data_loader = DataLoader( - datasets.FashionMNIST('data/fashion-mnist', train=True, download=True, transform=transforms.Compose( - [transforms.ToTensor()])), - batch_size=self.batch_size, shuffle=True) - elif self.dataset == 'celebA': - self.data_loader = utils.load_celebA('data/celebA', transform=transforms.Compose( - [transforms.CenterCrop(160), transforms.Scale(64), transforms.ToTensor()]), batch_size=self.batch_size, - shuffle=True) - self.z_dim = 62 - # fixed noise + self.sample_z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: - self.sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)).cuda(), volatile=True) - else: - self.sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)), volatile=True) + self.sample_z_ = self.sample_z_.cuda() def train(self): self.train_hist = {} @@ -146,10 +118,9 @@ def train(self): self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] + self.y_real_, self.y_fake_ = torch.ones(self.batch_size, 1), torch.zeros(self.batch_size, 1) if self.gpu_mode: - self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda()), Variable(torch.zeros(self.batch_size, 1).cuda()) - else: - self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1)) + self.y_real_, self.y_fake_ = self.y_real_.cuda(), self.y_fake_.cuda() self.D.train() print('training start!!') @@ -162,11 +133,8 @@ def train(self): break z_ = torch.rand((self.batch_size, self.z_dim)) - if self.gpu_mode: - x_, z_ = Variable(x_.cuda()), Variable(z_.cuda()) - else: - x_, z_ = Variable(x_), Variable(z_) + x_, z_ = x_.cuda(), z_.cuda() # update D network self.D_optimizer.zero_grad() @@ -179,7 +147,7 @@ def train(self): D_fake_loss = self.MSE_loss(D_fake, self.y_fake_) D_loss = D_real_loss + D_fake_loss - self.train_hist['D_loss'].append(D_loss.data[0]) + self.train_hist['D_loss'].append(D_loss.item()) D_loss.backward() self.D_optimizer.step() @@ -190,17 +158,18 @@ def train(self): G_ = self.G(z_) D_fake = self.D(G_) G_loss = self.MSE_loss(D_fake, self.y_real_) - self.train_hist['G_loss'].append(G_loss.data[0]) + self.train_hist['G_loss'].append(G_loss.item()) G_loss.backward() self.G_optimizer.step() if ((iter + 1) % 100) == 0: print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" % - ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.data[0], G_loss.data[0])) + ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.item(), G_loss.item())) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) - self.visualize_results((epoch+1)) + with torch.no_grad(): + self.visualize_results((epoch+1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), @@ -226,10 +195,9 @@ def visualize_results(self, epoch, fix=True): samples = self.G(self.sample_z_) else: """ random noise """ + sample_z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: - sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)).cuda(), volatile=True) - else: - sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)), volatile=True) + sample_z_ = sample_z_.cuda() samples = self.G(sample_z_) @@ -238,6 +206,7 @@ def visualize_results(self, epoch, fix=True): else: samples = samples.data.numpy().transpose(0, 2, 3, 1) + samples = (samples + 1) / 2 utils.save_images(samples[:image_frame_dim * image_frame_dim, :, :, :], [image_frame_dim, image_frame_dim], self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name + '_epoch%03d' % epoch + '.png') diff --git a/WGAN.py b/WGAN.py index 89263a5..08e1b0a 100644 --- a/WGAN.py +++ b/WGAN.py @@ -2,32 +2,23 @@ import numpy as np import torch.nn as nn import torch.optim as optim -from torch.autograd import Variable -from torch.utils.data import DataLoader -from torchvision import datasets, transforms +from dataloader import dataloader class generator(nn.Module): # Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657) # Architecture : FC1024_BR-FC7x7x128_BR-(64)4dc2s_BR-(1)4dc2s_S - def __init__(self, dataset = 'mnist'): + def __init__(self, input_dim=100, output_dim=1, input_size=32): super(generator, self).__init__() - if dataset == 'mnist' or dataset == 'fashion-mnist': - self.input_height = 28 - self.input_width = 28 - self.input_dim = 62 - self.output_dim = 1 - elif dataset == 'celebA': - self.input_height = 64 - self.input_width = 64 - self.input_dim = 62 - self.output_dim = 3 + self.input_dim = input_dim + self.output_dim = output_dim + self.input_size = input_size self.fc = nn.Sequential( nn.Linear(self.input_dim, 1024), nn.BatchNorm1d(1024), nn.ReLU(), - nn.Linear(1024, 128 * (self.input_height // 4) * (self.input_width // 4)), - nn.BatchNorm1d(128 * (self.input_height // 4) * (self.input_width // 4)), + nn.Linear(1024, 128 * (self.input_size // 4) * (self.input_size // 4)), + nn.BatchNorm1d(128 * (self.input_size // 4) * (self.input_size // 4)), nn.ReLU(), ) self.deconv = nn.Sequential( @@ -35,13 +26,13 @@ def __init__(self, dataset = 'mnist'): nn.BatchNorm2d(64), nn.ReLU(), nn.ConvTranspose2d(64, self.output_dim, 4, 2, 1), - nn.Sigmoid(), + nn.Tanh(), ) utils.initialize_weights(self) def forward(self, input): x = self.fc(input) - x = x.view(-1, 128, (self.input_height // 4), (self.input_width // 4)) + x = x.view(-1, 128, (self.input_size // 4), (self.input_size // 4)) x = self.deconv(x) return x @@ -49,18 +40,11 @@ def forward(self, input): class discriminator(nn.Module): # Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657) # Architecture : (64)4c2s-(128)4c2s_BL-FC1024_BL-FC1_S - def __init__(self, dataset = 'mnist'): + def __init__(self, input_dim=1, output_dim=1, input_size=32): super(discriminator, self).__init__() - if dataset == 'mnist' or dataset == 'fashion-mnist': - self.input_height = 28 - self.input_width = 28 - self.input_dim = 1 - self.output_dim = 1 - elif dataset == 'celebA': - self.input_height = 64 - self.input_width = 64 - self.input_dim = 3 - self.output_dim = 1 + self.input_dim = input_dim + self.output_dim = output_dim + self.input_size = input_size self.conv = nn.Sequential( nn.Conv2d(self.input_dim, 64, 4, 2, 1), @@ -70,17 +54,17 @@ def __init__(self, dataset = 'mnist'): nn.LeakyReLU(0.2), ) self.fc = nn.Sequential( - nn.Linear(128 * (self.input_height // 4) * (self.input_width // 4), 1024), + nn.Linear(128 * (self.input_size // 4) * (self.input_size // 4), 1024), nn.BatchNorm1d(1024), nn.LeakyReLU(0.2), nn.Linear(1024, self.output_dim), - nn.Sigmoid(), + # nn.Sigmoid(), ) utils.initialize_weights(self) def forward(self, input): x = self.conv(input) - x = x.view(-1, 128 * (self.input_height // 4) * (self.input_width // 4)) + x = x.view(-1, 128 * (self.input_size // 4) * (self.input_size // 4)) x = self.fc(x) return x @@ -89,7 +73,7 @@ class WGAN(object): def __init__(self, args): # parameters self.epoch = args.epoch - self.sample_num = 64 + self.sample_num = 100 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir @@ -97,12 +81,18 @@ def __init__(self, args): self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type + self.input_size = args.input_size + self.z_dim = 62 self.c = 0.01 # clipping value self.n_critic = 5 # the number of iterations of the critic per generator iteration + # load dataset + self.data_loader = dataloader(self.dataset, self.input_size, self.batch_size) + data = self.data_loader.__iter__().__next__()[0] + # networks init - self.G = generator(self.dataset) - self.D = discriminator(self.dataset) + self.G = generator(input_dim=self.z_dim, output_dim=data.shape[1], input_size=self.input_size) + self.D = discriminator(input_dim=data.shape[1], output_dim=1, input_size=self.input_size) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) @@ -115,28 +105,10 @@ def __init__(self, args): utils.print_network(self.D) print('-----------------------------------------------') - # load dataset - if self.dataset == 'mnist': - self.data_loader = DataLoader(datasets.MNIST('data/mnist', train=True, download=True, - transform=transforms.Compose( - [transforms.ToTensor()])), - batch_size=self.batch_size, shuffle=True) - elif self.dataset == 'fashion-mnist': - self.data_loader = DataLoader( - datasets.FashionMNIST('data/fashion-mnist', train=True, download=True, transform=transforms.Compose( - [transforms.ToTensor()])), - batch_size=self.batch_size, shuffle=True) - elif self.dataset == 'celebA': - self.data_loader = utils.load_celebA('data/celebA', transform=transforms.Compose( - [transforms.CenterCrop(160), transforms.Scale(64), transforms.ToTensor()]), batch_size=self.batch_size, - shuffle=True) - self.z_dim = 62 - # fixed noise + self.sample_z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: - self.sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)).cuda(), volatile=True) - else: - self.sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)), volatile=True) + self.sample_z_ = self.sample_z_.cuda() def train(self): self.train_hist = {} @@ -145,10 +117,9 @@ def train(self): self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] + self.y_real_, self.y_fake_ = torch.ones(self.batch_size, 1), torch.zeros(self.batch_size, 1) if self.gpu_mode: - self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda()), Variable(torch.zeros(self.batch_size, 1).cuda()) - else: - self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1)) + self.y_real_, self.y_fake_ = self.y_real_.cuda(), self.y_fake_.cuda() self.D.train() print('training start!!') @@ -161,11 +132,8 @@ def train(self): break z_ = torch.rand((self.batch_size, self.z_dim)) - if self.gpu_mode: - x_, z_ = Variable(x_.cuda()), Variable(z_.cuda()) - else: - x_, z_ = Variable(x_), Variable(z_) + x_, z_ = x_.cuda(), z_.cuda() # update D network self.D_optimizer.zero_grad() @@ -193,19 +161,20 @@ def train(self): G_ = self.G(z_) D_fake = self.D(G_) G_loss = -torch.mean(D_fake) - self.train_hist['G_loss'].append(G_loss.data[0]) + self.train_hist['G_loss'].append(G_loss.item()) G_loss.backward() self.G_optimizer.step() - self.train_hist['D_loss'].append(D_loss.data[0]) + self.train_hist['D_loss'].append(D_loss.item()) if ((iter + 1) % 100) == 0: print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" % - ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.data[0], G_loss.data[0])) + ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.item(), G_loss.item())) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) - self.visualize_results((epoch+1)) + with torch.no_grad(): + self.visualize_results((epoch+1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), @@ -231,10 +200,9 @@ def visualize_results(self, epoch, fix=True): samples = self.G(self.sample_z_) else: """ random noise """ + sample_z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: - sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)).cuda(), volatile=True) - else: - sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)), volatile=True) + sample_z_ = sample_z_.cuda() samples = self.G(sample_z_) @@ -243,6 +211,7 @@ def visualize_results(self, epoch, fix=True): else: samples = samples.data.numpy().transpose(0, 2, 3, 1) + samples = (samples + 1) / 2 utils.save_images(samples[:image_frame_dim * image_frame_dim, :, :, :], [image_frame_dim, image_frame_dim], self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name + '_epoch%03d' % epoch + '.png') diff --git a/WGAN_GP.py b/WGAN_GP.py index f1b6ed7..96b441b 100644 --- a/WGAN_GP.py +++ b/WGAN_GP.py @@ -2,32 +2,24 @@ import numpy as np import torch.nn as nn import torch.optim as optim -from torch.autograd import Variable, grad -from torch.utils.data import DataLoader -from torchvision import datasets, transforms +from torch.autograd import grad +from dataloader import dataloader class generator(nn.Module): # Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657) # Architecture : FC1024_BR-FC7x7x128_BR-(64)4dc2s_BR-(1)4dc2s_S - def __init__(self, dataset = 'mnist'): + def __init__(self, input_dim=100, output_dim=1, input_size=32): super(generator, self).__init__() - if dataset == 'mnist' or dataset == 'fashion-mnist': - self.input_height = 28 - self.input_width = 28 - self.input_dim = 62 - self.output_dim = 1 - elif dataset == 'celebA': - self.input_height = 64 - self.input_width = 64 - self.input_dim = 62 - self.output_dim = 3 + self.input_dim = input_dim + self.output_dim = output_dim + self.input_size = input_size self.fc = nn.Sequential( nn.Linear(self.input_dim, 1024), nn.BatchNorm1d(1024), nn.ReLU(), - nn.Linear(1024, 128 * (self.input_height // 4) * (self.input_width // 4)), - nn.BatchNorm1d(128 * (self.input_height // 4) * (self.input_width // 4)), + nn.Linear(1024, 128 * (self.input_size // 4) * (self.input_size // 4)), + nn.BatchNorm1d(128 * (self.input_size // 4) * (self.input_size // 4)), nn.ReLU(), ) self.deconv = nn.Sequential( @@ -35,13 +27,13 @@ def __init__(self, dataset = 'mnist'): nn.BatchNorm2d(64), nn.ReLU(), nn.ConvTranspose2d(64, self.output_dim, 4, 2, 1), - nn.Sigmoid(), + nn.Tanh(), ) utils.initialize_weights(self) def forward(self, input): x = self.fc(input) - x = x.view(-1, 128, (self.input_height // 4), (self.input_width // 4)) + x = x.view(-1, 128, (self.input_size // 4), (self.input_size // 4)) x = self.deconv(x) return x @@ -49,18 +41,11 @@ def forward(self, input): class discriminator(nn.Module): # Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657) # Architecture : (64)4c2s-(128)4c2s_BL-FC1024_BL-FC1_S - def __init__(self, dataset = 'mnist'): + def __init__(self, input_dim=1, output_dim=1, input_size=32): super(discriminator, self).__init__() - if dataset == 'mnist' or dataset == 'fashion-mnist': - self.input_height = 28 - self.input_width = 28 - self.input_dim = 1 - self.output_dim = 1 - elif dataset == 'celebA': - self.input_height = 64 - self.input_width = 64 - self.input_dim = 3 - self.output_dim = 1 + self.input_dim = input_dim + self.output_dim = output_dim + self.input_size = input_size self.conv = nn.Sequential( nn.Conv2d(self.input_dim, 64, 4, 2, 1), @@ -70,17 +55,17 @@ def __init__(self, dataset = 'mnist'): nn.LeakyReLU(0.2), ) self.fc = nn.Sequential( - nn.Linear(128 * (self.input_height // 4) * (self.input_width // 4), 1024), + nn.Linear(128 * (self.input_size // 4) * (self.input_size // 4), 1024), nn.BatchNorm1d(1024), nn.LeakyReLU(0.2), nn.Linear(1024, self.output_dim), - nn.Sigmoid(), + # nn.Sigmoid(), ) utils.initialize_weights(self) def forward(self, input): x = self.conv(input) - x = x.view(-1, 128 * (self.input_height // 4) * (self.input_width // 4)) + x = x.view(-1, 128 * (self.input_size // 4) * (self.input_size // 4)) x = self.fc(x) return x @@ -89,7 +74,7 @@ class WGAN_GP(object): def __init__(self, args): # parameters self.epoch = args.epoch - self.sample_num = 64 + self.sample_num = 100 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir @@ -97,12 +82,18 @@ def __init__(self, args): self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type - self.lambda_ = 0.25 + self.input_size = args.input_size + self.z_dim = 62 + self.lambda_ = 10 self.n_critic = 5 # the number of iterations of the critic per generator iteration + # load dataset + self.data_loader = dataloader(self.dataset, self.input_size, self.batch_size) + data = self.data_loader.__iter__().__next__()[0] + # networks init - self.G = generator(self.dataset) - self.D = discriminator(self.dataset) + self.G = generator(input_dim=self.z_dim, output_dim=data.shape[1], input_size=self.input_size) + self.D = discriminator(input_dim=data.shape[1], output_dim=1, input_size=self.input_size) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) @@ -115,28 +106,10 @@ def __init__(self, args): utils.print_network(self.D) print('-----------------------------------------------') - # load dataset - if self.dataset == 'mnist': - self.data_loader = DataLoader(datasets.MNIST('data/mnist', train=True, download=True, - transform=transforms.Compose( - [transforms.ToTensor()])), - batch_size=self.batch_size, shuffle=True) - elif self.dataset == 'fashion-mnist': - self.data_loader = DataLoader( - datasets.FashionMNIST('data/fashion-mnist', train=True, download=True, transform=transforms.Compose( - [transforms.ToTensor()])), - batch_size=self.batch_size, shuffle=True) - elif self.dataset == 'celebA': - self.data_loader = utils.load_celebA('data/celebA', transform=transforms.Compose( - [transforms.CenterCrop(160), transforms.Scale(64), transforms.ToTensor()]), batch_size=self.batch_size, - shuffle=True) - self.z_dim = 62 - # fixed noise + self.sample_z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: - self.sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)).cuda(), volatile=True) - else: - self.sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)), volatile=True) + self.sample_z_ = self.sample_z_.cuda() def train(self): self.train_hist = {} @@ -145,10 +118,9 @@ def train(self): self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] + self.y_real_, self.y_fake_ = torch.ones(self.batch_size, 1), torch.zeros(self.batch_size, 1) if self.gpu_mode: - self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda()), Variable(torch.zeros(self.batch_size, 1).cuda()) - else: - self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1)) + self.y_real_, self.y_fake_ = self.y_real_.cuda(), self.y_fake_.cuda() self.D.train() print('training start!!') @@ -161,11 +133,8 @@ def train(self): break z_ = torch.rand((self.batch_size, self.z_dim)) - if self.gpu_mode: - x_, z_ = Variable(x_.cuda()), Variable(z_.cuda()) - else: - x_, z_ = Variable(x_), Variable(z_) + x_, z_ = x_.cuda(), z_.cuda() # update D network self.D_optimizer.zero_grad() @@ -178,12 +147,12 @@ def train(self): D_fake_loss = torch.mean(D_fake) # gradient penalty + alpha = torch.rand((self.batch_size, 1, 1, 1)) if self.gpu_mode: - alpha = torch.rand(x_.size()).cuda() - else: - alpha = torch.rand(x_.size()) + alpha = alpha.cuda() - x_hat = Variable(alpha * x_.data + (1 - alpha) * G_.data, requires_grad=True) + x_hat = alpha * x_.data + (1 - alpha) * G_.data + x_hat.requires_grad = True pred_hat = self.D(x_hat) if self.gpu_mode: @@ -207,19 +176,20 @@ def train(self): G_ = self.G(z_) D_fake = self.D(G_) G_loss = -torch.mean(D_fake) - self.train_hist['G_loss'].append(G_loss.data[0]) + self.train_hist['G_loss'].append(G_loss.item()) G_loss.backward() self.G_optimizer.step() - self.train_hist['D_loss'].append(D_loss.data[0]) + self.train_hist['D_loss'].append(D_loss.item()) if ((iter + 1) % 100) == 0: print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" % - ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.data[0], G_loss.data[0])) + ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.item(), G_loss.item())) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) - self.visualize_results((epoch+1)) + with torch.no_grad(): + self.visualize_results((epoch+1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), @@ -245,10 +215,9 @@ def visualize_results(self, epoch, fix=True): samples = self.G(self.sample_z_) else: """ random noise """ + sample_z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: - sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)).cuda(), volatile=True) - else: - sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)), volatile=True) + sample_z_ = sample_z_.cuda() samples = self.G(sample_z_) @@ -257,6 +226,7 @@ def visualize_results(self, epoch, fix=True): else: samples = samples.data.numpy().transpose(0, 2, 3, 1) + samples = (samples + 1) / 2 utils.save_images(samples[:image_frame_dim * image_frame_dim, :, :, :], [image_frame_dim, image_frame_dim], self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name + '_epoch%03d' % epoch + '.png') diff --git a/dataloader.py b/dataloader.py new file mode 100644 index 0000000..dd89842 --- /dev/null +++ b/dataloader.py @@ -0,0 +1,31 @@ +from torch.utils.data import DataLoader +from torchvision import datasets, transforms + +def dataloader(dataset, input_size, batch_size, split='train'): + transform = transforms.Compose([transforms.Resize((input_size, input_size)), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))]) + if dataset == 'mnist': + data_loader = DataLoader( + datasets.MNIST('data/mnist', train=True, download=True, transform=transform), + batch_size=batch_size, shuffle=True) + elif dataset == 'fashion-mnist': + data_loader = DataLoader( + datasets.FashionMNIST('data/fashion-mnist', train=True, download=True, transform=transform), + batch_size=batch_size, shuffle=True) + elif dataset == 'cifar10': + data_loader = DataLoader( + datasets.CIFAR10('data/cifar10', train=True, download=True, transform=transform), + batch_size=batch_size, shuffle=True) + elif dataset == 'svhn': + data_loader = DataLoader( + datasets.SVHN('data/svhn', split=split, download=True, transform=transform), + batch_size=batch_size, shuffle=True) + elif dataset == 'stl10': + data_loader = DataLoader( + datasets.STL10('data/stl10', split=split, download=True, transform=transform), + batch_size=batch_size, shuffle=True) + elif dataset == 'lsun-bed': + data_loader = DataLoader( + datasets.LSUN('data/lsun', classes=['bedroom_train'], transform=transform), + batch_size=batch_size, shuffle=True) + + return data_loader \ No newline at end of file diff --git a/infoGAN.py b/infoGAN.py index 2ebd30d..64fb361 100644 --- a/infoGAN.py +++ b/infoGAN.py @@ -4,25 +4,25 @@ import torch.nn.functional as F import torch.optim as optim import matplotlib.pyplot as plt -from torch.autograd import Variable +from dataloader import dataloader class generator(nn.Module): # Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657) # Architecture : FC1024_BR-FC7x7x128_BR-(64)4dc2s_BR-(1)4dc2s_S - def __init__(self, dataset = 'mnist'): + def __init__(self, input_dim=100, output_dim=1, input_size=32, len_discrete_code=10, len_continuous_code=2): super(generator, self).__init__() - if dataset == 'mnist' or 'fashion-mnist': - self.input_height = 28 - self.input_width = 28 - self.input_dim = 62 + 12 - self.output_dim = 1 + self.input_dim = input_dim + self.output_dim = output_dim + self.input_size = input_size + self.len_discrete_code = len_discrete_code # categorical distribution (i.e. label) + self.len_continuous_code = len_continuous_code # gaussian distribution (e.g. rotation, thickness) self.fc = nn.Sequential( - nn.Linear(self.input_dim, 1024), + nn.Linear(self.input_dim + self.len_discrete_code + self.len_continuous_code, 1024), nn.BatchNorm1d(1024), nn.ReLU(), - nn.Linear(1024, 128 * (self.input_height // 4) * (self.input_width // 4)), - nn.BatchNorm1d(128 * (self.input_height // 4) * (self.input_width // 4)), + nn.Linear(1024, 128 * (self.input_size // 4) * (self.input_size // 4)), + nn.BatchNorm1d(128 * (self.input_size // 4) * (self.input_size // 4)), nn.ReLU(), ) self.deconv = nn.Sequential( @@ -30,14 +30,14 @@ def __init__(self, dataset = 'mnist'): nn.BatchNorm2d(64), nn.ReLU(), nn.ConvTranspose2d(64, self.output_dim, 4, 2, 1), - nn.Sigmoid(), + nn.Tanh(), ) utils.initialize_weights(self) def forward(self, input, cont_code, dist_code): x = torch.cat([input, cont_code, dist_code], 1) x = self.fc(x) - x = x.view(-1, 128, (self.input_height // 4), (self.input_width // 4)) + x = x.view(-1, 128, (self.input_size // 4), (self.input_size // 4)) x = self.deconv(x) return x @@ -45,15 +45,13 @@ def forward(self, input, cont_code, dist_code): class discriminator(nn.Module): # Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657) # Architecture : (64)4c2s-(128)4c2s_BL-FC1024_BL-FC1_S - def __init__(self, dataset='mnist'): + def __init__(self, input_dim=1, output_dim=1, input_size=32, len_discrete_code=10, len_continuous_code=2): super(discriminator, self).__init__() - if dataset == 'mnist' or 'fashion-mnist': - self.input_height = 28 - self.input_width = 28 - self.input_dim = 1 - self.output_dim = 1 - self.len_discrete_code = 10 # categorical distribution (i.e. label) - self.len_continuous_code = 2 # gaussian distribution (e.g. rotation, thickness) + self.input_dim = input_dim + self.output_dim = output_dim + self.input_size = input_size + self.len_discrete_code = len_discrete_code # categorical distribution (i.e. label) + self.len_continuous_code = len_continuous_code # gaussian distribution (e.g. rotation, thickness) self.conv = nn.Sequential( nn.Conv2d(self.input_dim, 64, 4, 2, 1), @@ -63,17 +61,17 @@ def __init__(self, dataset='mnist'): nn.LeakyReLU(0.2), ) self.fc = nn.Sequential( - nn.Linear(128 * (self.input_height // 4) * (self.input_width // 4), 1024), + nn.Linear(128 * (self.input_size // 4) * (self.input_size // 4), 1024), nn.BatchNorm1d(1024), nn.LeakyReLU(0.2), nn.Linear(1024, self.output_dim + self.len_continuous_code + self.len_discrete_code), - nn.Sigmoid(), + # nn.Sigmoid(), ) utils.initialize_weights(self) def forward(self, input): x = self.conv(input) - x = x.view(-1, 128 * (self.input_height // 4) * (self.input_width // 4)) + x = x.view(-1, 128 * (self.input_size // 4) * (self.input_size // 4)) x = self.fc(x) a = F.sigmoid(x[:, self.output_dim]) b = x[:, self.output_dim:self.output_dim + self.len_continuous_code] @@ -85,7 +83,6 @@ class infoGAN(object): def __init__(self, args, SUPERVISED=True): # parameters self.epoch = args.epoch - self.sample_num = 100 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir @@ -93,13 +90,20 @@ def __init__(self, args, SUPERVISED=True): self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type + self.input_size = args.input_size + self.z_dim = 62 self.SUPERVISED = SUPERVISED # if it is true, label info is directly used for code self.len_discrete_code = 10 # categorical distribution (i.e. label) self.len_continuous_code = 2 # gaussian distribution (e.g. rotation, thickness) + self.sample_num = self.len_discrete_code ** 2 + + # load dataset + self.data_loader = dataloader(self.dataset, self.input_size, self.batch_size) + data = self.data_loader.__iter__().__next__()[0] # networks init - self.G = generator(self.dataset) - self.D = discriminator(self.dataset) + self.G = generator(input_dim=self.z_dim, output_dim=data.shape[1], input_size=self.input_size, len_discrete_code=self.len_discrete_code, len_continuous_code=self.len_continuous_code) + self.D = discriminator(input_dim=data.shape[1], output_dim=1, input_size=self.input_size, len_discrete_code=self.len_discrete_code, len_continuous_code=self.len_continuous_code) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) self.info_optimizer = optim.Adam(itertools.chain(self.G.parameters(), self.D.parameters()), lr=args.lrD, betas=(args.beta1, args.beta2)) @@ -120,58 +124,40 @@ def __init__(self, args, SUPERVISED=True): utils.print_network(self.D) print('-----------------------------------------------') - # load mnist - self.data_X, self.data_Y = utils.load_mnist(args.dataset) - self.z_dim = 62 - self.y_dim = 10 - # fixed noise & condition self.sample_z_ = torch.zeros((self.sample_num, self.z_dim)) - for i in range(10): - self.sample_z_[i*self.y_dim] = torch.rand(1, self.z_dim) - for j in range(1, self.y_dim): - self.sample_z_[i*self.y_dim + j] = self.sample_z_[i*self.y_dim] + for i in range(self.len_discrete_code): + self.sample_z_[i * self.len_discrete_code] = torch.rand(1, self.z_dim) + for j in range(1, self.len_discrete_code): + self.sample_z_[i * self.len_discrete_code + j] = self.sample_z_[i * self.len_discrete_code] - temp = torch.zeros((10, 1)) - for i in range(self.y_dim): + temp = torch.zeros((self.len_discrete_code, 1)) + for i in range(self.len_discrete_code): temp[i, 0] = i temp_y = torch.zeros((self.sample_num, 1)) - for i in range(10): - temp_y[i*self.y_dim: (i+1)*self.y_dim] = temp + for i in range(self.len_discrete_code): + temp_y[i * self.len_discrete_code: (i + 1) * self.len_discrete_code] = temp - self.sample_y_ = torch.zeros((self.sample_num, self.y_dim)) - self.sample_y_.scatter_(1, temp_y.type(torch.LongTensor), 1) + self.sample_y_ = torch.zeros((self.sample_num, self.len_discrete_code)).scatter_(1, temp_y.type(torch.LongTensor), 1) self.sample_c_ = torch.zeros((self.sample_num, self.len_continuous_code)) # manipulating two continuous code - temp_z_ = torch.rand((1, self.z_dim)) - self.sample_z2_ = temp_z_ - for i in range(self.sample_num - 1): - self.sample_z2_ = torch.cat([self.sample_z2_, temp_z_]) - - y = np.zeros(self.sample_num, dtype=np.int64) - y_one_hot = np.zeros((self.sample_num, self.len_discrete_code)) - y_one_hot[np.arange(self.sample_num), y] = 1 - self.sample_y2_ = torch.from_numpy(y_one_hot).type(torch.FloatTensor) + self.sample_z2_ = torch.rand((1, self.z_dim)).expand(self.sample_num, self.z_dim) + self.sample_y2_ = torch.zeros(self.sample_num, self.len_discrete_code) + self.sample_y2_[:, 0] = 1 temp_c = torch.linspace(-1, 1, 10) self.sample_c2_ = torch.zeros((self.sample_num, 2)) - for i in range(10): - for j in range(10): - self.sample_c2_[i*10+j, 0] = temp_c[i] - self.sample_c2_[i*10+j, 1] = temp_c[j] + for i in range(self.len_discrete_code): + for j in range(self.len_discrete_code): + self.sample_c2_[i*self.len_discrete_code+j, 0] = temp_c[i] + self.sample_c2_[i*self.len_discrete_code+j, 1] = temp_c[j] if self.gpu_mode: self.sample_z_, self.sample_y_, self.sample_c_, self.sample_z2_, self.sample_y2_, self.sample_c2_ = \ - Variable(self.sample_z_.cuda(), volatile=True), Variable(self.sample_y_.cuda(), volatile=True), \ - Variable(self.sample_c_.cuda(), volatile=True), Variable(self.sample_z2_.cuda(), volatile=True), \ - Variable(self.sample_y2_.cuda(), volatile=True), Variable(self.sample_c2_.cuda(), volatile=True) - else: - self.sample_z_, self.sample_y_, self.sample_c_, self.sample_z2_, self.sample_y2_, self.sample_c2_ = \ - Variable(self.sample_z_, volatile=True), Variable(self.sample_y_, volatile=True), \ - Variable(self.sample_c_, volatile=True), Variable(self.sample_z2_, volatile=True), \ - Variable(self.sample_y2_, volatile=True), Variable(self.sample_c2_, volatile=True) + self.sample_z_.cuda(), self.sample_y_.cuda(), self.sample_c_.cuda(), self.sample_z2_.cuda(), \ + self.sample_y2_.cuda(), self.sample_c2_.cuda() def train(self): self.train_hist = {} @@ -181,10 +167,9 @@ def train(self): self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] + self.y_real_, self.y_fake_ = torch.ones(self.batch_size, 1), torch.zeros(self.batch_size, 1) if self.gpu_mode: - self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda()), Variable(torch.zeros(self.batch_size, 1).cuda()) - else: - self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1)) + self.y_real_, self.y_fake_ = self.y_real_.cuda(), self.y_fake_.cuda() self.D.train() print('training start!!') @@ -192,11 +177,12 @@ def train(self): for epoch in range(self.epoch): self.G.train() epoch_start_time = time.time() - for iter in range(len(self.data_X) // self.batch_size): - x_ = self.data_X[iter*self.batch_size:(iter+1)*self.batch_size] + for iter, (x_, y_) in enumerate(self.data_loader): + if iter == self.data_loader.dataset.__len__() // self.batch_size: + break z_ = torch.rand((self.batch_size, self.z_dim)) if self.SUPERVISED == True: - y_disc_ = self.data_Y[iter*self.batch_size:(iter+1)*self.batch_size] + y_disc_ = torch.zeros((self.batch_size, self.len_discrete_code)).scatter_(1, y_.type(torch.LongTensor).unsqueeze(1), 1) else: y_disc_ = torch.from_numpy( np.random.multinomial(1, self.len_discrete_code * [float(1.0 / self.len_discrete_code)], @@ -205,10 +191,7 @@ def train(self): y_cont_ = torch.from_numpy(np.random.uniform(-1, 1, size=(self.batch_size, 2))).type(torch.FloatTensor) if self.gpu_mode: - x_, z_, y_disc_, y_cont_ = Variable(x_.cuda()), Variable(z_.cuda()), \ - Variable(y_disc_.cuda()), Variable(y_cont_.cuda()) - else: - x_, z_, y_disc_, y_cont_ = Variable(x_), Variable(z_), Variable(y_disc_), Variable(y_cont_) + x_, z_, y_disc_, y_cont_ = x_.cuda(), z_.cuda(), y_disc_.cuda(), y_cont_.cuda() # update D network self.D_optimizer.zero_grad() @@ -221,7 +204,7 @@ def train(self): D_fake_loss = self.BCE_loss(D_fake, self.y_fake_) D_loss = D_real_loss + D_fake_loss - self.train_hist['D_loss'].append(D_loss.data[0]) + self.train_hist['D_loss'].append(D_loss.item()) D_loss.backward(retain_graph=True) self.D_optimizer.step() @@ -233,7 +216,7 @@ def train(self): D_fake, D_cont, D_disc = self.D(G_) G_loss = self.BCE_loss(D_fake, self.y_real_) - self.train_hist['G_loss'].append(G_loss.data[0]) + self.train_hist['G_loss'].append(G_loss.item()) G_loss.backward(retain_graph=True) self.G_optimizer.step() @@ -242,7 +225,7 @@ def train(self): disc_loss = self.CE_loss(D_disc, torch.max(y_disc_, 1)[1]) cont_loss = self.MSE_loss(D_cont, y_cont_) info_loss = disc_loss + cont_loss - self.train_hist['info_loss'].append(info_loss.data[0]) + self.train_hist['info_loss'].append(info_loss.item()) info_loss.backward() self.info_optimizer.step() @@ -250,10 +233,11 @@ def train(self): if ((iter + 1) % 100) == 0: print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f, info_loss: %.8f" % - ((epoch + 1), (iter + 1), len(self.data_X) // self.batch_size, D_loss.data[0], G_loss.data[0], info_loss.data[0])) + ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.item(), G_loss.item(), info_loss.item())) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) - self.visualize_results((epoch+1)) + with torch.no_grad(): + self.visualize_results((epoch+1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), @@ -282,6 +266,7 @@ def visualize_results(self, epoch): else: samples = samples.data.numpy().transpose(0, 2, 3, 1) + samples = (samples + 1) / 2 utils.save_images(samples[:image_frame_dim * image_frame_dim, :, :, :], [image_frame_dim, image_frame_dim], self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name + '_epoch%03d' % epoch + '.png') @@ -292,6 +277,7 @@ def visualize_results(self, epoch): else: samples = samples.data.numpy().transpose(0, 2, 3, 1) + samples = (samples + 1) / 2 utils.save_images(samples[:image_frame_dim * image_frame_dim, :, :, :], [image_frame_dim, image_frame_dim], self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name + '_cont_epoch%03d' % epoch + '.png') diff --git a/main.py b/main.py index 43f3971..d798e69 100644 --- a/main.py +++ b/main.py @@ -1,4 +1,4 @@ -import argparse, os +import argparse, os, torch from GAN import GAN from CGAN import CGAN from LSGAN import LSGAN @@ -15,24 +15,25 @@ def parse_args(): desc = "Pytorch implementation of GAN collections" parser = argparse.ArgumentParser(description=desc) - parser.add_argument('--gan_type', type=str, default='EBGAN', + parser.add_argument('--gan_type', type=str, default='GAN', choices=['GAN', 'CGAN', 'infoGAN', 'ACGAN', 'EBGAN', 'BEGAN', 'WGAN', 'WGAN_GP', 'DRAGAN', 'LSGAN'], - help='The type of GAN')#, required=True) - parser.add_argument('--dataset', type=str, default='mnist', choices=['mnist', 'fashion-mnist', 'celebA'], + help='The type of GAN') + parser.add_argument('--dataset', type=str, default='mnist', choices=['mnist', 'fashion-mnist', 'cifar10', 'cifar100', 'svhn', 'stl10', 'lsun-bed'], help='The name of dataset') - parser.add_argument('--epoch', type=int, default=25, help='The number of epochs to run') + parser.add_argument('--split', type=str, default='', help='The split flag for svhn and stl10') + parser.add_argument('--epoch', type=int, default=50, help='The number of epochs to run') parser.add_argument('--batch_size', type=int, default=64, help='The size of batch') + parser.add_argument('--input_size', type=int, default=28, help='The size of input image') parser.add_argument('--save_dir', type=str, default='models', help='Directory name to save the model') - parser.add_argument('--result_dir', type=str, default='results', - help='Directory name to save the generated images') - parser.add_argument('--log_dir', type=str, default='logs', - help='Directory name to save training logs') + parser.add_argument('--result_dir', type=str, default='results', help='Directory name to save the generated images') + parser.add_argument('--log_dir', type=str, default='logs', help='Directory name to save training logs') parser.add_argument('--lrG', type=float, default=0.0002) parser.add_argument('--lrD', type=float, default=0.0002) parser.add_argument('--beta1', type=float, default=0.5) parser.add_argument('--beta2', type=float, default=0.999) parser.add_argument('--gpu_mode', type=bool, default=True) + parser.add_argument('--benchmark_mode', type=bool, default=True) return check_args(parser.parse_args()) @@ -71,6 +72,9 @@ def main(): if args is None: exit() + if args.benchmark_mode: + torch.backends.cudnn.benchmark = True + # declare instance for GAN if args.gan_type == 'GAN': gan = GAN(args) @@ -79,7 +83,7 @@ def main(): elif args.gan_type == 'ACGAN': gan = ACGAN(args) elif args.gan_type == 'infoGAN': - gan = infoGAN(args, SUPERVISED = True) + gan = infoGAN(args, SUPERVISED=False) elif args.gan_type == 'EBGAN': gan = EBGAN(args) elif args.gan_type == 'WGAN':