model.py

from __future__ import division
import os
import time
import math
from glob import glob
import tensorflow as tf
import numpy as np
from six.moves import xrange

from ops import *
from utils import *

def conv_out_size_same(size, stride):
  return int(math.ceil(float(size) / float(stride)))

class DCGAN(object):
  def __init__(self, sess, input_height=108, input_width=108, crop=True,
         batch_size=64, sample_num = 64, output_height=64, output_width=64,
         y_dim=None, z_dim=100, gf_dim=64, df_dim=64,
         gfc_dim=1024, dfc_dim=1024, c_dim=3, dataset_name='default',
         input_fname_pattern='*.jpg', checkpoint_dir=None, sample_dir=None,
				 log_dir=None,
         blur_strategy="None"):
    """

    Args:
      sess: TensorFlow session
      batch_size: The size of batch. Should be specified before training.
      y_dim: (optional) Dimension of dim for y. [None]
      z_dim: (optional) Dimension of dim for Z. [100]
      gf_dim: (optional) Dimension of gen filters in first conv layer. [64]
      df_dim: (optional) Dimension of discrim filters in first conv layer. [64]
      gfc_dim: (optional) Dimension of gen units for for fully connected layer. [1024]
      dfc_dim: (optional) Dimension of discrim units for fully connected layer. [1024]
      c_dim: (optional) Dimension of image color. For grayscale input, set to 1. [3]
    """
    self.sess = sess
    self.crop = crop

    self.batch_size = batch_size
    self.sample_num = sample_num

    self.input_height = input_height
    self.input_width = input_width
    self.output_height = output_height
    self.output_width = output_width

    self.y_dim = y_dim
    self.z_dim = z_dim

    self.gf_dim = gf_dim
    self.df_dim = df_dim

    self.gfc_dim = gfc_dim
    self.dfc_dim = dfc_dim

    # batch normalization : deals with poor initialization helps gradient flow
    self.d_bn1 = batch_norm(name='d_bn1')
    self.d_bn2 = batch_norm(name='d_bn2')

    if not self.y_dim:
      self.d_bn3 = batch_norm(name='d_bn3')

    self.g_bn0 = batch_norm(name='g_bn0')
    self.g_bn1 = batch_norm(name='g_bn1')
    self.g_bn2 = batch_norm(name='g_bn2')

    if not self.y_dim:
      self.g_bn3 = batch_norm(name='g_bn3')

    self.dataset_name = dataset_name
    self.input_fname_pattern = input_fname_pattern
    self.checkpoint_dir = checkpoint_dir
    self.sample_dir = sample_dir
    self.blur_strategy = blur_strategy

    if self.dataset_name == 'mnist':
      self.data_X, self.data_y = self.load_mnist()
      self.c_dim = self.data_X[0].shape[-1]
    elif self.dataset_name == 'lsun':
      with open(os.path.join("./data", self.dataset_name, 'lsun_images')) as f:
        content = f.readlines()
      self.data = [x.strip() for x in content]
      self.data = [os.path.join("./data", self.dataset_name, x) for x in self.data]
      self.c_dim = imread(self.data[0]).shape[-1]
    else:
      self.data = glob(os.path.join("./data", self.dataset_name, self.input_fname_pattern))
      self.c_dim = imread(self.data[0]).shape[-1]

    self.grayscale = (self.c_dim == 1)
		
    self.log_dir = log_dir

    self.build_model()

  def build_model(self):
    if self.y_dim:
      self.y= tf.placeholder(tf.float32, [self.batch_size, self.y_dim], name='y')

    if self.crop:
      image_dims = [self.output_height, self.output_width, self.c_dim]
    else:
      image_dims = [self.input_height, self.input_width, self.c_dim]

    self.inputs = tf.placeholder(
      tf.float32, [self.batch_size] + image_dims, name='real_images')
    self.sample_inputs = tf.placeholder(
      tf.float32, [self.sample_num] + image_dims, name='sample_inputs')
  
    self.gauss_kernel = tf.placeholder(tf.float32, [9,9,3,1], name='gauss_kernel')

    inputs = self.inputs
    sample_inputs = self.sample_inputs

    self.z = tf.placeholder(
      tf.float32, [None, self.z_dim], name='z')
    self.z_sum = histogram_summary("z", self.z)

    if self.y_dim:
      self.G = self.generator(self.z, self.y)
      self.D, self.D_logits = \
          self.discriminator(gauss_blur(inputs, self.batch_size, kernel=self.gauss_kernel, output_height=self.output_height, blur_strategy=self.blur_strategy), self.y, reuse=False)

      self.sampler = self.sampler(self.z, self.y)
      self.D_, self.D_logits_ = \
          self.discriminator(gauss_blur(self.G, self.batch_size, kernel=self.gauss_kernel, output_height=self.output_height, blur_strategy=self.blur_strategy), self.y, reuse=True)
      self.R, self.R_logits = \
          self.discriminator_inference(gauss_blur(self.G, self.batch_size, kernel=self.gauss_kernel, output_height=self.output_height, blur_strategy=self.blur_strategy), self.y, reuse=True)
    else:
      self.G = self.generator(self.z)
      self.D, self.D_logits = self.discriminator(gauss_blur(inputs, 
                                                            self.batch_size, 
                                                            kernel=self.gauss_kernel, 
                                                            output_height=self.output_height, 
                                                            blur_strategy=self.blur_strategy))

      self.sampler = self.sampler(self.z)
      self.D_, self.D_logits_ = self.discriminator(gauss_blur(self.G, 
                                                              self.batch_size, 
                                                              kernel=self.gauss_kernel, 
                                                              output_height=self.output_height, 
                                                              blur_strategy=self.blur_strategy), 
                                                              reuse=True)
      self.R, self.R_logits = self.discriminator_inference(gauss_blur(self.G, 
                                                                      self.batch_size, 
                                                                      kernel=self.gauss_kernel, 
                                                                      output_height=self.output_height, 
                                                                      blur_strategy=self.blur_strategy), 
                                                                      reuse=True)

    self.d_sum = histogram_summary("d", self.D)
    self.d__sum = histogram_summary("d_", self.D_)
    self.G_sum = image_summary("G", self.G)

    def sigmoid_cross_entropy_with_logits(x, y):
      try:
        return tf.nn.sigmoid_cross_entropy_with_logits(logits=x, labels=y)
      except:
        return tf.nn.sigmoid_cross_entropy_with_logits(logits=x, targets=y)

    self.d_loss_real = tf.reduce_mean(
      sigmoid_cross_entropy_with_logits(self.D_logits, tf.ones_like(self.D)))
    self.d_loss_fake = tf.reduce_mean(
      sigmoid_cross_entropy_with_logits(self.D_logits_, tf.zeros_like(self.D_)))
    self.g_loss = tf.reduce_mean(
      sigmoid_cross_entropy_with_logits(self.D_logits_, tf.ones_like(self.D_)))

    # calculate probability of given image is real or not
    self.g_loss_D_raw = sigmoid_cross_entropy_with_logits(self.R_logits, 
                                        tf.ones_like(self.R))
    self.g_loss_D_raw_ = sigmoid_cross_entropy_with_logits(self.R_logits, 
                                        tf.zeros_like(self.R))

    self.D_prob_fake_G_image = tf.nn.softmax(
                                  tf.concat(
                                    [self.g_loss_D_raw_, 
                                    self.g_loss_D_raw],1))
    self.D_prob_fake_G_image_mean = tf.reduce_mean(
                                      self.D_prob_fake_G_image[:,0])
    
    self.actual_G_quality_sum = scalar_summary("G_quality", 
                                                self.D_prob_fake_G_image_mean)


    self.GD_training_iterations = tf.placeholder(tf.int32, None, name='GD_training_iterations')
    self.GD_training_iterations_sum = scalar_summary("GD_training_iterations", self.GD_training_iterations)

    self.GD_controller_error = tf.placeholder(tf.float32, None, name='GD_controller_error')
    self.GD_controller_error_sum = scalar_summary("GD_controller_error", self.GD_controller_error)

    self.d_loss_real_sum = scalar_summary("d_loss_real", self.d_loss_real)
    self.d_loss_fake_sum = scalar_summary("d_loss_fake", self.d_loss_fake)
                          
    self.d_loss = self.d_loss_real + self.d_loss_fake

    self.g_loss_sum = scalar_summary("g_loss", self.g_loss)
    self.d_loss_sum = scalar_summary("d_loss", self.d_loss)

    t_vars = tf.trainable_variables()

    self.d_vars = [var for var in t_vars if 'd_' in var.name]
    self.g_vars = [var for var in t_vars if 'g_' in var.name]

    self.saver = tf.train.Saver()

  def train(self, config):
    d_optim = tf.train.AdamOptimizer(config.learning_rate, beta1=config.beta1) \
              .minimize(self.d_loss, var_list=self.d_vars)
    g_optim = tf.train.AdamOptimizer(config.learning_rate, beta1=config.beta1) \
              .minimize(self.g_loss, var_list=self.g_vars)
    try:
      tf.global_variables_initializer().run()
    except:
      tf.initialize_all_variables().run()


    self.g_sum = merge_summary([self.z_sum, self.d__sum,
      self.G_sum, self.d_loss_fake_sum, self.g_loss_sum])
    self.d_sum = merge_summary(
        [self.z_sum, self.d_sum, self.d_loss_real_sum, self.d_loss_sum])
    
    self.writer = SummaryWriter(self.log_dir + '/Discriminator', self.sess.graph)
    self.writer2 = SummaryWriter(self.log_dir + '/Generator')

    sample_z = np.random.uniform(-1, 1, size=(self.sample_num , self.z_dim))
    
    if config.dataset == 'mnist':
      sample_inputs = self.data_X[0:self.sample_num]
      sample_labels = self.data_y[0:self.sample_num]
    else:
      sample_files = self.data[0:self.sample_num]
      sample = [
          get_image(sample_file,
                    input_height=self.input_height,
                    input_width=self.input_width,
                    resize_height=self.output_height,
                    resize_width=self.output_width,
                    crop=self.crop,
                    grayscale=self.grayscale) for sample_file in sample_files]
      if (self.grayscale):
        sample_inputs = np.array(sample).astype(np.float32)[:, :, :, None]
      else:
        sample_inputs = np.array(sample).astype(np.float32)

    counter = 1

    D_iteration_counter = 0
    G_iteration_counter = 0

    # implementation of the controller
    # first of all we have some parameters
    self.target_starting_G_quality = config.target_starting_G_quality
    self.target_ending_G_quality = config.target_ending_G_quality
    self.control_gain = config.control_gain
    self.G2D_ratio = self.target_starting_G_quality 

    
    start_time = time.time()
    could_load, checkpoint_counter = self.load(self.checkpoint_dir)
    if could_load:
      counter = checkpoint_counter
      print(" [*] Load SUCCESS")
    else:
      print(" [!] Load failed...")

    for epoch in xrange(config.epoch):
      if config.dataset == 'mnist':
        batch_idxs = min(len(self.data_X), config.train_size) // config.batch_size
      elif self.dataset_name == 'lsun':
        with open(os.path.join("./data", self.dataset_name, 'lsun_images')) as f:
          content = f.readlines()
        self.data = [x.strip() for x in content]
        self.data = [os.path.join("./data", self.dataset_name, x) for x in self.data]
        self.c_dim = imread(self.data[0]).shape[-1]
        batch_idxs = min(len(self.data), config.train_size) // config.batch_size
      else:      
        self.data = glob(os.path.join(
          "./data", config.dataset, self.input_fname_pattern))
        batch_idxs = min(len(self.data), config.train_size) // config.batch_size

      for idx in xrange(0, batch_idxs):
        if config.dataset == 'mnist':
          batch_images = self.data_X[idx*config.batch_size:(idx+1)*config.batch_size]
          batch_labels = self.data_y[idx*config.batch_size:(idx+1)*config.batch_size]
        else:
          batch_files = self.data[idx*config.batch_size:(idx+1)*config.batch_size]
          batch = [
              get_image(batch_file,
                        input_height=self.input_height,
                        input_width=self.input_width,
                        resize_height=self.output_height,
                        resize_width=self.output_width,
                        crop=self.crop,
                        grayscale=self.grayscale) for batch_file in batch_files]
          if self.grayscale:
            batch_images = np.array(batch).astype(np.float32)[:, :, :, None]
          else:
            batch_images = np.array(batch).astype(np.float32)

        batch_z = np.random.uniform(-1, 1, [config.batch_size, self.z_dim]) \
              .astype(np.float32)

        # setup the gauss kernel

        if config.blur_strategy == "3x3":
          tmp = np.array([[1,2,1],
                          [2,4,2],
                          [1,2,1]], dtype=np.float32)/(3*16) # define our kernel
          tmp = np.lib.pad(tmp, ((3,3), (3,3)), 'constant', constant_values=(0)) # pad kernel with zeros to fill up for 9x9 kernel
          blur_gauss_kernel = np.array([tmp, tmp, tmp])
          gauss_kernel = blur_gauss_kernel.reshape(9,9,3,1)
        elif config.blur_strategy == "reg_lin":
          sigma_interp = 5.0 - ((epoch+1)/config.epoch)*4.5 # linearly interpolate sigma between 5.0 and 0.5
          gauss_kernel = gauss_blur_kernel(sigma_interp)
        else: # hyperbolic or unused if None
          sig_alpha = 5.0 # increase to start with stronger blur
          sig_beta = 12.0 # increase to decrease blur faster 
          sigma_interp = sig_alpha/(sig_beta*((epoch+1)/config.epoch)+1)
          gauss_kernel = gauss_blur_kernel(sigma_interp)

        # evaluate generator to discriminator ratio
        # here we compute some intermediate results
        self.target_G_quality = self.target_starting_G_quality + (self.target_ending_G_quality-self.target_starting_G_quality)*(epoch/config.epoch)
        
        # we also experimented with parameterizing the reference value using a sine curve.
        #self.target_G_quality += 0.2 * np.sin(counter/500.0)
        
        # for sampling generated images from z space using sampler
        self.actual_G_quality, summary_str= self.sess.run(
          [self.D_prob_fake_G_image_mean, self.actual_G_quality_sum],
          feed_dict={
              self.z: sample_z,
              self.inputs: sample_inputs,
              self.gauss_kernel: gauss_kernel
          },
        )
        self.writer.add_summary(summary_str, counter)
        
        # here we compute the error we want to minimize
        self.control_error = self.actual_G_quality - self.target_G_quality 
        self.G2D_ratio = np.clip(self.G2D_ratio + self.control_gain*self.control_error, a_min=0, a_max=1) 
         

        if config.dataset == 'mnist':
      
          if config.GpD_ratio == -1:
            if np.random.rand() < self.G2D_ratio:
              D_iteration_counter += 1
              # Update D network
              _, summary_str = self.sess.run([d_optim, self.d_sum],
                feed_dict={ 
                  self.inputs: batch_images,
                  self.z: batch_z,
                  self.y:batch_labels,
                  self.gauss_kernel: gauss_kernel
                })
              self.writer.add_summary(summary_str, counter)
            else:
              G_iteration_counter += 1
              # Update G network
              _, summary_str = self.sess.run([g_optim, self.g_sum],
                feed_dict={
                  self.z: batch_z, 
                  self.y:batch_labels,
                self.gauss_kernel: gauss_kernel
                })
              self.writer.add_summary(summary_str, counter)
          else:
            for i in range(1):
              D_iteration_counter += 1
              # Update D network
              _, summary_str = self.sess.run([d_optim, self.d_sum],
                feed_dict={ 
                  self.inputs: batch_images,
                  self.z: batch_z,
                  self.y:batch_labels,
                  self.gauss_kernel: gauss_kernel
                })
              self.writer.add_summary(summary_str, counter)
            for i in range(config.GpD_ratio):
              G_iteration_counter += 1
              # Update G network
              _, summary_str = self.sess.run([g_optim, self.g_sum],
                feed_dict={
                  self.z: batch_z, 
                  self.y:batch_labels,
                self.gauss_kernel: gauss_kernel
                })
              self.writer.add_summary(summary_str, counter)


          errD_fake = self.d_loss_fake.eval({
              self.z: batch_z, 
              self.y:batch_labels
          })
          errD_real = self.d_loss_real.eval({
              self.inputs: batch_images,
              self.y:batch_labels
          })
          errG = self.g_loss.eval({
              self.z: batch_z,
              self.y: batch_labels
          })
          pass
        else:
          
          if config.GpD_ratio == -1:
            if np.random.rand() < self.G2D_ratio:
              D_iteration_counter += 1
              # Update D network
              _, summary_str = self.sess.run([d_optim, self.d_sum],
                feed_dict={ self.inputs: batch_images, 
                            self.z: batch_z,
                            self.gauss_kernel: gauss_kernel })
              self.writer.add_summary(summary_str, counter)
            else:
              G_iteration_counter += 1
              # Update G network
              _, summary_str = self.sess.run([g_optim, self.g_sum],
                feed_dict={ self.z: batch_z,
                            self.gauss_kernel: gauss_kernel })
              self.writer.add_summary(summary_str, counter)
          else:
            for i in range(1): # update discriminator once
              D_iteration_counter += 1
              # Update D network
              _, summary_str = self.sess.run([d_optim, self.d_sum],
                feed_dict={ self.inputs: batch_images, 
                            self.z: batch_z,
                            self.gauss_kernel: gauss_kernel })
              self.writer.add_summary(summary_str, counter)
            for i in range(config.GpD_ratio):
              G_iteration_counter += 1
              # Update G network
              _, summary_str = self.sess.run([g_optim, self.g_sum],
                feed_dict={ self.z: batch_z,
                            self.gauss_kernel: gauss_kernel })
              self.writer.add_summary(summary_str, counter)


          errD_fake = self.d_loss_fake.eval({ self.z: batch_z,
                          self.gauss_kernel: gauss_kernel })
          errD_real = self.d_loss_real.eval({ self.inputs: batch_images,
                          self.gauss_kernel: gauss_kernel })
          errG = self.g_loss.eval({self.z: batch_z,
                          self.gauss_kernel: gauss_kernel})
      
        # update the discriminator and generator counters
        summary_str = self.sess.run(self.GD_training_iterations_sum,
                          feed_dict={
                                self.GD_training_iterations: D_iteration_counter,
                          self.gauss_kernel: gauss_kernel})
        self.writer.add_summary(summary_str, counter)

        summary_str = self.sess.run(self.GD_training_iterations_sum,
                          feed_dict={
                                self.GD_training_iterations: G_iteration_counter,
                          self.gauss_kernel: gauss_kernel})
        self.writer2.add_summary(summary_str, counter)


        # update error plot of the controller
        summary_str = self.sess.run(self.GD_controller_error_sum,
                          feed_dict={
                                self.GD_controller_error: self.control_error,
                          self.gauss_kernel: gauss_kernel})
        self.writer.add_summary(summary_str, counter)


        counter += 1
        print("Epoch: [%2d] [%4d/%4d] time: %4.4f, d_loss: %.8f, g_loss: %.8f" \
          % (epoch, idx, batch_idxs,
            time.time() - start_time, errD_fake+errD_real, errG))

        if np.mod(counter, config.sample_every) == 1:
          if config.dataset == 'mnist':
            # for sampling generated images from z space using sampler
            samples, d_loss, g_loss, d_loss_real, self.actual_G_quality = self.sess.run( [self.sampler, self.d_loss, 
              self.g_loss, self.D_prob_fake_G_image,
              self.D_prob_fake_G_image_mean],
              feed_dict={
                  self.z: sample_z,
                  self.inputs: sample_inputs,
                  self.y:sample_labels,
                  self.gauss_kernel: gauss_kernel
              }
            )
            manifold_h = int(np.ceil(np.sqrt(samples.shape[0])))
            manifold_w = int(np.floor(np.sqrt(samples.shape[0])))
            if config.with_overlay:
              save_images_ex(samples, [d_loss_real], [manifold_h, manifold_w],
                  './{}/train_{:02d}_{:04d}.png'.format(self.sample_dir, epoch, idx))
            else:
              save_images(samples, [manifold_h, manifold_w],
                  './{}/train_{:02d}_{:04d}.png'.format(self.sample_dir, epoch, idx))

            print("[Sample] d_loss: %.8f, g_loss: %.8f" % (d_loss, g_loss)) 

          else:
            try:
              # for sampling generated images from z space using sampler
              samples, d_loss, g_loss, d_loss_real = self.sess.run(
              [self.sampler, self.d_loss, 
              self.g_loss, self.D_prob_fake_G_image],
                feed_dict={
                    self.z: sample_z,
                    self.inputs: sample_inputs,
                    self.gauss_kernel: gauss_kernel
                },
              )
              manifold_h = int(np.ceil(np.sqrt(samples.shape[0])))
              manifold_w = int(np.floor(np.sqrt(samples.shape[0])))
              if config.with_overlay:
                save_images_ex(samples, [d_loss_real], [manifold_h, manifold_w],
                  './{}/train_{:02d}_{:04d}.png'.format(self.sample_dir, epoch, idx))
              else:
                save_images(samples, [manifold_h, manifold_w],
                  './{}/train_{:02d}_{:04d}.png'.format(self.sample_dir, epoch, idx))

              print("[Sample] d_loss: %.8f, g_loss: %.8f" % (d_loss, g_loss)) 

            except:
              print("one pic error!...")

        if np.mod(counter, 500) == 2:
          self.save(self.checkpoint_dir, counter)

  def discriminator(self, image, y=None, reuse=False):
    with tf.variable_scope("discriminator") as scope:
      if reuse:
        scope.reuse_variables()

      if not self.y_dim:
        h0 = lrelu(conv2d(image, self.df_dim, name='d_h0_conv'))
        h1 = lrelu(self.d_bn1(conv2d(h0, self.df_dim*2, name='d_h1_conv')))
        h2 = lrelu(self.d_bn2(conv2d(h1, self.df_dim*4, name='d_h2_conv')))
        h3 = lrelu(self.d_bn3(conv2d(h2, self.df_dim*8, name='d_h3_conv')))
        h4 = linear(tf.reshape(h3, [self.batch_size, -1]), 1, 'd_h3_lin')

        return tf.nn.sigmoid(h4), h4
      else:
        yb = tf.reshape(y, [self.batch_size, 1, 1, self.y_dim])
        x = conv_cond_concat(image, yb)

        h0 = lrelu(conv2d(x, self.c_dim + self.y_dim, name='d_h0_conv'))
        h0 = conv_cond_concat(h0, yb)

        h1 = lrelu(self.d_bn1(conv2d(h0, self.df_dim + self.y_dim, name='d_h1_conv')))
        h1 = tf.reshape(h1, [self.batch_size, -1])      
        h1 = concat([h1, y], 1)
        
        h2 = lrelu(self.d_bn2(linear(h1, self.dfc_dim, 'd_h2_lin')))
        h2 = concat([h2, y], 1)

        h3 = linear(h2, 1, 'd_h3_lin')
        
        return tf.nn.sigmoid(h3), h3

  def discriminator_inference(self, image, y=None, reuse=True):
    with tf.variable_scope("discriminator") as scope:
      scope.reuse_variables()

      if not self.y_dim:
        h0 = lrelu(conv2d(image, self.df_dim, name='d_h0_conv'))
        h1 = lrelu(self.d_bn1(conv2d(h0, self.df_dim*2, name='d_h1_conv'), train=False))
        h2 = lrelu(self.d_bn2(conv2d(h1, self.df_dim*4, name='d_h2_conv'), train=False))
        h3 = lrelu(self.d_bn3(conv2d(h2, self.df_dim*8, name='d_h3_conv'), train=False))
        h4 = linear(tf.reshape(h3, [self.batch_size, -1]), 1, 'd_h3_lin')

        return tf.nn.sigmoid(h4), h4
      else:
        yb = tf.reshape(y, [self.batch_size, 1, 1, self.y_dim])
        x = conv_cond_concat(image, yb)

        h0 = lrelu(conv2d(x, self.c_dim + self.y_dim, name='d_h0_conv'))
        h0 = conv_cond_concat(h0, yb)

        h1 = lrelu(self.d_bn1(conv2d(h0, self.df_dim + self.y_dim, name='d_h1_conv'), train=False))
        h1 = tf.reshape(h1, [self.batch_size, -1])      
        h1 = concat([h1, y], 1)
        
        h2 = lrelu(self.d_bn2(linear(h1, self.dfc_dim, 'd_h2_lin'), train=False))
        h2 = concat([h2, y], 1)

        h3 = linear(h2, 1, 'd_h3_lin')
        
        return tf.nn.sigmoid(h3), h3

  def generator(self, z, y=None):
    with tf.variable_scope("generator") as scope:
      if not self.y_dim:
        s_h, s_w = self.output_height, self.output_width
        s_h2, s_w2 = conv_out_size_same(s_h, 2), conv_out_size_same(s_w, 2)
        s_h4, s_w4 = conv_out_size_same(s_h2, 2), conv_out_size_same(s_w2, 2)
        s_h8, s_w8 = conv_out_size_same(s_h4, 2), conv_out_size_same(s_w4, 2)
        s_h16, s_w16 = conv_out_size_same(s_h8, 2), conv_out_size_same(s_w8, 2)

        # project `z` and reshape
        self.z_, self.h0_w, self.h0_b = linear(
            z, self.gf_dim*8*s_h16*s_w16, 'g_h0_lin', with_w=True)

        self.h0 = tf.reshape(
            self.z_, [-1, s_h16, s_w16, self.gf_dim * 8])
        h0 = tf.nn.relu(self.g_bn0(self.h0))

        self.h1, self.h1_w, self.h1_b = deconv2d(
            h0, [self.batch_size, s_h8, s_w8, self.gf_dim*4], name='g_h1', with_w=True)
        h1 = tf.nn.relu(self.g_bn1(self.h1))

        h2, self.h2_w, self.h2_b = deconv2d(
            h1, [self.batch_size, s_h4, s_w4, self.gf_dim*2], name='g_h2', with_w=True)
        h2 = tf.nn.relu(self.g_bn2(h2))

        h3, self.h3_w, self.h3_b = deconv2d(
            h2, [self.batch_size, s_h2, s_w2, self.gf_dim*1], name='g_h3', with_w=True)
        h3 = tf.nn.relu(self.g_bn3(h3))

        h4, self.h4_w, self.h4_b = deconv2d(
            h3, [self.batch_size, s_h, s_w, self.c_dim], name='g_h4', with_w=True)

        return tf.nn.tanh(h4)
      else:
        s_h, s_w = self.output_height, self.output_width
        s_h2, s_h4 = int(s_h/2), int(s_h/4)
        s_w2, s_w4 = int(s_w/2), int(s_w/4)

        # yb = tf.expand_dims(tf.expand_dims(y, 1),2)
        yb = tf.reshape(y, [self.batch_size, 1, 1, self.y_dim])
        z = concat([z, y], 1)

        h0 = tf.nn.relu(
            self.g_bn0(linear(z, self.gfc_dim, 'g_h0_lin')))
        h0 = concat([h0, y], 1)

        h1 = tf.nn.relu(self.g_bn1(
            linear(h0, self.gf_dim*2*s_h4*s_w4, 'g_h1_lin')))
        h1 = tf.reshape(h1, [self.batch_size, s_h4, s_w4, self.gf_dim * 2])

        h1 = conv_cond_concat(h1, yb)

        h2 = tf.nn.relu(self.g_bn2(deconv2d(h1,
            [self.batch_size, s_h2, s_w2, self.gf_dim * 2], name='g_h2')))
        h2 = conv_cond_concat(h2, yb)

        return tf.nn.sigmoid(
            deconv2d(h2, [self.batch_size, s_h, s_w, self.c_dim], name='g_h3'))

  def sampler(self, z, y=None):
    with tf.variable_scope("generator") as scope:
      scope.reuse_variables()

      if not self.y_dim:
        s_h, s_w = self.output_height, self.output_width
        s_h2, s_w2 = conv_out_size_same(s_h, 2), conv_out_size_same(s_w, 2)
        s_h4, s_w4 = conv_out_size_same(s_h2, 2), conv_out_size_same(s_w2, 2)
        s_h8, s_w8 = conv_out_size_same(s_h4, 2), conv_out_size_same(s_w4, 2)
        s_h16, s_w16 = conv_out_size_same(s_h8, 2), conv_out_size_same(s_w8, 2)

        # project `z` and reshape
        h0 = tf.reshape(
            linear(z, self.gf_dim*8*s_h16*s_w16, 'g_h0_lin'),
            [-1, s_h16, s_w16, self.gf_dim * 8])
        h0 = tf.nn.relu(self.g_bn0(h0, train=False))

        h1 = deconv2d(h0, [self.batch_size, s_h8, s_w8, self.gf_dim*4], name='g_h1')
        h1 = tf.nn.relu(self.g_bn1(h1, train=False))

        h2 = deconv2d(h1, [self.batch_size, s_h4, s_w4, self.gf_dim*2], name='g_h2')
        h2 = tf.nn.relu(self.g_bn2(h2, train=False))

        h3 = deconv2d(h2, [self.batch_size, s_h2, s_w2, self.gf_dim*1], name='g_h3')
        h3 = tf.nn.relu(self.g_bn3(h3, train=False))

        h4 = deconv2d(h3, [self.batch_size, s_h, s_w, self.c_dim], name='g_h4')

        return tf.nn.tanh(h4)
      else:
        s_h, s_w = self.output_height, self.output_width
        s_h2, s_h4 = int(s_h/2), int(s_h/4)
        s_w2, s_w4 = int(s_w/2), int(s_w/4)

        # yb = tf.reshape(y, [-1, 1, 1, self.y_dim])
        yb = tf.reshape(y, [self.batch_size, 1, 1, self.y_dim])
        z = concat([z, y], 1)

        h0 = tf.nn.relu(self.g_bn0(linear(z, self.gfc_dim, 'g_h0_lin'), train=False))
        h0 = concat([h0, y], 1)

        h1 = tf.nn.relu(self.g_bn1(
            linear(h0, self.gf_dim*2*s_h4*s_w4, 'g_h1_lin'), train=False))
        h1 = tf.reshape(h1, [self.batch_size, s_h4, s_w4, self.gf_dim * 2])
        h1 = conv_cond_concat(h1, yb)

        h2 = tf.nn.relu(self.g_bn2(
            deconv2d(h1, [self.batch_size, s_h2, s_w2, self.gf_dim * 2], name='g_h2'), train=False))
        h2 = conv_cond_concat(h2, yb)

        return tf.nn.sigmoid(deconv2d(h2, [self.batch_size, s_h, s_w, self.c_dim], name='g_h3'))

  def load_mnist(self):
    data_dir = os.path.join("./data", self.dataset_name)
    
    fd = open(os.path.join(data_dir,'train-images-idx3-ubyte'))
    loaded = np.fromfile(file=fd,dtype=np.uint8)
    trX = loaded[16:].reshape((60000,28,28,1)).astype(np.float)

    fd = open(os.path.join(data_dir,'train-labels-idx1-ubyte'))
    loaded = np.fromfile(file=fd,dtype=np.uint8)
    trY = loaded[8:].reshape((60000)).astype(np.float)

    fd = open(os.path.join(data_dir,'t10k-images-idx3-ubyte'))
    loaded = np.fromfile(file=fd,dtype=np.uint8)
    teX = loaded[16:].reshape((10000,28,28,1)).astype(np.float)

    fd = open(os.path.join(data_dir,'t10k-labels-idx1-ubyte'))
    loaded = np.fromfile(file=fd,dtype=np.uint8)
    teY = loaded[8:].reshape((10000)).astype(np.float)

    trY = np.asarray(trY)
    teY = np.asarray(teY)
    
    X = np.concatenate((trX, teX), axis=0)
    y = np.concatenate((trY, teY), axis=0).astype(np.int)
    
    seed = 547
    np.random.seed(seed)
    np.random.shuffle(X)
    np.random.seed(seed)
    np.random.shuffle(y)
    
    y_vec = np.zeros((len(y), self.y_dim), dtype=np.float)
    for i, label in enumerate(y):
      y_vec[i,y[i]] = 1.0
    
    return X/255.,y_vec

  @property
  def model_dir(self):
    return "{}_{}_{}_{}".format(
        self.dataset_name, self.batch_size,
        self.output_height, self.output_width)
      
  def save(self, checkpoint_dir, step):
    model_name = "DCGAN.model"
    checkpoint_dir = os.path.join(checkpoint_dir, self.model_dir)

    if not os.path.exists(checkpoint_dir):
      os.makedirs(checkpoint_dir)

    self.saver.save(self.sess,
            os.path.join(checkpoint_dir, model_name),
            global_step=step)

  def load(self, checkpoint_dir):
    import re
    print(" [*] Reading checkpoints...")
    checkpoint_dir = os.path.join(checkpoint_dir, self.model_dir)

    ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
    if ckpt and ckpt.model_checkpoint_path:
      ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
      self.saver.restore(self.sess, os.path.join(checkpoint_dir, ckpt_name))
      counter = int(next(re.finditer("(\d+)(?!.*\d)",ckpt_name)).group(0))
      print(" [*] Success to read {}".format(ckpt_name))
      return True, counter
    else:
      print(" [*] Failed to find a checkpoint")
      return False, 0