天天看點

DCGAN的代碼實作(基于tansorflow的手寫數字集合)

DCGAN 是在生成模型和判别模型中使用卷積和反卷積操作, 提高了模型的效果.

DCGAN相比于GAN的改進之處:

  1. 使用了LeakRelu 激活函數, 經過大牛的實驗證明效果好于Relu
  2. 使用batchnormalization, 有效減少了随機初始化帶來的誤差
  3. 判别網絡中使用了strides convolutions 代替了池化操作, 生成器中使用fractional strided convolutions (反卷積).
  4. others(具體的可以Google)
    DCGAN的代碼實作(基于tansorflow的手寫數字集合)
    下面是代碼實作,基本的GAN代碼實作可以參照快速上手生成對抗生成網絡生成手寫數字集(直接上代碼以及詳細注釋,親測可用)
#代碼附詳細注釋
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

# 定義幾個超參數, batch-size的大小
batch_size = 64
# 噪聲的長度
noise_size = 100
# 疊代的輪數
epochs = 5
# 學習率
learning_rate = 0.001
# 抽取樣本檢查生成器的性能
n_smples = 20

# 讀取mnist資料集
mnist = input_data.read_data_sets("MNIST_DATA")

# 擷取生成網絡和判别網絡的輸入
def get_input(noise_dim, image_height, image_width, image_depth):
    """
    :param noise_dim: 噪聲的長度
    :param image_height: 圖檔的高度
    :param image_width: 圖檔的寬度
    :param image_depth: 圖檔的深度
    :return: 以placeholder形式傳回兩個網絡的輸入
    """
    D_input = tf.placeholder(tf.float32, [None, image_height, image_width, image_depth], name="input_real")
    G_input = tf.placeholder(tf.float32, [None, noise_dim], name="input_noise")

    return D_input, G_input

# 定義生成器
def get_generator(G_input, output_dim, is_train=True, alpha=0.01):
    """
    :param G_input: 生成器的輸入,應該是(batch_size, 100)
    :param output_dim: 生成器的輸出, (batch_size, 28, 28, 1)
    :param is_train: 是否訓練
    :param alpha: LeakyRelu的參數
    :return: 傳回生成的圖檔
    """
    # 定義一個命名空間generator
    with tf.variable_scope("generator", reuse=(not is_train)):
        # batch x 100 x 1 ---> batch x 4 x 4 x 512
        layer1 = tf.layers.dense(G_input, 4*4*512)
        layer1 = tf.reshape(layer1, [-1, 4, 4, 512])
        layer1 = tf.layers.batch_normalization(layer1, training=is_train)
        layer1 = tf.maximum(alpha * layer1, layer1)
        layer1 = tf.nn.dropout(layer1, keep_prob=0.6)

        # batch x 4 x 4 x 512 ---> batch x 7 x 7 x 256
        layer2 = tf.layers.conv2d_transpose(layer1, 256, 4, strides=1, padding="valid")
        layer2 = tf.layers.batch_normalization(layer2, training=is_train)
        layer2 = tf.maximum(alpha * layer2, layer2)
        layer2 = tf.nn.dropout(layer2, keep_prob=0.6)

        # batch x 7 x 7 x 256 ---> batch x 14 x 14 x 128
        layer3 = tf.layers.conv2d_transpose(layer2, 128, 3, strides=2, padding="same")
        layer3 = tf.layers.batch_normalization(layer3, training=is_train)
        layer3 = tf.maximum(alpha * layer3, layer3)
        layer3 = tf.nn.dropout(layer3, keep_prob=0.6)

        # batch x 14 x 14 x 128 ---> batch x 28 x 28 x 1
        logits = tf.layers.conv2d_transpose(layer3, output_dim, 3, strides=2, padding="same")
        outputs = tf.tanh(logits)
        return outputs
# 定義一個判别器
def get_discriminator(D_input, reuse=False, alpha=0.01):
    """
    :param D_input: 輸入圖檔
    :param reuse: 是否重用參數
    :param alpha: LeakyRelu的參數
    :return: 傳回對圖檔的判别結果,是一個機率值
    """
    # 定義一個命名空間discriminator
    with tf.variable_scope("discriminator", reuse=reuse):
        # batch x 28 x 28 x 1 ---> batch x 14 x 14 x 128
        layer1 = tf.layers.conv2d(D_input, 128, 3, strides=2, padding="same")
        layer1 = tf.maximum(alpha * layer1, layer1)
        layer1 = tf.nn.dropout(layer1, keep_prob=0.6)

        # batch x 14 x 14 x 28 ---> batch x 7 x 7 x 256
        layer2 = tf.layers.conv2d(layer1, 256, 3, strides=2, padding="same")
        layer2 = tf.layers.batch_normalization(layer2, training=True)
        layer2 = tf.maximum(alpha * layer2, layer2)
        layer2 = tf.nn.dropout(layer2, keep_prob=0.6)

        # batch x 7 x 7 x 256 ---> batch x 4 x 4 x 512
        layer3 = tf.layers.conv2d(layer2, 512, 3, strides=2, padding='same')
        layer3 = tf.layers.batch_normalization(layer3, training=True)
        layer3 = tf.maximum(alpha * layer3, layer3)
        layer3 = tf.nn.dropout(layer3, keep_prob=0.6)

        # batch x 4 x 4 x 512 ---> batch x (16*512)
        flatten = tf.reshape(layer3, (-1, 16*512))
        logits = tf.layers.dense(flatten, 1)
        outputs = tf.sigmoid(logits)

        return logits, outputs

# 擷取模型的損失值
def get_loss(D_input_real, G_input, image_depth, smooth=0.1):
    """
    :param D_input: 判别模型的輸入
    :param G_input: 生成模型的輸入
    :param image_depth: 圖檔的通道數,彩色為3,灰階為1
    :param smooth: 平滑值
    :return: 傳回兩個網絡的損失
    """
    g_outputs = get_generator(G_input, image_depth, is_train=True)

    # 将真實的圖檔放入模型中判别
    d_logits_real, d_output_real = get_discriminator(D_input_real)

    # 将生成器生成的圖檔放入判别模型中判讀
    d_logits_fake, d_output_fake = get_discriminator(g_outputs, reuse=True)

    #計算損失, 生成器努力讓圖檔更加逼真
    g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_fake,
                                                                    labels=tf.ones_like(d_logits_fake)*(1-smooth)))

    # 判别器努力分别出真實圖檔和生成圖檔,是以判别器的損失函數是兩部分
    d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_real,
                                                                         labels=tf.ones_like(d_logits_real)*(1-smooth)))

    d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_fake,
                                                                         labels=tf.zeros_like(d_logits_fake)))
    # 判别器的損失
    d_loss = tf.add(d_loss_fake, d_loss_real)
    # 傳回損失
    return g_loss, d_loss

# 優化操作
def get_optimizer(g_loss, d_loss, learning_rate=0.001):
    """
    :param g_loss: 生成器的損失
    :param d_loss: 判别器的損失
    :param learning_rate: 學習率
    :return: 優化操作
    """
    # 分别通過tftrainable-variables()獲得兩個網絡中的參數
    train_vars = tf.trainable_variables()
    g_vars = [var for var in train_vars if var.name.startswith("generator")]
    d_vars = [var for var in train_vars if var.name.startswith("discriminator")]

    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        g_opt = tf.train.AdamOptimizer(learning_rate).minimize(g_loss, var_list=g_vars)
        d_opt = tf.train.AdamOptimizer(learning_rate).minimize(d_loss, var_list=d_vars)
    # 傳回優化操作
    return g_opt, d_opt

# 訓練模型
def train(noise_size, data_shape, batch_size, n_samples):
    """
    :param noise_size: 噪聲的次元
    :param data_shape: 圖檔的形狀
    :param batch_size: 每個batch的大小
    :param n_samples: 抽樣數目
    """
    # 計步器
    steps = 0
    # 調用get_input()函數,進而獲得兩個網絡的輸入(placeholder形式)
    D_input, G_input = get_input(noise_size, data_shape[1], data_shape[2], data_shape[3])

    # 擷取損失值
    g_loss, d_loss = get_loss(D_input, G_input, data_shape[-1])
    # 擷取優化操作
    g_train_opt, d_train_opt = get_optimizer(g_loss, d_loss, learning_rate)

    # 打開一個會話
    with tf.Session() as sess:
        # 初始化所有的變量
        sess.run(tf.global_variables_initializer())

        for epoch in range(epochs):
            for i in range(mnist.train.num_examples // batch_size):
                steps += 1
                # 擷取真實圖檔
                batch = mnist.train.next_batch(batch_size)
                batch_images = batch[0].reshape((batch_size, data_shape[1], data_shape[2], data_shape[3]))


                # 生成噪音
                batch_noise = np.random.uniform(-1, 1, size=(batch_size, noise_size))
                # 開始優化
                sess.run(g_train_opt, feed_dict={D_input: batch_images,
                                                 G_input: batch_noise})
                sess.run(d_train_opt, feed_dict={D_input: batch_images,

                                                 G_input: batch_noise})
                # 每間隔5步列印出結果, 并且儲存生成模型生成的圖檔
                if steps % 5 == 0:
                    train_loss_d = d_loss.eval({D_input: batch_images,
                                                G_input: batch_noise})

                    train_loss_g = g_loss.eval({D_input: batch_images,
                                                G_input: batch_noise})

                    # 儲存生成的圖檔
                    temp = tf.placeholder(tf.float32, [None, 100])
                    sample_input_noise= np.random.uniform(-1, 1, size=(n_smples, noise_size))
                    generator_pictures = sess.run(get_generator(temp, 1, is_train=False), feed_dict={
                                                  temp:sample_input_noise})


                    # 從生成的圖檔中随機的選取一張儲存下來
                    single_picture = generator_pictures[np.random.randint(0, n_samples)]
                    single_picture = (np.reshape(single_picture, (28, 28)) + 1) * 127.5
                    # 儲存圖檔

                    if not os.path.exists('DC_pictures/'):
                        os.makedirs('DC_pictures/')
                    cv2.imwrite("DC_pictures/A{}.jpg".format(str(steps)), single_picture)
                    print(
                        "Epoch {}/{}... stpes:{} ".format(epoch + 1, epochs, steps),
                        "Discriminator loss : {:.4f}...".format(train_loss_d),
                        "Generator loss: {:.4f}".format(train_loss_g)
                    )
if __name__ == '__main__':
    with tf.Graph().as_default():
        train(noise_size, [-1, 28, 28, 1], batch_size, n_samples=n_smples)



           

繼續閱讀