DCGAN的代碼實作(基于tansorflow的手寫數字集合)

DCGAN 是在生成模型和判别模型中使用卷積和反卷積操作, 提高了模型的效果.

DCGAN相比于GAN的改進之處:

使用了LeakRelu 激活函數, 經過大牛的實驗證明效果好于Relu
使用batchnormalization, 有效減少了随機初始化帶來的誤差
判别網絡中使用了strides convolutions 代替了池化操作, 生成器中使用fractional strided convolutions (反卷積).
others(具體的可以Google)

DCGAN的代碼實作(基于tansorflow的手寫數字集合)
下面是代碼實作,基本的GAN代碼實作可以參照快速上手生成對抗生成網絡生成手寫數字集（直接上代碼以及詳細注釋，親測可用）

#代碼附詳細注釋
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

# 定義幾個超參數, batch-size的大小
batch_size = 64
# 噪聲的長度
noise_size = 100
# 疊代的輪數
epochs = 5
# 學習率
learning_rate = 0.001
# 抽取樣本檢查生成器的性能
n_smples = 20

# 讀取mnist資料集
mnist = input_data.read_data_sets("MNIST_DATA")

# 擷取生成網絡和判别網絡的輸入
def get_input(noise_dim, image_height, image_width, image_depth):
    """
    :param noise_dim: 噪聲的長度
    :param image_height: 圖檔的高度
    :param image_width: 圖檔的寬度
    :param image_depth: 圖檔的深度
    :return: 以placeholder形式傳回兩個網絡的輸入
    """
    D_input = tf.placeholder(tf.float32, [None, image_height, image_width, image_depth], name="input_real")
    G_input = tf.placeholder(tf.float32, [None, noise_dim], name="input_noise")

    return D_input, G_input

# 定義生成器
def get_generator(G_input, output_dim, is_train=True, alpha=0.01):
    """
    :param G_input: 生成器的輸入,應該是(batch_size, 100)
    :param output_dim: 生成器的輸出, (batch_size, 28, 28, 1)
    :param is_train: 是否訓練
    :param alpha: LeakyRelu的參數
    :return: 傳回生成的圖檔
    """
    # 定義一個命名空間generator
    with tf.variable_scope("generator", reuse=(not is_train)):
        # batch x 100 x 1 ---> batch x 4 x 4 x 512
        layer1 = tf.layers.dense(G_input, 4*4*512)
        layer1 = tf.reshape(layer1, [-1, 4, 4, 512])
        layer1 = tf.layers.batch_normalization(layer1, training=is_train)
        layer1 = tf.maximum(alpha * layer1, layer1)
        layer1 = tf.nn.dropout(layer1, keep_prob=0.6)

        # batch x 4 x 4 x 512 ---> batch x 7 x 7 x 256
        layer2 = tf.layers.conv2d_transpose(layer1, 256, 4, strides=1, padding="valid")
        layer2 = tf.layers.batch_normalization(layer2, training=is_train)
        layer2 = tf.maximum(alpha * layer2, layer2)
        layer2 = tf.nn.dropout(layer2, keep_prob=0.6)

        # batch x 7 x 7 x 256 ---> batch x 14 x 14 x 128
        layer3 = tf.layers.conv2d_transpose(layer2, 128, 3, strides=2, padding="same")
        layer3 = tf.layers.batch_normalization(layer3, training=is_train)
        layer3 = tf.maximum(alpha * layer3, layer3)
        layer3 = tf.nn.dropout(layer3, keep_prob=0.6)

        # batch x 14 x 14 x 128 ---> batch x 28 x 28 x 1
        logits = tf.layers.conv2d_transpose(layer3, output_dim, 3, strides=2, padding="same")
        outputs = tf.tanh(logits)
        return outputs
# 定義一個判别器
def get_discriminator(D_input, reuse=False, alpha=0.01):
    """
    :param D_input: 輸入圖檔
    :param reuse: 是否重用參數
    :param alpha: LeakyRelu的參數
    :return: 傳回對圖檔的判别結果,是一個機率值
    """
    # 定義一個命名空間discriminator
    with tf.variable_scope("discriminator", reuse=reuse):
        # batch x 28 x 28 x 1 ---> batch x 14 x 14 x 128
        layer1 = tf.layers.conv2d(D_input, 128, 3, strides=2, padding="same")
        layer1 = tf.maximum(alpha * layer1, layer1)
        layer1 = tf.nn.dropout(layer1, keep_prob=0.6)

        # batch x 14 x 14 x 28 ---> batch x 7 x 7 x 256
        layer2 = tf.layers.conv2d(layer1, 256, 3, strides=2, padding="same")
        layer2 = tf.layers.batch_normalization(layer2, training=True)
        layer2 = tf.maximum(alpha * layer2, layer2)
        layer2 = tf.nn.dropout(layer2, keep_prob=0.6)

        # batch x 7 x 7 x 256 ---> batch x 4 x 4 x 512
        layer3 = tf.layers.conv2d(layer2, 512, 3, strides=2, padding='same')
        layer3 = tf.layers.batch_normalization(layer3, training=True)
        layer3 = tf.maximum(alpha * layer3, layer3)
        layer3 = tf.nn.dropout(layer3, keep_prob=0.6)

        # batch x 4 x 4 x 512 ---> batch x (16*512)
        flatten = tf.reshape(layer3, (-1, 16*512))
        logits = tf.layers.dense(flatten, 1)
        outputs = tf.sigmoid(logits)

        return logits, outputs

# 擷取模型的損失值
def get_loss(D_input_real, G_input, image_depth, smooth=0.1):
    """
    :param D_input: 判别模型的輸入
    :param G_input: 生成模型的輸入
    :param image_depth: 圖檔的通道數,彩色為3,灰階為1
    :param smooth: 平滑值
    :return: 傳回兩個網絡的損失
    """
    g_outputs = get_generator(G_input, image_depth, is_train=True)

    # 将真實的圖檔放入模型中判别
    d_logits_real, d_output_real = get_discriminator(D_input_real)

    # 将生成器生成的圖檔放入判别模型中判讀
    d_logits_fake, d_output_fake = get_discriminator(g_outputs, reuse=True)

    #計算損失, 生成器努力讓圖檔更加逼真
    g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_fake,
                                                                    labels=tf.ones_like(d_logits_fake)*(1-smooth)))

    # 判别器努力分别出真實圖檔和生成圖檔,是以判别器的損失函數是兩部分
    d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_real,
                                                                         labels=tf.ones_like(d_logits_real)*(1-smooth)))

    d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_fake,
                                                                         labels=tf.zeros_like(d_logits_fake)))
    # 判别器的損失
    d_loss = tf.add(d_loss_fake, d_loss_real)
    # 傳回損失
    return g_loss, d_loss

# 優化操作
def get_optimizer(g_loss, d_loss, learning_rate=0.001):
    """
    :param g_loss: 生成器的損失
    :param d_loss: 判别器的損失
    :param learning_rate: 學習率
    :return: 優化操作
    """
    # 分别通過tftrainable-variables()獲得兩個網絡中的參數
    train_vars = tf.trainable_variables()
    g_vars = [var for var in train_vars if var.name.startswith("generator")]
    d_vars = [var for var in train_vars if var.name.startswith("discriminator")]

    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        g_opt = tf.train.AdamOptimizer(learning_rate).minimize(g_loss, var_list=g_vars)
        d_opt = tf.train.AdamOptimizer(learning_rate).minimize(d_loss, var_list=d_vars)
    # 傳回優化操作
    return g_opt, d_opt

# 訓練模型
def train(noise_size, data_shape, batch_size, n_samples):
    """
    :param noise_size: 噪聲的次元
    :param data_shape: 圖檔的形狀
    :param batch_size: 每個batch的大小
    :param n_samples: 抽樣數目
    """
    # 計步器
    steps = 0
    # 調用get_input()函數,進而獲得兩個網絡的輸入(placeholder形式)
    D_input, G_input = get_input(noise_size, data_shape[1], data_shape[2], data_shape[3])

    # 擷取損失值
    g_loss, d_loss = get_loss(D_input, G_input, data_shape[-1])
    # 擷取優化操作
    g_train_opt, d_train_opt = get_optimizer(g_loss, d_loss, learning_rate)

    # 打開一個會話
    with tf.Session() as sess:
        # 初始化所有的變量
        sess.run(tf.global_variables_initializer())

        for epoch in range(epochs):
            for i in range(mnist.train.num_examples // batch_size):
                steps += 1
                # 擷取真實圖檔
                batch = mnist.train.next_batch(batch_size)
                batch_images = batch[0].reshape((batch_size, data_shape[1], data_shape[2], data_shape[3]))


                # 生成噪音
                batch_noise = np.random.uniform(-1, 1, size=(batch_size, noise_size))
                # 開始優化
                sess.run(g_train_opt, feed_dict={D_input: batch_images,
                                                 G_input: batch_noise})
                sess.run(d_train_opt, feed_dict={D_input: batch_images,

                                                 G_input: batch_noise})
                # 每間隔5步列印出結果, 并且儲存生成模型生成的圖檔
                if steps % 5 == 0:
                    train_loss_d = d_loss.eval({D_input: batch_images,
                                                G_input: batch_noise})

                    train_loss_g = g_loss.eval({D_input: batch_images,
                                                G_input: batch_noise})

                    # 儲存生成的圖檔
                    temp = tf.placeholder(tf.float32, [None, 100])
                    sample_input_noise= np.random.uniform(-1, 1, size=(n_smples, noise_size))
                    generator_pictures = sess.run(get_generator(temp, 1, is_train=False), feed_dict={
                                                  temp:sample_input_noise})


                    # 從生成的圖檔中随機的選取一張儲存下來
                    single_picture = generator_pictures[np.random.randint(0, n_samples)]
                    single_picture = (np.reshape(single_picture, (28, 28)) + 1) * 127.5
                    # 儲存圖檔

                    if not os.path.exists('DC_pictures/'):
                        os.makedirs('DC_pictures/')
                    cv2.imwrite("DC_pictures/A{}.jpg".format(str(steps)), single_picture)
                    print(
                        "Epoch {}/{}... stpes:{} ".format(epoch + 1, epochs, steps),
                        "Discriminator loss : {:.4f}...".format(train_loss_d),
                        "Generator loss: {:.4f}".format(train_loss_g)
                    )
if __name__ == '__main__':
    with tf.Graph().as_default():
        train(noise_size, [-1, 28, 28, 1], batch_size, n_samples=n_smples)

DCGAN的代碼實作(基于tansorflow的手寫數字集合)

繼續閱讀

簡單文檔分類——樸素貝葉斯算法樸素貝葉斯算法簡單文檔分類執行個體步驟總結樸素貝葉斯分類調用(sklearn)

考證大全 | 證券從業資格考試

敲黑闆！2021年證券從業考試考點預測

2021年銀行從業考試考情介紹,果斷收藏!

證券從業合格證書什麼時候列印？有哪些注意事項？

【幹貨滿滿】初級銀行從業考試《個人理财》重點梳理

2020年經濟師考試，難嗎？

初級銀行從業資格證有什麼用？

MBA提前面試純幹貨分享

MBA值得學麼

吳恩達logistic回歸實作

【人工智能行業大師訪談1】吳恩達采訪 Geoffery Hinton

深度學習模型分析人類複雜疾病的準确性

【趨高機器視覺】機器視覺技術原了解析及解決方案

解碼器用于語義分割：資料依賴的解碼可以實作靈活的特征聚合

cs231n斯坦福基于卷積神經網絡的CV學習筆記（一）KNN和線性分類器/分類器損失/反向傳播一，KNN圖像分類算法二，線性分類器三，線性分類器損失四，反向傳播五，神經網絡