DCGAN 是在生成模型和判别模型中使用卷積和反卷積操作, 提高了模型的效果.
DCGAN相比于GAN的改進之處:
- 使用了LeakRelu 激活函數, 經過大牛的實驗證明效果好于Relu
- 使用batchnormalization, 有效減少了随機初始化帶來的誤差
- 判别網絡中使用了strides convolutions 代替了池化操作, 生成器中使用fractional strided convolutions (反卷積).
- others(具體的可以Google) 下面是代碼實作,基本的GAN代碼實作可以參照快速上手生成對抗生成網絡生成手寫數字集(直接上代碼以及詳細注釋,親測可用)
#代碼附詳細注釋
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
# 定義幾個超參數, batch-size的大小
batch_size = 64
# 噪聲的長度
noise_size = 100
# 疊代的輪數
epochs = 5
# 學習率
learning_rate = 0.001
# 抽取樣本檢查生成器的性能
n_smples = 20
# 讀取mnist資料集
mnist = input_data.read_data_sets("MNIST_DATA")
# 擷取生成網絡和判别網絡的輸入
def get_input(noise_dim, image_height, image_width, image_depth):
"""
:param noise_dim: 噪聲的長度
:param image_height: 圖檔的高度
:param image_width: 圖檔的寬度
:param image_depth: 圖檔的深度
:return: 以placeholder形式傳回兩個網絡的輸入
"""
D_input = tf.placeholder(tf.float32, [None, image_height, image_width, image_depth], name="input_real")
G_input = tf.placeholder(tf.float32, [None, noise_dim], name="input_noise")
return D_input, G_input
# 定義生成器
def get_generator(G_input, output_dim, is_train=True, alpha=0.01):
"""
:param G_input: 生成器的輸入,應該是(batch_size, 100)
:param output_dim: 生成器的輸出, (batch_size, 28, 28, 1)
:param is_train: 是否訓練
:param alpha: LeakyRelu的參數
:return: 傳回生成的圖檔
"""
# 定義一個命名空間generator
with tf.variable_scope("generator", reuse=(not is_train)):
# batch x 100 x 1 ---> batch x 4 x 4 x 512
layer1 = tf.layers.dense(G_input, 4*4*512)
layer1 = tf.reshape(layer1, [-1, 4, 4, 512])
layer1 = tf.layers.batch_normalization(layer1, training=is_train)
layer1 = tf.maximum(alpha * layer1, layer1)
layer1 = tf.nn.dropout(layer1, keep_prob=0.6)
# batch x 4 x 4 x 512 ---> batch x 7 x 7 x 256
layer2 = tf.layers.conv2d_transpose(layer1, 256, 4, strides=1, padding="valid")
layer2 = tf.layers.batch_normalization(layer2, training=is_train)
layer2 = tf.maximum(alpha * layer2, layer2)
layer2 = tf.nn.dropout(layer2, keep_prob=0.6)
# batch x 7 x 7 x 256 ---> batch x 14 x 14 x 128
layer3 = tf.layers.conv2d_transpose(layer2, 128, 3, strides=2, padding="same")
layer3 = tf.layers.batch_normalization(layer3, training=is_train)
layer3 = tf.maximum(alpha * layer3, layer3)
layer3 = tf.nn.dropout(layer3, keep_prob=0.6)
# batch x 14 x 14 x 128 ---> batch x 28 x 28 x 1
logits = tf.layers.conv2d_transpose(layer3, output_dim, 3, strides=2, padding="same")
outputs = tf.tanh(logits)
return outputs
# 定義一個判别器
def get_discriminator(D_input, reuse=False, alpha=0.01):
"""
:param D_input: 輸入圖檔
:param reuse: 是否重用參數
:param alpha: LeakyRelu的參數
:return: 傳回對圖檔的判别結果,是一個機率值
"""
# 定義一個命名空間discriminator
with tf.variable_scope("discriminator", reuse=reuse):
# batch x 28 x 28 x 1 ---> batch x 14 x 14 x 128
layer1 = tf.layers.conv2d(D_input, 128, 3, strides=2, padding="same")
layer1 = tf.maximum(alpha * layer1, layer1)
layer1 = tf.nn.dropout(layer1, keep_prob=0.6)
# batch x 14 x 14 x 28 ---> batch x 7 x 7 x 256
layer2 = tf.layers.conv2d(layer1, 256, 3, strides=2, padding="same")
layer2 = tf.layers.batch_normalization(layer2, training=True)
layer2 = tf.maximum(alpha * layer2, layer2)
layer2 = tf.nn.dropout(layer2, keep_prob=0.6)
# batch x 7 x 7 x 256 ---> batch x 4 x 4 x 512
layer3 = tf.layers.conv2d(layer2, 512, 3, strides=2, padding='same')
layer3 = tf.layers.batch_normalization(layer3, training=True)
layer3 = tf.maximum(alpha * layer3, layer3)
layer3 = tf.nn.dropout(layer3, keep_prob=0.6)
# batch x 4 x 4 x 512 ---> batch x (16*512)
flatten = tf.reshape(layer3, (-1, 16*512))
logits = tf.layers.dense(flatten, 1)
outputs = tf.sigmoid(logits)
return logits, outputs
# 擷取模型的損失值
def get_loss(D_input_real, G_input, image_depth, smooth=0.1):
"""
:param D_input: 判别模型的輸入
:param G_input: 生成模型的輸入
:param image_depth: 圖檔的通道數,彩色為3,灰階為1
:param smooth: 平滑值
:return: 傳回兩個網絡的損失
"""
g_outputs = get_generator(G_input, image_depth, is_train=True)
# 将真實的圖檔放入模型中判别
d_logits_real, d_output_real = get_discriminator(D_input_real)
# 将生成器生成的圖檔放入判别模型中判讀
d_logits_fake, d_output_fake = get_discriminator(g_outputs, reuse=True)
#計算損失, 生成器努力讓圖檔更加逼真
g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_fake,
labels=tf.ones_like(d_logits_fake)*(1-smooth)))
# 判别器努力分别出真實圖檔和生成圖檔,是以判别器的損失函數是兩部分
d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_real,
labels=tf.ones_like(d_logits_real)*(1-smooth)))
d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_fake,
labels=tf.zeros_like(d_logits_fake)))
# 判别器的損失
d_loss = tf.add(d_loss_fake, d_loss_real)
# 傳回損失
return g_loss, d_loss
# 優化操作
def get_optimizer(g_loss, d_loss, learning_rate=0.001):
"""
:param g_loss: 生成器的損失
:param d_loss: 判别器的損失
:param learning_rate: 學習率
:return: 優化操作
"""
# 分别通過tftrainable-variables()獲得兩個網絡中的參數
train_vars = tf.trainable_variables()
g_vars = [var for var in train_vars if var.name.startswith("generator")]
d_vars = [var for var in train_vars if var.name.startswith("discriminator")]
with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
g_opt = tf.train.AdamOptimizer(learning_rate).minimize(g_loss, var_list=g_vars)
d_opt = tf.train.AdamOptimizer(learning_rate).minimize(d_loss, var_list=d_vars)
# 傳回優化操作
return g_opt, d_opt
# 訓練模型
def train(noise_size, data_shape, batch_size, n_samples):
"""
:param noise_size: 噪聲的次元
:param data_shape: 圖檔的形狀
:param batch_size: 每個batch的大小
:param n_samples: 抽樣數目
"""
# 計步器
steps = 0
# 調用get_input()函數,進而獲得兩個網絡的輸入(placeholder形式)
D_input, G_input = get_input(noise_size, data_shape[1], data_shape[2], data_shape[3])
# 擷取損失值
g_loss, d_loss = get_loss(D_input, G_input, data_shape[-1])
# 擷取優化操作
g_train_opt, d_train_opt = get_optimizer(g_loss, d_loss, learning_rate)
# 打開一個會話
with tf.Session() as sess:
# 初始化所有的變量
sess.run(tf.global_variables_initializer())
for epoch in range(epochs):
for i in range(mnist.train.num_examples // batch_size):
steps += 1
# 擷取真實圖檔
batch = mnist.train.next_batch(batch_size)
batch_images = batch[0].reshape((batch_size, data_shape[1], data_shape[2], data_shape[3]))
# 生成噪音
batch_noise = np.random.uniform(-1, 1, size=(batch_size, noise_size))
# 開始優化
sess.run(g_train_opt, feed_dict={D_input: batch_images,
G_input: batch_noise})
sess.run(d_train_opt, feed_dict={D_input: batch_images,
G_input: batch_noise})
# 每間隔5步列印出結果, 并且儲存生成模型生成的圖檔
if steps % 5 == 0:
train_loss_d = d_loss.eval({D_input: batch_images,
G_input: batch_noise})
train_loss_g = g_loss.eval({D_input: batch_images,
G_input: batch_noise})
# 儲存生成的圖檔
temp = tf.placeholder(tf.float32, [None, 100])
sample_input_noise= np.random.uniform(-1, 1, size=(n_smples, noise_size))
generator_pictures = sess.run(get_generator(temp, 1, is_train=False), feed_dict={
temp:sample_input_noise})
# 從生成的圖檔中随機的選取一張儲存下來
single_picture = generator_pictures[np.random.randint(0, n_samples)]
single_picture = (np.reshape(single_picture, (28, 28)) + 1) * 127.5
# 儲存圖檔
if not os.path.exists('DC_pictures/'):
os.makedirs('DC_pictures/')
cv2.imwrite("DC_pictures/A{}.jpg".format(str(steps)), single_picture)
print(
"Epoch {}/{}... stpes:{} ".format(epoch + 1, epochs, steps),
"Discriminator loss : {:.4f}...".format(train_loss_d),
"Generator loss: {:.4f}".format(train_loss_g)
)
if __name__ == '__main__':
with tf.Graph().as_default():
train(noise_size, [-1, 28, 28, 1], batch_size, n_samples=n_smples)