天天看點

VGGNet及Tensorflow實作

轉載自:點選打開連結

VGG網絡像素值計算

這是VGG的網絡:

VGGNet及Tensorflow實作

下面算一下每一層的像素值計算:

輸入:224*224*3

1. conv3 - 64(卷積核的數量):kernel size:3 stride:1 pad:1

像素:(224-3+2*1)/1+1=224 224*224*64

參數: (3*3*3)*64 =1728

2. conv3 - 64:kernel size:3 stride:1 pad:1

像素: (224-3+1*2)/1+1=224 224*224*64

參數: (3*3*64)*64 =36864

3. pool2 kernel size:2 stride:2 pad:0

像素: (224-2)/2 = 112 112*112*64

參數: 0

4.conv3-128:kernel size:3 stride:1 pad:1

像素: (112-3+2*1)/1+1 = 112 112*112*128

參數: (3*3*64)*128 =73728

5.conv3-128:kernel size:3 stride:1 pad:1

像素: (112-3+2*1)/1+1 = 112 112*112*128

參數: (3*3*128)*128 =147456

6.pool2: kernel size:2 stride:2 pad:0

像素: (112-2)/2+1=56 56*56*128

參數:0

7.conv3-256: kernel size:3 stride:1 pad:1

像素: (56-3+2*1)/1+1=56 56*56*256

參數:(3*3*128)*256=294912

8.conv3-256: kernel size:3 stride:1 pad:1

像素: (56-3+2*1)/1+1=56 56*56*256

參數:(3*3*256)*256=589824

9.conv3-256: kernel size:3 stride:1 pad:1

像素: (56-3+2*1)/1+1=56 56*56*256

參數:(3*3*256)*256=589824

10.pool2: kernel size:2 stride:2 pad:0

像素:(56 - 2)/2+1=28 28*28*256

參數:0

11. conv3-512:kernel size:3 stride:1 pad:1

像素:(28-3+2*1)/1+1=28 28*28*512

參數:(3*3*256)*512 = 1179648

12. conv3-512:kernel size:3 stride:1 pad:1

像素:(28-3+2*1)/1+1=28 28*28*512

參數:(3*3*512)*512 = 2359296

13. conv3-512:kernel size:3 stride:1 pad:1

像素:(28-3+2*1)/1+1=28 28*28*512

參數:(3*3*512)*512 = 2359296

14.pool2: kernel size:2 stride:2 pad:0

像素:(28-2)/2+1=14 14*14*512

參數: 0

15. conv3-512:kernel size:3 stride:1 pad:1

像素:(14-3+2*1)/1+1=14 14*14*512

參數:(3*3*512)*512 = 2359296

16. conv3-512:kernel size:3 stride:1 pad:1

像素:(14-3+2*1)/1+1=14 14*14*512

參數:(3*3*512)*512 = 2359296

17. conv3-512:kernel size:3 stride:1 pad:1

像素:(14-3+2*1)/1+1=14 14*14*512

參數:(3*3*512)*512 = 2359296

18.pool2:kernel size:2 stride:2 pad:0

像素:(14-2)/2+1=7 7*7*512

參數:0

19.FC: 4096 neurons

像素:1*1*4096

參數:7*7*512*4096 = 102760448

20.FC: 4096 neurons

像素:1*1*4096

參數:4096*4096 = 16777216

21.FC:1000 neurons

像素:1*1*1000

參數:4096*1000=4096000

總共參數數量大約138M左右。

本文主要工作計算了一下VGG網絡各層的輸出像素以及所需參數,作為一個了解CNN的練習,VGG網絡的特點是利用小的尺寸核代替大的卷積核,然後把網絡做深,舉個例子,VGG把alexnet最開始的一個7*7的卷積核用3個3*3的卷積核代替,其感受野是一樣。關于感受野的計算可以參照另一篇博文。

AlexNet最開始的7*7的卷積核的感受野是:7*7

VGG第一個卷積核的感受野:3*3

第二個卷積核的感受野:(3-1)*1+3=5

第三個卷積核的感受野:(5-1)*1+3=7

可見三個3*3卷積核和一個7*7卷積核的感受野是一樣的,但是3*3卷積核可以把網絡做的更深。VGGNet不好的一點是它耗費更多計算資源,并且使用了更多的參數,導緻更多的記憶體占用。

Tensorflow實作

代碼參考:《Tensorflow實踐》——黃文堅

from  datetime import datetime
import tensorflow as tf
import math
import time

batch_size = 32
num_batches = 100

# 用來建立卷積層并把本層的參數存入參數清單
# input_op:輸入的tensor name:該層的名稱 kh:卷積層的高 kw:卷積層的寬 n_out:輸出通道數,dh:步長的高 dw:步長的寬,p是參數清單
def conv_op(input_op,name,kh,kw,n_out,dh,dw,p):
    # 輸入的通道數
    n_in = input_op.get_shape()[-1].value
    with tf.name_scope(name) as scope:
        kernel = tf.get_variable(scope + "w",shape=[kh,kw,n_in,n_out],dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer_conv2d())
        conv = tf.nn.conv2d(input_op, kernel, (1,dh,dw,1),padding='SAME')
        bias_init_val = tf.constant(0.0, shape=[n_out],dtype=tf.float32)
        biases = tf.Variable(bias_init_val , trainable=True , name='b')
        z = tf.nn.bias_add(conv,biases)
        activation = tf.nn.relu(z,name=scope)
        p += [kernel,biases]
        return activation

# 定義全連接配接層
def fc_op(input_op,name,n_out,p):
    n_in = input_op.get_shape()[-1].value
    with tf.name_scope(name) as scope:
        kernel = tf.get_variable(scope+'w',shape=[n_in,n_out],dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer_conv2d())
        biases = tf.Variable(tf.constant(0.1,shape=[n_out],dtype=tf.float32),name='b')
        # tf.nn.relu_layer()用來對輸入變量input_op與kernel做乘法并且加上偏置b
        activation = tf.nn.relu_layer(input_op,kernel,biases,name=scope)
        p += [kernel,biases]
        return activation

# 定義最大池化層
def mpool_op(input_op,name,kh,kw,dh,dw):
    return tf.nn.max_pool(input_op,ksize=[1,kh,kw,1],strides=[1,dh,dw,1],padding='SAME',name=name)

#定義網絡結構
def inference_op(input_op,keep_prob):
    p = []
    conv1_1 = conv_op(input_op,name='conv1_1',kh=3,kw=3,n_out=64,dh=1,dw=1,p=p)
    conv1_2 = conv_op(conv1_1,name='conv1_2',kh=3,kw=3,n_out=64,dh=1,dw=1,p=p)
    pool1 = mpool_op(conv1_2,name='pool1',kh=2,kw=2,dw=2,dh=2)

    conv2_1 = conv_op(pool1,name='conv2_1',kh=3,kw=3,n_out=128,dh=1,dw=1,p=p)
    conv2_2 = conv_op(conv2_1,name='conv2_2',kh=3,kw=3,n_out=128,dh=1,dw=1,p=p)
    pool2 = mpool_op(conv2_2, name='pool2', kh=2, kw=2, dw=2, dh=2)

    conv3_1 = conv_op(pool2, name='conv3_1', kh=3, kw=3, n_out=256, dh=1, dw=1, p=p)
    conv3_2 = conv_op(conv3_1, name='conv3_2', kh=3, kw=3, n_out=256, dh=1, dw=1, p=p)
    conv3_3 = conv_op(conv3_2, name='conv3_3', kh=3, kw=3, n_out=256, dh=1, dw=1, p=p)
    pool3 = mpool_op(conv3_3, name='pool3', kh=2, kw=2, dw=2, dh=2)

    conv4_1 = conv_op(pool3, name='conv4_1', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
    conv4_2 = conv_op(conv4_1, name='conv4_2', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
    conv4_3 = conv_op(conv4_2, name='conv4_3', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
    pool4 = mpool_op(conv4_3, name='pool4', kh=2, kw=2, dw=2, dh=2)

    conv5_1 = conv_op(pool4, name='conv5_1', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
    conv5_2 = conv_op(conv5_1, name='conv5_2', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
    conv5_3 = conv_op(conv5_2, name='conv5_3', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
    pool5 = mpool_op(conv5_3, name='pool5', kh=2, kw=2, dw=2, dh=2)

    shp = pool5.get_shape()
    flattened_shape = shp[1].value * shp[2].value * shp[3].value
    resh1 = tf.reshape(pool5,[-1,flattened_shape],name="resh1")

    fc6 = fc_op(resh1,name="fc6",n_out=4096,p=p)
    fc6_drop = tf.nn.dropout(fc6,keep_prob,name='fc6_drop')
    fc7 = fc_op(fc6_drop,name="fc7",n_out=4096,p=p)
    fc7_drop = tf.nn.dropout(fc7,keep_prob,name="fc7_drop")
    fc8 = fc_op(fc7_drop,name="fc8",n_out=1000,p=p)
    softmax = tf.nn.softmax(fc8)
    predictions = tf.argmax(softmax,1)
    return predictions,softmax,fc8,p

def time_tensorflow_run(session,target,feed,info_string):
    num_steps_burn_in = 10  # 預熱輪數
    total_duration = 0.0  # 總時間
    total_duration_squared = 0.0  # 總時間的平方和用以計算方差
    for i in range(num_batches + num_steps_burn_in):
        start_time = time.time()
        _ = session.run(target,feed_dict=feed)
        duration = time.time() - start_time
        if i >= num_steps_burn_in:  # 隻考慮預熱輪數之後的時間
            if not i % 10:
                print('%s:step %d,duration = %.3f' % (datetime.now(), i - num_steps_burn_in, duration))
                total_duration += duration
                total_duration_squared += duration * duration
    mn = total_duration / num_batches  # 平均每個batch的時間
    vr = total_duration_squared / num_batches - mn * mn  # 方差
    sd = math.sqrt(vr)  # 标準差
    print('%s: %s across %d steps, %.3f +/- %.3f sec/batch' % (datetime.now(), info_string, num_batches, mn, sd))

def run_benchmark():
    with tf.Graph().as_default():
        image_size = 224  # 輸入圖像尺寸
        images = tf.Variable(tf.random_normal([batch_size, image_size, image_size, 3], dtype=tf.float32, stddev=1e-1))
        keep_prob = tf.placeholder(tf.float32)
        prediction,softmax,fc8,p = inference_op(images,keep_prob)
        init = tf.global_variables_initializer()
        sess = tf.Session()
        sess.run(init)
        time_tensorflow_run(sess, prediction,{keep_prob:1.0}, "Forward")
        # 用以模拟訓練的過程
        objective = tf.nn.l2_loss(fc8)  # 給一個loss
        grad = tf.gradients(objective, p)  # 相對于loss的 所有模型參數的梯度
        time_tensorflow_run(sess, grad, {keep_prob:0.5},"Forward-backward")


run
           

這個代碼隻是用來模拟訓練過程然後評估每輪的計算時間的,結果如下:

VGGNet及Tensorflow實作

這裡我沒有使用GPU加速,是以速度比較緩慢。

繼續閱讀