天天看點

AlexNet卷積神經網絡實作

AlexNet特點

  • 使用ReLU作為激活函數,解決了sigmoid在網絡較深時的梯度彌散問題
  • 使用Dropout随機忽略一部分神經元,以避免過拟合
  • 使用重疊的maxpool,避免argpool的模糊化效果
  • 提出LRN層,對局部神經元的活動建立競争機制,使其中響應比較大的值變得相對更大,并抑制其它回報較小的神經元,增強模型的泛化能力
  • GTX 580*2 3GB
  • 資料增強:随機從256256的原始圖像中截取224224區域,并水準翻轉,相當于增加了(256-224)^2*2=2048倍的資料量,大大減輕了過拟合

源代碼

注釋已經寫得很清楚了,不再贅述~

from datetime import datetime
import math
import tensorflow as tf
import time

batch_size = 32 
num_batches = 100

def print_activations(t):
	# 列印網絡結構
	print(t.op.name,'',t.get_shape().as_list())

def inference(images):
	parameters = []
	# conv1
	with tf.name_scope('conv1') as scope:
		#使用截斷正态分布函數初始化卷積核參數 卷積核尺寸11*11 通道3 卷積核數量64
		kernel = tf.Variable(tf.truncated_normal([11,11,3,64],dtype=tf.float32,stddev=1e-1),name='weights')
		# 完成對圖像的卷積操作 strides步長4*4(即圖像上每4*4區域隻取樣一次)
		conv = tf.nn.conv2d(images,kernel,[1,4,4,1],padding='SAME')
		# 初始化biases為0
		biases = tf.Variable(tf.constant(0.0,shape=[64],dtype=tf.float32),trainable=True,name='biases')
		bias = tf.nn.bias_add(conv,biases)
		# ReLU激活函數對結果進行非線性處理
		conv1 = tf.nn.relu(bias,name=scope)
		print_activations(conv1)
		# 将可訓練參數(kernel,biases)添加至 parameters 
		parameters += [kernel,biases]
		# 自主選擇是否選用LRN  depth_radius設為4
		lrn1 = tf.nn.lrn(conv1,4,bias=1.0,alpha=0.001/9,beta=0.75,name='lrn1')
		# 池化尺寸3*3(即将3*3大小的像素塊降為1*1的像素) padding模式VALID,即取樣是不能超過邊框
		pool1 = tf.nn.max_pool(lrn1,ksize=[1,3,3,1],strides=[1,2,2,1],padding='VALID',name='pool1')
		# 列印輸出結果pool1的結構
		print_activations(pool1)

	# conv2
	with tf.name_scope('conv2') as scope:
		# 卷積核尺寸5*5 通道64(上一層輸出通道數,也就是上一層是卷積核數量)卷積核數量194
		kernel = tf.Variable(tf.truncated_normal([5,5,64,192],dtype=tf.float32,stddev=1e-1),name='weights')
		# 卷積步長1*1 padding模式為SAME:矩形周圍補2個像素
		conv = tf.nn.conv2d(pool1,kernel,[1,1,1,1],padding='SAME')
		biases = tf.Variable(tf.constant(0.0,shape=[192],dtype=tf.float32),trainable=True,name='biases')
		bias = tf.nn.bias_add(conv,biases)
		conv2 = tf.nn.relu(bias,name=scope)
		print_activations(conv2)
		parameters += [kernel,biases]
		lrn2 = tf.nn.lrn(conv2,4,bias=1.0,alpha=0.001/9,beta=0.75,name='lrn2')
		pool2 = tf.nn.max_pool(lrn2,ksize=[1,3,3,1],strides=[1,2,2,1],padding='VALID',name='pool2')
		print_activations(pool2)		

	# conv3
	with tf.name_scope('conv3') as scope:
		# 卷積核尺寸3*3 輸入通道數192 輸出通道數384
		kernel = tf.Variable(tf.truncated_normal([3,3,192,384],dtype=tf.float32,stddev=1e-1),name='weights')
		# 卷積步長1*1  padding模式為SAME:矩形周圍補1個像素
		conv = tf.nn.conv2d(pool2,kernel,[1,1,1,1],padding='SAME')
		biases = tf.Variable(tf.constant(0.0,shape=[384],dtype=tf.float32),trainable=True,name='biases')
		bias = tf.nn.bias_add(conv,biases)
		conv3 = tf.nn.relu(bias,name=scope)
		print_activations(conv3)
		parameters += [kernel,biases]

	# conv4
	with tf.name_scope('conv4') as scope:
		# 卷積核尺寸3*3 輸入通道數384 輸出通道數256
		kernel = tf.Variable(tf.truncated_normal([3,3,384,256],dtype=tf.float32,stddev=1e-1),name='weights')
		# 卷積步長1*1
		conv = tf.nn.conv2d(conv3,kernel,[1,1,1,1],padding='SAME')
		biases = tf.Variable(tf.constant(0.0,shape=[256],dtype=tf.float32),trainable=True,name='biases')
		bias = tf.nn.bias_add(conv,biases)
		conv4 = tf.nn.relu(bias,name=scope)
		print_activations(conv4)
		parameters += [kernel,biases]

	# conv5
	with tf.name_scope('conv5') as scope:
		# 卷積核尺寸3*3 輸入通道數256 輸出通道數256
		kernel = tf.Variable(tf.truncated_normal([3,3,256,256],dtype=tf.float32,stddev=1e-1),name='weights')
		# 卷積步長1*1
		conv = tf.nn.conv2d(conv4,kernel,[1,1,1,1],padding='SAME')
		biases = tf.Variable(tf.constant(0.0,shape=[256],dtype=tf.float32),trainable=True,name='biases')
		bias = tf.nn.bias_add(conv,biases)
		conv5 = tf.nn.relu(bias,name=scope)
		print_activations(conv5)
		parameters += [kernel,biases]
		# maxpool
		pool5 = tf.nn.max_pool(conv5,ksize=[1,3,3,1],strides=[1,2,2,1],padding='VALID',name='pool5')
		print_activations(pool5)

	# fc6
	
	# fc7
	
	# fc8
	
	return pool5,parameters

def time_tensorflow_run(session,target,info_string):
	# 預熱輪數 給程式預熱
	num_steps_burn_in = 10
	# 總時間
	total_duration = 0.0
	# 總時間的平方和 以計算方差
	total_duration_squared = 0.0

	for i in range(num_batches+num_steps_burn_in):
		start_time = time.time()
		_ = session.run(target)
		duration = time.time() - start_time
		if i >= num_steps_burn_in:
			if not i % 10:
				print('%s:step %d, duration = %.3f' % (datetime.now(),i - num_steps_burn_in,duration))
			total_duration += duration
			total_duration_squared += duration*duration
	# 計算每輪疊代的平均耗時mn
	mn = total_duration / num_batches
	vr = total_duration_squared /num_batches - mn*mn
	# 标準差
	sd = math.sqrt(vr)	
	print('%s: %s across %d steps, %.3f +/- %.3f sec / batch' % 
			(datetime.now(),info_string,num_batches,mn,sd))

def run_benchmark():
	with tf.Graph().as_default():
		image_size = 224
		images = tf.Variable(tf.random_normal([batch_size,image_size,image_size,3],
												dtype=tf.float32,
												stddev=1e-1))
		pool5,parameters = inference(images)
		init = tf.global_variables_initializer()
		sess = tf.Session()
		sess.run(init)

		time_tensorflow_run(sess,pool5,'Forward')
		objective = tf.nn.l2_loss(pool5)
		grad = tf.gradients(objective,parameters)
		time_tensorflow_run(sess,grad,"Forward-backward")

# 主函數
run_benchmark()
           

運作結果

cpu:i5 2.6GHz

AlexNet卷積神經網絡實作
AlexNet卷積神經網絡實作

GPU:Nvida Quardro 2000 5GB

(教材上GTX 1080 跑的是0.026)

AlexNet卷積神經網絡實作
AlexNet卷積神經網絡實作

繼續閱讀