基本介绍
本次实验的模型是图像分类最新的模型,
EfficientNet
系列。该模型在ImageNet上训练,取得顶级的的准确度,并且有效的迁移学习到其它的分类数据集,相关比对结果如下图所示。论文的核心思想是提出了对网络复合缩放,通过对网络的宽度、深度和分辨率统一进行缩放,能够达到更高的精度上限,并且网络的计算量降低很多。该论文的翻译,请参考链接:EfficientNet论文翻译。本次实验采用的是
EfficientNet-B7
。
![](https://img.laitimes.com/img/9ZDMuAjOiMmIsIjOiQnIsIiclRnblN2XjlGcjAzNfRHLGZkRGZkRfJ3bs92YsYTMfVmepNHL90zZOFTU61UeFRUY0QnMMBjVtJWd0ckW65UbM5WOHJWa5kHT20ESjBjUIF2X0hXZ0xCMx81dvRWYoNHLrdEZwZ1Rh5WNXp1bwNjW1ZUba9VZwlHdssmch1mclRXY39CXldWYtlWPzNXZj9mcw1ycz9WL49zZuBnLxgjNzQjM0kDMyETMxAjMwIzLc52YucWbp5GZzNmLn9Gbi1yZtl2Lc9CX6MHc0RHaiojIsJye.png)
网络结构
下图是基准网络EfficientNet-B0的结构组成,根据Google提供的开源代码中的网络结构。EfficientNet-B0包含3个大的部分:
- stem:conv2d(3x3)+ BN + activation(swish_f32).
- B l o c k i Block_i Blocki:总共有16个MBConv,每个模块的结构为(Conv1x1+BN+Swish_f32,升维)+ DepthWise_Conv + SE + (Conv1x1+BN,降维)+ Add.
- head:Conv2d(1x1)+ BN + swish_f32 + global_pooling + dropout + dense(最后输出的类别数量)。
下图给出了EfficientNet-B0网络大致简图,可以大概了解网络的结构,其它的EfficientNet网络均是通过复合系数缩放而来,(width_coefficient,depth_coefficient)分别将宽度(channel)和模块(Block)数量乘以对应的系数。
代码框架
- 建立模型的类,初始化函数中填充相应的训练参数
class Model: def __init__(self): self.base_architecture = cfg.efficientnet.base_architecture[0] self.pre_trained_weight = cfg.efficientnet.pre_trained_weight[0] self.num_classes = cfg.efficientnet.num_class self.input_shape = cfg.train.input_size self.batch_size = cfg.train.batch_size self.step_per_epoch = cfg.train.step_per_epoch self.warmup_epoch = cfg.train.warmup_epochs self.first_stage_epoch = cfg.train.first_stage_epochs self.second_stage_epoch = cfg.train.second_stage_epochs self.learn_rate_init = cfg.train.learn_rate_init self.learn_rate_end = cfg.train.learn_rate_end self.loss_function = cfg.train.loss_function[0] self.model_name = 'efficientnet-b7' self.batch_norm_decay = cfg.efficientnet.batch_norm_decay self.override_params = {}
- 网络的输入模块,即网络的数据接口
with tf.name_scope('Input_Placeholder'): self.inputs = tf.placeholder(tf.float32, shape=(None, self.input_shape[0], self.input_shape[1], self.input_shape[2]), name='input') self.label_c = tf.placeholder(tf.int64, shape=(self.batch_size,), name='label') self.trainable = tf.placeholder(dtype=tf.bool, name='training')
- 构建网络结构模块,即网络结构
with tf.name_scope('Build_Model'): self.logits = self.model(self.trainable, self.pre_trained_weight, self.base_architecture, num_classes=self.num_classes) self.one_hot = tf.one_hot(self.label_c, self.num_classes) # 模型保存的所有变量 self.net_variables = tf.global_variables()
- 网络的损失函数
with tf.name_scope('Loss_Function'): if self.loss_function == 'softmax': print('using softmax cross entropy loss function') loss_net = tf.losses.softmax_cross_entropy(self.one_hot, self.logits) else: print('using sigmoid cross entropy loss function') loss_net = tf.losses.sigmoid_cross_entropy(self.one_hot, self.logits, label_smoothing=0.1) # 添加L2正则化项 # l2 = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()]) # self.loss = loss_net + l2*0.0005 # Add weight decay to the loss for non-batch-normalization variables. # self.loss = loss_net + 0.0005 * tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() # if 'batch_normalization' not in v.name]) for v in tf.trainable_variables(): print('1====: ', v.name) if 'batch_normalization' not in v.name and 'head' in v.name and 'bias' not in v.name: print('------------------------------') print('2====: ', v.name) print('------------------------------') self.loss = loss_net + 0.00005 * tf.add_n([tf.nn.l2_loss(v)])
- 构建网络的评价指标,要根据具体的任务,构建相应的评价指标
with tf.name_scope('Compute_Accuracy'): # 计算准确度 correct_prediction = tf.equal(tf.argmax(self.logits, 1), self.label_c) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float')) # 计算每次训练的混淆矩阵, 统计灵敏度和特异度的训练过程 confusion_matrix = tf.confusion_matrix(self.label_c, tf.argmax(self.logits, 1), num_classes=2) TN = confusion_matrix[0][0] FP = confusion_matrix[0][1] FN = confusion_matrix[1][0] TP = confusion_matrix[1][1] # acc = (TP + TN) / (TP + TN + FP + FN) self.sensitive = TP / (TP + FN) self.specify = TN / (TN + FP)
- 设定网络的学习率
with tf.name_scope('Learning_Rate'): self.global_step = tf.Variable(0, dtype=tf.int64, trainable=False, name='global_step') warmup_steps = tf.constant(self.warmup_epoch * self.step_per_epoch, dtype=tf.int64, name='warmup_steps') # 总的训练步数 train_steps = tf.constant((self.first_stage_epoch + self.second_stage_epoch) * self.step_per_epoch, dtype=tf.int64, name='train_steps') # 带有预热的cosine学习率调整 cosine_item = (1 + tf.cos((self.global_step - warmup_steps) / (train_steps - warmup_steps) * np.pi)) warm_learn_rate = self.learn_rate_end + 0.5 * (self.learn_rate_init - self.learn_rate_end) * cosine_item self.learn_rate = tf.cond(pred=self.global_step < warmup_steps, true_fn=lambda: self.global_step / warmup_steps * self.learn_rate_init, false_fn=lambda: warm_learn_rate) # self.learn_rate = tf.train.exponential_decay(cfg.Train.Learn_Rate_Init, # self.global_step, # decay_steps=400, # decay_rate=0.9) # boundaries = [240, 1600] # values = [0.001, 0.0001, 0.00001] # self.learn_rate = tf.train.piecewise_constant(self.global_step, boundaries, values) global_step_update = tf.assign_add(self.global_step, 1)
- 设定待优化的参数,通常用于迁移学习,需要分阶段训练网络的不同的模块
with tf.name_scope("First_Train_Stage"): # 存储第一阶段需要优化的网络参数 self.first_stage_trainable_var_list = [] for var in tf.trainable_variables(): var_name = var.op.name var_name_mess = str(var_name).split('/') # 根据名字, 选择要优化的参数 if var_name_mess[1] in ['head']: self.first_stage_trainable_var_list.append(var) optimizer = tf.train.AdamOptimizer(self.learn_rate) optimizer_variables = optimizer.minimize(self.loss, var_list=self.first_stage_trainable_var_list) with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): with tf.control_dependencies([optimizer_variables, global_step_update]): # with tf.control_dependencies([moving_ave]): self.train_op_with_frozen_variables = tf.no_op() # self.train_op_with_frozen_variables = tf.group(moving_ave) with tf.name_scope("Second_Train_Stage"): second_stage_trainable_var_list = tf.trainable_variables() second_stage_optimizer = tf.train.AdamOptimizer(self.learn_rate) second_stage_variables = second_stage_optimizer.minimize(self.loss, var_list=second_stage_trainable_var_list) with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): with tf.control_dependencies([second_stage_variables, global_step_update]): # with tf.control_dependencies([moving_ave]): self.train_op_with_all_variables = tf.no_op() # self.train_op_with_all_variables = tf.group(moving_ave)
- 保存网络和加载网络
with tf.name_scope('Model_Loader_Save'): # 恢复除去最后一层的所有训练参数 variables_to_restore = [] for v in self.net_variables: if v.name.split('/')[1] not in ['head']: variables_to_restore.append(v) self.loader = tf.train.Saver(variables_to_restore) self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=200)
- 保存网络的训练记录,用于tensorboard
with tf.name_scope('Collect_Summary'): tf.summary.scalar('loss', self.loss) tf.summary.scalar('accuracy', self.accuracy) tf.summary.scalar('learning rate', self.learn_rate) tf.summary.scalar('sensitive', self.sensitive) tf.summary.scalar('specify', self.specify) self.merged = tf.summary.merge_all()
完整代码(Model.py)
import tensorflow as tf
import numpy as np
import model_builder_factory
from config import cfg
class Model:
def __init__(self):
self.base_architecture = cfg.efficientnet.base_architecture[0]
self.pre_trained_weight = cfg.efficientnet.pre_trained_weight[0]
self.num_classes = cfg.efficientnet.num_class
self.input_shape = cfg.train.input_size
self.batch_size = cfg.train.batch_size
self.step_per_epoch = cfg.train.step_per_epoch
self.warmup_epoch = cfg.train.warmup_epochs
self.first_stage_epoch = cfg.train.first_stage_epochs
self.second_stage_epoch = cfg.train.second_stage_epochs
self.learn_rate_init = cfg.train.learn_rate_init
self.learn_rate_end = cfg.train.learn_rate_end
self.loss_function = cfg.train.loss_function[0]
self.model_name = 'efficientnet-b7'
self.batch_norm_decay = cfg.efficientnet.batch_norm_decay
self.override_params = {}
with tf.name_scope('Input_Placeholder'):
self.inputs = tf.placeholder(tf.float32,
shape=(None, self.input_shape[0], self.input_shape[1], self.input_shape[2]),
name='input')
self.label_c = tf.placeholder(tf.int64, shape=(self.batch_size,), name='label')
self.trainable = tf.placeholder(dtype=tf.bool, name='training')
with tf.name_scope('Build_Model'):
self.logits = self.model(self.trainable,
self.pre_trained_weight,
self.base_architecture,
num_classes=self.num_classes)
self.one_hot = tf.one_hot(self.label_c, self.num_classes)
# 模型保存的所有变量
self.net_variables = tf.global_variables()
with tf.name_scope('Loss_Function'):
if self.loss_function == 'softmax':
print('using softmax cross entropy loss function')
loss_net = tf.losses.softmax_cross_entropy(self.one_hot, self.logits)
else:
print('using sigmoid cross entropy loss function')
loss_net = tf.losses.sigmoid_cross_entropy(self.one_hot, self.logits, label_smoothing=0.1)
# 添加L2正则化项
# l2 = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()])
# self.loss = loss_net + l2*0.0005
# Add weight decay to the loss for non-batch-normalization variables.
# self.loss = loss_net + 0.0005 * tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()
# if 'batch_normalization' not in v.name])
for v in tf.trainable_variables():
print('1====: ', v.name)
if 'batch_normalization' not in v.name and 'head' in v.name and 'bias' not in v.name:
print('------------------------------')
print('2====: ', v.name)
print('------------------------------')
self.loss = loss_net + 0.00005 * tf.add_n([tf.nn.l2_loss(v)])
with tf.name_scope('Compute_Accuracy'):
# 计算准确度
correct_prediction = tf.equal(tf.argmax(self.logits, 1), self.label_c)
self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
# 计算每次训练的混淆矩阵, 统计灵敏度和特异度的训练过程
confusion_matrix = tf.confusion_matrix(self.label_c, tf.argmax(self.logits, 1), num_classes=2)
TN = confusion_matrix[0][0]
FP = confusion_matrix[0][1]
FN = confusion_matrix[1][0]
TP = confusion_matrix[1][1]
# acc = (TP + TN) / (TP + TN + FP + FN)
self.sensitive = TP / (TP + FN)
self.specify = TN / (TN + FP)
with tf.name_scope('Learning_Rate'):
self.global_step = tf.Variable(0, dtype=tf.int64, trainable=False, name='global_step')
warmup_steps = tf.constant(self.warmup_epoch * self.step_per_epoch,
dtype=tf.int64,
name='warmup_steps')
# 总的训练步数
train_steps = tf.constant((self.first_stage_epoch + self.second_stage_epoch) * self.step_per_epoch,
dtype=tf.int64,
name='train_steps')
# 带有预热的cosine学习率调整
cosine_item = (1 + tf.cos((self.global_step - warmup_steps) / (train_steps - warmup_steps) * np.pi))
warm_learn_rate = self.learn_rate_end + 0.5 * (self.learn_rate_init - self.learn_rate_end) * cosine_item
self.learn_rate = tf.cond(pred=self.global_step < warmup_steps,
true_fn=lambda: self.global_step / warmup_steps * self.learn_rate_init,
false_fn=lambda: warm_learn_rate)
# self.learn_rate = tf.train.exponential_decay(cfg.Train.Learn_Rate_Init,
# self.global_step,
# decay_steps=400,
# decay_rate=0.9)
# boundaries = [240, 1600]
# values = [0.001, 0.0001, 0.00001]
# self.learn_rate = tf.train.piecewise_constant(self.global_step, boundaries, values)
global_step_update = tf.assign_add(self.global_step, 1)
# with tf.name_scope("Moving_Weight_Decay"):
# moving_ave = tf.train.ExponentialMovingAverage(cfg.ResNet.Moving_Ave_Decay).apply(tf.trainable_variables())
with tf.name_scope("First_Train_Stage"):
# 存储第一阶段需要优化的网络参数
self.first_stage_trainable_var_list = []
for var in tf.trainable_variables():
var_name = var.op.name
var_name_mess = str(var_name).split('/')
# 根据名字, 选择要优化的参数
if var_name_mess[1] in ['head']:
self.first_stage_trainable_var_list.append(var)
optimizer = tf.train.AdamOptimizer(self.learn_rate)
optimizer_variables = optimizer.minimize(self.loss, var_list=self.first_stage_trainable_var_list)
with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
with tf.control_dependencies([optimizer_variables, global_step_update]):
# with tf.control_dependencies([moving_ave]):
self.train_op_with_frozen_variables = tf.no_op()
# self.train_op_with_frozen_variables = tf.group(moving_ave)
with tf.name_scope("Second_Train_Stage"):
second_stage_trainable_var_list = tf.trainable_variables()
second_stage_optimizer = tf.train.AdamOptimizer(self.learn_rate)
second_stage_variables = second_stage_optimizer.minimize(self.loss, var_list=second_stage_trainable_var_list)
with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
with tf.control_dependencies([second_stage_variables, global_step_update]):
# with tf.control_dependencies([moving_ave]):
self.train_op_with_all_variables = tf.no_op()
# self.train_op_with_all_variables = tf.group(moving_ave)
with tf.name_scope('Model_Loader_Save'):
# 恢复除去最后一层的所有训练参数
variables_to_restore = []
for v in self.net_variables:
if v.name.split('/')[1] not in ['head']:
variables_to_restore.append(v)
self.loader = tf.train.Saver(variables_to_restore)
self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=200)
with tf.name_scope('Collect_Summary'):
tf.summary.scalar('loss', self.loss)
tf.summary.scalar('accuracy', self.accuracy)
tf.summary.scalar('learning rate', self.learn_rate)
tf.summary.scalar('sensitive', self.sensitive)
tf.summary.scalar('specify', self.specify)
self.merged = tf.summary.merge_all()
def model(self, is_training, pre_trained_model, base_architecture, num_classes):
"""
load network structure
:param is_training: 是否训练的标志
:param pre_trained_model: 预训练权重
:param base_architecture: 网络名字
:param num_classes: 分类数量
:return: 网络最后一层,以及所有层构成的字典
"""
# if FLAGS.batch_norm_momentum is not None:
# override_params['batch_norm_momentum'] = FLAGS.batch_norm_momentum
# if FLAGS.batch_norm_epsilon is not None:
# override_params['batch_norm_epsilon'] = FLAGS.batch_norm_epsilon
# if FLAGS.dropout_rate is not None:
# override_params['dropout_rate'] = FLAGS.dropout_rate
# if FLAGS.survival_prob is not None:
# override_params['survival_prob'] = FLAGS.survival_prob
# if FLAGS.data_format:
# override_params['data_format'] = FLAGS.data_format
# if FLAGS.num_label_classes:
self.override_params['num_classes'] = num_classes
# if FLAGS.depth_coefficient:
# override_params['depth_coefficient'] = FLAGS.depth_coefficient
# if FLAGS.width_coefficient:
# override_params['width_coefficient'] = FLAGS.width_coefficient
model_builder = model_builder_factory.get_model_builder(self.base_architecture)
logits, _ = model_builder.build_model(self.inputs,
self.base_architecture,
is_training,
override_params=self.override_params)
return logits
完整代码(config.py)
该代码用于设置网络的各种训练参数,训练过程中,只需更改这个文件的相应参数即可,非常方便。
from easydict import EasyDict as edict
cfg = edict()
# Consumers can get config by: from config import cfg
# network options
cfg.efficientnet = edict()
cfg.efficientnet.num_class = 2
cfg.efficientnet.moving_ave_decay = 0.9995
cfg.efficientnet.pre_trained_weight = ['./efficientnet-b0/model.ckpt',
'./efficientnet-b7/model.ckpt']
cfg.efficientnet.base_architecture = ['efficientnet-b0',
'efficientnet-b7']
cfg.efficientnet.batch_norm_decay = 0.99
# train options
cfg.train = edict()
cfg.train.root_path = '../B7Data/1025_color_new/'
cfg.train.train_set = "../B7Data/1025_color_new/train_1025_color.txt"
cfg.train.valid_set = "../B7Data/1025_color_new/valid_1025_color.txt"
cfg.train.log = './checkpoint/log/log_1111_test5/'
cfg.train.save_model = './checkpoint/model/model_1111_test5/model'
cfg.train.train_num = 1352
cfg.train.valid_num = 256
cfg.train.batch_size = 32
cfg.train.step_per_epoch = 1352//32
cfg.train.input_size = [224, 224, 3]
cfg.train.learn_rate_init = 0.0001
cfg.train.learn_rate_end = 1e-6
cfg.train.warmup_epochs = 10
cfg.train.first_stage_epochs = 60
cfg.train.second_stage_epochs = 100
cfg.train.loss_function = ['sigmoid', 'softmax']
# test options
cfg.test = edict()
cfg.test.mode = ['txt', 'image']
cfg.test.image_path = '../B7Data/1025_color_new/'
cfg.test.weight_file = "./checkpoint/model/model_1027_1/model-1"