天天看點

計算機視覺系列3.1 VGGNet中的資料預處理計算機視覺系列3.1 VGGNet中的資料預處理

計算機視覺系列3.1 VGGNet中的資料預處理

本文連結:https://blog.csdn.net/weixin_44633882/article/details/87705734

ps:《計算機視覺系列3 VGGNet網絡的思路》這篇部落格還在打磨,因為我認為釋出的部落格應該對于讀者需要負責,能夠幫助他們解答困惑,而不是未完成的部落格來影響或誤導讀者,是以等完成了再上傳,感謝了解!

1. 簡介

本文章分析了tensorflow slim中vgg預處理的源碼。

  • 源碼位址:https://github.com/tensorflow/models/blob/master/research/slim/preprocessing/vgg_preprocessing.py

1.1 vgg預處理

tensorflow slim中的訓練和預測對圖檔的預處理是不同的。

訓練中的圖檔預處理

  • 随機生成一個最短邊的長度resize_side,範圍在[256,512]
  • 對圖像進行等比例變換,使最短邊的大小等于resize_side
  • 對圖像進行随機裁剪,大小為output_height × output_width
  • 水準翻轉
  • 減去ImageNet訓練集的RGB均值

預測中的圖檔預處理

  • 給定最短邊長度resize_side
  • 對圖像進行等比例變換,使最短邊的大小等于resize_side
  • 對圖像進行中心裁剪,大小為output_height × output_width
  • 減去ImageNet訓練集的RGB均值
def preprocess_image(image, output_height, output_width, is_training=False,
                     resize_side_min=_RESIZE_SIDE_MIN,
                     resize_side_max=_RESIZE_SIDE_MAX):
  if is_training:
    return preprocess_for_train(image, output_height, output_width,
                                resize_side_min, resize_side_max)
  else:
    return preprocess_for_eval(image, output_height, output_width,
                               resize_side_min)
           

訓練

def preprocess_for_train(image,
                         output_height,
                         output_width,
                         resize_side_min=_RESIZE_SIDE_MIN,
                         resize_side_max=_RESIZE_SIDE_MAX):
  # 随機生成一個最短邊的大小
  resize_side = tf.random_uniform(
      [], minval=resize_side_min, maxval=resize_side_max+1, dtype=tf.int32)
  # rehape(image),等比例變換,使最短邊的大小等于resize_side
  image = _aspect_preserving_resize(image, resize_side) 
  # 得到一次crop的圖像
  image = _random_crop([image], output_height, output_width)[0]
  image.set_shape([output_height, output_width, 3])
  image = tf.to_float(image)
  # 水準翻轉
  image = tf.image.random_flip_left_right(image)
  # 傳回減去ImageNet訓練集的RGB均值的圖像。
  return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])
           

預測

def preprocess_for_eval(image, output_height, output_width, resize_side):
  # rehape(image),等比例變換,使最短邊的大小等于resize_side
  image = _aspect_preserving_resize(image, resize_side)
  image = _central_crop([image], output_height, output_width)[0]
  image.set_shape([output_height, output_width, 3])
  image = tf.to_float(image)
  # 傳回減去ImageNet訓練集的RGB均值的圖像。
  return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])
           

2. 代碼分析

import tensorflow as tf

slim = tf.contrib.slim

_R_MEAN = 123.68
_G_MEAN = 116.78
_B_MEAN = 103.94

_RESIZE_SIDE_MIN = 256
_RESIZE_SIDE_MAX = 512
           
  • 裁剪圖像
def _crop(image, offset_height, offset_width, crop_height, crop_width):
  """Crops the given image using the provided offsets and sizes.
  Note that the method doesn't assume we know the input image size but it does
  assume we know the input image rank.
  Args:
    image: an image of shape [height, width, channels].
    offset_height: a scalar tensor indicating the height offset.
    offset_width: a scalar tensor indicating the width offset.
    crop_height: the height of the cropped image.
    crop_width: the width of the cropped image.
  Returns:
    the cropped (and resized) image.
  Raises:
    InvalidArgumentError: if the rank is not 3 or if the image dimensions are
      less than the crop size.
  """
  original_shape = tf.shape(image)

  rank_assertion = tf.Assert(
      tf.equal(tf.rank(image), 3),
      ['Rank of image must be equal to 3.'])
  with tf.control_dependencies([rank_assertion]):
  # 合成一個tensor
    cropped_shape = tf.stack([crop_height, crop_width, original_shape[2]])
  
  size_assertion = tf.Assert(
      tf.logical_and(
          tf.greater_equal(original_shape[0], crop_height),
          tf.greater_equal(original_shape[1], crop_width)),
      ['Crop size greater than the image size.'])

  offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0]))

  # Use tf.slice instead of crop_to_bounding box as it accepts tensors to
  # define the crop size.
  # 從image中根據起始位置offsets, 來裁剪出大小為cropped_shape的圖像。
  with tf.control_dependencies([size_assertion]):
    image = tf.slice(image, offsets, cropped_shape)
  return tf.reshape(image, cropped_shape)
           
  • 随機裁剪,使用_crop()
def _random_crop(image_list, crop_height, crop_width):
  """Crops the given list of images.
  The function applies the same crop to each image in the list. This can be
  effectively applied when there are multiple image inputs of the same
  dimension such as:
    image, depths, normals = _random_crop([image, depths, normals], 120, 150)
  Args:
    image_list: a list of image tensors of the same dimension but possibly
      varying channel.
    crop_height: the new height.
    crop_width: the new width.
  Returns:
    the image_list with cropped images.
  Raises:
    ValueError: if there are multiple image inputs provided with different size
      or the images are smaller than the crop dimensions.
  """
  if not image_list:
    raise ValueError('Empty image_list.')

  # Compute the rank assertions.
  rank_assertions = []
  for i in range(len(image_list)):
    image_rank = tf.rank(image_list[i]) # 傳回圖檔的次元數
    # 檢視是否次元數為3,傳回的是一個op
    rank_assert = tf.Assert(
        tf.equal(image_rank, 3),
        ['Wrong rank for tensor  %s [expected] [actual]',
         image_list[i].name, 3, image_rank])
    # 添加入rank_assertions
    rank_assertions.append(rank_assert)
  
  with tf.control_dependencies([rank_assertions[0]]):
    image_shape = tf.shape(image_list[0])
  image_height = image_shape[0]
  image_width = image_shape[1]
  # 檢查圖檔的高和寬是否都大于crop的高和寬,傳回的是一個op
  crop_size_assert = tf.Assert(
      tf.logical_and(
          tf.greater_equal(image_height, crop_height),
          tf.greater_equal(image_width, crop_width)),
      ['Crop size greater than the image size.'])
  
  asserts = [rank_assertions[0], crop_size_assert]

  for i in range(1, len(image_list)):
    image = image_list[i]
    asserts.append(rank_assertions[i])
    with tf.control_dependencies([rank_assertions[i]]):
      shape = tf.shape(image)
    height = shape[0]
    width = shape[1]

    height_assert = tf.Assert(
        tf.equal(height, image_height),
        ['Wrong height for tensor %s [expected][actual]',
         image.name, height, image_height])
    width_assert = tf.Assert(
        tf.equal(width, image_width),
        ['Wrong width for tensor %s [expected][actual]',
         image.name, width, image_width])
    asserts.extend([height_assert, width_assert])

  # Create a random bounding box.
  #
  # 這裡使用tf.random_uniform,而不是使用numpy.random.rand
  # 前者可以在graph eval time生成随機的數字
  # 後者在graph定義時生成随機的數字
  with tf.control_dependencies(asserts):
  # 傳回一個tensor,shape'[]'将其變為scalar
    max_offset_height = tf.reshape(image_height - crop_height + 1, [])
  with tf.control_dependencies(asserts):
    max_offset_width = tf.reshape(image_width - crop_width + 1, [])
# 随機選擇offset
  offset_height = tf.random_uniform(
      [], maxval=max_offset_height, dtype=tf.int32)
  offset_width = tf.random_uniform(
      [], maxval=max_offset_width, dtype=tf.int32)

  return [_crop(image, offset_height, offset_width,
                crop_height, crop_width) for image in image_list]
           
  • 中心切片,crop的圖檔以原圖的中心為中心,使用_crop()
def _central_crop(image_list, crop_height, crop_width):
  """Performs central crops of the given image list.
  Args:
    image_list: a list of image tensors of the same dimension but possibly
      varying channel.
    crop_height: the height of the image following the crop.
    crop_width: the width of the image following the crop.
  Returns:
    the list of cropped images.
  """
  outputs = []
  for image in image_list:
    image_height = tf.shape(image)[0]
    image_width = tf.shape(image)[1]

    offset_height = (image_height - crop_height) / 2
    offset_width = (image_width - crop_width) / 2

    outputs.append(_crop(image, offset_height, offset_width,
                         crop_height, crop_width))
  return outputs
           
  • 使圖像減去ImageNet訓練集的RGB均值
def _mean_image_subtraction(image, means):
  """Subtracts the given means from each image channel.
  For example:
    means = [123.68, 116.779, 103.939]
    image = _mean_image_subtraction(image, means)
  Note that the rank of `image` must be known.
  Args:
    image: a tensor of size [height, width, C].
    means: a C-vector of values to subtract from each channel.
  Returns:
    the centered image.
  Raises:
    ValueError: If the rank of `image` is unknown, if `image` has a rank other
      than three or if the number of channels in `image` doesn't match the
      number of values in `means`.
  """
  if image.get_shape().ndims != 3:
    raise ValueError('Input must be of size [height, width, C>0]')
  num_channels = image.get_shape().as_list()[-1]
  if len(means) != num_channels:
    raise ValueError('len(means) must match the number of channels')
  # 一個tensor list, 包含了三個channel的資料
  channels = tf.split(axis=2, num_or_size_splits=num_channels, value=image)
  for i in range(num_channels):
    channels[i] -= means[i]
  return tf.concat(axis=2, values=channels)
           
  • 根據smallest_side要求,将height和width中的最短的邊,變為smallest_side大小。另一條邊等比放大。

    傳回new_height, new_width

def _smallest_size_at_least(height, width, smallest_side):
  """Computes new shape with the smallest side equal to `smallest_side`.
  Computes new shape with the smallest side equal to `smallest_side` while
  preserving the original aspect ratio.
  Args:
    height: an int32 scalar tensor indicating the current height.
    width: an int32 scalar tensor indicating the current width.
    smallest_side: A python integer or scalar `Tensor` indicating the size of
      the smallest side after resize.
  Returns:
    new_height: an int32 scalar tensor indicating the new height.
    new_width: and int32 scalar tensor indicating the new width.
  """
  smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32)

  height = tf.to_float(height)
  width = tf.to_float(width)
  smallest_side = tf.to_float(smallest_side)

  scale = tf.cond(tf.greater(height, width),
                  lambda: smallest_side / width,
                  lambda: smallest_side / height)
  new_height = tf.to_int32(tf.rint(height * scale))
  new_width = tf.to_int32(tf.rint(width * scale))
  return new_height, new_width
           
  • 對圖像進行resize,使用_smallest_size_at_least()
def _aspect_preserving_resize(image, smallest_side):
  """Resize images preserving the original aspect ratio.
  Args:
    image: A 3-D image `Tensor`.
    smallest_side: A python integer or scalar `Tensor` indicating the size of
      the smallest side after resize.
  Returns:
    resized_image: A 3-D tensor containing the resized image.
  """
  smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32)

  shape = tf.shape(image)
  height = shape[0]
  width = shape[1]
  # 根據smallest_side要求,按比例,得到新的height和width
  new_height, new_width = _smallest_size_at_least(height, width, smallest_side)
  image = tf.expand_dims(image, 0) # shape增加一個次元
  # 使用雙線插值
  resized_image = tf.image.resize_bilinear(image, [new_height, new_width],
                                           align_corners=False)
  # 删除圖檔中大小為1的次元
  resized_image = tf.squeeze(resized_image)
  resized_image.set_shape([None, None, 3])
  return resized_image
           

繼續閱讀