這次我們分析 input_preprocess.py 主要是預處理資料用于DeepLab訓練或驗證
使用了 core/preprocess_utils.py 的大量函數
首先 import 必要的庫
import tensorflow as tf
from deeplab.core import feature_extractor
from deeplab.core import preprocess_utils
# 訓練時左右反轉的機率
_PROB_OF_FLIP =
函數 preprocess_image_and_label
傳回 原圖
處理後的圖檔 [crop_height, crop_width, 3]
标簽 [crop_height, crop_width, 1]
def preprocess_image_and_label(image,
label,
crop_height,
crop_width,
min_resize_value=None,
max_resize_value=None,
resize_factor=None,
min_scale_factor=,
max_scale_factor=,
scale_factor_step_size=,
ignore_label=,
is_training=True,
model_variant=None):
"""預處理圖檔和标簽.
Args:
image: 輸入圖像 Input image.
label: GT分割圖 Ground truth annotation label.
crop_height: The height value used to crop the image and label.
crop_width: The width value used to crop the image and label.
min_resize_value: Desired size of the smaller image side.
max_resize_value: Maximum allowed size of the larger image side.
resize_factor: Resized dimensions are multiple of factor plus one.
min_scale_factor: Minimum scale factor value.
max_scale_factor: Maximum scale factor value.
scale_factor_step_size: The step size from min scale factor to max scale
factor. The input is randomly scaled based on the value of
(min_scale_factor, max_scale_factor, scale_factor_step_size).
ignore_label: The label value which will be ignored for training and
evaluation.
is_training: If the preprocessing is used for training or not.
model_variant: Model variant (string) for choosing how to mean-subtract the
images. See feature_extractor.network_map for supported model variants.
Returns:
original_image: 原始圖像(resized過) Original image (could be resized).
processed_image: 處理後圖像 Preprocessed image.
label: 處理過的分割圖 Preprocessed ground truth segmentation label.
Raises:
ValueError: Ground truth label not provided during training.
"""
# 如果訓練階段沒有label, 則報錯
if is_training and label is None:
raise ValueError('During training, label must be provided.')
# model_variant ?
if model_variant is None:
tf.logging.warning('Default mean-subtraction is performed. Please specify '
'a model_variant. See feature_extractor.network_map for '
'supported model variants.')
# 儲存一下原始圖像
original_image = image
processed_image = tf.cast(image, tf.float32)
if label is not None:
label = tf.cast(label, tf.int32)
# Resize image and label to the desired range.
if min_resize_value is not None or max_resize_value is not None:
# 調用core/preprocess_utils.resize_to_range函數
[processed_image, label] = (
preprocess_utils.resize_to_range(
image=processed_image,
label=label,
min_size=min_resize_value,
max_size=max_resize_value,
factor=resize_factor,
align_corners=True))
# 原始圖更換為resized後的圖檔
original_image = tf.identity(processed_image)
# 随機放縮資料增強 調用core/preprocess_utils中的兩個函數
scale = preprocess_utils.get_random_scale(
min_scale_factor, max_scale_factor, scale_factor_step_size)
processed_image, label = preprocess_utils.randomly_scale_image_and_label(
processed_image, label, scale)
processed_image.set_shape([None, None, ])
# Pad圖檔和Label到指定大小 [crop_height, crop_width]
image_shape = tf.shape(processed_image)
image_height = image_shape[]
image_width = image_shape[]
target_height = image_height + tf.maximum(crop_height - image_height, )
target_width = image_width + tf.maximum(crop_width - image_width, )
# 用圖檔均值進行pad圖檔 core/preprocess_utils
mean_pixel = tf.reshape(
feature_extractor.mean_pixel(model_variant), [, , ])
processed_image = preprocess_utils.pad_to_bounding_box(
processed_image, , , target_height, target_width, mean_pixel)
if label is not None:
label = preprocess_utils.pad_to_bounding_box(
label, , , target_height, target_width, ignore_label)
# 随機裁剪 preprocess_utils.random_crop
if is_training and label is not None:
processed_image, label = preprocess_utils.random_crop(
[processed_image, label], crop_height, crop_width)
processed_image.set_shape([crop_height, crop_width, ])
if label is not None:
label.set_shape([crop_height, crop_width, ])
# 如果是訓練階段,随機翻轉
if is_training:
# Randomly left-right flip the image and label.
processed_image, label, _ = preprocess_utils.flip_dim(
[processed_image, label], _PROB_OF_FLIP, dim=)
return original_image, processed_image, label