人臉面部情緒識别 (一)
人臉面部情緒識别(二)
人臉面部情緒識别 age&gender(三)
根據人臉預測年齡性别和情緒代碼實作 (c++ + caffe)(四)
* 背景 *
1、 目前人臉識别已經廣泛運用,即使在視訊流裡面也能流暢識别出來,無論是對安防還是其他體驗類産品都有很大的影響。研究完人臉識别後,對于年齡的預測,性别的判斷以及根據面部動作識别表情也開始實作,以後可能還會學習顔值預測和是否帶眼睛戴帽子什麼的。面部表情識别技術主要的應用領域包括人機互動、智能控制、安全、醫療、通信等領域。顔值預測可以運用于未來的虛拟化妝,客戶可以看見化妝後的自己,滿意後再實際化妝出來的效果最能讓客戶開心。
實作
- 在哪裡實作?
第一,在視訊流裡實時識别,人臉識别的人臉對齊過程實作,人臉檢測完之後将檢測結果傳參給預測模型。
第二、直接給圖檔先檢測再預測
-
代碼實作
demo.py
-
import os import cv2 import time import numpy as np import argparse import dlib from contextlib import contextmanager from wide_resnet import WideResNet from keras.utils.data_utils import get_file from keras.models import model_from_json pretrained_model = "https://github.com/yu4u/age-gender-estimation/releases/download/v0.5/weights.18-4.06.hdf5" modhash = '89f56a39a78454e96379348bddd78c0d' emotion_labels = ['angry', 'fear', 'happy', 'sad', 'surprise', 'neutral'] # load json and create model arch json_file = open('model.json','r') loaded_model_json = json_file.read() json_file.close() #将json重構為model結構 model = model_from_json(loaded_model_json) # load weights into new model model.load_weights('model.h5') def predict_emotion(face_image_gray): # a single cropped face resized_img = cv2.resize(face_image_gray, (,), interpolation = cv2.INTER_AREA) image = resized_img.reshape(, , , ) im = cv2.resize(resized_img,(,)) cv2.imwrite('face.bmp', im) list_of_list = model.predict(image, batch_size=, verbose=) angry, fear, happy, sad, surprise, neutral = [prob for lst in list_of_list for prob in lst] return [angry, fear, happy, sad, surprise, neutral] def get_args(): parser = argparse.ArgumentParser(description="This script detects faces from web cam input, " "and estimates age and gender for the detected faces.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) #改成自己的位址 parser.add_argument("--weight_file", type=str, default="./pretrained_models/weights.18-4.06.hdf5", help="path to weight file (e.g. weights.18-4.06.hdf5)") parser.add_argument("--depth", type=int, default=, help="depth of network") parser.add_argument("--width", type=int, default=, help="width of network") args = parser.parse_args() return args def draw_label(image, point, label, font=cv2.FONT_HERSHEY_SIMPLEX, font_scale=, thickness=): size = cv2.getTextSize(label, font, font_scale, thickness)[] x, y = point cv2.rectangle(image, (x, y - size[]), (x + size[], y), (, , ), cv2.FILLED) cv2.putText(image, label, point, font, font_scale, (, , ), thickness) @contextmanager def video_capture(*args, **kwargs): cap = cv2.VideoCapture(*args, **kwargs) try: yield cap finally: cap.release() def yield_images(): # capture video with video_capture() as cap: cap.set(cv2.CAP_PROP_FRAME_WIDTH, ) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, ) while True: # get video frame ret, img = cap.read() if not ret: raise RuntimeError("Failed to capture image") yield img def main(): biaoqing = "" args = get_args() depth = args.depth k = args.width weight_file = args.weight_file print(weight_file) #第一次運作時會自動從給的網址下載下傳weights.18-4.06.hdf5模型(190M左右) if not weight_file: weight_file = get_file("weights.18-4.06.hdf5", pretrained_model, cache_subdir="pretrained_models", file_hash=modhash, cache_dir=os.path.dirname(os.path.abspath(__file__))) # for face detection detector = dlib.get_frontal_face_detector() # load model and weights img_size = model = WideResNet(img_size, depth=depth, k=k)() model.load_weights(weight_file) for img in yield_images(): #img = cv2.imread("1.jpg") input_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img_h, img_w, _ = np.shape(input_img) #print("h w ",img_h,img_w) emotions = [] # Draw a rectangle around the faces # detect faces using dlib detector detected = detector(img_gray, ) faces = np.empty((len(detected), img_size, img_size, )) #print("dector",detected) if len(detected) > : for i, d in enumerate(detected): #print("i,d =",i,d) x1, y1, x2, y2, w, h = d.left(), d.top(), d.right() + , d.bottom() + , d.width(), d.height() #print("w h =",w,h) xw1 = max(int(x1 - * w), ) yw1 = max(int(y1 - * h), ) xw2 = min(int(x2 + * w), img_w - ) yw2 = min(int(y2 + * h), img_h - ) cv2.rectangle(img, (x1, y1), (x2, y2), (, , ), ) #cv2.rectangle(img, (xw1, yw1), (xw2, yw2), (255, 0, 0), 2) faces[i, :, :, :] = cv2.resize(img[yw1:yw2 + , xw1:xw2 + , :], (img_size, img_size)) #print("faces ",faces) face_image_gray = img_gray[y1:y1 + y2, x1:x1 + x2] angry, fear, happy, sad, surprise, neutral = predict_emotion(face_image_gray) emotions = [angry, fear, happy, sad, surprise, neutral] m = emotions.index(max(emotions)) for index, val in enumerate(emotion_labels): if (m == index): biaoqing = val # predict ages and genders of the detected faces results = model.predict(faces) predicted_genders = results[] ages = np.arange(, ).reshape(, ) predicted_ages = results[].dot(ages).flatten() # draw results for i, d in enumerate(detected): #print("表情",biaoqing) label = "{}, {},{}".format(int(predicted_ages[i]), "F" if predicted_genders[i][] > else "M" ,biaoqing) draw_label(img, (d.left(), d.top()), label) cv2.imshow("result", img) #等待3ms key = cv2.waitKey() if key == : break if __name__ == '__main__': main()
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
- 87
- 88
- 89
- 90
- 91
- 92
- 93
- 94
- 95
- 96
- 97
- 98
- 99
- 100
- 101
- 102
- 103
- 104
- 105
- 106
- 107
- 108
- 109
- 110
- 111
- 112
- 113
- 114
- 115
- 116
- 117
- 118
- 119
- 120
- 121
- 122
- 123
- 124
- 125
- 126
- 127
- 128
- 129
- 130
- 131
- 132
- 133
- 134
- 135
- 136
- 137
- 138
- 139
- 140
- 141
- 142
- 143
- 144
- 145
- 146
- 147
- 148
- 149
- 150
- 151
- 152
- 153
- 154
- 155
- 156
- 157
- 158
- 159
- 160
- 161
- 162
- 163
- 164
- 165
- 166
- 167
- 168
# This code is imported from the following project: https://github.com/asmith26/wide_resnets_keras import logging import sys import numpy as np from keras.models import Model from keras.layers import Input, Activation, add, Dense, Flatten, Dropout from keras.layers.convolutional import Conv2D, AveragePooling2D from keras.layers.normalization import BatchNormalization from keras.regularizers import l2 from keras import backend as K sys.setrecursionlimit( ** ) np.random.seed( ** ) class WideResNet: def __init__(self, image_size, depth=, k=): self._depth = depth self._k = k self._dropout_probability = self._weight_decay = self._use_bias = False self._weight_init = "he_normal" if K.image_dim_ordering() == "th": logging.debug("image_dim_ordering = 'th'") self._channel_axis = self._input_shape = (, image_size, image_size) else: logging.debug("image_dim_ordering = 'tf'") self._channel_axis = - self._input_shape = (image_size, image_size, ) # Wide residual network http://arxiv.org/abs/1605.07146 def _wide_basic(self, n_input_plane, n_output_plane, stride): def f(net): # format of conv_params: # [ [kernel_size=("kernel width", "kernel height"), # strides="(stride_vertical,stride_horizontal)", # padding="same" or "valid"] ] # B(3,3): orignal <<basic>> block conv_params = [[, , stride, "same"], [, , (, ), "same"]] n_bottleneck_plane = n_output_plane # Residual block for i, v in enumerate(conv_params): if i == : if n_input_plane != n_output_plane: net = BatchNormalization(axis=self._channel_axis)(net) net = Activation("relu")(net) convs = net else: convs = BatchNormalization(axis=self._channel_axis)(net) convs = Activation("relu")(convs) convs = Conv2D(n_bottleneck_plane, kernel_size=(v[], v[]), strides=v[], padding=v[], kernel_initializer=self._weight_init, kernel_regularizer=l2(self._weight_decay), use_bias=self._use_bias)(convs) else: convs = BatchNormalization(axis=self._channel_axis)(convs) convs = Activation("relu")(convs) if self._dropout_probability > : convs = Dropout(self._dropout_probability)(convs) convs = Conv2D(n_bottleneck_plane, kernel_size=(v[], v[]), strides=v[], padding=v[], kernel_initializer=self._weight_init, kernel_regularizer=l2(self._weight_decay), use_bias=self._use_bias)(convs) # Shortcut Connection: identity function or 1x1 convolutional # (depends on difference between input & output shape - this # corresponds to whether we are using the first block in each # group; see _layer() ). if n_input_plane != n_output_plane: shortcut = Conv2D(n_output_plane, kernel_size=(, ), strides=stride, padding="same", kernel_initializer=self._weight_init, kernel_regularizer=l2(self._weight_decay), use_bias=self._use_bias)(net) else: shortcut = net return add([convs, shortcut]) return f # "Stacking Residual Units on the same stage" def _layer(self, block, n_input_plane, n_output_plane, count, stride): def f(net): net = block(n_input_plane, n_output_plane, stride)(net) for i in range(, int(count + )): net = block(n_output_plane, n_output_plane, stride=(, ))(net) return net return f # def create_model(self): def __call__(self): logging.debug("Creating model...") assert ((self._depth - ) % == ) n = (self._depth - ) / inputs = Input(shape=self._input_shape) n_stages = [, * self._k, * self._k, * self._k] conv1 = Conv2D(filters=n_stages[], kernel_size=(, ), strides=(, ), padding="same", kernel_initializer=self._weight_init, kernel_regularizer=l2(self._weight_decay), use_bias=self._use_bias)(inputs) # "One conv at the beginning (spatial size: 32x32)" # Add wide residual blocks block_fn = self._wide_basic conv2 = self._layer(block_fn, n_input_plane=n_stages[], n_output_plane=n_stages[], count=n, stride=(, ))(conv1) conv3 = self._layer(block_fn, n_input_plane=n_stages[], n_output_plane=n_stages[], count=n, stride=(, ))(conv2) conv4 = self._layer(block_fn, n_input_plane=n_stages[], n_output_plane=n_stages[], count=n, stride=(, ))(conv3) batch_norm = BatchNormalization(axis=self._channel_axis)(conv4) relu = Activation("relu")(batch_norm) # Classifier block pool = AveragePooling2D(pool_size=(, ), strides=(, ), padding="same")(relu) flatten = Flatten()(pool) predictions_g = Dense(units=, kernel_initializer=self._weight_init, use_bias=self._use_bias, kernel_regularizer=l2(self._weight_decay), activation="softmax", name="pred_gender")(flatten) predictions_a = Dense(units=, kernel_initializer=self._weight_init, use_bias=self._use_bias, kernel_regularizer=l2(self._weight_decay), activation="softmax", name="pred_age")(flatten) model = Model(inputs=inputs, outputs=[predictions_g, predictions_a]) return model def main(): model = WideResNet()() model.summary() if __name__ == '__main__': main()
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
- 87
- 88
- 89
- 90
- 91
- 92
- 93
- 94
- 95
- 96
- 97
- 98
- 99
- 100
- 101
- 102
- 103
- 104
- 105
- 106
- 107
- 108
- 109
- 110
- 111
- 112
- 113
- 114
- 115
- 116
- 117
- 118
- 119
- 120
- 121
- 122
- 123
- 124
- 125
- 126
- 127
- 128
- 129
- 130
- 131
- 132
- 133
- 134
- 135
- 136
- 137
- 138
- 139
- 140
- 141
- 142
- 143
- 144
- 145
- 146
- 147
- 148
- 149
- 150
- 151
- 152
- 153
準備工作
環境:python3 TensorFlow-gpu numpy keras dlib 模型:model.h5(表情預測模型) model.json(表情預測模型的json類型) weights.18-4.06.hdf5(性别年齡預測模型) [模型下載下傳](https://download.csdn.net/download/hpymiss/10490349)
- 1
- 2
- 3
運作
python demo.py
- 1
效果
根據人臉預測年齡性别和情緒 (python + keras)(三)人臉面部情緒識别 (一)人臉面部情緒識别(二)人臉面部情緒識别 age&gender(三)根據人臉預測年齡性别和情緒代碼實作 (c++ + caffe)(四) 處理一幀一秒以内,視訊流裡能流暢運作
不足之處:模型的精度還不夠,需要進行微調,如何改進還待研究
硬體
-
GPU:
name: GeForce GTX 960M major: 5 minor: 0 memoryClockRate(GHz): 1.176
pciBusID: 0000:02:00.0
totalMemory: 4.00GiB freeMemory: 3.34GiB
- 處理器 (i7)
學習參考
keras官方文檔
參考代碼以及model.h5下載下傳
年齡性别預測
徹底了解Python中的yield
Keras 實作的性别年齡檢測 (已并入顔值服務)
keras系列︱人臉表情分類與識别:opencv人臉檢測+Keras情緒分類(四)