You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

308 lines
13 KiB

  1. #!/usr/bin/env python3
  2. import cv2
  3. import numpy as np
  4. from scipy.spatial.transform import Rotation
  5. import deep_models_shared_python3 as dm
  6. class HeadPoseEstimator:
  7. def __init__(self, models_directory, use_neural_compute_stick=False):
  8. # Load the models
  9. models_dir = models_directory
  10. print('Using the following directory to load object detector models:', models_dir)
  11. # file with network architecture and other information
  12. head_detection_model_prototxt_filename = models_dir + '/head_detection/deploy.prototxt'
  13. # file with network weights
  14. head_detection_model_caffemodel_filename = models_dir + '/head_detection/res10_300x300_ssd_iter_140000.caffemodel'
  15. self.face_confidence_threshold = 0.2
  16. print('attempting to load neural network from files')
  17. print('prototxt file =', head_detection_model_prototxt_filename)
  18. print('caffemodel file =', head_detection_model_caffemodel_filename)
  19. self.head_detection_model = cv2.dnn.readNetFromCaffe(head_detection_model_prototxt_filename, head_detection_model_caffemodel_filename)
  20. dm.print_model_info(self.head_detection_model, 'head_detection_model')
  21. # attempt to use Neural Compute Stick 2
  22. if use_neural_compute_stick:
  23. print('HeadPoseEstimator.__init__: Attempting to use an Intel Neural Compute Stick 2 using the following command: self.head_detection_model.setPreferableTarget(cv2.dnn.DNN_TARGET_MYRIAD)')
  24. self.head_detection_model.setPreferableTarget(cv2.dnn.DNN_TARGET_MYRIAD)
  25. head_pose_model_dir = models_dir + '/open_model_zoo/head-pose-estimation-adas-0001/FP32/'
  26. head_pose_weights_filename = head_pose_model_dir + 'head-pose-estimation-adas-0001.bin'
  27. head_pose_config_filename = head_pose_model_dir + 'head-pose-estimation-adas-0001.xml'
  28. self.head_pose_model = cv2.dnn.readNet(head_pose_weights_filename, head_pose_config_filename)
  29. if use_neural_compute_stick:
  30. print('Not attempting to use a Intel Neural Compute Stick 2 for head pose estimation due to potential errors.')
  31. dm.print_model_info(self.head_pose_model, 'head_pose_model')
  32. landmarks_model_dir = models_dir + '/open_model_zoo/facial-landmarks-35-adas-0002/FP32/'
  33. landmarks_weights_filename = landmarks_model_dir + 'facial-landmarks-35-adas-0002.bin'
  34. landmarks_config_filename = landmarks_model_dir + 'facial-landmarks-35-adas-0002.xml'
  35. self.landmarks_model = cv2.dnn.readNet(landmarks_weights_filename, landmarks_config_filename)
  36. if use_neural_compute_stick:
  37. print('Not attempting to use a Intel Neural Compute Stick 2 for facial landmarks due to potential errors.')
  38. dm.print_model_info(self.head_pose_model, 'head_pose_model')
  39. dm.print_model_info(self.landmarks_model, 'landmarks_model')
  40. self.landmark_names = ['right_eye_left', 'right_eye_right',
  41. 'left_eye_right', 'left_eye_left', 'nose_tip',
  42. 'nose_bottom', 'nose_right', 'nose_left', 'mouth_right',
  43. 'mouth_left', 'mouth_top', 'mouth_bottom',
  44. 'right_eyebrow_right', 'right_eyebrow_middle', 'right_eyebrow_left',
  45. 'left_eyebrow_right', 'left_eyebrow_middle', 'left_eyebrow_left',
  46. 'right_cheek_18', 'right_cheek_19', 'right_cheek_20', 'right_cheek_21',
  47. 'right_cheek_22', 'right_cheek_23', 'right_cheek_24',
  48. 'chin_right', 'chin_middle', 'chin_left',
  49. 'left_cheek_28', 'left_cheek_29', 'left_cheek_30', 'left_cheek_31',
  50. 'left_cheek_32', 'left_cheek_33', 'left_cheek_34']
  51. def get_landmark_names(self):
  52. return self.landmark_names
  53. def get_landmark_colors(self):
  54. return None
  55. def get_landmark_color_dict(self):
  56. return None
  57. def detect_faces(self, rgb_image):
  58. orig_h, orig_w, c = rgb_image.shape
  59. face_image = rgb_image
  60. rot_h, rot_w, c = face_image.shape
  61. # Assumes that the width is smaller than the height, and crop
  62. # a width x width square image from the top.
  63. square_face_image = face_image[:rot_w, :, :]
  64. sqr_h, sqr_w, c = square_face_image.shape
  65. network_image = cv2.resize(square_face_image, (300, 300))
  66. # Some magic numbers came from
  67. # https://www.pyimagesearch.com/2018/02/26/face-detection-with-opencv-and-deep-learning/
  68. face_image_blob = cv2.dnn.blobFromImage(network_image, 1.0, (300, 300), (104.0, 177.0, 123.0))
  69. self.head_detection_model.setInput(face_image_blob)
  70. face_detections = self.head_detection_model.forward()[0,0,:,:]
  71. confidence_mask = face_detections[:, 2] > self.face_confidence_threshold
  72. face_detections = face_detections[confidence_mask]
  73. coordinates = face_detections[:, 3:7]
  74. # Scale and rotate coordinates to the original image
  75. coordinates = coordinates * np.array([sqr_w, sqr_h, sqr_w, sqr_h])
  76. face_id = 0
  77. boxes = []
  78. for x0, y0, x1, y1 in coordinates:
  79. orig_y0 = y0
  80. orig_y1 = y1
  81. orig_x0 = x0
  82. orig_x1 = x1
  83. face_id += 1
  84. bounding_box = [orig_x0, orig_y0, orig_x1, orig_y1]
  85. boxes.append(bounding_box)
  86. return boxes
  87. def get_sub_image(self, rgb_image, bounding_box, enlarge_box=True, enlarge_scale=1.15):
  88. if enlarge_box:
  89. scale = enlarge_scale
  90. orig_h, orig_w, c = rgb_image.shape
  91. x0 = bounding_box[0]
  92. y0 = bounding_box[1]
  93. x1 = bounding_box[2]
  94. y1 = bounding_box[3]
  95. m_x = (x1 + x0) / 2.0
  96. m_y = (y1 + y0) / 2.0
  97. b_w = x1 - x0
  98. b_h = y1 - y0
  99. b_w = scale * b_w
  100. b_h = scale * b_h
  101. x0 = int(round(m_x - (b_w/2.0)))
  102. x1 = int(round(m_x + (b_w/2.0)))
  103. y0 = int(round(m_y - (b_h/2.0)))
  104. y1 = int(round(m_y + (b_h/2.0)))
  105. x0 = max(0, x0)
  106. x1 = min(orig_w, x1)
  107. y0 = max(0, y0)
  108. y1 = min(orig_h, y1)
  109. else:
  110. x0 = int(round(bounding_box[0]))
  111. y0 = int(round(bounding_box[1]))
  112. x1 = int(round(bounding_box[2]))
  113. y1 = int(round(bounding_box[3]))
  114. actual_bounding_box = [x0, y0, x1, y1]
  115. image_to_crop = rgb_image
  116. sub_image = image_to_crop[y0:y1, x0:x1, :]
  117. return sub_image, actual_bounding_box
  118. def estimate_head_pose(self, rgb_image, bounding_box, enlarge_box=True, enlarge_scale=1.15):
  119. face_crop_image, actual_bounding_box = self.get_sub_image(rgb_image, bounding_box, enlarge_box=enlarge_box, enlarge_scale=enlarge_scale)
  120. sqr_h, sqr_w, c = face_crop_image.shape
  121. if (sqr_h > 0) and (sqr_w > 0):
  122. head_pose_image_blob = cv2.dnn.blobFromImage(face_crop_image,
  123. size=(60, 60),
  124. swapRB=False,
  125. crop=False,
  126. ddepth=cv2.CV_32F)
  127. self.head_pose_model.setInput(head_pose_image_blob)
  128. head_pose_out = self.head_pose_model.forward(['angle_r_fc', 'angle_p_fc', 'angle_y_fc'])
  129. rpy = head_pose_out
  130. roll = rpy[0][0][0]
  131. pitch = rpy[1][0][0]
  132. yaw = rpy[2][0][0]
  133. pitch = pitch * np.pi/180.0
  134. roll = roll * np.pi/180.0
  135. yaw = yaw * np.pi/180.0
  136. return yaw, pitch, roll
  137. return None, None, None
  138. def detect_facial_landmarks(self, rgb_image, bounding_box, enlarge_box=True, enlarge_scale=1.15):
  139. face_crop_image, actual_bounding_box = self.get_sub_image(rgb_image, bounding_box, enlarge_box=enlarge_box, enlarge_scale=enlarge_scale)
  140. sqr_h, sqr_w, c = face_crop_image.shape
  141. if (sqr_h > 0) and (sqr_w > 0):
  142. landmarks_image_blob = cv2.dnn.blobFromImage(face_crop_image,
  143. size=(60, 60),
  144. swapRB=False,
  145. crop=False,
  146. ddepth=cv2.CV_32F)
  147. self.landmarks_model.setInput(landmarks_image_blob)
  148. landmarks_out = self.landmarks_model.forward()
  149. s = landmarks_out.shape
  150. out = np.reshape(landmarks_out[0], (s[1]//2, 2))
  151. x0, y0, x1, y1 = actual_bounding_box
  152. landmarks = {}
  153. for n, v in enumerate(out):
  154. x = int(round((v[0] * sqr_w) + x0))
  155. y = int(round((v[1] * sqr_h) + y0))
  156. name = self.landmark_names[n]
  157. landmarks[name] = (x,y)
  158. return landmarks, self.landmark_names.copy()
  159. return None, None
  160. def draw_bounding_box(self, image, bounding_box):
  161. x0 = int(round(bounding_box[0]))
  162. y0 = int(round(bounding_box[1]))
  163. x1 = int(round(bounding_box[2]))
  164. y1 = int(round(bounding_box[3]))
  165. color = (0, 0, 255)
  166. thickness = 2
  167. cv2.rectangle(image, (x0, y0), (x1, y1), color, thickness)
  168. def draw_head_pose(self, image, yaw, pitch, roll, bounding_box):
  169. x0, y0, x1, y1 = bounding_box
  170. face_x = (x1 + x0) / 2.0
  171. face_y = (y1 + y0) / 2.0
  172. #
  173. # opencv uses right-handed coordinate system
  174. # x points to the right of the image
  175. # y points to the bottom of the image
  176. # z points into the image
  177. #
  178. h, w, c = image.shape
  179. camera_center = (w/2.0, h/2.0)
  180. #For rendering with an unknown camera
  181. focal_length = 50.0
  182. camera_matrix = np.array([[focal_length, 0.0, camera_center[0]],
  183. [0.0, focal_length, camera_center[1]],
  184. [0.0, 0.0, 1.0]])
  185. face_translation = np.array([0.0, 0.0, 3000.0])
  186. distortion_coefficients = np.array([0.0, 0.0, 0.0, 0.0])
  187. # negate the directions of the y and z axes
  188. axes = np.array([[2000.0, 0.0, 0.0 ],
  189. [0.0, -2000.0, 0.0 ],
  190. [0.0, 0.0, -2000.0],
  191. [0.0, 0.0, 0.0 ]])
  192. head_ypr = np.array([-yaw, pitch, roll])
  193. rotation_mat = Rotation.from_euler('yxz', head_ypr).as_dcm()
  194. rotation_vec, jacobian = cv2.Rodrigues(rotation_mat)
  195. image_points, jacobian = cv2.projectPoints(axes, rotation_vec, face_translation, camera_matrix, distortion_coefficients)
  196. face_pix = np.array([face_x, face_y])
  197. origin = image_points[3].ravel()
  198. x_axis = (image_points[0].ravel() - origin) + face_pix
  199. y_axis = (image_points[1].ravel() - origin) + face_pix
  200. z_axis = (image_points[2].ravel() - origin) + face_pix
  201. p0 = tuple(np.int32(np.round(face_pix)))
  202. p1 = tuple(np.int32(np.round(x_axis)))
  203. cv2.line(image, p0, p1, (0, 0, 255), 2)
  204. p1 = tuple(np.int32(np.round(y_axis)))
  205. cv2.line(image, p0, p1, (0, 255, 0), 2)
  206. p1 = tuple(np.int32(np.round(z_axis)))
  207. cv2.line(image, p0, p1, (255, 0, 0), 2)
  208. def draw_landmarks(self, image, landmarks):
  209. for name, xy in landmarks.items():
  210. x = xy[0]
  211. y = xy[1]
  212. if 'mouth' in name:
  213. color = (255, 0, 0)
  214. elif 'nose' in name:
  215. color = (0, 255, 0)
  216. elif 'eyebrow' in name:
  217. color = (0, 0, 0)
  218. elif 'right_eye' in name:
  219. color = (255, 255, 0)
  220. elif 'left_eye' in name:
  221. color = (0, 255, 255)
  222. elif 'chin' in name:
  223. color = (255, 0, 255)
  224. else:
  225. color = (0, 0, 255)
  226. cv2.circle(image, (x,y), 2, color, 1)
  227. font_scale = 1.0
  228. line_color = [0, 0, 0]
  229. line_width = 1
  230. font = cv2.FONT_HERSHEY_PLAIN
  231. def apply_to_image(self, rgb_image, draw_output=False):
  232. if draw_output:
  233. output_image = rgb_image.copy()
  234. else:
  235. output_image = None
  236. heads = []
  237. boxes = self.detect_faces(rgb_image)
  238. facial_landmark_names = self.landmark_names.copy()
  239. for bounding_box in boxes:
  240. if draw_output:
  241. self.draw_bounding_box(output_image, bounding_box)
  242. yaw, pitch, roll = self.estimate_head_pose(rgb_image, bounding_box, enlarge_box=True, enlarge_scale=1.15)
  243. if yaw is not None:
  244. ypr = (yaw, pitch, roll)
  245. if draw_output:
  246. self.draw_head_pose(output_image, yaw, pitch, roll, bounding_box)
  247. else:
  248. ypr = None
  249. landmarks, landmark_names = self.detect_facial_landmarks(rgb_image, bounding_box, enlarge_box=True, enlarge_scale=1.15)
  250. if (landmarks is not None) and draw_output:
  251. self.draw_landmarks(output_image, landmarks)
  252. heads.append({'box':bounding_box, 'ypr':ypr, 'landmarks':landmarks})
  253. return heads, output_image