From ad673ada4ad1947cf7db99a1a94c0e5a885fda84 Mon Sep 17 00:00:00 2001
From: "pagidi.k" <pagidi.k@northeastern.edu>
Date: Thu, 14 Apr 2022 10:10:14 -0400
Subject: [PATCH] NCS Guide

---
 .../NCS2_guide/NCS2_guide_detect_faces.py     |  86 +++++
 .../NCS2_guide/NCS2_guide_detect_objects.py   |   0
 .../nodes/NCS2_guide/NCS2_guide_do.py         |   0
 .../NCS2_guide/NCS2_guide_head_estimator.py   | 322 ++++++++++++++++++
 4 files changed, 408 insertions(+)
 create mode 100755 stretch_deep_perception/nodes/NCS2_guide/NCS2_guide_detect_faces.py
 create mode 100644 stretch_deep_perception/nodes/NCS2_guide/NCS2_guide_detect_objects.py
 create mode 100644 stretch_deep_perception/nodes/NCS2_guide/NCS2_guide_do.py
 create mode 100644 stretch_deep_perception/nodes/NCS2_guide/NCS2_guide_head_estimator.py

diff --git a/stretch_deep_perception/nodes/NCS2_guide/NCS2_guide_detect_faces.py b/stretch_deep_perception/nodes/NCS2_guide/NCS2_guide_detect_faces.py
new file mode 100755
index 0000000..0e308a9
--- /dev/null
+++ b/stretch_deep_perception/nodes/NCS2_guide/NCS2_guide_detect_faces.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+
+import cv2
+import sys
+import rospy
+import head_estimator as he
+import detection_node as dn
+import deep_learning_model_options as do
+
+
+if __name__ == '__main__':    
+    print('cv2.__version__ =', cv2.__version__)
+    print('Python version (must be > 3.0):', sys.version)
+    assert(int(sys.version[0]) >= 3)
+    
+    ##############################################
+    # Perform coarse filtering of 3D points using anthropometry
+    #
+    # 30cm should be significantly over the maximum dimensions of a human head
+    # 10cm should be significantly smaller than the dimensions of an adult head
+    # https://en.wikipedia.org/wiki/Human_head
+
+    # children "attain 30% of adult head width by the middle of
+    # prenatal life, 60% by birth, 80% by 6 postnatal months, 90%
+    # by 3 years and 95% by 9 years" - GROWTH IN HEAD WIDTH DURING
+    # THE FIRST TWELVE YEARS OF LIFE HOWARD V. MEREDITH, copyright
+    # 1953 by the American Academy of Pediatrics
+    # https://pediatrics.aappublications.org/content/12/4/411
+
+    # Filtering for depths corresponding with heads with heights
+    # or widths from 8cm to 40cm should be conservative.
+    min_head_m = 0.08
+    max_head_m = 0.4
+    ##############################################
+
+    #### load model directory where all the deep perception models are stored   
+    models_directory = do.get_directory()
+    print('Using the following directory for deep learning models:', models_directory) 
+    
+    ######################################################
+    # check if the neural compute stick is able to run the deep learning model. 
+    # Not all models are compatile to run on NCS2.
+    # Currently this only work for two models: object and face
+    # detection.  Currently it does not work with head pose
+    # estimation, facial landmarks, and body landmarks.
+    # The reason is that NCS only supports models to be run through OpenVINO. 
+    # There are OpenVINO format model zoos availble on the above mentioned models.
+    # In case you want to run a model which is not availble in the model zoo, 
+    # There is an API availabe to convert any format into OpenVINO format.
+    # https://docs.openvino.ai/2021.2/openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Caffe.html
+
+    # The neural compute stick is called in deep_learning_model_options Refer that file for further instructions
+    
+    #####################################################
+
+
+    use_neural_compute_stick = do.use_neural_compute_stick()
+    if use_neural_compute_stick:
+        print('Attempt to use an Intel Neural Compute Stick 2.')
+    else:
+        print('Not attempting to use an Intel Neural Compute Stick 2.')
+    
+    #  Each model has to have two formats one to run on nomal cpu which can be the direct caffe/pytorch/tensorflow etc., format
+    # second is the OpenVINO format.
+    # If the model is compatible to run on NCS 2, make sure to turn on the NCS feature by passing the use_neural_compute_stick.
+    detector = he.HeadPoseEstimator(models_directory,
+                                    use_neural_compute_stick=use_neural_compute_stick)
+
+    ### The rest of the code is normal
+    default_marker_name = 'face'
+    node_name = 'DetectFacesNode'
+    topic_base_name = 'faces'
+    fit_plane = False
+    node = dn.DetectionNode(detector,
+                            default_marker_name,
+                            node_name,
+                            topic_base_name,
+                            fit_plane,
+                            min_box_side_m=min_head_m,
+                            max_box_side_m=max_head_m)
+    node.main()
+    try:
+        rospy.spin()
+    except KeyboardInterrupt:
+        print('interrupt received, so shutting down')
+
diff --git a/stretch_deep_perception/nodes/NCS2_guide/NCS2_guide_detect_objects.py b/stretch_deep_perception/nodes/NCS2_guide/NCS2_guide_detect_objects.py
new file mode 100644
index 0000000..e69de29
diff --git a/stretch_deep_perception/nodes/NCS2_guide/NCS2_guide_do.py b/stretch_deep_perception/nodes/NCS2_guide/NCS2_guide_do.py
new file mode 100644
index 0000000..e69de29
diff --git a/stretch_deep_perception/nodes/NCS2_guide/NCS2_guide_head_estimator.py b/stretch_deep_perception/nodes/NCS2_guide/NCS2_guide_head_estimator.py
new file mode 100644
index 0000000..cc93a9c
--- /dev/null
+++ b/stretch_deep_perception/nodes/NCS2_guide/NCS2_guide_head_estimator.py
@@ -0,0 +1,322 @@
+#!/usr/bin/env python3
+
+import cv2
+import numpy as np
+from scipy.spatial.transform import Rotation
+import deep_models_shared as dm
+
+class HeadPoseEstimator:
+    def __init__(self, models_directory, use_neural_compute_stick=False):
+        # Load the models to run on CPU
+        #########################################################################
+        
+        # Specify model directory and load caffe / prototxt models 
+        # Load weights and config files from this directory 
+        # Note: These models will not run on compute stick
+        #########################################################################
+        models_dir = models_directory
+        print('Using the following directory to load object detector models:', models_dir)
+        
+        # file with network architecture and other information
+        head_detection_model_prototxt_filename = models_dir + '/head_detection/deploy.prototxt'
+        # file with network weights
+        head_detection_model_caffemodel_filename = models_dir + '/head_detection/res10_300x300_ssd_iter_140000.caffemodel'
+        self.face_confidence_threshold = 0.2
+
+        print('attempting to load neural network from files')
+        print('prototxt file =', head_detection_model_prototxt_filename)
+        print('caffemodel file =', head_detection_model_caffemodel_filename)
+        self.head_detection_model = cv2.dnn.readNetFromCaffe(head_detection_model_prototxt_filename, head_detection_model_caffemodel_filename)
+        dm.print_model_info(self.head_detection_model, 'head_detection_model')
+        
+        # Load the models to run on VPU (NCS 2)
+
+        #########################################################################
+        # If neural compute stick is available then run the models on Myriad using the command setPreferableTarget
+        # Load model directory from the OpenVINO model zoo 
+        # Load weights and config files from this directory 
+        #
+        #########################################################################
+        if use_neural_compute_stick:
+            print('HeadPoseEstimator.__init__: Attempting to use an Intel Neural Compute Stick 2 using the following command: self.head_detection_model.setPreferableTarget(cv2.dnn.DNN_TARGET_MYRIAD)')
+            self.head_detection_model.setPreferableTarget(cv2.dnn.DNN_TARGET_MYRIAD)
+
+        head_pose_model_dir = models_dir + '/open_model_zoo/head-pose-estimation-adas-0001/FP32/'
+        head_pose_weights_filename = head_pose_model_dir + 'head-pose-estimation-adas-0001.bin'
+        head_pose_config_filename = head_pose_model_dir + 'head-pose-estimation-adas-0001.xml'
+        self.head_pose_model = cv2.dnn.readNet(head_pose_weights_filename, head_pose_config_filename)
+        
+        if use_neural_compute_stick:
+            print('Not attempting to use a Intel Neural Compute Stick 2 for head pose estimation due to potential errors.')
+
+        dm.print_model_info(self.head_pose_model, 'head_pose_model')
+
+        landmarks_model_dir = models_dir + '/open_model_zoo/facial-landmarks-35-adas-0002/FP32/'
+        landmarks_weights_filename = landmarks_model_dir + 'facial-landmarks-35-adas-0002.bin'
+        landmarks_config_filename = landmarks_model_dir + 'facial-landmarks-35-adas-0002.xml'
+        self.landmarks_model = cv2.dnn.readNet(landmarks_weights_filename, landmarks_config_filename)
+
+        if use_neural_compute_stick:
+            print('Not attempting to use a Intel Neural Compute Stick 2 for facial landmarks due to potential errors.')
+
+        ### The rest of the code is as is
+        dm.print_model_info(self.head_pose_model, 'head_pose_model')
+
+        
+        dm.print_model_info(self.landmarks_model, 'landmarks_model')
+        
+        self.landmark_names = ['right_eye_left', 'right_eye_right',
+                               'left_eye_right', 'left_eye_left', 'nose_tip',
+                               'nose_bottom', 'nose_right', 'nose_left', 'mouth_right',
+                               'mouth_left', 'mouth_top', 'mouth_bottom',
+                               'right_eyebrow_right', 'right_eyebrow_middle', 'right_eyebrow_left',
+                               'left_eyebrow_right', 'left_eyebrow_middle', 'left_eyebrow_left',
+                               'right_cheek_18', 'right_cheek_19', 'right_cheek_20', 'right_cheek_21',
+                               'right_cheek_22', 'right_cheek_23', 'right_cheek_24',
+                               'chin_right', 'chin_middle', 'chin_left',
+                               'left_cheek_28', 'left_cheek_29', 'left_cheek_30', 'left_cheek_31',
+                               'left_cheek_32', 'left_cheek_33', 'left_cheek_34']
+    
+
+    def get_landmark_names(self):
+        return self.landmark_names
+
+    def get_landmark_colors(self):
+        return None
+
+    def get_landmark_color_dict(self):
+        return None
+
+    def detect_faces(self, rgb_image):
+        orig_h, orig_w, c = rgb_image.shape
+        face_image = rgb_image
+        rot_h, rot_w, c = face_image.shape
+        # Assumes that the width is smaller than the height, and crop
+        # a width x width square image from the top.
+        square_face_image = face_image[:rot_w, :, :]
+        sqr_h, sqr_w, c = square_face_image.shape
+        network_image = cv2.resize(square_face_image, (300, 300))
+        # Some magic numbers came from
+        # https://www.pyimagesearch.com/2018/02/26/face-detection-with-opencv-and-deep-learning/
+        face_image_blob = cv2.dnn.blobFromImage(network_image, 1.0, (300, 300), (104.0, 177.0, 123.0))
+        self.head_detection_model.setInput(face_image_blob)
+        face_detections = self.head_detection_model.forward()[0,0,:,:]
+        confidence_mask = face_detections[:, 2] > self.face_confidence_threshold
+        face_detections = face_detections[confidence_mask]
+        coordinates = face_detections[:, 3:7]
+        # Scale and rotate coordinates to the original image
+        coordinates = coordinates * np.array([sqr_w, sqr_h, sqr_w, sqr_h])
+
+        face_id = 0
+        boxes = []
+
+        for x0, y0, x1, y1 in coordinates:
+            orig_y0 = y0
+            orig_y1 = y1
+            orig_x0 = x0
+            orig_x1 = x1
+            face_id += 1
+            bounding_box = [orig_x0, orig_y0, orig_x1, orig_y1]
+            boxes.append(bounding_box)
+
+        return boxes
+
+    
+    def get_sub_image(self, rgb_image, bounding_box, enlarge_box=True, enlarge_scale=1.15):
+        if enlarge_box:
+            scale = enlarge_scale
+            orig_h, orig_w, c = rgb_image.shape
+
+            x0 = bounding_box[0]
+            y0 = bounding_box[1]
+            x1 = bounding_box[2]
+            y1 = bounding_box[3]
+
+            m_x = (x1 + x0) / 2.0
+            m_y = (y1 + y0) / 2.0
+
+            b_w = x1 - x0
+            b_h = y1 - y0
+
+            b_w = scale * b_w
+            b_h = scale * b_h
+
+            x0 = int(round(m_x - (b_w/2.0)))
+            x1 = int(round(m_x + (b_w/2.0)))
+            y0 = int(round(m_y - (b_h/2.0)))
+            y1 = int(round(m_y + (b_h/2.0)))
+
+            x0 = max(0, x0)
+            x1 = min(orig_w, x1)
+            y0 = max(0, y0)
+            y1 = min(orig_h, y1)
+        else: 
+            x0 = int(round(bounding_box[0]))
+            y0 = int(round(bounding_box[1]))
+            x1 = int(round(bounding_box[2]))
+            y1 = int(round(bounding_box[3]))
+
+        actual_bounding_box = [x0, y0, x1, y1]
+        image_to_crop = rgb_image
+        sub_image = image_to_crop[y0:y1, x0:x1, :]
+        return sub_image, actual_bounding_box
+    
+
+    def estimate_head_pose(self, rgb_image, bounding_box, enlarge_box=True, enlarge_scale=1.15):
+        face_crop_image, actual_bounding_box = self.get_sub_image(rgb_image, bounding_box, enlarge_box=enlarge_box, enlarge_scale=enlarge_scale)
+        sqr_h, sqr_w, c = face_crop_image.shape
+
+        if (sqr_h > 0) and (sqr_w > 0):
+            head_pose_image_blob = cv2.dnn.blobFromImage(face_crop_image,
+                                                         size=(60, 60),
+                                                         swapRB=False,
+                                                         crop=False,
+                                                         ddepth=cv2.CV_32F)
+            self.head_pose_model.setInput(head_pose_image_blob)
+            head_pose_out = self.head_pose_model.forward(['angle_r_fc', 'angle_p_fc', 'angle_y_fc'])
+            rpy = head_pose_out
+            roll = rpy[0][0][0]
+            pitch = rpy[1][0][0]
+            yaw = rpy[2][0][0]
+            pitch = pitch * np.pi/180.0
+            roll = roll * np.pi/180.0
+            yaw = yaw * np.pi/180.0
+            
+            return yaw, pitch, roll
+
+        return None, None, None
+
+
+    def detect_facial_landmarks(self, rgb_image, bounding_box, enlarge_box=True, enlarge_scale=1.15):
+        face_crop_image, actual_bounding_box = self.get_sub_image(rgb_image, bounding_box, enlarge_box=enlarge_box, enlarge_scale=enlarge_scale)
+        sqr_h, sqr_w, c = face_crop_image.shape
+
+        if (sqr_h > 0) and (sqr_w > 0):
+            landmarks_image_blob = cv2.dnn.blobFromImage(face_crop_image,
+                                                         size=(60, 60),
+                                                         swapRB=False,
+                                                         crop=False,
+                                                         ddepth=cv2.CV_32F)
+            self.landmarks_model.setInput(landmarks_image_blob)
+            landmarks_out = self.landmarks_model.forward()
+
+            s = landmarks_out.shape
+            out = np.reshape(landmarks_out[0], (s[1]//2, 2))
+            x0, y0, x1, y1 = actual_bounding_box
+
+            landmarks = {}
+            for n, v in enumerate(out):
+                x = int(round((v[0] * sqr_w) + x0))
+                y = int(round((v[1] * sqr_h) + y0))
+                name = self.landmark_names[n]
+                landmarks[name] = (x,y)
+
+            return landmarks, self.landmark_names.copy()
+        return None, None
+
+
+    def draw_bounding_box(self, image, bounding_box):
+        x0 = int(round(bounding_box[0]))
+        y0 = int(round(bounding_box[1]))
+        x1 = int(round(bounding_box[2]))
+        y1 = int(round(bounding_box[3]))
+        color = (0, 0, 255)
+        thickness = 2
+        cv2.rectangle(image, (x0, y0), (x1, y1), color, thickness)
+
+        
+    def draw_head_pose(self, image, yaw, pitch, roll, bounding_box):
+        x0, y0, x1, y1 = bounding_box
+        face_x = (x1 + x0) / 2.0
+        face_y = (y1 + y0) / 2.0
+        #
+        # opencv uses right-handed coordinate system
+        # x points to the right of the image
+        # y points to the bottom of the image
+        # z points into the image
+        #
+
+        h, w, c = image.shape
+        camera_center = (w/2.0, h/2.0)
+        #For rendering with an unknown camera
+        focal_length = 50.0 
+        camera_matrix = np.array([[focal_length, 0.0,          camera_center[0]],
+                                  [0.0,          focal_length, camera_center[1]],
+                                  [0.0,          0.0,          1.0]])
+        face_translation = np.array([0.0, 0.0, 3000.0])
+        distortion_coefficients = np.array([0.0, 0.0, 0.0, 0.0])
+        # negate the directions of the y and z axes
+        axes = np.array([[2000.0,  0.0,      0.0   ],
+                         [0.0,     -2000.0,  0.0   ],
+                         [0.0,     0.0,      -2000.0],
+                         [0.0,     0.0,      0.0   ]])
+        head_ypr = np.array([-yaw, pitch, roll])
+        rotation_mat = Rotation.from_euler('yxz', head_ypr).as_matrix()
+        rotation_vec, jacobian = cv2.Rodrigues(rotation_mat)
+        image_points, jacobian = cv2.projectPoints(axes, rotation_vec, face_translation, camera_matrix, distortion_coefficients)
+        face_pix = np.array([face_x, face_y])
+
+        origin = image_points[3].ravel()
+        x_axis = (image_points[0].ravel() - origin) + face_pix
+        y_axis = (image_points[1].ravel() - origin) + face_pix
+        z_axis = (image_points[2].ravel() - origin) + face_pix
+
+        p0 = tuple(np.int32(np.round(face_pix)))
+        p1 = tuple(np.int32(np.round(x_axis)))
+        cv2.line(image, p0, p1, (0, 0, 255), 2)
+        p1 = tuple(np.int32(np.round(y_axis)))
+        cv2.line(image, p0, p1, (0, 255, 0), 2)
+        p1 = tuple(np.int32(np.round(z_axis)))
+        cv2.line(image, p0, p1, (255, 0, 0), 2)
+
+        
+    def draw_landmarks(self, image, landmarks):
+        for name, xy in landmarks.items():
+            x = xy[0]
+            y = xy[1]
+            if 'mouth' in name:
+                color = (255, 0, 0)
+            elif 'nose' in name:
+                color = (0, 255, 0)
+            elif 'eyebrow' in name:
+                color = (0, 0, 0)
+            elif 'right_eye' in name:
+                color = (255, 255, 0)
+            elif 'left_eye' in name:
+                color = (0, 255, 255)
+            elif 'chin' in name:
+                color = (255, 0, 255)
+            else:
+                color = (0, 0, 255)
+            cv2.circle(image, (x,y), 2, color, 1)
+            font_scale = 1.0
+            line_color = [0, 0, 0]
+            line_width = 1
+            font = cv2.FONT_HERSHEY_PLAIN 
+
+            
+    def apply_to_image(self, rgb_image, draw_output=False):
+        if draw_output:
+            output_image = rgb_image.copy()
+        else:
+            output_image = None
+
+        heads = []
+        boxes = self.detect_faces(rgb_image)
+        facial_landmark_names = self.landmark_names.copy()
+        for bounding_box in boxes:
+            if draw_output: 
+                self.draw_bounding_box(output_image, bounding_box)
+            yaw, pitch, roll = self.estimate_head_pose(rgb_image, bounding_box, enlarge_box=True, enlarge_scale=1.15)
+            if yaw is not None: 
+                ypr = (yaw, pitch, roll)
+                if draw_output: 
+                    self.draw_head_pose(output_image, yaw, pitch, roll, bounding_box)
+            else:
+                ypr = None
+            landmarks, landmark_names = self.detect_facial_landmarks(rgb_image, bounding_box, enlarge_box=True, enlarge_scale=1.15)
+            if (landmarks is not None) and draw_output: 
+                self.draw_landmarks(output_image, landmarks)
+            heads.append({'box':bounding_box, 'ypr':ypr, 'landmarks':landmarks})
+
+        return heads, output_image