From 3e731749953520fb3cb306108b8333c923ed3d2a Mon Sep 17 00:00:00 2001
From: Hongyu Li <li.hongyu1@northeastern.edu>
Date: Mon, 14 Mar 2022 17:55:28 -0400
Subject: [PATCH] Lisa conquering audio one wav at a time

---
 vz_ros_wrappers/CMakeLists.txt                |  42 +++-
 .../audio_common_msgs/CMakeLists.txt          |  12 +
 .../audio_common_msgs/msg/AudioData.msg       |   1 +
 .../audio_common_msgs/msg/AudioInfo.msg       |  12 +
 vz_ros_wrappers/audio_common_msgs/package.xml |  20 ++
 vz_ros_wrappers/launch/audio.launch           |  13 +
 vz_ros_wrappers/package.xml                   |   8 +
 vz_ros_wrappers/scripts/audio_capture.cpp     | 236 ++++++++++++++++++
 vz_ros_wrappers/scripts/unit8_to_wav.py       |  37 +++
 9 files changed, 369 insertions(+), 12 deletions(-)
 create mode 100644 vz_ros_wrappers/audio_common_msgs/CMakeLists.txt
 create mode 100644 vz_ros_wrappers/audio_common_msgs/msg/AudioData.msg
 create mode 100644 vz_ros_wrappers/audio_common_msgs/msg/AudioInfo.msg
 create mode 100644 vz_ros_wrappers/audio_common_msgs/package.xml
 create mode 100644 vz_ros_wrappers/launch/audio.launch
 create mode 100644 vz_ros_wrappers/scripts/audio_capture.cpp
 create mode 100644 vz_ros_wrappers/scripts/unit8_to_wav.py

diff --git a/vz_ros_wrappers/CMakeLists.txt b/vz_ros_wrappers/CMakeLists.txt
index fc1766e..980fad1 100644
--- a/vz_ros_wrappers/CMakeLists.txt
+++ b/vz_ros_wrappers/CMakeLists.txt
@@ -12,12 +12,16 @@ find_package(catkin REQUIRED COMPONENTS
   # opencv2 # ananya this does not compile
   roscpp
   rospy
+  audio_common_msgs
   sensor_msgs
   std_msgs
 )
 
+find_package(PkgConfig)
+pkg_check_modules(GST1.0 gstreamer-1.0 REQUIRED)
+
 ## System dependencies are found with CMake's conventions
-# find_package(Boost REQUIRED COMPONENTS system)
+find_package(Boost REQUIRED COMPONENTS thread)
 
 
 ## Uncomment this if the package has a setup.py. This macro ensures
@@ -121,6 +125,8 @@ catkin_package(
 include_directories(
 # include
   ${catkin_INCLUDE_DIRS}
+  ${Boost_INCLUDE_DIRS}
+  ${GST1.0_INCLUDE_DIRS}
 )
 
 ## Declare a C++ library
@@ -136,7 +142,8 @@ include_directories(
 ## Declare a C++ executable
 ## With catkin_make all packages are built within a single CMake context
 ## The recommended prefix ensures that target names across packages don't collide
-# add_executable(${PROJECT_NAME}_node src/vz_ros_wrappers_node.cpp)
+add_executable(audio_capture scripts/audio_capture.cpp)
+add_executable(audio_play scripts/audio_play.cpp)
 
 ## Rename C++ executable without prefix
 ## The above recommended prefix causes long target names, the following renames the
@@ -146,12 +153,20 @@ include_directories(
 
 ## Add cmake target dependencies of the executable
 ## same as for the library above
-# add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
+add_dependencies(audio_capture ${catkin_EXPORTED_TARGETS})
+add_dependencies(audio_play ${catkin_EXPORTED_TARGETS})
 
 ## Specify libraries to link a library or executable target against
-# target_link_libraries(${PROJECT_NAME}_node
-#   ${catkin_LIBRARIES}
-# )
+target_link_libraries(audio_capture
+   ${catkin_LIBRARIES}
+   ${GST1.0_LIBRARIES}
+   ${Boost_LIBRARIES}
+)
+target_link_libraries(audio_play
+   ${catkin_LIBRARIES}
+   ${GST1.0_LIBRARIES}
+   ${Boost_LIBRARIES}
+)
 
 #############
 ## Install ##
@@ -169,9 +184,12 @@ include_directories(
 
 ## Mark executables for installation
 ## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_executables.html
-# install(TARGETS ${PROJECT_NAME}_node
-#   RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
-# )
+install(TARGETS audio_capture
+  DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
+)
+install(TARGETS audio_play
+  DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
+)
 
 ## Mark libraries for installation
 ## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_libraries.html
@@ -182,11 +200,11 @@ include_directories(
 # )
 
 ## Mark cpp header files for installation
-# install(DIRECTORY include/${PROJECT_NAME}/
-#   DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
+install(DIRECTORY launch
+   DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
 #   FILES_MATCHING PATTERN "*.h"
 #   PATTERN ".svn" EXCLUDE
-# )
+)
 
 ## Mark other files for installation (e.g. launch and bag files, etc.)
 # install(FILES
diff --git a/vz_ros_wrappers/audio_common_msgs/CMakeLists.txt b/vz_ros_wrappers/audio_common_msgs/CMakeLists.txt
new file mode 100644
index 0000000..eb37935
--- /dev/null
+++ b/vz_ros_wrappers/audio_common_msgs/CMakeLists.txt
@@ -0,0 +1,12 @@
+cmake_minimum_required(VERSION 2.8.3)
+
+project(audio_common_msgs)
+
+find_package(catkin REQUIRED COMPONENTS message_generation)
+add_message_files(DIRECTORY msg FILES
+  AudioData.msg
+  AudioInfo.msg
+  )
+generate_messages()
+
+catkin_package(CATKIN_DEPENDS message_runtime)
\ No newline at end of file
diff --git a/vz_ros_wrappers/audio_common_msgs/msg/AudioData.msg b/vz_ros_wrappers/audio_common_msgs/msg/AudioData.msg
new file mode 100644
index 0000000..1735052
--- /dev/null
+++ b/vz_ros_wrappers/audio_common_msgs/msg/AudioData.msg
@@ -0,0 +1 @@
+uint8[] data
\ No newline at end of file
diff --git a/vz_ros_wrappers/audio_common_msgs/msg/AudioInfo.msg b/vz_ros_wrappers/audio_common_msgs/msg/AudioInfo.msg
new file mode 100644
index 0000000..1e0a539
--- /dev/null
+++ b/vz_ros_wrappers/audio_common_msgs/msg/AudioInfo.msg
@@ -0,0 +1,12 @@
+# This message contains the audio meta data
+
+# Number of channels
+uint8 channels
+# Sampling rate [Hz]
+uint32 sample_rate
+# Audio format (e.g. S16LE)
+string sample_format
+# Amount of audio data per second [bits/s]
+uint32 bitrate
+# Audio coding format (e.g. WAVE, MP3)
+string coding_format
\ No newline at end of file
diff --git a/vz_ros_wrappers/audio_common_msgs/package.xml b/vz_ros_wrappers/audio_common_msgs/package.xml
new file mode 100644
index 0000000..f20f24b
--- /dev/null
+++ b/vz_ros_wrappers/audio_common_msgs/package.xml
@@ -0,0 +1,20 @@
+<package>
+    <name>audio_common_msgs</name>
+    <version>0.3.12</version>
+    <description>
+       Messages for transmitting audio via ROS
+    </description>
+    <maintainer email="namniart@gmail.com">Austin Hendrix</maintainer>
+    <maintainer email="shingogo.5511@gmail.com">Shingo Kitagawa</maintainer>
+    <author>Nate Koenig</author>
+    <license>BSD</license>
+    <url type="website">http://ros.org/wiki/audio_common_msgs</url>
+    <url type="repository">https://github.com/ros-drivers/audio_common</url>
+    <url type="bugtracker">https://github.com/ros-drivers/audio_common/issues</url>
+ 
+    <buildtool_depend>catkin</buildtool_depend>
+ 
+    <build_depend>message_generation</build_depend>
+ 
+    <run_depend>message_runtime</run_depend>
+ </package>
\ No newline at end of file
diff --git a/vz_ros_wrappers/launch/audio.launch b/vz_ros_wrappers/launch/audio.launch
new file mode 100644
index 0000000..3bf24f7
--- /dev/null
+++ b/vz_ros_wrappers/launch/audio.launch
@@ -0,0 +1,13 @@
+<launch>
+    <arg name="device" default="" />
+  
+    <!-- publish audio data as wav format -->
+    <node name="audio_capture" pkg="vz_ros_wrappers" type="audio_capture" output="screen">
+      <param name="device" value="" />
+      <param name="format" value="mp3" />
+      <param name="channels" value="1" />
+      <param name="depth" value="16" />
+      <param name="sample_rate" value="16000" />
+      <param name="device" value="$(arg device)" />
+    </node>
+  </launch>
\ No newline at end of file
diff --git a/vz_ros_wrappers/package.xml b/vz_ros_wrappers/package.xml
index 450ab2e..795a7b5 100644
--- a/vz_ros_wrappers/package.xml
+++ b/vz_ros_wrappers/package.xml
@@ -52,8 +52,11 @@
   <build_depend>cv_bridge</build_depend>
   <!-- <build_depend>opencv2</build_depend> -->
   <build_depend>roscpp</build_depend>
+  <build_depend>audio_common_msgs</build_depend>
   <build_depend>rospy</build_depend>
   <build_depend>sensor_msgs</build_depend>
+  <build_depend>libgstreamer1.0-dev</build_depend>
+  <build_depend>libgstreamer-plugins-base1.0-dev</build_depend>
   <build_depend>std_msgs</build_depend>
   <build_export_depend>cv_bridge</build_export_depend>
   <!-- <build_export_depend>opencv2</build_export_depend> -->
@@ -68,6 +71,11 @@
   <exec_depend>sensor_msgs</exec_depend>
   <exec_depend>std_msgs</exec_depend>
 
+  <exec_depend>audio_common_msgs</exec_depend>
+  <exec_depend>gstreamer1.0</exec_depend>
+  <exec_depend>gstreamer1.0-plugins-base</exec_depend>
+  <exec_depend>gstreamer1.0-plugins-good</exec_depend>
+  <exec_depend>gstreamer1.0-plugins-ugly</exec_depend>
 
   <!-- The export tag contains other, unspecified, tags -->
   <export>
diff --git a/vz_ros_wrappers/scripts/audio_capture.cpp b/vz_ros_wrappers/scripts/audio_capture.cpp
new file mode 100644
index 0000000..354dca3
--- /dev/null
+++ b/vz_ros_wrappers/scripts/audio_capture.cpp
@@ -0,0 +1,236 @@
+#include <stdio.h>
+#include <gst/gst.h>
+#include <gst/app/gstappsink.h>
+#include <boost/thread.hpp>
+
+#include <ros/ros.h>
+
+#include "audio_common_msgs/AudioData.h"
+#include "audio_common_msgs/AudioInfo.h"
+
+namespace audio_transport
+{
+  class RosGstCapture
+  {
+    public:
+      RosGstCapture()
+      {
+        _bitrate = 192;
+
+        std::string dst_type;
+
+        // Need to encoding or publish raw wave data
+        ros::param::param<std::string>("~format", _format, "mp3");
+        ros::param::param<std::string>("~sample_format", _sample_format, "S16LE");
+
+        // The bitrate at which to encode the audio
+        ros::param::param<int>("~bitrate", _bitrate, 192);
+
+        // only available for raw data
+        ros::param::param<int>("~channels", _channels, 1);
+        ros::param::param<int>("~depth", _depth, 16);
+        ros::param::param<int>("~sample_rate", _sample_rate, 16000);
+
+        // The destination of the audio
+        ros::param::param<std::string>("~dst", dst_type, "appsink");
+
+        // The source of the audio
+        //ros::param::param<std::string>("~src", source_type, "alsasrc");
+        std::string device;
+        ros::param::param<std::string>("~device", device, "");
+
+        _pub = _nh.advertise<audio_common_msgs::AudioData>("audio", 10, true);
+        _pub_info = _nh.advertise<audio_common_msgs::AudioInfo>("audio_info", 1, true);
+
+        _loop = g_main_loop_new(NULL, false);
+        _pipeline = gst_pipeline_new("ros_pipeline");
+        _bus = gst_pipeline_get_bus(GST_PIPELINE(_pipeline));
+        gst_bus_add_signal_watch(_bus);
+        g_signal_connect(_bus, "message::error",
+                         G_CALLBACK(onMessage), this);
+        g_object_unref(_bus);
+
+        // We create the sink first, just for convenience
+        if (dst_type == "appsink")
+        {
+          _sink = gst_element_factory_make("appsink", "sink");
+          g_object_set(G_OBJECT(_sink), "emit-signals", true, NULL);
+          g_object_set(G_OBJECT(_sink), "max-buffers", 100, NULL);
+          g_signal_connect( G_OBJECT(_sink), "new-sample",
+                            G_CALLBACK(onNewBuffer), this);
+        }
+        else
+        {
+          ROS_INFO("file sink to %s", dst_type.c_str());
+          _sink = gst_element_factory_make("filesink", "sink");
+          g_object_set( G_OBJECT(_sink), "location", dst_type.c_str(), NULL);
+        }
+
+        _source = gst_element_factory_make("alsasrc", "source");
+        // if device isn't specified, it will use the default which is
+        // the alsa default source.
+        // A valid device will be of the foram hw:0,0 with other numbers
+        // than 0 and 0 as are available.
+        if (device != "")
+        {
+          // ghcar *gst_device = device.c_str();
+          g_object_set(G_OBJECT(_source), "device", device.c_str(), NULL);
+        }
+
+        GstCaps *caps;
+        caps = gst_caps_new_simple("audio/x-raw",
+                                   "format", G_TYPE_STRING, _sample_format.c_str(),
+                                   "channels", G_TYPE_INT, _channels,
+                                   "width",    G_TYPE_INT, _depth,
+                                   "depth",    G_TYPE_INT, _depth,
+                                   "rate",     G_TYPE_INT, _sample_rate,
+                                   "signed",   G_TYPE_BOOLEAN, TRUE,
+                                   NULL);
+
+        gboolean link_ok;
+        if (_format == "mp3"){
+          _filter = gst_element_factory_make("capsfilter", "filter");
+          g_object_set( G_OBJECT(_filter), "caps", caps, NULL);
+          gst_caps_unref(caps);
+
+          _convert = gst_element_factory_make("audioconvert", "convert");
+          if (!_convert) {
+            ROS_ERROR_STREAM("Failed to create audioconvert element");
+            exitOnMainThread(1);
+          }
+
+          _encode = gst_element_factory_make("lamemp3enc", "encoder");
+          if (!_encode) {
+            ROS_ERROR_STREAM("Failed to create encoder element");
+            exitOnMainThread(1);
+          }
+          g_object_set( G_OBJECT(_encode), "target", 1, NULL);
+          g_object_set( G_OBJECT(_encode), "bitrate", _bitrate, NULL);
+
+          gst_bin_add_many( GST_BIN(_pipeline), _source, _filter, _convert, _encode, _sink, NULL);
+          link_ok = gst_element_link_many(_source, _filter, _convert, _encode, _sink, NULL);
+        } else if (_format == "wave") {
+          if (dst_type == "appsink") {
+            g_object_set( G_OBJECT(_sink), "caps", caps, NULL);
+            gst_caps_unref(caps);
+            gst_bin_add_many( GST_BIN(_pipeline), _source, _sink, NULL);
+            link_ok = gst_element_link_many( _source, _sink, NULL);
+          } else {
+            _filter = gst_element_factory_make("wavenc", "filter");
+            gst_bin_add_many( GST_BIN(_pipeline), _source, _filter, _sink, NULL);
+            link_ok = gst_element_link_many( _source, _filter, _sink, NULL);
+          }
+        } else {
+          ROS_ERROR_STREAM("format must be \"wave\" or \"mp3\"");
+          exitOnMainThread(1);
+        }
+        /*}
+        else
+        {
+          _sleep_time = 10000;
+          _source = gst_element_factory_make("filesrc", "source");
+          g_object_set(G_OBJECT(_source), "location", source_type.c_str(), NULL);
+
+          gst_bin_add_many( GST_BIN(_pipeline), _source, _sink, NULL);
+          gst_element_link_many(_source, _sink, NULL);
+        }
+        */
+
+        if (!link_ok) {
+          ROS_ERROR_STREAM("Unsupported media type.");
+          exitOnMainThread(1);
+        }
+
+        gst_element_set_state(GST_ELEMENT(_pipeline), GST_STATE_PLAYING);
+
+        _gst_thread = boost::thread( boost::bind(g_main_loop_run, _loop) );
+
+        audio_common_msgs::AudioInfo info_msg;
+        info_msg.channels = _channels;
+        info_msg.sample_rate = _sample_rate;
+        info_msg.sample_format = _sample_format;
+        info_msg.bitrate = _bitrate;
+        info_msg.coding_format = _format;
+        _pub_info.publish(info_msg);
+      }
+
+      ~RosGstCapture()
+      {
+        g_main_loop_quit(_loop);
+        gst_element_set_state(_pipeline, GST_STATE_NULL);
+        gst_object_unref(_pipeline);
+        g_main_loop_unref(_loop);
+      }
+
+      void exitOnMainThread(int code)
+      {
+        exit(code);
+      }
+
+      void publish( const audio_common_msgs::AudioData &msg )
+      {
+        _pub.publish(msg);
+      }
+
+      static GstFlowReturn onNewBuffer (GstAppSink *appsink, gpointer userData)
+      {
+        RosGstCapture *server = reinterpret_cast<RosGstCapture*>(userData);
+        GstMapInfo map;
+
+        GstSample *sample;
+        g_signal_emit_by_name(appsink, "pull-sample", &sample);
+
+        GstBuffer *buffer = gst_sample_get_buffer(sample);
+
+        audio_common_msgs::AudioData msg;
+        gst_buffer_map(buffer, &map, GST_MAP_READ);
+        msg.data.resize( map.size );
+
+        memcpy( &msg.data[0], map.data, map.size );
+
+        gst_buffer_unmap(buffer, &map);
+        gst_sample_unref(sample);
+
+        server->publish(msg);
+
+        return GST_FLOW_OK;
+      }
+
+      static gboolean onMessage (GstBus *bus, GstMessage *message, gpointer userData)
+      {
+        RosGstCapture *server = reinterpret_cast<RosGstCapture*>(userData);
+        GError *err;
+        gchar *debug;
+
+        gst_message_parse_error(message, &err, &debug);
+        ROS_ERROR_STREAM("gstreamer: " << err->message);
+        g_error_free(err);
+        g_free(debug);
+        g_main_loop_quit(server->_loop);
+        server->exitOnMainThread(1);
+        return FALSE;
+      }
+
+    private:
+      ros::NodeHandle _nh;
+      ros::Publisher _pub;
+      ros::Publisher _pub_info;
+
+      boost::thread _gst_thread;
+
+      GstElement *_pipeline, *_source, *_filter, *_sink, *_convert, *_encode;
+      GstBus *_bus;
+      int _bitrate, _channels, _depth, _sample_rate;
+      GMainLoop *_loop;
+      std::string _format, _sample_format;
+  };
+}
+
+int main (int argc, char **argv)
+{
+  ros::init(argc, argv, "audio_capture");
+  gst_init(&argc, &argv);
+
+  audio_transport::RosGstCapture server;
+  ros::spin();
+}
\ No newline at end of file
diff --git a/vz_ros_wrappers/scripts/unit8_to_wav.py b/vz_ros_wrappers/scripts/unit8_to_wav.py
new file mode 100644
index 0000000..3584abb
--- /dev/null
+++ b/vz_ros_wrappers/scripts/unit8_to_wav.py
@@ -0,0 +1,37 @@
+#! /usr/bin/python3
+
+import rospy
+import numpy as np
+from audio_common_msgs.msg import AudioData
+from std_msgs.msg import String
+from scipy.io.wavfile import wavf
+
+class D435i:
+    def __init__(self):
+        # 1) Write subscriber to /audio topic and populate a numpy data structure (array) with the uint8[] data
+        self.raw_audio_sub = rospy.Subscriber('/audio', AudioData, self.raw_callback)
+
+        self.byteArray = np.array(1)
+        self.msg_count = 0
+
+    def raw_callback(self, bytes):
+        if (self.msg_count > 0):
+            self.byteArray.append(bytes)
+        else :
+            self.byteArray()
+        self.msg_count += 1 
+
+# 2) Check you're "Decoding" the audio ROS message correctly by saving to a .wav file
+# 3) Be curious. Listen to the .wav file and see if it sounds gucci, and then maybe twiddle about with the encoding on the audio_capture.cpp, seeing if that changes anything e.g. encoding with mp3 instead.
+# 4) If you've made it this far, well done. Try find Utku's function to pass the np array.
+
+    def run(self):
+        while not rospy.is_shutdown():
+            pass
+
+if __name__ == '__main__':
+    try:
+        rospy.init_node('audio_converter', anonymous=True)
+    except rospy.ROSInterruptException:
+        print('Audio converter node failed!')
+        pass
\ No newline at end of file