Lisa conquering audio one wav at a time

2 years ago · 3e73174995
--- a/vz_ros_wrappers/CMakeLists.txt
+++ b/vz_ros_wrappers/CMakeLists.txt
@ -12,12 +12,16 @@ find_package(catkin REQUIRED COMPONENTS
  # opencv2 # ananya this does not compile
  roscpp
  rospy
  audio_common_msgs
  sensor_msgs
  std_msgs
 )

 find_package(PkgConfig)
 pkg_check_modules(GST1.0 gstreamer-1.0 REQUIRED)

 ## System dependencies are found with CMake's conventions
 # find_package(Boost REQUIRED COMPONENTS system)
 find_package(Boost REQUIRED COMPONENTS thread)


 ## Uncomment this if the package has a setup.py. This macro ensures
@ -121,6 +125,8 @@ catkin_package(
 include_directories(
 # include
  ${catkin_INCLUDE_DIRS}
  ${Boost_INCLUDE_DIRS}
  ${GST1.0_INCLUDE_DIRS}
 )

 ## Declare a C++ library
@ -136,7 +142,8 @@ include_directories(
 ## Declare a C++ executable
 ## With catkin_make all packages are built within a single CMake context
 ## The recommended prefix ensures that target names across packages don't collide
 # add_executable(${PROJECT_NAME}_node src/vz_ros_wrappers_node.cpp)
 add_executable(audio_capture scripts/audio_capture.cpp)
 add_executable(audio_play scripts/audio_play.cpp)

 ## Rename C++ executable without prefix
 ## The above recommended prefix causes long target names, the following renames the
@ -146,12 +153,20 @@ include_directories(

 ## Add cmake target dependencies of the executable
 ## same as for the library above
 # add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
 add_dependencies(audio_capture ${catkin_EXPORTED_TARGETS})
 add_dependencies(audio_play ${catkin_EXPORTED_TARGETS})

 ## Specify libraries to link a library or executable target against
 # target_link_libraries(${PROJECT_NAME}_node
 #   ${catkin_LIBRARIES}
 # )
 target_link_libraries(audio_capture
   ${catkin_LIBRARIES}
   ${GST1.0_LIBRARIES}
   ${Boost_LIBRARIES}
 )
 target_link_libraries(audio_play
   ${catkin_LIBRARIES}
   ${GST1.0_LIBRARIES}
   ${Boost_LIBRARIES}
 )

 #############
 ## Install ##
@ -169,9 +184,12 @@ include_directories(

 ## Mark executables for installation
 ## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_executables.html
 # install(TARGETS ${PROJECT_NAME}_node
 #   RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
 # )
 install(TARGETS audio_capture
  DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
 )
 install(TARGETS audio_play
  DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
 )

 ## Mark libraries for installation
 ## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_libraries.html
@ -182,11 +200,11 @@ include_directories(
 # )

 ## Mark cpp header files for installation
 # install(DIRECTORY include/${PROJECT_NAME}/
 #   DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
 install(DIRECTORY launch
   DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
 #   FILES_MATCHING PATTERN "*.h"
 #   PATTERN ".svn" EXCLUDE
 # )
 )

 ## Mark other files for installation (e.g. launch and bag files, etc.)
 # install(FILES
--- a/vz_ros_wrappers/audio_common_msgs/CMakeLists.txt
+++ b/vz_ros_wrappers/audio_common_msgs/CMakeLists.txt
@ -0,0 +1,12 @@
 cmake_minimum_required(VERSION 2.8.3)

 project(audio_common_msgs)

 find_package(catkin REQUIRED COMPONENTS message_generation)
 add_message_files(DIRECTORY msg FILES
  AudioData.msg
  AudioInfo.msg
  )
 generate_messages()

 catkin_package(CATKIN_DEPENDS message_runtime)
--- a/vz_ros_wrappers/audio_common_msgs/msg/AudioData.msg
+++ b/vz_ros_wrappers/audio_common_msgs/msg/AudioData.msg
@ -0,0 +1 @@
 uint8[] data
--- a/vz_ros_wrappers/audio_common_msgs/msg/AudioInfo.msg
+++ b/vz_ros_wrappers/audio_common_msgs/msg/AudioInfo.msg
@ -0,0 +1,12 @@
 # This message contains the audio meta data

 # Number of channels
 uint8 channels
 # Sampling rate [Hz]
 uint32 sample_rate
 # Audio format (e.g. S16LE)
 string sample_format
 # Amount of audio data per second [bits/s]
 uint32 bitrate
 # Audio coding format (e.g. WAVE, MP3)
 string coding_format
--- a/vz_ros_wrappers/audio_common_msgs/package.xml
+++ b/vz_ros_wrappers/audio_common_msgs/package.xml
@ -0,0 +1,20 @@
 <package>
    <name>audio_common_msgs</name>
    <version>0.3.12</version>
    <description>
       Messages for transmitting audio via ROS
    </description>
    <maintainer email="namniart@gmail.com">Austin Hendrix</maintainer>
    <maintainer email="shingogo.5511@gmail.com">Shingo Kitagawa</maintainer>
    <author>Nate Koenig</author>
    <license>BSD</license>
    <url type="website">http://ros.org/wiki/audio_common_msgs</url>
    <url type="repository">https://github.com/ros-drivers/audio_common</url>
    <url type="bugtracker">https://github.com/ros-drivers/audio_common/issues</url>
 
    <buildtool_depend>catkin</buildtool_depend>
 
    <build_depend>message_generation</build_depend>
 
    <run_depend>message_runtime</run_depend>
 </package>
--- a/vz_ros_wrappers/launch/audio.launch
+++ b/vz_ros_wrappers/launch/audio.launch
@ -0,0 +1,13 @@
 <launch>
    <arg name="device" default="" />
  
    <!-- publish audio data as wav format -->
    <node name="audio_capture" pkg="vz_ros_wrappers" type="audio_capture" output="screen">
      <param name="device" value="" />
      <param name="format" value="mp3" />
      <param name="channels" value="1" />
      <param name="depth" value="16" />
      <param name="sample_rate" value="16000" />
      <param name="device" value="$(arg device)" />
    </node>
  </launch>
--- a/vz_ros_wrappers/package.xml
+++ b/vz_ros_wrappers/package.xml
@ -52,8 +52,11 @@
  <build_depend>cv_bridge</build_depend>
  <!-- <build_depend>opencv2</build_depend> -->
  <build_depend>roscpp</build_depend>
  <build_depend>audio_common_msgs</build_depend>
  <build_depend>rospy</build_depend>
  <build_depend>sensor_msgs</build_depend>
  <build_depend>libgstreamer1.0-dev</build_depend>
  <build_depend>libgstreamer-plugins-base1.0-dev</build_depend>
  <build_depend>std_msgs</build_depend>
  <build_export_depend>cv_bridge</build_export_depend>
  <!-- <build_export_depend>opencv2</build_export_depend> -->
@ -68,6 +71,11 @@
  <exec_depend>sensor_msgs</exec_depend>
  <exec_depend>std_msgs</exec_depend>

  <exec_depend>audio_common_msgs</exec_depend>
  <exec_depend>gstreamer1.0</exec_depend>
  <exec_depend>gstreamer1.0-plugins-base</exec_depend>
  <exec_depend>gstreamer1.0-plugins-good</exec_depend>
  <exec_depend>gstreamer1.0-plugins-ugly</exec_depend>

  <!-- The export tag contains other, unspecified, tags -->
  <export>
--- a/vz_ros_wrappers/scripts/audio_capture.cpp
+++ b/vz_ros_wrappers/scripts/audio_capture.cpp
@ -0,0 +1,236 @@
 #include <stdio.h>
 #include <gst/gst.h>
 #include <gst/app/gstappsink.h>
 #include <boost/thread.hpp>

 #include <ros/ros.h>

 #include "audio_common_msgs/AudioData.h"
 #include "audio_common_msgs/AudioInfo.h"

 namespace audio_transport
 {
  class RosGstCapture
  {
    public:
      RosGstCapture()
      {
        _bitrate = 192;

        std::string dst_type;

        // Need to encoding or publish raw wave data
        ros::param::param<std::string>("~format", _format, "mp3");
        ros::param::param<std::string>("~sample_format", _sample_format, "S16LE");

        // The bitrate at which to encode the audio
        ros::param::param<int>("~bitrate", _bitrate, 192);

        // only available for raw data
        ros::param::param<int>("~channels", _channels, 1);
        ros::param::param<int>("~depth", _depth, 16);
        ros::param::param<int>("~sample_rate", _sample_rate, 16000);

        // The destination of the audio
        ros::param::param<std::string>("~dst", dst_type, "appsink");

        // The source of the audio
        //ros::param::param<std::string>("~src", source_type, "alsasrc");
        std::string device;
        ros::param::param<std::string>("~device", device, "");

        _pub = _nh.advertise<audio_common_msgs::AudioData>("audio", 10, true);
        _pub_info = _nh.advertise<audio_common_msgs::AudioInfo>("audio_info", 1, true);

        _loop = g_main_loop_new(NULL, false);
        _pipeline = gst_pipeline_new("ros_pipeline");
        _bus = gst_pipeline_get_bus(GST_PIPELINE(_pipeline));
        gst_bus_add_signal_watch(_bus);
        g_signal_connect(_bus, "message::error",
                         G_CALLBACK(onMessage), this);
        g_object_unref(_bus);

        // We create the sink first, just for convenience
        if (dst_type == "appsink")
        {
          _sink = gst_element_factory_make("appsink", "sink");
          g_object_set(G_OBJECT(_sink), "emit-signals", true, NULL);
          g_object_set(G_OBJECT(_sink), "max-buffers", 100, NULL);
          g_signal_connect( G_OBJECT(_sink), "new-sample",
                            G_CALLBACK(onNewBuffer), this);
        }
        else
        {
          ROS_INFO("file sink to %s", dst_type.c_str());
          _sink = gst_element_factory_make("filesink", "sink");
          g_object_set( G_OBJECT(_sink), "location", dst_type.c_str(), NULL);
        }

        _source = gst_element_factory_make("alsasrc", "source");
        // if device isn't specified, it will use the default which is
        // the alsa default source.
        // A valid device will be of the foram hw:0,0 with other numbers
        // than 0 and 0 as are available.
        if (device != "")
        {
          // ghcar *gst_device = device.c_str();
          g_object_set(G_OBJECT(_source), "device", device.c_str(), NULL);
        }

        GstCaps *caps;
        caps = gst_caps_new_simple("audio/x-raw",
                                   "format", G_TYPE_STRING, _sample_format.c_str(),
                                   "channels", G_TYPE_INT, _channels,
                                   "width",    G_TYPE_INT, _depth,
                                   "depth",    G_TYPE_INT, _depth,
                                   "rate",     G_TYPE_INT, _sample_rate,
                                   "signed",   G_TYPE_BOOLEAN, TRUE,
                                   NULL);

        gboolean link_ok;
        if (_format == "mp3"){
          _filter = gst_element_factory_make("capsfilter", "filter");
          g_object_set( G_OBJECT(_filter), "caps", caps, NULL);
          gst_caps_unref(caps);

          _convert = gst_element_factory_make("audioconvert", "convert");
          if (!_convert) {
            ROS_ERROR_STREAM("Failed to create audioconvert element");
            exitOnMainThread(1);
          }

          _encode = gst_element_factory_make("lamemp3enc", "encoder");
          if (!_encode) {
            ROS_ERROR_STREAM("Failed to create encoder element");
            exitOnMainThread(1);
          }
          g_object_set( G_OBJECT(_encode), "target", 1, NULL);
          g_object_set( G_OBJECT(_encode), "bitrate", _bitrate, NULL);

          gst_bin_add_many( GST_BIN(_pipeline), _source, _filter, _convert, _encode, _sink, NULL);
          link_ok = gst_element_link_many(_source, _filter, _convert, _encode, _sink, NULL);
        } else if (_format == "wave") {
          if (dst_type == "appsink") {
            g_object_set( G_OBJECT(_sink), "caps", caps, NULL);
            gst_caps_unref(caps);
            gst_bin_add_many( GST_BIN(_pipeline), _source, _sink, NULL);
            link_ok = gst_element_link_many( _source, _sink, NULL);
          } else {
            _filter = gst_element_factory_make("wavenc", "filter");
            gst_bin_add_many( GST_BIN(_pipeline), _source, _filter, _sink, NULL);
            link_ok = gst_element_link_many( _source, _filter, _sink, NULL);
          }
        } else {
          ROS_ERROR_STREAM("format must be \"wave\" or \"mp3\"");
          exitOnMainThread(1);
        }
        /*}
        else
        {
          _sleep_time = 10000;
          _source = gst_element_factory_make("filesrc", "source");
          g_object_set(G_OBJECT(_source), "location", source_type.c_str(), NULL);

          gst_bin_add_many( GST_BIN(_pipeline), _source, _sink, NULL);
          gst_element_link_many(_source, _sink, NULL);
        }
        */

        if (!link_ok) {
          ROS_ERROR_STREAM("Unsupported media type.");
          exitOnMainThread(1);
        }

        gst_element_set_state(GST_ELEMENT(_pipeline), GST_STATE_PLAYING);

        _gst_thread = boost::thread( boost::bind(g_main_loop_run, _loop) );

        audio_common_msgs::AudioInfo info_msg;
        info_msg.channels = _channels;
        info_msg.sample_rate = _sample_rate;
        info_msg.sample_format = _sample_format;
        info_msg.bitrate = _bitrate;
        info_msg.coding_format = _format;
        _pub_info.publish(info_msg);
      }

      ~RosGstCapture()
      {
        g_main_loop_quit(_loop);
        gst_element_set_state(_pipeline, GST_STATE_NULL);
        gst_object_unref(_pipeline);
        g_main_loop_unref(_loop);
      }

      void exitOnMainThread(int code)
      {
        exit(code);
      }

      void publish( const audio_common_msgs::AudioData &msg )
      {
        _pub.publish(msg);
      }

      static GstFlowReturn onNewBuffer (GstAppSink *appsink, gpointer userData)
      {
        RosGstCapture *server = reinterpret_cast<RosGstCapture*>(userData);
        GstMapInfo map;

        GstSample *sample;
        g_signal_emit_by_name(appsink, "pull-sample", &sample);

        GstBuffer *buffer = gst_sample_get_buffer(sample);

        audio_common_msgs::AudioData msg;
        gst_buffer_map(buffer, &map, GST_MAP_READ);
        msg.data.resize( map.size );

        memcpy( &msg.data[0], map.data, map.size );

        gst_buffer_unmap(buffer, &map);
        gst_sample_unref(sample);

        server->publish(msg);

        return GST_FLOW_OK;
      }

      static gboolean onMessage (GstBus *bus, GstMessage *message, gpointer userData)
      {
        RosGstCapture *server = reinterpret_cast<RosGstCapture*>(userData);
        GError *err;
        gchar *debug;

        gst_message_parse_error(message, &err, &debug);
        ROS_ERROR_STREAM("gstreamer: " << err->message);
        g_error_free(err);
        g_free(debug);
        g_main_loop_quit(server->_loop);
        server->exitOnMainThread(1);
        return FALSE;
      }

    private:
      ros::NodeHandle _nh;
      ros::Publisher _pub;
      ros::Publisher _pub_info;

      boost::thread _gst_thread;

      GstElement *_pipeline, *_source, *_filter, *_sink, *_convert, *_encode;
      GstBus *_bus;
      int _bitrate, _channels, _depth, _sample_rate;
      GMainLoop *_loop;
      std::string _format, _sample_format;
  };
 }

 int main (int argc, char **argv)
 {
  ros::init(argc, argv, "audio_capture");
  gst_init(&argc, &argv);

  audio_transport::RosGstCapture server;
  ros::spin();
 }
--- a/vz_ros_wrappers/scripts/unit8_to_wav.py
+++ b/vz_ros_wrappers/scripts/unit8_to_wav.py
@ -0,0 +1,37 @@
 #! /usr/bin/python3

 import rospy
 import numpy as np
 from audio_common_msgs.msg import AudioData
 from std_msgs.msg import String
 from scipy.io.wavfile import wavf

 class D435i:
    def __init__(self):
        # 1) Write subscriber to /audio topic and populate a numpy data structure (array) with the uint8[] data
        self.raw_audio_sub = rospy.Subscriber('/audio', AudioData, self.raw_callback)

        self.byteArray = np.array(1)
        self.msg_count = 0

    def raw_callback(self, bytes):
        if (self.msg_count > 0):
            self.byteArray.append(bytes)
        else :
            self.byteArray()
        self.msg_count += 1 

 # 2) Check you're "Decoding" the audio ROS message correctly by saving to a .wav file
 # 3) Be curious. Listen to the .wav file and see if it sounds gucci, and then maybe twiddle about with the encoding on the audio_capture.cpp, seeing if that changes anything e.g. encoding with mp3 instead.
 # 4) If you've made it this far, well done. Try find Utku's function to pass the np array.

    def run(self):
        while not rospy.is_shutdown():
            pass

 if __name__ == '__main__':
    try:
        rospy.init_node('audio_converter', anonymous=True)
    except rospy.ROSInterruptException:
        print('Audio converter node failed!')
        pass