From 3e731749953520fb3cb306108b8333c923ed3d2a Mon Sep 17 00:00:00 2001 From: Hongyu Li Date: Mon, 14 Mar 2022 17:55:28 -0400 Subject: [PATCH] Lisa conquering audio one wav at a time --- vz_ros_wrappers/CMakeLists.txt | 42 +++- .../audio_common_msgs/CMakeLists.txt | 12 + .../audio_common_msgs/msg/AudioData.msg | 1 + .../audio_common_msgs/msg/AudioInfo.msg | 12 + vz_ros_wrappers/audio_common_msgs/package.xml | 20 ++ vz_ros_wrappers/launch/audio.launch | 13 + vz_ros_wrappers/package.xml | 8 + vz_ros_wrappers/scripts/audio_capture.cpp | 236 ++++++++++++++++++ vz_ros_wrappers/scripts/unit8_to_wav.py | 37 +++ 9 files changed, 369 insertions(+), 12 deletions(-) create mode 100644 vz_ros_wrappers/audio_common_msgs/CMakeLists.txt create mode 100644 vz_ros_wrappers/audio_common_msgs/msg/AudioData.msg create mode 100644 vz_ros_wrappers/audio_common_msgs/msg/AudioInfo.msg create mode 100644 vz_ros_wrappers/audio_common_msgs/package.xml create mode 100644 vz_ros_wrappers/launch/audio.launch create mode 100644 vz_ros_wrappers/scripts/audio_capture.cpp create mode 100644 vz_ros_wrappers/scripts/unit8_to_wav.py diff --git a/vz_ros_wrappers/CMakeLists.txt b/vz_ros_wrappers/CMakeLists.txt index fc1766e..980fad1 100644 --- a/vz_ros_wrappers/CMakeLists.txt +++ b/vz_ros_wrappers/CMakeLists.txt @@ -12,12 +12,16 @@ find_package(catkin REQUIRED COMPONENTS # opencv2 # ananya this does not compile roscpp rospy + audio_common_msgs sensor_msgs std_msgs ) +find_package(PkgConfig) +pkg_check_modules(GST1.0 gstreamer-1.0 REQUIRED) + ## System dependencies are found with CMake's conventions -# find_package(Boost REQUIRED COMPONENTS system) +find_package(Boost REQUIRED COMPONENTS thread) ## Uncomment this if the package has a setup.py. This macro ensures @@ -121,6 +125,8 @@ catkin_package( include_directories( # include ${catkin_INCLUDE_DIRS} + ${Boost_INCLUDE_DIRS} + ${GST1.0_INCLUDE_DIRS} ) ## Declare a C++ library @@ -136,7 +142,8 @@ include_directories( ## Declare a C++ executable ## With catkin_make all packages are built within a single CMake context ## The recommended prefix ensures that target names across packages don't collide -# add_executable(${PROJECT_NAME}_node src/vz_ros_wrappers_node.cpp) +add_executable(audio_capture scripts/audio_capture.cpp) +add_executable(audio_play scripts/audio_play.cpp) ## Rename C++ executable without prefix ## The above recommended prefix causes long target names, the following renames the @@ -146,12 +153,20 @@ include_directories( ## Add cmake target dependencies of the executable ## same as for the library above -# add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) +add_dependencies(audio_capture ${catkin_EXPORTED_TARGETS}) +add_dependencies(audio_play ${catkin_EXPORTED_TARGETS}) ## Specify libraries to link a library or executable target against -# target_link_libraries(${PROJECT_NAME}_node -# ${catkin_LIBRARIES} -# ) +target_link_libraries(audio_capture + ${catkin_LIBRARIES} + ${GST1.0_LIBRARIES} + ${Boost_LIBRARIES} +) +target_link_libraries(audio_play + ${catkin_LIBRARIES} + ${GST1.0_LIBRARIES} + ${Boost_LIBRARIES} +) ############# ## Install ## @@ -169,9 +184,12 @@ include_directories( ## Mark executables for installation ## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_executables.html -# install(TARGETS ${PROJECT_NAME}_node -# RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} -# ) +install(TARGETS audio_capture + DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} +) +install(TARGETS audio_play + DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} +) ## Mark libraries for installation ## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_libraries.html @@ -182,11 +200,11 @@ include_directories( # ) ## Mark cpp header files for installation -# install(DIRECTORY include/${PROJECT_NAME}/ -# DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION} +install(DIRECTORY launch + DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} # FILES_MATCHING PATTERN "*.h" # PATTERN ".svn" EXCLUDE -# ) +) ## Mark other files for installation (e.g. launch and bag files, etc.) # install(FILES diff --git a/vz_ros_wrappers/audio_common_msgs/CMakeLists.txt b/vz_ros_wrappers/audio_common_msgs/CMakeLists.txt new file mode 100644 index 0000000..eb37935 --- /dev/null +++ b/vz_ros_wrappers/audio_common_msgs/CMakeLists.txt @@ -0,0 +1,12 @@ +cmake_minimum_required(VERSION 2.8.3) + +project(audio_common_msgs) + +find_package(catkin REQUIRED COMPONENTS message_generation) +add_message_files(DIRECTORY msg FILES + AudioData.msg + AudioInfo.msg + ) +generate_messages() + +catkin_package(CATKIN_DEPENDS message_runtime) \ No newline at end of file diff --git a/vz_ros_wrappers/audio_common_msgs/msg/AudioData.msg b/vz_ros_wrappers/audio_common_msgs/msg/AudioData.msg new file mode 100644 index 0000000..1735052 --- /dev/null +++ b/vz_ros_wrappers/audio_common_msgs/msg/AudioData.msg @@ -0,0 +1 @@ +uint8[] data \ No newline at end of file diff --git a/vz_ros_wrappers/audio_common_msgs/msg/AudioInfo.msg b/vz_ros_wrappers/audio_common_msgs/msg/AudioInfo.msg new file mode 100644 index 0000000..1e0a539 --- /dev/null +++ b/vz_ros_wrappers/audio_common_msgs/msg/AudioInfo.msg @@ -0,0 +1,12 @@ +# This message contains the audio meta data + +# Number of channels +uint8 channels +# Sampling rate [Hz] +uint32 sample_rate +# Audio format (e.g. S16LE) +string sample_format +# Amount of audio data per second [bits/s] +uint32 bitrate +# Audio coding format (e.g. WAVE, MP3) +string coding_format \ No newline at end of file diff --git a/vz_ros_wrappers/audio_common_msgs/package.xml b/vz_ros_wrappers/audio_common_msgs/package.xml new file mode 100644 index 0000000..f20f24b --- /dev/null +++ b/vz_ros_wrappers/audio_common_msgs/package.xml @@ -0,0 +1,20 @@ + + audio_common_msgs + 0.3.12 + + Messages for transmitting audio via ROS + + Austin Hendrix + Shingo Kitagawa + Nate Koenig + BSD + http://ros.org/wiki/audio_common_msgs + https://github.com/ros-drivers/audio_common + https://github.com/ros-drivers/audio_common/issues + + catkin + + message_generation + + message_runtime + \ No newline at end of file diff --git a/vz_ros_wrappers/launch/audio.launch b/vz_ros_wrappers/launch/audio.launch new file mode 100644 index 0000000..3bf24f7 --- /dev/null +++ b/vz_ros_wrappers/launch/audio.launch @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/vz_ros_wrappers/package.xml b/vz_ros_wrappers/package.xml index 450ab2e..795a7b5 100644 --- a/vz_ros_wrappers/package.xml +++ b/vz_ros_wrappers/package.xml @@ -52,8 +52,11 @@ cv_bridge roscpp + audio_common_msgs rospy sensor_msgs + libgstreamer1.0-dev + libgstreamer-plugins-base1.0-dev std_msgs cv_bridge @@ -68,6 +71,11 @@ sensor_msgs std_msgs + audio_common_msgs + gstreamer1.0 + gstreamer1.0-plugins-base + gstreamer1.0-plugins-good + gstreamer1.0-plugins-ugly diff --git a/vz_ros_wrappers/scripts/audio_capture.cpp b/vz_ros_wrappers/scripts/audio_capture.cpp new file mode 100644 index 0000000..354dca3 --- /dev/null +++ b/vz_ros_wrappers/scripts/audio_capture.cpp @@ -0,0 +1,236 @@ +#include +#include +#include +#include + +#include + +#include "audio_common_msgs/AudioData.h" +#include "audio_common_msgs/AudioInfo.h" + +namespace audio_transport +{ + class RosGstCapture + { + public: + RosGstCapture() + { + _bitrate = 192; + + std::string dst_type; + + // Need to encoding or publish raw wave data + ros::param::param("~format", _format, "mp3"); + ros::param::param("~sample_format", _sample_format, "S16LE"); + + // The bitrate at which to encode the audio + ros::param::param("~bitrate", _bitrate, 192); + + // only available for raw data + ros::param::param("~channels", _channels, 1); + ros::param::param("~depth", _depth, 16); + ros::param::param("~sample_rate", _sample_rate, 16000); + + // The destination of the audio + ros::param::param("~dst", dst_type, "appsink"); + + // The source of the audio + //ros::param::param("~src", source_type, "alsasrc"); + std::string device; + ros::param::param("~device", device, ""); + + _pub = _nh.advertise("audio", 10, true); + _pub_info = _nh.advertise("audio_info", 1, true); + + _loop = g_main_loop_new(NULL, false); + _pipeline = gst_pipeline_new("ros_pipeline"); + _bus = gst_pipeline_get_bus(GST_PIPELINE(_pipeline)); + gst_bus_add_signal_watch(_bus); + g_signal_connect(_bus, "message::error", + G_CALLBACK(onMessage), this); + g_object_unref(_bus); + + // We create the sink first, just for convenience + if (dst_type == "appsink") + { + _sink = gst_element_factory_make("appsink", "sink"); + g_object_set(G_OBJECT(_sink), "emit-signals", true, NULL); + g_object_set(G_OBJECT(_sink), "max-buffers", 100, NULL); + g_signal_connect( G_OBJECT(_sink), "new-sample", + G_CALLBACK(onNewBuffer), this); + } + else + { + ROS_INFO("file sink to %s", dst_type.c_str()); + _sink = gst_element_factory_make("filesink", "sink"); + g_object_set( G_OBJECT(_sink), "location", dst_type.c_str(), NULL); + } + + _source = gst_element_factory_make("alsasrc", "source"); + // if device isn't specified, it will use the default which is + // the alsa default source. + // A valid device will be of the foram hw:0,0 with other numbers + // than 0 and 0 as are available. + if (device != "") + { + // ghcar *gst_device = device.c_str(); + g_object_set(G_OBJECT(_source), "device", device.c_str(), NULL); + } + + GstCaps *caps; + caps = gst_caps_new_simple("audio/x-raw", + "format", G_TYPE_STRING, _sample_format.c_str(), + "channels", G_TYPE_INT, _channels, + "width", G_TYPE_INT, _depth, + "depth", G_TYPE_INT, _depth, + "rate", G_TYPE_INT, _sample_rate, + "signed", G_TYPE_BOOLEAN, TRUE, + NULL); + + gboolean link_ok; + if (_format == "mp3"){ + _filter = gst_element_factory_make("capsfilter", "filter"); + g_object_set( G_OBJECT(_filter), "caps", caps, NULL); + gst_caps_unref(caps); + + _convert = gst_element_factory_make("audioconvert", "convert"); + if (!_convert) { + ROS_ERROR_STREAM("Failed to create audioconvert element"); + exitOnMainThread(1); + } + + _encode = gst_element_factory_make("lamemp3enc", "encoder"); + if (!_encode) { + ROS_ERROR_STREAM("Failed to create encoder element"); + exitOnMainThread(1); + } + g_object_set( G_OBJECT(_encode), "target", 1, NULL); + g_object_set( G_OBJECT(_encode), "bitrate", _bitrate, NULL); + + gst_bin_add_many( GST_BIN(_pipeline), _source, _filter, _convert, _encode, _sink, NULL); + link_ok = gst_element_link_many(_source, _filter, _convert, _encode, _sink, NULL); + } else if (_format == "wave") { + if (dst_type == "appsink") { + g_object_set( G_OBJECT(_sink), "caps", caps, NULL); + gst_caps_unref(caps); + gst_bin_add_many( GST_BIN(_pipeline), _source, _sink, NULL); + link_ok = gst_element_link_many( _source, _sink, NULL); + } else { + _filter = gst_element_factory_make("wavenc", "filter"); + gst_bin_add_many( GST_BIN(_pipeline), _source, _filter, _sink, NULL); + link_ok = gst_element_link_many( _source, _filter, _sink, NULL); + } + } else { + ROS_ERROR_STREAM("format must be \"wave\" or \"mp3\""); + exitOnMainThread(1); + } + /*} + else + { + _sleep_time = 10000; + _source = gst_element_factory_make("filesrc", "source"); + g_object_set(G_OBJECT(_source), "location", source_type.c_str(), NULL); + + gst_bin_add_many( GST_BIN(_pipeline), _source, _sink, NULL); + gst_element_link_many(_source, _sink, NULL); + } + */ + + if (!link_ok) { + ROS_ERROR_STREAM("Unsupported media type."); + exitOnMainThread(1); + } + + gst_element_set_state(GST_ELEMENT(_pipeline), GST_STATE_PLAYING); + + _gst_thread = boost::thread( boost::bind(g_main_loop_run, _loop) ); + + audio_common_msgs::AudioInfo info_msg; + info_msg.channels = _channels; + info_msg.sample_rate = _sample_rate; + info_msg.sample_format = _sample_format; + info_msg.bitrate = _bitrate; + info_msg.coding_format = _format; + _pub_info.publish(info_msg); + } + + ~RosGstCapture() + { + g_main_loop_quit(_loop); + gst_element_set_state(_pipeline, GST_STATE_NULL); + gst_object_unref(_pipeline); + g_main_loop_unref(_loop); + } + + void exitOnMainThread(int code) + { + exit(code); + } + + void publish( const audio_common_msgs::AudioData &msg ) + { + _pub.publish(msg); + } + + static GstFlowReturn onNewBuffer (GstAppSink *appsink, gpointer userData) + { + RosGstCapture *server = reinterpret_cast(userData); + GstMapInfo map; + + GstSample *sample; + g_signal_emit_by_name(appsink, "pull-sample", &sample); + + GstBuffer *buffer = gst_sample_get_buffer(sample); + + audio_common_msgs::AudioData msg; + gst_buffer_map(buffer, &map, GST_MAP_READ); + msg.data.resize( map.size ); + + memcpy( &msg.data[0], map.data, map.size ); + + gst_buffer_unmap(buffer, &map); + gst_sample_unref(sample); + + server->publish(msg); + + return GST_FLOW_OK; + } + + static gboolean onMessage (GstBus *bus, GstMessage *message, gpointer userData) + { + RosGstCapture *server = reinterpret_cast(userData); + GError *err; + gchar *debug; + + gst_message_parse_error(message, &err, &debug); + ROS_ERROR_STREAM("gstreamer: " << err->message); + g_error_free(err); + g_free(debug); + g_main_loop_quit(server->_loop); + server->exitOnMainThread(1); + return FALSE; + } + + private: + ros::NodeHandle _nh; + ros::Publisher _pub; + ros::Publisher _pub_info; + + boost::thread _gst_thread; + + GstElement *_pipeline, *_source, *_filter, *_sink, *_convert, *_encode; + GstBus *_bus; + int _bitrate, _channels, _depth, _sample_rate; + GMainLoop *_loop; + std::string _format, _sample_format; + }; +} + +int main (int argc, char **argv) +{ + ros::init(argc, argv, "audio_capture"); + gst_init(&argc, &argv); + + audio_transport::RosGstCapture server; + ros::spin(); +} \ No newline at end of file diff --git a/vz_ros_wrappers/scripts/unit8_to_wav.py b/vz_ros_wrappers/scripts/unit8_to_wav.py new file mode 100644 index 0000000..3584abb --- /dev/null +++ b/vz_ros_wrappers/scripts/unit8_to_wav.py @@ -0,0 +1,37 @@ +#! /usr/bin/python3 + +import rospy +import numpy as np +from audio_common_msgs.msg import AudioData +from std_msgs.msg import String +from scipy.io.wavfile import wavf + +class D435i: + def __init__(self): + # 1) Write subscriber to /audio topic and populate a numpy data structure (array) with the uint8[] data + self.raw_audio_sub = rospy.Subscriber('/audio', AudioData, self.raw_callback) + + self.byteArray = np.array(1) + self.msg_count = 0 + + def raw_callback(self, bytes): + if (self.msg_count > 0): + self.byteArray.append(bytes) + else : + self.byteArray() + self.msg_count += 1 + +# 2) Check you're "Decoding" the audio ROS message correctly by saving to a .wav file +# 3) Be curious. Listen to the .wav file and see if it sounds gucci, and then maybe twiddle about with the encoding on the audio_capture.cpp, seeing if that changes anything e.g. encoding with mp3 instead. +# 4) If you've made it this far, well done. Try find Utku's function to pass the np array. + + def run(self): + while not rospy.is_shutdown(): + pass + +if __name__ == '__main__': + try: + rospy.init_node('audio_converter', anonymous=True) + except rospy.ROSInterruptException: + print('Audio converter node failed!') + pass \ No newline at end of file