Browse Source

Lisa conquering audio one wav at a time

pull/65/head
Hongyu Li 2 years ago
parent
commit
3e73174995
9 changed files with 369 additions and 12 deletions
  1. +30
    -12
      vz_ros_wrappers/CMakeLists.txt
  2. +12
    -0
      vz_ros_wrappers/audio_common_msgs/CMakeLists.txt
  3. +1
    -0
      vz_ros_wrappers/audio_common_msgs/msg/AudioData.msg
  4. +12
    -0
      vz_ros_wrappers/audio_common_msgs/msg/AudioInfo.msg
  5. +20
    -0
      vz_ros_wrappers/audio_common_msgs/package.xml
  6. +13
    -0
      vz_ros_wrappers/launch/audio.launch
  7. +8
    -0
      vz_ros_wrappers/package.xml
  8. +236
    -0
      vz_ros_wrappers/scripts/audio_capture.cpp
  9. +37
    -0
      vz_ros_wrappers/scripts/unit8_to_wav.py

+ 30
- 12
vz_ros_wrappers/CMakeLists.txt View File

@ -12,12 +12,16 @@ find_package(catkin REQUIRED COMPONENTS
# opencv2 # ananya this does not compile
roscpp
rospy
audio_common_msgs
sensor_msgs
std_msgs
)
find_package(PkgConfig)
pkg_check_modules(GST1.0 gstreamer-1.0 REQUIRED)
## System dependencies are found with CMake's conventions
# find_package(Boost REQUIRED COMPONENTS system)
find_package(Boost REQUIRED COMPONENTS thread)
## Uncomment this if the package has a setup.py. This macro ensures
@ -121,6 +125,8 @@ catkin_package(
include_directories(
# include
${catkin_INCLUDE_DIRS}
${Boost_INCLUDE_DIRS}
${GST1.0_INCLUDE_DIRS}
)
## Declare a C++ library
@ -136,7 +142,8 @@ include_directories(
## Declare a C++ executable
## With catkin_make all packages are built within a single CMake context
## The recommended prefix ensures that target names across packages don't collide
# add_executable(${PROJECT_NAME}_node src/vz_ros_wrappers_node.cpp)
add_executable(audio_capture scripts/audio_capture.cpp)
add_executable(audio_play scripts/audio_play.cpp)
## Rename C++ executable without prefix
## The above recommended prefix causes long target names, the following renames the
@ -146,12 +153,20 @@ include_directories(
## Add cmake target dependencies of the executable
## same as for the library above
# add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
add_dependencies(audio_capture ${catkin_EXPORTED_TARGETS})
add_dependencies(audio_play ${catkin_EXPORTED_TARGETS})
## Specify libraries to link a library or executable target against
# target_link_libraries(${PROJECT_NAME}_node
# ${catkin_LIBRARIES}
# )
target_link_libraries(audio_capture
${catkin_LIBRARIES}
${GST1.0_LIBRARIES}
${Boost_LIBRARIES}
)
target_link_libraries(audio_play
${catkin_LIBRARIES}
${GST1.0_LIBRARIES}
${Boost_LIBRARIES}
)
#############
## Install ##
@ -169,9 +184,12 @@ include_directories(
## Mark executables for installation
## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_executables.html
# install(TARGETS ${PROJECT_NAME}_node
# RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
# )
install(TARGETS audio_capture
DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
)
install(TARGETS audio_play
DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
)
## Mark libraries for installation
## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_libraries.html
@ -182,11 +200,11 @@ include_directories(
# )
## Mark cpp header files for installation
# install(DIRECTORY include/${PROJECT_NAME}/
# DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
install(DIRECTORY launch
DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
# FILES_MATCHING PATTERN "*.h"
# PATTERN ".svn" EXCLUDE
# )
)
## Mark other files for installation (e.g. launch and bag files, etc.)
# install(FILES

+ 12
- 0
vz_ros_wrappers/audio_common_msgs/CMakeLists.txt View File

@ -0,0 +1,12 @@
cmake_minimum_required(VERSION 2.8.3)
project(audio_common_msgs)
find_package(catkin REQUIRED COMPONENTS message_generation)
add_message_files(DIRECTORY msg FILES
AudioData.msg
AudioInfo.msg
)
generate_messages()
catkin_package(CATKIN_DEPENDS message_runtime)

+ 1
- 0
vz_ros_wrappers/audio_common_msgs/msg/AudioData.msg View File

@ -0,0 +1 @@
uint8[] data

+ 12
- 0
vz_ros_wrappers/audio_common_msgs/msg/AudioInfo.msg View File

@ -0,0 +1,12 @@
# This message contains the audio meta data
# Number of channels
uint8 channels
# Sampling rate [Hz]
uint32 sample_rate
# Audio format (e.g. S16LE)
string sample_format
# Amount of audio data per second [bits/s]
uint32 bitrate
# Audio coding format (e.g. WAVE, MP3)
string coding_format

+ 20
- 0
vz_ros_wrappers/audio_common_msgs/package.xml View File

@ -0,0 +1,20 @@
<package>
<name>audio_common_msgs</name>
<version>0.3.12</version>
<description>
Messages for transmitting audio via ROS
</description>
<maintainer email="namniart@gmail.com">Austin Hendrix</maintainer>
<maintainer email="shingogo.5511@gmail.com">Shingo Kitagawa</maintainer>
<author>Nate Koenig</author>
<license>BSD</license>
<url type="website">http://ros.org/wiki/audio_common_msgs</url>
<url type="repository">https://github.com/ros-drivers/audio_common</url>
<url type="bugtracker">https://github.com/ros-drivers/audio_common/issues</url>
<buildtool_depend>catkin</buildtool_depend>
<build_depend>message_generation</build_depend>
<run_depend>message_runtime</run_depend>
</package>

+ 13
- 0
vz_ros_wrappers/launch/audio.launch View File

@ -0,0 +1,13 @@
<launch>
<arg name="device" default="" />
<!-- publish audio data as wav format -->
<node name="audio_capture" pkg="vz_ros_wrappers" type="audio_capture" output="screen">
<param name="device" value="" />
<param name="format" value="mp3" />
<param name="channels" value="1" />
<param name="depth" value="16" />
<param name="sample_rate" value="16000" />
<param name="device" value="$(arg device)" />
</node>
</launch>

+ 8
- 0
vz_ros_wrappers/package.xml View File

@ -52,8 +52,11 @@
<build_depend>cv_bridge</build_depend>
<!-- <build_depend>opencv2</build_depend> -->
<build_depend>roscpp</build_depend>
<build_depend>audio_common_msgs</build_depend>
<build_depend>rospy</build_depend>
<build_depend>sensor_msgs</build_depend>
<build_depend>libgstreamer1.0-dev</build_depend>
<build_depend>libgstreamer-plugins-base1.0-dev</build_depend>
<build_depend>std_msgs</build_depend>
<build_export_depend>cv_bridge</build_export_depend>
<!-- <build_export_depend>opencv2</build_export_depend> -->
@ -68,6 +71,11 @@
<exec_depend>sensor_msgs</exec_depend>
<exec_depend>std_msgs</exec_depend>
<exec_depend>audio_common_msgs</exec_depend>
<exec_depend>gstreamer1.0</exec_depend>
<exec_depend>gstreamer1.0-plugins-base</exec_depend>
<exec_depend>gstreamer1.0-plugins-good</exec_depend>
<exec_depend>gstreamer1.0-plugins-ugly</exec_depend>
<!-- The export tag contains other, unspecified, tags -->
<export>

+ 236
- 0
vz_ros_wrappers/scripts/audio_capture.cpp View File

@ -0,0 +1,236 @@
#include <stdio.h>
#include <gst/gst.h>
#include <gst/app/gstappsink.h>
#include <boost/thread.hpp>
#include <ros/ros.h>
#include "audio_common_msgs/AudioData.h"
#include "audio_common_msgs/AudioInfo.h"
namespace audio_transport
{
class RosGstCapture
{
public:
RosGstCapture()
{
_bitrate = 192;
std::string dst_type;
// Need to encoding or publish raw wave data
ros::param::param<std::string>("~format", _format, "mp3");
ros::param::param<std::string>("~sample_format", _sample_format, "S16LE");
// The bitrate at which to encode the audio
ros::param::param<int>("~bitrate", _bitrate, 192);
// only available for raw data
ros::param::param<int>("~channels", _channels, 1);
ros::param::param<int>("~depth", _depth, 16);
ros::param::param<int>("~sample_rate", _sample_rate, 16000);
// The destination of the audio
ros::param::param<std::string>("~dst", dst_type, "appsink");
// The source of the audio
//ros::param::param<std::string>("~src", source_type, "alsasrc");
std::string device;
ros::param::param<std::string>("~device", device, "");
_pub = _nh.advertise<audio_common_msgs::AudioData>("audio", 10, true);
_pub_info = _nh.advertise<audio_common_msgs::AudioInfo>("audio_info", 1, true);
_loop = g_main_loop_new(NULL, false);
_pipeline = gst_pipeline_new("ros_pipeline");
_bus = gst_pipeline_get_bus(GST_PIPELINE(_pipeline));
gst_bus_add_signal_watch(_bus);
g_signal_connect(_bus, "message::error",
G_CALLBACK(onMessage), this);
g_object_unref(_bus);
// We create the sink first, just for convenience
if (dst_type == "appsink")
{
_sink = gst_element_factory_make("appsink", "sink");
g_object_set(G_OBJECT(_sink), "emit-signals", true, NULL);
g_object_set(G_OBJECT(_sink), "max-buffers", 100, NULL);
g_signal_connect( G_OBJECT(_sink), "new-sample",
G_CALLBACK(onNewBuffer), this);
}
else
{
ROS_INFO("file sink to %s", dst_type.c_str());
_sink = gst_element_factory_make("filesink", "sink");
g_object_set( G_OBJECT(_sink), "location", dst_type.c_str(), NULL);
}
_source = gst_element_factory_make("alsasrc", "source");
// if device isn't specified, it will use the default which is
// the alsa default source.
// A valid device will be of the foram hw:0,0 with other numbers
// than 0 and 0 as are available.
if (device != "")
{
// ghcar *gst_device = device.c_str();
g_object_set(G_OBJECT(_source), "device", device.c_str(), NULL);
}
GstCaps *caps;
caps = gst_caps_new_simple("audio/x-raw",
"format", G_TYPE_STRING, _sample_format.c_str(),
"channels", G_TYPE_INT, _channels,
"width", G_TYPE_INT, _depth,
"depth", G_TYPE_INT, _depth,
"rate", G_TYPE_INT, _sample_rate,
"signed", G_TYPE_BOOLEAN, TRUE,
NULL);
gboolean link_ok;
if (_format == "mp3"){
_filter = gst_element_factory_make("capsfilter", "filter");
g_object_set( G_OBJECT(_filter), "caps", caps, NULL);
gst_caps_unref(caps);
_convert = gst_element_factory_make("audioconvert", "convert");
if (!_convert) {
ROS_ERROR_STREAM("Failed to create audioconvert element");
exitOnMainThread(1);
}
_encode = gst_element_factory_make("lamemp3enc", "encoder");
if (!_encode) {
ROS_ERROR_STREAM("Failed to create encoder element");
exitOnMainThread(1);
}
g_object_set( G_OBJECT(_encode), "target", 1, NULL);
g_object_set( G_OBJECT(_encode), "bitrate", _bitrate, NULL);
gst_bin_add_many( GST_BIN(_pipeline), _source, _filter, _convert, _encode, _sink, NULL);
link_ok = gst_element_link_many(_source, _filter, _convert, _encode, _sink, NULL);
} else if (_format == "wave") {
if (dst_type == "appsink") {
g_object_set( G_OBJECT(_sink), "caps", caps, NULL);
gst_caps_unref(caps);
gst_bin_add_many( GST_BIN(_pipeline), _source, _sink, NULL);
link_ok = gst_element_link_many( _source, _sink, NULL);
} else {
_filter = gst_element_factory_make("wavenc", "filter");
gst_bin_add_many( GST_BIN(_pipeline), _source, _filter, _sink, NULL);
link_ok = gst_element_link_many( _source, _filter, _sink, NULL);
}
} else {
ROS_ERROR_STREAM("format must be \"wave\" or \"mp3\"");
exitOnMainThread(1);
}
/*}
else
{
_sleep_time = 10000;
_source = gst_element_factory_make("filesrc", "source");
g_object_set(G_OBJECT(_source), "location", source_type.c_str(), NULL);
gst_bin_add_many( GST_BIN(_pipeline), _source, _sink, NULL);
gst_element_link_many(_source, _sink, NULL);
}
*/
if (!link_ok) {
ROS_ERROR_STREAM("Unsupported media type.");
exitOnMainThread(1);
}
gst_element_set_state(GST_ELEMENT(_pipeline), GST_STATE_PLAYING);
_gst_thread = boost::thread( boost::bind(g_main_loop_run, _loop) );
audio_common_msgs::AudioInfo info_msg;
info_msg.channels = _channels;
info_msg.sample_rate = _sample_rate;
info_msg.sample_format = _sample_format;
info_msg.bitrate = _bitrate;
info_msg.coding_format = _format;
_pub_info.publish(info_msg);
}
~RosGstCapture()
{
g_main_loop_quit(_loop);
gst_element_set_state(_pipeline, GST_STATE_NULL);
gst_object_unref(_pipeline);
g_main_loop_unref(_loop);
}
void exitOnMainThread(int code)
{
exit(code);
}
void publish( const audio_common_msgs::AudioData &msg )
{
_pub.publish(msg);
}
static GstFlowReturn onNewBuffer (GstAppSink *appsink, gpointer userData)
{
RosGstCapture *server = reinterpret_cast<RosGstCapture*>(userData);
GstMapInfo map;
GstSample *sample;
g_signal_emit_by_name(appsink, "pull-sample", &sample);
GstBuffer *buffer = gst_sample_get_buffer(sample);
audio_common_msgs::AudioData msg;
gst_buffer_map(buffer, &map, GST_MAP_READ);
msg.data.resize( map.size );
memcpy( &msg.data[0], map.data, map.size );
gst_buffer_unmap(buffer, &map);
gst_sample_unref(sample);
server->publish(msg);
return GST_FLOW_OK;
}
static gboolean onMessage (GstBus *bus, GstMessage *message, gpointer userData)
{
RosGstCapture *server = reinterpret_cast<RosGstCapture*>(userData);
GError *err;
gchar *debug;
gst_message_parse_error(message, &err, &debug);
ROS_ERROR_STREAM("gstreamer: " << err->message);
g_error_free(err);
g_free(debug);
g_main_loop_quit(server->_loop);
server->exitOnMainThread(1);
return FALSE;
}
private:
ros::NodeHandle _nh;
ros::Publisher _pub;
ros::Publisher _pub_info;
boost::thread _gst_thread;
GstElement *_pipeline, *_source, *_filter, *_sink, *_convert, *_encode;
GstBus *_bus;
int _bitrate, _channels, _depth, _sample_rate;
GMainLoop *_loop;
std::string _format, _sample_format;
};
}
int main (int argc, char **argv)
{
ros::init(argc, argv, "audio_capture");
gst_init(&argc, &argv);
audio_transport::RosGstCapture server;
ros::spin();
}

+ 37
- 0
vz_ros_wrappers/scripts/unit8_to_wav.py View File

@ -0,0 +1,37 @@
#! /usr/bin/python3
import rospy
import numpy as np
from audio_common_msgs.msg import AudioData
from std_msgs.msg import String
from scipy.io.wavfile import wavf
class D435i:
def __init__(self):
# 1) Write subscriber to /audio topic and populate a numpy data structure (array) with the uint8[] data
self.raw_audio_sub = rospy.Subscriber('/audio', AudioData, self.raw_callback)
self.byteArray = np.array(1)
self.msg_count = 0
def raw_callback(self, bytes):
if (self.msg_count > 0):
self.byteArray.append(bytes)
else :
self.byteArray()
self.msg_count += 1
# 2) Check you're "Decoding" the audio ROS message correctly by saving to a .wav file
# 3) Be curious. Listen to the .wav file and see if it sounds gucci, and then maybe twiddle about with the encoding on the audio_capture.cpp, seeing if that changes anything e.g. encoding with mp3 instead.
# 4) If you've made it this far, well done. Try find Utku's function to pass the np array.
def run(self):
while not rospy.is_shutdown():
pass
if __name__ == '__main__':
try:
rospy.init_node('audio_converter', anonymous=True)
except rospy.ROSInterruptException:
print('Audio converter node failed!')
pass

Loading…
Cancel
Save