From fb487bb8a5ba0b8b1d58ad375b1b3153e4911f51 Mon Sep 17 00:00:00 2001 From: Hongyu Li Date: Tue, 22 Mar 2022 13:05:08 -0400 Subject: [PATCH] some audio changes --- .../launch/audio.launch | 13 - .../launch/audio_test.launch | 4 - .../launch/wav_audio.launch | 4 + .../scripts/audio_capture.cpp | 236 ------------------ .../scripts/ros_interface.py | 89 ------- ...etch_respeak_test2.py => stretch_audio.py} | 73 ++---- 6 files changed, 24 insertions(+), 395 deletions(-) delete mode 100644 vz_acoustic_scene_analysis/launch/audio.launch delete mode 100644 vz_acoustic_scene_analysis/launch/audio_test.launch create mode 100644 vz_acoustic_scene_analysis/launch/wav_audio.launch delete mode 100644 vz_acoustic_scene_analysis/scripts/audio_capture.cpp delete mode 100755 vz_acoustic_scene_analysis/scripts/ros_interface.py rename vz_acoustic_scene_analysis/scripts/{stretch_respeak_test2.py => stretch_audio.py} (85%) diff --git a/vz_acoustic_scene_analysis/launch/audio.launch b/vz_acoustic_scene_analysis/launch/audio.launch deleted file mode 100644 index 2912b46..0000000 --- a/vz_acoustic_scene_analysis/launch/audio.launch +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/vz_acoustic_scene_analysis/launch/audio_test.launch b/vz_acoustic_scene_analysis/launch/audio_test.launch deleted file mode 100644 index eab5a03..0000000 --- a/vz_acoustic_scene_analysis/launch/audio_test.launch +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/vz_acoustic_scene_analysis/launch/wav_audio.launch b/vz_acoustic_scene_analysis/launch/wav_audio.launch new file mode 100644 index 0000000..663bb49 --- /dev/null +++ b/vz_acoustic_scene_analysis/launch/wav_audio.launch @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/vz_acoustic_scene_analysis/scripts/audio_capture.cpp b/vz_acoustic_scene_analysis/scripts/audio_capture.cpp deleted file mode 100644 index 9e56263..0000000 --- a/vz_acoustic_scene_analysis/scripts/audio_capture.cpp +++ /dev/null @@ -1,236 +0,0 @@ -#include -#include -#include -#include - -#include - -#include "vz_acoustic_scene_analysis/MyAudioData.h" -#include "vz_acoustic_scene_analysis/MyAudioInfo.h" - -namespace audio_transport -{ - class RosGstCapture - { - public: - RosGstCapture() - { - _bitrate = 192; - - std::string dst_type; - - // Need to encoding or publish raw wave data - ros::param::param("~format", _format, "mp3"); - ros::param::param("~sample_format", _sample_format, "S16LE"); - - // The bitrate at which to encode the audio - ros::param::param("~bitrate", _bitrate, 192); - - // only available for raw data - ros::param::param("~channels", _channels, 1); - ros::param::param("~depth", _depth, 16); - ros::param::param("~sample_rate", _sample_rate, 16000); - - // The destination of the audio - ros::param::param("~dst", dst_type, "appsink"); - - // The source of the audio - //ros::param::param("~src", source_type, "alsasrc"); - std::string device; - ros::param::param("~device", device, ""); - - _pub = _nh.advertise("audio", 10, true); - _pub_info = _nh.advertise("audio_info", 1, true); - - _loop = g_main_loop_new(NULL, false); - _pipeline = gst_pipeline_new("ros_pipeline"); - _bus = gst_pipeline_get_bus(GST_PIPELINE(_pipeline)); - gst_bus_add_signal_watch(_bus); - g_signal_connect(_bus, "message::error", - G_CALLBACK(onMessage), this); - g_object_unref(_bus); - - // We create the sink first, just for convenience - if (dst_type == "appsink") - { - _sink = gst_element_factory_make("appsink", "sink"); - g_object_set(G_OBJECT(_sink), "emit-signals", true, NULL); - g_object_set(G_OBJECT(_sink), "max-buffers", 100, NULL); - g_signal_connect( G_OBJECT(_sink), "new-sample", - G_CALLBACK(onNewBuffer), this); - } - else - { - ROS_INFO("file sink to %s", dst_type.c_str()); - _sink = gst_element_factory_make("filesink", "sink"); - g_object_set( G_OBJECT(_sink), "location", dst_type.c_str(), NULL); - } - - _source = gst_element_factory_make("alsasrc", "source"); - // if device isn't specified, it will use the default which is - // the alsa default source. - // A valid device will be of the foram hw:0,0 with other numbers - // than 0 and 0 as are available. - if (device != "") - { - // ghcar *gst_device = device.c_str(); - g_object_set(G_OBJECT(_source), "device", device.c_str(), NULL); - } - - GstCaps *caps; - caps = gst_caps_new_simple("audio/x-raw", - "format", G_TYPE_STRING, _sample_format.c_str(), - "channels", G_TYPE_INT, _channels, - "width", G_TYPE_INT, _depth, - "depth", G_TYPE_INT, _depth, - "rate", G_TYPE_INT, _sample_rate, - "signed", G_TYPE_BOOLEAN, TRUE, - NULL); - - gboolean link_ok; - if (_format == "mp3"){ - _filter = gst_element_factory_make("capsfilter", "filter"); - g_object_set( G_OBJECT(_filter), "caps", caps, NULL); - gst_caps_unref(caps); - - _convert = gst_element_factory_make("audioconvert", "convert"); - if (!_convert) { - ROS_ERROR_STREAM("Failed to create audioconvert element"); - exitOnMainThread(1); - } - - _encode = gst_element_factory_make("lamemp3enc", "encoder"); - if (!_encode) { - ROS_ERROR_STREAM("Failed to create encoder element"); - exitOnMainThread(1); - } - g_object_set( G_OBJECT(_encode), "target", 1, NULL); - g_object_set( G_OBJECT(_encode), "bitrate", _bitrate, NULL); - - gst_bin_add_many( GST_BIN(_pipeline), _source, _filter, _convert, _encode, _sink, NULL); - link_ok = gst_element_link_many(_source, _filter, _convert, _encode, _sink, NULL); - } else if (_format == "wave") { - if (dst_type == "appsink") { - g_object_set( G_OBJECT(_sink), "caps", caps, NULL); - gst_caps_unref(caps); - gst_bin_add_many( GST_BIN(_pipeline), _source, _sink, NULL); - link_ok = gst_element_link_many( _source, _sink, NULL); - } else { - _filter = gst_element_factory_make("wavenc", "filter"); - gst_bin_add_many( GST_BIN(_pipeline), _source, _filter, _sink, NULL); - link_ok = gst_element_link_many( _source, _filter, _sink, NULL); - } - } else { - ROS_ERROR_STREAM("format must be \"wave\" or \"mp3\""); - exitOnMainThread(1); - } - /*} - else - { - _sleep_time = 10000; - _source = gst_element_factory_make("filesrc", "source"); - g_object_set(G_OBJECT(_source), "location", source_type.c_str(), NULL); - - gst_bin_add_many( GST_BIN(_pipeline), _source, _sink, NULL); - gst_element_link_many(_source, _sink, NULL); - } - */ - - if (!link_ok) { - ROS_ERROR_STREAM("Unsupported media type."); - exitOnMainThread(1); - } - - gst_element_set_state(GST_ELEMENT(_pipeline), GST_STATE_PLAYING); - - _gst_thread = boost::thread( boost::bind(g_main_loop_run, _loop) ); - - vz_acoustic_scene_analysis::MyAudioInfo info_msg; - info_msg.channels = _channels; - info_msg.sample_rate = _sample_rate; - info_msg.sample_format = _sample_format; - info_msg.bitrate = _bitrate; - info_msg.coding_format = _format; - _pub_info.publish(info_msg); - } - - ~RosGstCapture() - { - g_main_loop_quit(_loop); - gst_element_set_state(_pipeline, GST_STATE_NULL); - gst_object_unref(_pipeline); - g_main_loop_unref(_loop); - } - - void exitOnMainThread(int code) - { - exit(code); - } - - void publish( const vz_acoustic_scene_analysis::MyAudioData &msg ) - { - _pub.publish(msg); - } - - static GstFlowReturn onNewBuffer (GstAppSink *appsink, gpointer userData) - { - RosGstCapture *server = reinterpret_cast(userData); - GstMapInfo map; - - GstSample *sample; - g_signal_emit_by_name(appsink, "pull-sample", &sample); - - GstBuffer *buffer = gst_sample_get_buffer(sample); - - vz_acoustic_scene_analysis::MyAudioData msg; - gst_buffer_map(buffer, &map, GST_MAP_READ); - msg.data.resize( map.size ); - - memcpy( &msg.data[0], map.data, map.size ); - - gst_buffer_unmap(buffer, &map); - gst_sample_unref(sample); - - server->publish(msg); - - return GST_FLOW_OK; - } - - static gboolean onMessage (GstBus *bus, GstMessage *message, gpointer userData) - { - RosGstCapture *server = reinterpret_cast(userData); - GError *err; - gchar *debug; - - gst_message_parse_error(message, &err, &debug); - ROS_ERROR_STREAM("gstreamer: " << err->message); - g_error_free(err); - g_free(debug); - g_main_loop_quit(server->_loop); - server->exitOnMainThread(1); - return FALSE; - } - - private: - ros::NodeHandle _nh; - ros::Publisher _pub; - ros::Publisher _pub_info; - - boost::thread _gst_thread; - - GstElement *_pipeline, *_source, *_filter, *_sink, *_convert, *_encode; - GstBus *_bus; - int _bitrate, _channels, _depth, _sample_rate; - GMainLoop *_loop; - std::string _format, _sample_format; - }; -} - -int main (int argc, char **argv) -{ - ros::init(argc, argv, "audio_capture"); - gst_init(&argc, &argv); - - audio_transport::RosGstCapture server; - ros::spin(); -} \ No newline at end of file diff --git a/vz_acoustic_scene_analysis/scripts/ros_interface.py b/vz_acoustic_scene_analysis/scripts/ros_interface.py deleted file mode 100755 index b18612e..0000000 --- a/vz_acoustic_scene_analysis/scripts/ros_interface.py +++ /dev/null @@ -1,89 +0,0 @@ -#! /usr/bin/python3 - -from sklearn.cluster import k_means -import rospy -import numpy as np -from vz_acoustic_scene_analysis.msg import MyAudioData -from std_msgs.msg import String -from scipy.io.wavfile import write -from collections import deque -import time -from pathlib import Path - - - -# Non ROS import -# import acoustic_scene_analysis as asa - -class RosInterface: - def __init__(self): - home_dir = str(Path.home()) - self.save_dir = home_dir + "/Music/" - # 1) Write subscriber to /audio topic and populate a numpy data structure (array) with the uint8[] data - # self.maxSize = 7 - # self.queue = [None] * 7 - # self.head = self.tail = -1 - self.wav_data = [] - self.arraylength = 0 - self.msg_count = 0 - - rospy.Subscriber("/audio", MyAudioData, self.raw_callback) - - def enqueue(self,data): - # if queue is full - if ((self.tail+1) % self.k == self.head): - # convert to mp3 - # publish mp3 - # remove the first element (call dequeue) - pass - elif (self.head == -1): - self.head = 0 - self.tail = 0 - else: - self.tail = (self.tail +1) % self.maxSize - self.queue[self.tail] = data - - def dequeue(self): - # if empty queue - if (self.head == -1): - pass - # if the self - else: - temp = self.queue[self.head] - - def raw_callback(self, msg): - # print("Length of uint8[]:", len(msg.data)) - self.wav_data.append(msg.data) - - # if (self.msg_count < 10000): - # self.arraylength += len(msg.data) - # print(self.nparray) - # print(len(bytes)) - # else : - # self.byteArray[self.msg_count] = bytes - # print(len(bytes)) - self.msg_count += 1 - - def on_shutdown(self): - wav_arr = np.array(self.wav_data) - print(wav_arr) - print(wav_arr.shape) - write(self.save_dir +'test.mp3', 44100, wav_arr) - print("check music") - pass - -# 2) Check you're "Decoding" the audio ROS message correctly by saving to a .wav file -# 3) Be curious. Listen to the .wav file and see if it sounds gucci, and then maybe twiddle about with the encoding on the audio_capture.cpp, seeing if that changes anything e.g. encoding with mp3 instead. -# 4) If you've made it this far, well done. Try find Utku's function to pass the np array. - - -if __name__ == '__main__': - try: - rospy.init_node('ros_interface', anonymous=True) - ros_int = RosInterface() - - rospy.on_shutdown(ros_int.on_shutdown) - rospy.spin() - except rospy.ROSInterruptException: - print('Audio converter node failed!') - pass \ No newline at end of file diff --git a/vz_acoustic_scene_analysis/scripts/stretch_respeak_test2.py b/vz_acoustic_scene_analysis/scripts/stretch_audio.py similarity index 85% rename from vz_acoustic_scene_analysis/scripts/stretch_respeak_test2.py rename to vz_acoustic_scene_analysis/scripts/stretch_audio.py index 27157f8..14768db 100755 --- a/vz_acoustic_scene_analysis/scripts/stretch_respeak_test2.py +++ b/vz_acoustic_scene_analysis/scripts/stretch_audio.py @@ -185,53 +185,12 @@ def get_respeaker_device_id(): return device_id - RESPEAKER_RATE = 16000 RESPEAKER_CHANNELS = 6 # must flash 6_channels_firmware.bin first RESPEAKER_WIDTH = 2 RESPEAKER_INDEX = get_respeaker_device_id() CHUNK = 1024 -# def record_audio(seconds=5): -# p = pyaudio.PyAudio() -# stream = p.open(rate=RESPEAKER_RATE, -# format=p.get_format_from_width(RESPEAKER_WIDTH), -# channels=RESPEAKER_CHANNELS, -# input=True, -# input_device_index=RESPEAKER_INDEX, -# output= False) - -# frames = [] -# for i in range(0, int(RESPEAKER_RATE / CHUNK * seconds)): -# data = stream.read(CHUNK) -# a = np.frombuffer(data,dtype=np.int16)[0::6] # extracts fused channel 0 -# frames.append(a.tobytes()) - -# stream.stop_stream() -# stream.close() -# p.terminate() - -# return frames - - -def save_wav(frames, fname): - p = pyaudio.PyAudio() - wf = wave.open(fname, 'wb') - wf.setnchannels(1) - wf.setsampwidth(p.get_sample_size(p.get_format_from_width(RESPEAKER_WIDTH))) - wf.setframerate(RESPEAKER_RATE) - for val in frames: - wf.writeframes(b''.join(val)) - wf.close() - -# def rec_and_save(): -# print("* recording 5 seconds") -# frames = record_audio() # rospy.Timer(rospy.Duration(0.2), record_audio) -# print("* done") -# file_name = "/home/hello-robot/Desktop/output_audio.wav" -# save_wav(frames, file_name) -# # send test.wav files -# print("* done") class Audio: def __init__(self): @@ -239,24 +198,25 @@ class Audio: self.wav_list = [] self.record_count = 0 # Count how many times we've recorded f seconds of audio self.file_name = "/home/hello-robot/Desktop/output_audio.wav" + # Publisher for Audio Data + # self.audio_data_pub = rospy.Publisher("/wav_data", ) - def write_audio(self): - recorded_frames = self.record_audio(.5) - print("i haz frames: ", self.record_count) + def get_audio(self): + recorded_frames = self.record_audio(.2) # set param here chunk size self.wav_list.append(recorded_frames) self.record_count += 1 # Every 5 seconds for - if ((self.record_count % 5) == 0): + if ((self.record_count % 2) == 0): # set param sequence size + return_list = self.wav_list + # Remove the first object (0.2 seconds of audio data) + self.wav_list.pop(0) # send the frames at the beginning of the list (save as wav for now) - save_wav(self.wav_list,self.file_name) - # Empty list - self.wav_list = [] - print("5 seconds have passed, very nice") + return return_list + def record_audio(self, seconds=5): p = pyaudio.PyAudio() - print ("i NO haz stream") stream = p.open(rate=RESPEAKER_RATE, format=p.get_format_from_width(RESPEAKER_WIDTH), @@ -264,12 +224,10 @@ class Audio: input=True, input_device_index=RESPEAKER_INDEX, output= False) - print ("i haz stream") frames = [] for i in range(0, int(RESPEAKER_RATE / CHUNK * seconds)): data = stream.read(CHUNK) - print("I haz data from stream: ", i) a = np.frombuffer(data,dtype=np.int16)[0::6] # extracts fused channel 0 frames.append(a.tobytes()) @@ -282,6 +240,7 @@ class Audio: def process_audio_loop(self): rospy.init_node("audio_capture") + rospy.param audio_count = 0 dev = usb.core.find(idVendor=0x2886, idProduct=0x0018) try: @@ -289,7 +248,15 @@ class Audio: respeaker = Tuning() while True: if respeaker.is_voice() == 1: - self.write_audio() + # wav_data = list of lists of bytes + wav_data = self.get_audio() + if (str(type(wav_data)) == ""): + # Maybe publish wav_data as ROS message? + pass + # publish_wav(wav_data) # lol + # Call of Utku's function + # asa_out = process_wav(wav_data, asa_params) + # Convert asa_out to ROS message audio_count += 1 print(audio_count) except usb.core.USBError: