From fb487bb8a5ba0b8b1d58ad375b1b3153e4911f51 Mon Sep 17 00:00:00 2001
From: Hongyu Li
Date: Tue, 22 Mar 2022 13:05:08 -0400
Subject: [PATCH] some audio changes
---
.../launch/audio.launch | 13 -
.../launch/audio_test.launch | 4 -
.../launch/wav_audio.launch | 4 +
.../scripts/audio_capture.cpp | 236 ------------------
.../scripts/ros_interface.py | 89 -------
...etch_respeak_test2.py => stretch_audio.py} | 73 ++----
6 files changed, 24 insertions(+), 395 deletions(-)
delete mode 100644 vz_acoustic_scene_analysis/launch/audio.launch
delete mode 100644 vz_acoustic_scene_analysis/launch/audio_test.launch
create mode 100644 vz_acoustic_scene_analysis/launch/wav_audio.launch
delete mode 100644 vz_acoustic_scene_analysis/scripts/audio_capture.cpp
delete mode 100755 vz_acoustic_scene_analysis/scripts/ros_interface.py
rename vz_acoustic_scene_analysis/scripts/{stretch_respeak_test2.py => stretch_audio.py} (85%)
diff --git a/vz_acoustic_scene_analysis/launch/audio.launch b/vz_acoustic_scene_analysis/launch/audio.launch
deleted file mode 100644
index 2912b46..0000000
--- a/vz_acoustic_scene_analysis/launch/audio.launch
+++ /dev/null
@@ -1,13 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/vz_acoustic_scene_analysis/launch/audio_test.launch b/vz_acoustic_scene_analysis/launch/audio_test.launch
deleted file mode 100644
index eab5a03..0000000
--- a/vz_acoustic_scene_analysis/launch/audio_test.launch
+++ /dev/null
@@ -1,4 +0,0 @@
-
-
-
-
\ No newline at end of file
diff --git a/vz_acoustic_scene_analysis/launch/wav_audio.launch b/vz_acoustic_scene_analysis/launch/wav_audio.launch
new file mode 100644
index 0000000..663bb49
--- /dev/null
+++ b/vz_acoustic_scene_analysis/launch/wav_audio.launch
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/vz_acoustic_scene_analysis/scripts/audio_capture.cpp b/vz_acoustic_scene_analysis/scripts/audio_capture.cpp
deleted file mode 100644
index 9e56263..0000000
--- a/vz_acoustic_scene_analysis/scripts/audio_capture.cpp
+++ /dev/null
@@ -1,236 +0,0 @@
-#include
-#include
-#include
-#include
-
-#include
-
-#include "vz_acoustic_scene_analysis/MyAudioData.h"
-#include "vz_acoustic_scene_analysis/MyAudioInfo.h"
-
-namespace audio_transport
-{
- class RosGstCapture
- {
- public:
- RosGstCapture()
- {
- _bitrate = 192;
-
- std::string dst_type;
-
- // Need to encoding or publish raw wave data
- ros::param::param("~format", _format, "mp3");
- ros::param::param("~sample_format", _sample_format, "S16LE");
-
- // The bitrate at which to encode the audio
- ros::param::param("~bitrate", _bitrate, 192);
-
- // only available for raw data
- ros::param::param("~channels", _channels, 1);
- ros::param::param("~depth", _depth, 16);
- ros::param::param("~sample_rate", _sample_rate, 16000);
-
- // The destination of the audio
- ros::param::param("~dst", dst_type, "appsink");
-
- // The source of the audio
- //ros::param::param("~src", source_type, "alsasrc");
- std::string device;
- ros::param::param("~device", device, "");
-
- _pub = _nh.advertise("audio", 10, true);
- _pub_info = _nh.advertise("audio_info", 1, true);
-
- _loop = g_main_loop_new(NULL, false);
- _pipeline = gst_pipeline_new("ros_pipeline");
- _bus = gst_pipeline_get_bus(GST_PIPELINE(_pipeline));
- gst_bus_add_signal_watch(_bus);
- g_signal_connect(_bus, "message::error",
- G_CALLBACK(onMessage), this);
- g_object_unref(_bus);
-
- // We create the sink first, just for convenience
- if (dst_type == "appsink")
- {
- _sink = gst_element_factory_make("appsink", "sink");
- g_object_set(G_OBJECT(_sink), "emit-signals", true, NULL);
- g_object_set(G_OBJECT(_sink), "max-buffers", 100, NULL);
- g_signal_connect( G_OBJECT(_sink), "new-sample",
- G_CALLBACK(onNewBuffer), this);
- }
- else
- {
- ROS_INFO("file sink to %s", dst_type.c_str());
- _sink = gst_element_factory_make("filesink", "sink");
- g_object_set( G_OBJECT(_sink), "location", dst_type.c_str(), NULL);
- }
-
- _source = gst_element_factory_make("alsasrc", "source");
- // if device isn't specified, it will use the default which is
- // the alsa default source.
- // A valid device will be of the foram hw:0,0 with other numbers
- // than 0 and 0 as are available.
- if (device != "")
- {
- // ghcar *gst_device = device.c_str();
- g_object_set(G_OBJECT(_source), "device", device.c_str(), NULL);
- }
-
- GstCaps *caps;
- caps = gst_caps_new_simple("audio/x-raw",
- "format", G_TYPE_STRING, _sample_format.c_str(),
- "channels", G_TYPE_INT, _channels,
- "width", G_TYPE_INT, _depth,
- "depth", G_TYPE_INT, _depth,
- "rate", G_TYPE_INT, _sample_rate,
- "signed", G_TYPE_BOOLEAN, TRUE,
- NULL);
-
- gboolean link_ok;
- if (_format == "mp3"){
- _filter = gst_element_factory_make("capsfilter", "filter");
- g_object_set( G_OBJECT(_filter), "caps", caps, NULL);
- gst_caps_unref(caps);
-
- _convert = gst_element_factory_make("audioconvert", "convert");
- if (!_convert) {
- ROS_ERROR_STREAM("Failed to create audioconvert element");
- exitOnMainThread(1);
- }
-
- _encode = gst_element_factory_make("lamemp3enc", "encoder");
- if (!_encode) {
- ROS_ERROR_STREAM("Failed to create encoder element");
- exitOnMainThread(1);
- }
- g_object_set( G_OBJECT(_encode), "target", 1, NULL);
- g_object_set( G_OBJECT(_encode), "bitrate", _bitrate, NULL);
-
- gst_bin_add_many( GST_BIN(_pipeline), _source, _filter, _convert, _encode, _sink, NULL);
- link_ok = gst_element_link_many(_source, _filter, _convert, _encode, _sink, NULL);
- } else if (_format == "wave") {
- if (dst_type == "appsink") {
- g_object_set( G_OBJECT(_sink), "caps", caps, NULL);
- gst_caps_unref(caps);
- gst_bin_add_many( GST_BIN(_pipeline), _source, _sink, NULL);
- link_ok = gst_element_link_many( _source, _sink, NULL);
- } else {
- _filter = gst_element_factory_make("wavenc", "filter");
- gst_bin_add_many( GST_BIN(_pipeline), _source, _filter, _sink, NULL);
- link_ok = gst_element_link_many( _source, _filter, _sink, NULL);
- }
- } else {
- ROS_ERROR_STREAM("format must be \"wave\" or \"mp3\"");
- exitOnMainThread(1);
- }
- /*}
- else
- {
- _sleep_time = 10000;
- _source = gst_element_factory_make("filesrc", "source");
- g_object_set(G_OBJECT(_source), "location", source_type.c_str(), NULL);
-
- gst_bin_add_many( GST_BIN(_pipeline), _source, _sink, NULL);
- gst_element_link_many(_source, _sink, NULL);
- }
- */
-
- if (!link_ok) {
- ROS_ERROR_STREAM("Unsupported media type.");
- exitOnMainThread(1);
- }
-
- gst_element_set_state(GST_ELEMENT(_pipeline), GST_STATE_PLAYING);
-
- _gst_thread = boost::thread( boost::bind(g_main_loop_run, _loop) );
-
- vz_acoustic_scene_analysis::MyAudioInfo info_msg;
- info_msg.channels = _channels;
- info_msg.sample_rate = _sample_rate;
- info_msg.sample_format = _sample_format;
- info_msg.bitrate = _bitrate;
- info_msg.coding_format = _format;
- _pub_info.publish(info_msg);
- }
-
- ~RosGstCapture()
- {
- g_main_loop_quit(_loop);
- gst_element_set_state(_pipeline, GST_STATE_NULL);
- gst_object_unref(_pipeline);
- g_main_loop_unref(_loop);
- }
-
- void exitOnMainThread(int code)
- {
- exit(code);
- }
-
- void publish( const vz_acoustic_scene_analysis::MyAudioData &msg )
- {
- _pub.publish(msg);
- }
-
- static GstFlowReturn onNewBuffer (GstAppSink *appsink, gpointer userData)
- {
- RosGstCapture *server = reinterpret_cast(userData);
- GstMapInfo map;
-
- GstSample *sample;
- g_signal_emit_by_name(appsink, "pull-sample", &sample);
-
- GstBuffer *buffer = gst_sample_get_buffer(sample);
-
- vz_acoustic_scene_analysis::MyAudioData msg;
- gst_buffer_map(buffer, &map, GST_MAP_READ);
- msg.data.resize( map.size );
-
- memcpy( &msg.data[0], map.data, map.size );
-
- gst_buffer_unmap(buffer, &map);
- gst_sample_unref(sample);
-
- server->publish(msg);
-
- return GST_FLOW_OK;
- }
-
- static gboolean onMessage (GstBus *bus, GstMessage *message, gpointer userData)
- {
- RosGstCapture *server = reinterpret_cast(userData);
- GError *err;
- gchar *debug;
-
- gst_message_parse_error(message, &err, &debug);
- ROS_ERROR_STREAM("gstreamer: " << err->message);
- g_error_free(err);
- g_free(debug);
- g_main_loop_quit(server->_loop);
- server->exitOnMainThread(1);
- return FALSE;
- }
-
- private:
- ros::NodeHandle _nh;
- ros::Publisher _pub;
- ros::Publisher _pub_info;
-
- boost::thread _gst_thread;
-
- GstElement *_pipeline, *_source, *_filter, *_sink, *_convert, *_encode;
- GstBus *_bus;
- int _bitrate, _channels, _depth, _sample_rate;
- GMainLoop *_loop;
- std::string _format, _sample_format;
- };
-}
-
-int main (int argc, char **argv)
-{
- ros::init(argc, argv, "audio_capture");
- gst_init(&argc, &argv);
-
- audio_transport::RosGstCapture server;
- ros::spin();
-}
\ No newline at end of file
diff --git a/vz_acoustic_scene_analysis/scripts/ros_interface.py b/vz_acoustic_scene_analysis/scripts/ros_interface.py
deleted file mode 100755
index b18612e..0000000
--- a/vz_acoustic_scene_analysis/scripts/ros_interface.py
+++ /dev/null
@@ -1,89 +0,0 @@
-#! /usr/bin/python3
-
-from sklearn.cluster import k_means
-import rospy
-import numpy as np
-from vz_acoustic_scene_analysis.msg import MyAudioData
-from std_msgs.msg import String
-from scipy.io.wavfile import write
-from collections import deque
-import time
-from pathlib import Path
-
-
-
-# Non ROS import
-# import acoustic_scene_analysis as asa
-
-class RosInterface:
- def __init__(self):
- home_dir = str(Path.home())
- self.save_dir = home_dir + "/Music/"
- # 1) Write subscriber to /audio topic and populate a numpy data structure (array) with the uint8[] data
- # self.maxSize = 7
- # self.queue = [None] * 7
- # self.head = self.tail = -1
- self.wav_data = []
- self.arraylength = 0
- self.msg_count = 0
-
- rospy.Subscriber("/audio", MyAudioData, self.raw_callback)
-
- def enqueue(self,data):
- # if queue is full
- if ((self.tail+1) % self.k == self.head):
- # convert to mp3
- # publish mp3
- # remove the first element (call dequeue)
- pass
- elif (self.head == -1):
- self.head = 0
- self.tail = 0
- else:
- self.tail = (self.tail +1) % self.maxSize
- self.queue[self.tail] = data
-
- def dequeue(self):
- # if empty queue
- if (self.head == -1):
- pass
- # if the self
- else:
- temp = self.queue[self.head]
-
- def raw_callback(self, msg):
- # print("Length of uint8[]:", len(msg.data))
- self.wav_data.append(msg.data)
-
- # if (self.msg_count < 10000):
- # self.arraylength += len(msg.data)
- # print(self.nparray)
- # print(len(bytes))
- # else :
- # self.byteArray[self.msg_count] = bytes
- # print(len(bytes))
- self.msg_count += 1
-
- def on_shutdown(self):
- wav_arr = np.array(self.wav_data)
- print(wav_arr)
- print(wav_arr.shape)
- write(self.save_dir +'test.mp3', 44100, wav_arr)
- print("check music")
- pass
-
-# 2) Check you're "Decoding" the audio ROS message correctly by saving to a .wav file
-# 3) Be curious. Listen to the .wav file and see if it sounds gucci, and then maybe twiddle about with the encoding on the audio_capture.cpp, seeing if that changes anything e.g. encoding with mp3 instead.
-# 4) If you've made it this far, well done. Try find Utku's function to pass the np array.
-
-
-if __name__ == '__main__':
- try:
- rospy.init_node('ros_interface', anonymous=True)
- ros_int = RosInterface()
-
- rospy.on_shutdown(ros_int.on_shutdown)
- rospy.spin()
- except rospy.ROSInterruptException:
- print('Audio converter node failed!')
- pass
\ No newline at end of file
diff --git a/vz_acoustic_scene_analysis/scripts/stretch_respeak_test2.py b/vz_acoustic_scene_analysis/scripts/stretch_audio.py
similarity index 85%
rename from vz_acoustic_scene_analysis/scripts/stretch_respeak_test2.py
rename to vz_acoustic_scene_analysis/scripts/stretch_audio.py
index 27157f8..14768db 100755
--- a/vz_acoustic_scene_analysis/scripts/stretch_respeak_test2.py
+++ b/vz_acoustic_scene_analysis/scripts/stretch_audio.py
@@ -185,53 +185,12 @@ def get_respeaker_device_id():
return device_id
-
RESPEAKER_RATE = 16000
RESPEAKER_CHANNELS = 6 # must flash 6_channels_firmware.bin first
RESPEAKER_WIDTH = 2
RESPEAKER_INDEX = get_respeaker_device_id()
CHUNK = 1024
-# def record_audio(seconds=5):
-# p = pyaudio.PyAudio()
-# stream = p.open(rate=RESPEAKER_RATE,
-# format=p.get_format_from_width(RESPEAKER_WIDTH),
-# channels=RESPEAKER_CHANNELS,
-# input=True,
-# input_device_index=RESPEAKER_INDEX,
-# output= False)
-
-# frames = []
-# for i in range(0, int(RESPEAKER_RATE / CHUNK * seconds)):
-# data = stream.read(CHUNK)
-# a = np.frombuffer(data,dtype=np.int16)[0::6] # extracts fused channel 0
-# frames.append(a.tobytes())
-
-# stream.stop_stream()
-# stream.close()
-# p.terminate()
-
-# return frames
-
-
-def save_wav(frames, fname):
- p = pyaudio.PyAudio()
- wf = wave.open(fname, 'wb')
- wf.setnchannels(1)
- wf.setsampwidth(p.get_sample_size(p.get_format_from_width(RESPEAKER_WIDTH)))
- wf.setframerate(RESPEAKER_RATE)
- for val in frames:
- wf.writeframes(b''.join(val))
- wf.close()
-
-# def rec_and_save():
-# print("* recording 5 seconds")
-# frames = record_audio() # rospy.Timer(rospy.Duration(0.2), record_audio)
-# print("* done")
-# file_name = "/home/hello-robot/Desktop/output_audio.wav"
-# save_wav(frames, file_name)
-# # send test.wav files
-# print("* done")
class Audio:
def __init__(self):
@@ -239,24 +198,25 @@ class Audio:
self.wav_list = []
self.record_count = 0 # Count how many times we've recorded f seconds of audio
self.file_name = "/home/hello-robot/Desktop/output_audio.wav"
+ # Publisher for Audio Data
+ # self.audio_data_pub = rospy.Publisher("/wav_data", )
- def write_audio(self):
- recorded_frames = self.record_audio(.5)
- print("i haz frames: ", self.record_count)
+ def get_audio(self):
+ recorded_frames = self.record_audio(.2) # set param here chunk size
self.wav_list.append(recorded_frames)
self.record_count += 1
# Every 5 seconds for
- if ((self.record_count % 5) == 0):
+ if ((self.record_count % 2) == 0): # set param sequence size
+ return_list = self.wav_list
+ # Remove the first object (0.2 seconds of audio data)
+ self.wav_list.pop(0)
# send the frames at the beginning of the list (save as wav for now)
- save_wav(self.wav_list,self.file_name)
- # Empty list
- self.wav_list = []
- print("5 seconds have passed, very nice")
+ return return_list
+
def record_audio(self, seconds=5):
p = pyaudio.PyAudio()
- print ("i NO haz stream")
stream = p.open(rate=RESPEAKER_RATE,
format=p.get_format_from_width(RESPEAKER_WIDTH),
@@ -264,12 +224,10 @@ class Audio:
input=True,
input_device_index=RESPEAKER_INDEX,
output= False)
- print ("i haz stream")
frames = []
for i in range(0, int(RESPEAKER_RATE / CHUNK * seconds)):
data = stream.read(CHUNK)
- print("I haz data from stream: ", i)
a = np.frombuffer(data,dtype=np.int16)[0::6] # extracts fused channel 0
frames.append(a.tobytes())
@@ -282,6 +240,7 @@ class Audio:
def process_audio_loop(self):
rospy.init_node("audio_capture")
+ rospy.param
audio_count = 0
dev = usb.core.find(idVendor=0x2886, idProduct=0x0018)
try:
@@ -289,7 +248,15 @@ class Audio:
respeaker = Tuning()
while True:
if respeaker.is_voice() == 1:
- self.write_audio()
+ # wav_data = list of lists of bytes
+ wav_data = self.get_audio()
+ if (str(type(wav_data)) == ""):
+ # Maybe publish wav_data as ROS message?
+ pass
+ # publish_wav(wav_data) # lol
+ # Call of Utku's function
+ # asa_out = process_wav(wav_data, asa_params)
+ # Convert asa_out to ROS message
audio_count += 1
print(audio_count)
except usb.core.USBError: