Browse Source

some audio changes

pull/65/head
Hongyu Li 2 years ago
parent
commit
fb487bb8a5
6 changed files with 24 additions and 395 deletions
  1. +0
    -13
      vz_acoustic_scene_analysis/launch/audio.launch
  2. +0
    -4
      vz_acoustic_scene_analysis/launch/audio_test.launch
  3. +4
    -0
      vz_acoustic_scene_analysis/launch/wav_audio.launch
  4. +0
    -236
      vz_acoustic_scene_analysis/scripts/audio_capture.cpp
  5. +0
    -89
      vz_acoustic_scene_analysis/scripts/ros_interface.py
  6. +20
    -53
      vz_acoustic_scene_analysis/scripts/stretch_audio.py

+ 0
- 13
vz_acoustic_scene_analysis/launch/audio.launch View File

@ -1,13 +0,0 @@
<launch>
<arg name="device" default="plughw:1,0" />
<!-- publish audio data as wav format -->
<node name="audio_capture" pkg="vz_acoustic_scene_analysis" type="audio_capture" output="screen">
<param name="format" value="wave" />
<param name="channels" value="1" />
<param name="depth" value="16" />
<param name="sample_rate" value="44100" />
<param name="device" value="$(arg device)" />
</node>
<node pkg="vz_acoustic_scene_analysis" type="ros_interface.py" name="ros_interface" output="screen" />
</launch>

+ 0
- 4
vz_acoustic_scene_analysis/launch/audio_test.launch View File

@ -1,4 +0,0 @@
<launch>
<node name="respeak_test" pkg="respeaker_ros" type="stretch_respeak_test2.py" />
<!-- <rosparam file="$(find respeaker_ros)/config/audio_params.yaml" /> -->
</launch>

+ 4
- 0
vz_acoustic_scene_analysis/launch/wav_audio.launch View File

@ -0,0 +1,4 @@
<launch>
<node name="stretch_audio" pkg="respeaker_ros" type="stretch_audio.py" />
<!-- <rosparam file="$(find respeaker_ros)/config/audio_params.yaml" /> -->
</launch>

+ 0
- 236
vz_acoustic_scene_analysis/scripts/audio_capture.cpp View File

@ -1,236 +0,0 @@
#include <stdio.h>
#include <gst/gst.h>
#include <gst/app/gstappsink.h>
#include <boost/thread.hpp>
#include <ros/ros.h>
#include "vz_acoustic_scene_analysis/MyAudioData.h"
#include "vz_acoustic_scene_analysis/MyAudioInfo.h"
namespace audio_transport
{
class RosGstCapture
{
public:
RosGstCapture()
{
_bitrate = 192;
std::string dst_type;
// Need to encoding or publish raw wave data
ros::param::param<std::string>("~format", _format, "mp3");
ros::param::param<std::string>("~sample_format", _sample_format, "S16LE");
// The bitrate at which to encode the audio
ros::param::param<int>("~bitrate", _bitrate, 192);
// only available for raw data
ros::param::param<int>("~channels", _channels, 1);
ros::param::param<int>("~depth", _depth, 16);
ros::param::param<int>("~sample_rate", _sample_rate, 16000);
// The destination of the audio
ros::param::param<std::string>("~dst", dst_type, "appsink");
// The source of the audio
//ros::param::param<std::string>("~src", source_type, "alsasrc");
std::string device;
ros::param::param<std::string>("~device", device, "");
_pub = _nh.advertise<vz_acoustic_scene_analysis::MyAudioData>("audio", 10, true);
_pub_info = _nh.advertise<vz_acoustic_scene_analysis::MyAudioInfo>("audio_info", 1, true);
_loop = g_main_loop_new(NULL, false);
_pipeline = gst_pipeline_new("ros_pipeline");
_bus = gst_pipeline_get_bus(GST_PIPELINE(_pipeline));
gst_bus_add_signal_watch(_bus);
g_signal_connect(_bus, "message::error",
G_CALLBACK(onMessage), this);
g_object_unref(_bus);
// We create the sink first, just for convenience
if (dst_type == "appsink")
{
_sink = gst_element_factory_make("appsink", "sink");
g_object_set(G_OBJECT(_sink), "emit-signals", true, NULL);
g_object_set(G_OBJECT(_sink), "max-buffers", 100, NULL);
g_signal_connect( G_OBJECT(_sink), "new-sample",
G_CALLBACK(onNewBuffer), this);
}
else
{
ROS_INFO("file sink to %s", dst_type.c_str());
_sink = gst_element_factory_make("filesink", "sink");
g_object_set( G_OBJECT(_sink), "location", dst_type.c_str(), NULL);
}
_source = gst_element_factory_make("alsasrc", "source");
// if device isn't specified, it will use the default which is
// the alsa default source.
// A valid device will be of the foram hw:0,0 with other numbers
// than 0 and 0 as are available.
if (device != "")
{
// ghcar *gst_device = device.c_str();
g_object_set(G_OBJECT(_source), "device", device.c_str(), NULL);
}
GstCaps *caps;
caps = gst_caps_new_simple("audio/x-raw",
"format", G_TYPE_STRING, _sample_format.c_str(),
"channels", G_TYPE_INT, _channels,
"width", G_TYPE_INT, _depth,
"depth", G_TYPE_INT, _depth,
"rate", G_TYPE_INT, _sample_rate,
"signed", G_TYPE_BOOLEAN, TRUE,
NULL);
gboolean link_ok;
if (_format == "mp3"){
_filter = gst_element_factory_make("capsfilter", "filter");
g_object_set( G_OBJECT(_filter), "caps", caps, NULL);
gst_caps_unref(caps);
_convert = gst_element_factory_make("audioconvert", "convert");
if (!_convert) {
ROS_ERROR_STREAM("Failed to create audioconvert element");
exitOnMainThread(1);
}
_encode = gst_element_factory_make("lamemp3enc", "encoder");
if (!_encode) {
ROS_ERROR_STREAM("Failed to create encoder element");
exitOnMainThread(1);
}
g_object_set( G_OBJECT(_encode), "target", 1, NULL);
g_object_set( G_OBJECT(_encode), "bitrate", _bitrate, NULL);
gst_bin_add_many( GST_BIN(_pipeline), _source, _filter, _convert, _encode, _sink, NULL);
link_ok = gst_element_link_many(_source, _filter, _convert, _encode, _sink, NULL);
} else if (_format == "wave") {
if (dst_type == "appsink") {
g_object_set( G_OBJECT(_sink), "caps", caps, NULL);
gst_caps_unref(caps);
gst_bin_add_many( GST_BIN(_pipeline), _source, _sink, NULL);
link_ok = gst_element_link_many( _source, _sink, NULL);
} else {
_filter = gst_element_factory_make("wavenc", "filter");
gst_bin_add_many( GST_BIN(_pipeline), _source, _filter, _sink, NULL);
link_ok = gst_element_link_many( _source, _filter, _sink, NULL);
}
} else {
ROS_ERROR_STREAM("format must be \"wave\" or \"mp3\"");
exitOnMainThread(1);
}
/*}
else
{
_sleep_time = 10000;
_source = gst_element_factory_make("filesrc", "source");
g_object_set(G_OBJECT(_source), "location", source_type.c_str(), NULL);
gst_bin_add_many( GST_BIN(_pipeline), _source, _sink, NULL);
gst_element_link_many(_source, _sink, NULL);
}
*/
if (!link_ok) {
ROS_ERROR_STREAM("Unsupported media type.");
exitOnMainThread(1);
}
gst_element_set_state(GST_ELEMENT(_pipeline), GST_STATE_PLAYING);
_gst_thread = boost::thread( boost::bind(g_main_loop_run, _loop) );
vz_acoustic_scene_analysis::MyAudioInfo info_msg;
info_msg.channels = _channels;
info_msg.sample_rate = _sample_rate;
info_msg.sample_format = _sample_format;
info_msg.bitrate = _bitrate;
info_msg.coding_format = _format;
_pub_info.publish(info_msg);
}
~RosGstCapture()
{
g_main_loop_quit(_loop);
gst_element_set_state(_pipeline, GST_STATE_NULL);
gst_object_unref(_pipeline);
g_main_loop_unref(_loop);
}
void exitOnMainThread(int code)
{
exit(code);
}
void publish( const vz_acoustic_scene_analysis::MyAudioData &msg )
{
_pub.publish(msg);
}
static GstFlowReturn onNewBuffer (GstAppSink *appsink, gpointer userData)
{
RosGstCapture *server = reinterpret_cast<RosGstCapture*>(userData);
GstMapInfo map;
GstSample *sample;
g_signal_emit_by_name(appsink, "pull-sample", &sample);
GstBuffer *buffer = gst_sample_get_buffer(sample);
vz_acoustic_scene_analysis::MyAudioData msg;
gst_buffer_map(buffer, &map, GST_MAP_READ);
msg.data.resize( map.size );
memcpy( &msg.data[0], map.data, map.size );
gst_buffer_unmap(buffer, &map);
gst_sample_unref(sample);
server->publish(msg);
return GST_FLOW_OK;
}
static gboolean onMessage (GstBus *bus, GstMessage *message, gpointer userData)
{
RosGstCapture *server = reinterpret_cast<RosGstCapture*>(userData);
GError *err;
gchar *debug;
gst_message_parse_error(message, &err, &debug);
ROS_ERROR_STREAM("gstreamer: " << err->message);
g_error_free(err);
g_free(debug);
g_main_loop_quit(server->_loop);
server->exitOnMainThread(1);
return FALSE;
}
private:
ros::NodeHandle _nh;
ros::Publisher _pub;
ros::Publisher _pub_info;
boost::thread _gst_thread;
GstElement *_pipeline, *_source, *_filter, *_sink, *_convert, *_encode;
GstBus *_bus;
int _bitrate, _channels, _depth, _sample_rate;
GMainLoop *_loop;
std::string _format, _sample_format;
};
}
int main (int argc, char **argv)
{
ros::init(argc, argv, "audio_capture");
gst_init(&argc, &argv);
audio_transport::RosGstCapture server;
ros::spin();
}

+ 0
- 89
vz_acoustic_scene_analysis/scripts/ros_interface.py View File

@ -1,89 +0,0 @@
#! /usr/bin/python3
from sklearn.cluster import k_means
import rospy
import numpy as np
from vz_acoustic_scene_analysis.msg import MyAudioData
from std_msgs.msg import String
from scipy.io.wavfile import write
from collections import deque
import time
from pathlib import Path
# Non ROS import
# import acoustic_scene_analysis as asa
class RosInterface:
def __init__(self):
home_dir = str(Path.home())
self.save_dir = home_dir + "/Music/"
# 1) Write subscriber to /audio topic and populate a numpy data structure (array) with the uint8[] data
# self.maxSize = 7
# self.queue = [None] * 7
# self.head = self.tail = -1
self.wav_data = []
self.arraylength = 0
self.msg_count = 0
rospy.Subscriber("/audio", MyAudioData, self.raw_callback)
def enqueue(self,data):
# if queue is full
if ((self.tail+1) % self.k == self.head):
# convert to mp3
# publish mp3
# remove the first element (call dequeue)
pass
elif (self.head == -1):
self.head = 0
self.tail = 0
else:
self.tail = (self.tail +1) % self.maxSize
self.queue[self.tail] = data
def dequeue(self):
# if empty queue
if (self.head == -1):
pass
# if the self
else:
temp = self.queue[self.head]
def raw_callback(self, msg):
# print("Length of uint8[]:", len(msg.data))
self.wav_data.append(msg.data)
# if (self.msg_count < 10000):
# self.arraylength += len(msg.data)
# print(self.nparray)
# print(len(bytes))
# else :
# self.byteArray[self.msg_count] = bytes
# print(len(bytes))
self.msg_count += 1
def on_shutdown(self):
wav_arr = np.array(self.wav_data)
print(wav_arr)
print(wav_arr.shape)
write(self.save_dir +'test.mp3', 44100, wav_arr)
print("check music")
pass
# 2) Check you're "Decoding" the audio ROS message correctly by saving to a .wav file
# 3) Be curious. Listen to the .wav file and see if it sounds gucci, and then maybe twiddle about with the encoding on the audio_capture.cpp, seeing if that changes anything e.g. encoding with mp3 instead.
# 4) If you've made it this far, well done. Try find Utku's function to pass the np array.
if __name__ == '__main__':
try:
rospy.init_node('ros_interface', anonymous=True)
ros_int = RosInterface()
rospy.on_shutdown(ros_int.on_shutdown)
rospy.spin()
except rospy.ROSInterruptException:
print('Audio converter node failed!')
pass

vz_acoustic_scene_analysis/scripts/stretch_respeak_test2.py → vz_acoustic_scene_analysis/scripts/stretch_audio.py View File

@ -185,53 +185,12 @@ def get_respeaker_device_id():
return device_id
RESPEAKER_RATE = 16000
RESPEAKER_CHANNELS = 6 # must flash 6_channels_firmware.bin first
RESPEAKER_WIDTH = 2
RESPEAKER_INDEX = get_respeaker_device_id()
CHUNK = 1024
# def record_audio(seconds=5):
# p = pyaudio.PyAudio()
# stream = p.open(rate=RESPEAKER_RATE,
# format=p.get_format_from_width(RESPEAKER_WIDTH),
# channels=RESPEAKER_CHANNELS,
# input=True,
# input_device_index=RESPEAKER_INDEX,
# output= False)
# frames = []
# for i in range(0, int(RESPEAKER_RATE / CHUNK * seconds)):
# data = stream.read(CHUNK)
# a = np.frombuffer(data,dtype=np.int16)[0::6] # extracts fused channel 0
# frames.append(a.tobytes())
# stream.stop_stream()
# stream.close()
# p.terminate()
# return frames
def save_wav(frames, fname):
p = pyaudio.PyAudio()
wf = wave.open(fname, 'wb')
wf.setnchannels(1)
wf.setsampwidth(p.get_sample_size(p.get_format_from_width(RESPEAKER_WIDTH)))
wf.setframerate(RESPEAKER_RATE)
for val in frames:
wf.writeframes(b''.join(val))
wf.close()
# def rec_and_save():
# print("* recording 5 seconds")
# frames = record_audio() # rospy.Timer(rospy.Duration(0.2), record_audio)
# print("* done")
# file_name = "/home/hello-robot/Desktop/output_audio.wav"
# save_wav(frames, file_name)
# # send test.wav files
# print("* done")
class Audio:
def __init__(self):
@ -239,24 +198,25 @@ class Audio:
self.wav_list = []
self.record_count = 0 # Count how many times we've recorded f seconds of audio
self.file_name = "/home/hello-robot/Desktop/output_audio.wav"
# Publisher for Audio Data
# self.audio_data_pub = rospy.Publisher("/wav_data", )
def write_audio(self):
recorded_frames = self.record_audio(.5)
print("i haz frames: ", self.record_count)
def get_audio(self):
recorded_frames = self.record_audio(.2) # set param here chunk size
self.wav_list.append(recorded_frames)
self.record_count += 1
# Every 5 seconds for
if ((self.record_count % 5) == 0):
if ((self.record_count % 2) == 0): # set param sequence size
return_list = self.wav_list
# Remove the first object (0.2 seconds of audio data)
self.wav_list.pop(0)
# send the frames at the beginning of the list (save as wav for now)
save_wav(self.wav_list,self.file_name)
# Empty list
self.wav_list = []
print("5 seconds have passed, very nice")
return return_list
def record_audio(self, seconds=5):
p = pyaudio.PyAudio()
print ("i NO haz stream")
stream = p.open(rate=RESPEAKER_RATE,
format=p.get_format_from_width(RESPEAKER_WIDTH),
@ -264,12 +224,10 @@ class Audio:
input=True,
input_device_index=RESPEAKER_INDEX,
output= False)
print ("i haz stream")
frames = []
for i in range(0, int(RESPEAKER_RATE / CHUNK * seconds)):
data = stream.read(CHUNK)
print("I haz data from stream: ", i)
a = np.frombuffer(data,dtype=np.int16)[0::6] # extracts fused channel 0
frames.append(a.tobytes())
@ -282,6 +240,7 @@ class Audio:
def process_audio_loop(self):
rospy.init_node("audio_capture")
rospy.param
audio_count = 0
dev = usb.core.find(idVendor=0x2886, idProduct=0x0018)
try:
@ -289,7 +248,15 @@ class Audio:
respeaker = Tuning()
while True:
if respeaker.is_voice() == 1:
self.write_audio()
# wav_data = list of lists of bytes
wav_data = self.get_audio()
if (str(type(wav_data)) == "<class 'list'>"):
# Maybe publish wav_data as ROS message?
pass
# publish_wav(wav_data) # lol
# Call of Utku's function
# asa_out = process_wav(wav_data, asa_params)
# Convert asa_out to ROS message
audio_count += 1
print(audio_count)
except usb.core.USBError:

Loading…
Cancel
Save