From fb487bb8a5ba0b8b1d58ad375b1b3153e4911f51 Mon Sep 17 00:00:00 2001
From: Hongyu Li <li.hongyu1@northeastern.edu>
Date: Tue, 22 Mar 2022 13:05:08 -0400
Subject: [PATCH] some audio changes

---
 .../launch/audio.launch                       |  13 -
 .../launch/audio_test.launch                  |   4 -
 .../launch/wav_audio.launch                   |   4 +
 .../scripts/audio_capture.cpp                 | 236 ------------------
 .../scripts/ros_interface.py                  |  89 -------
 ...etch_respeak_test2.py => stretch_audio.py} |  73 ++----
 6 files changed, 24 insertions(+), 395 deletions(-)
 delete mode 100644 vz_acoustic_scene_analysis/launch/audio.launch
 delete mode 100644 vz_acoustic_scene_analysis/launch/audio_test.launch
 create mode 100644 vz_acoustic_scene_analysis/launch/wav_audio.launch
 delete mode 100644 vz_acoustic_scene_analysis/scripts/audio_capture.cpp
 delete mode 100755 vz_acoustic_scene_analysis/scripts/ros_interface.py
 rename vz_acoustic_scene_analysis/scripts/{stretch_respeak_test2.py => stretch_audio.py} (85%)
diff --git a/vz_acoustic_scene_analysis/launch/audio.launch b/vz_acoustic_scene_analysis/launch/audio.launch
deleted file mode 100644
index 2912b46..0000000
--- a/vz_acoustic_scene_analysis/launch/audio.launch
+++ /dev/null
@@ -1,13 +0,0 @@
-<launch>
-    <arg name="device" default="plughw:1,0" />
-  
-    <!-- publish audio data as wav format -->
-    <node name="audio_capture" pkg="vz_acoustic_scene_analysis" type="audio_capture" output="screen">
-      <param name="format" value="wave" />
-      <param name="channels" value="1" />
-      <param name="depth" value="16" />
-      <param name="sample_rate" value="44100" />
-      <param name="device" value="$(arg device)" />
-    </node>
-    <node pkg="vz_acoustic_scene_analysis" type="ros_interface.py" name="ros_interface" output="screen" />
-  </launch>
\ No newline at end of file
diff --git a/vz_acoustic_scene_analysis/launch/audio_test.launch b/vz_acoustic_scene_analysis/launch/audio_test.launch
deleted file mode 100644
index eab5a03..0000000
--- a/vz_acoustic_scene_analysis/launch/audio_test.launch
+++ /dev/null
@@ -1,4 +0,0 @@
-<launch>
-    <node name="respeak_test" pkg="respeaker_ros" type="stretch_respeak_test2.py" />
-    <!-- <rosparam file="$(find respeaker_ros)/config/audio_params.yaml" /> -->
-  </launch>
\ No newline at end of file
diff --git a/vz_acoustic_scene_analysis/launch/wav_audio.launch b/vz_acoustic_scene_analysis/launch/wav_audio.launch
new file mode 100644
index 0000000..663bb49
--- /dev/null
+++ b/vz_acoustic_scene_analysis/launch/wav_audio.launch
@@ -0,0 +1,4 @@
+<launch>
+    <node name="stretch_audio" pkg="respeaker_ros" type="stretch_audio.py" />
+    <!-- <rosparam file="$(find respeaker_ros)/config/audio_params.yaml" /> -->
+  </launch>
\ No newline at end of file
diff --git a/vz_acoustic_scene_analysis/scripts/audio_capture.cpp b/vz_acoustic_scene_analysis/scripts/audio_capture.cpp
deleted file mode 100644
index 9e56263..0000000
--- a/vz_acoustic_scene_analysis/scripts/audio_capture.cpp
+++ /dev/null
@@ -1,236 +0,0 @@
-#include <stdio.h>
-#include <gst/gst.h>
-#include <gst/app/gstappsink.h>
-#include <boost/thread.hpp>
-
-#include <ros/ros.h>
-
-#include "vz_acoustic_scene_analysis/MyAudioData.h"
-#include "vz_acoustic_scene_analysis/MyAudioInfo.h"
-
-namespace audio_transport
-{
-  class RosGstCapture
-  {
-    public:
-      RosGstCapture()
-      {
-        _bitrate = 192;
-
-        std::string dst_type;
-
-        // Need to encoding or publish raw wave data
-        ros::param::param<std::string>("~format", _format, "mp3");
-        ros::param::param<std::string>("~sample_format", _sample_format, "S16LE");
-
-        // The bitrate at which to encode the audio
-        ros::param::param<int>("~bitrate", _bitrate, 192);
-
-        // only available for raw data
-        ros::param::param<int>("~channels", _channels, 1);
-        ros::param::param<int>("~depth", _depth, 16);
-        ros::param::param<int>("~sample_rate", _sample_rate, 16000);
-
-        // The destination of the audio
-        ros::param::param<std::string>("~dst", dst_type, "appsink");
-
-        // The source of the audio
-        //ros::param::param<std::string>("~src", source_type, "alsasrc");
-        std::string device;
-        ros::param::param<std::string>("~device", device, "");
-
-        _pub = _nh.advertise<vz_acoustic_scene_analysis::MyAudioData>("audio", 10, true);
-        _pub_info = _nh.advertise<vz_acoustic_scene_analysis::MyAudioInfo>("audio_info", 1, true);
-
-        _loop = g_main_loop_new(NULL, false);
-        _pipeline = gst_pipeline_new("ros_pipeline");
-        _bus = gst_pipeline_get_bus(GST_PIPELINE(_pipeline));
-        gst_bus_add_signal_watch(_bus);
-        g_signal_connect(_bus, "message::error",
-                         G_CALLBACK(onMessage), this);
-        g_object_unref(_bus);
-
-        // We create the sink first, just for convenience
-        if (dst_type == "appsink")
-        {
-          _sink = gst_element_factory_make("appsink", "sink");
-          g_object_set(G_OBJECT(_sink), "emit-signals", true, NULL);
-          g_object_set(G_OBJECT(_sink), "max-buffers", 100, NULL);
-          g_signal_connect( G_OBJECT(_sink), "new-sample",
-                            G_CALLBACK(onNewBuffer), this);
-        }
-        else
-        {
-          ROS_INFO("file sink to %s", dst_type.c_str());
-          _sink = gst_element_factory_make("filesink", "sink");
-          g_object_set( G_OBJECT(_sink), "location", dst_type.c_str(), NULL);
-        }
-
-        _source = gst_element_factory_make("alsasrc", "source");
-        // if device isn't specified, it will use the default which is
-        // the alsa default source.
-        // A valid device will be of the foram hw:0,0 with other numbers
-        // than 0 and 0 as are available.
-        if (device != "")
-        {
-          // ghcar *gst_device = device.c_str();
-          g_object_set(G_OBJECT(_source), "device", device.c_str(), NULL);
-        }
-
-        GstCaps *caps;
-        caps = gst_caps_new_simple("audio/x-raw",
-                                   "format", G_TYPE_STRING, _sample_format.c_str(),
-                                   "channels", G_TYPE_INT, _channels,
-                                   "width",    G_TYPE_INT, _depth,
-                                   "depth",    G_TYPE_INT, _depth,
-                                   "rate",     G_TYPE_INT, _sample_rate,
-                                   "signed",   G_TYPE_BOOLEAN, TRUE,
-                                   NULL);
-
-        gboolean link_ok;
-        if (_format == "mp3"){
-          _filter = gst_element_factory_make("capsfilter", "filter");
-          g_object_set( G_OBJECT(_filter), "caps", caps, NULL);
-          gst_caps_unref(caps);
-
-          _convert = gst_element_factory_make("audioconvert", "convert");
-          if (!_convert) {
-            ROS_ERROR_STREAM("Failed to create audioconvert element");
-            exitOnMainThread(1);
-          }
-
-          _encode = gst_element_factory_make("lamemp3enc", "encoder");
-          if (!_encode) {
-            ROS_ERROR_STREAM("Failed to create encoder element");
-            exitOnMainThread(1);
-          }
-          g_object_set( G_OBJECT(_encode), "target", 1, NULL);
-          g_object_set( G_OBJECT(_encode), "bitrate", _bitrate, NULL);
-
-          gst_bin_add_many( GST_BIN(_pipeline), _source, _filter, _convert, _encode, _sink, NULL);
-          link_ok = gst_element_link_many(_source, _filter, _convert, _encode, _sink, NULL);
-        } else if (_format == "wave") {
-          if (dst_type == "appsink") {
-            g_object_set( G_OBJECT(_sink), "caps", caps, NULL);
-            gst_caps_unref(caps);
-            gst_bin_add_many( GST_BIN(_pipeline), _source, _sink, NULL);
-            link_ok = gst_element_link_many( _source, _sink, NULL);
-          } else {
-            _filter = gst_element_factory_make("wavenc", "filter");
-            gst_bin_add_many( GST_BIN(_pipeline), _source, _filter, _sink, NULL);
-            link_ok = gst_element_link_many( _source, _filter, _sink, NULL);
-          }
-        } else {
-          ROS_ERROR_STREAM("format must be \"wave\" or \"mp3\"");
-          exitOnMainThread(1);
-        }
-        /*}
-        else
-        {
-          _sleep_time = 10000;
-          _source = gst_element_factory_make("filesrc", "source");
-          g_object_set(G_OBJECT(_source), "location", source_type.c_str(), NULL);
-
-          gst_bin_add_many( GST_BIN(_pipeline), _source, _sink, NULL);
-          gst_element_link_many(_source, _sink, NULL);
-        }
-        */
-
-        if (!link_ok) {
-          ROS_ERROR_STREAM("Unsupported media type.");
-          exitOnMainThread(1);
-        }
-
-        gst_element_set_state(GST_ELEMENT(_pipeline), GST_STATE_PLAYING);
-
-        _gst_thread = boost::thread( boost::bind(g_main_loop_run, _loop) );
-
-        vz_acoustic_scene_analysis::MyAudioInfo info_msg;
-        info_msg.channels = _channels;
-        info_msg.sample_rate = _sample_rate;
-        info_msg.sample_format = _sample_format;
-        info_msg.bitrate = _bitrate;
-        info_msg.coding_format = _format;
-        _pub_info.publish(info_msg);
-      }
-
-      ~RosGstCapture()
-      {
-        g_main_loop_quit(_loop);
-        gst_element_set_state(_pipeline, GST_STATE_NULL);
-        gst_object_unref(_pipeline);
-        g_main_loop_unref(_loop);
-      }
-
-      void exitOnMainThread(int code)
-      {
-        exit(code);
-      }
-
-      void publish( const vz_acoustic_scene_analysis::MyAudioData &msg )
-      {
-        _pub.publish(msg);
-      }
-
-      static GstFlowReturn onNewBuffer (GstAppSink *appsink, gpointer userData)
-      {
-        RosGstCapture *server = reinterpret_cast<RosGstCapture*>(userData);
-        GstMapInfo map;
-
-        GstSample *sample;
-        g_signal_emit_by_name(appsink, "pull-sample", &sample);
-
-        GstBuffer *buffer = gst_sample_get_buffer(sample);
-
-        vz_acoustic_scene_analysis::MyAudioData msg;
-        gst_buffer_map(buffer, &map, GST_MAP_READ);
-        msg.data.resize( map.size );
-
-        memcpy( &msg.data[0], map.data, map.size );
-
-        gst_buffer_unmap(buffer, &map);
-        gst_sample_unref(sample);
-
-        server->publish(msg);
-
-        return GST_FLOW_OK;
-      }
-
-      static gboolean onMessage (GstBus *bus, GstMessage *message, gpointer userData)
-      {
-        RosGstCapture *server = reinterpret_cast<RosGstCapture*>(userData);
-        GError *err;
-        gchar *debug;
-
-        gst_message_parse_error(message, &err, &debug);
-        ROS_ERROR_STREAM("gstreamer: " << err->message);
-        g_error_free(err);
-        g_free(debug);
-        g_main_loop_quit(server->_loop);
-        server->exitOnMainThread(1);
-        return FALSE;
-      }
-
-    private:
-      ros::NodeHandle _nh;
-      ros::Publisher _pub;
-      ros::Publisher _pub_info;
-
-      boost::thread _gst_thread;
-
-      GstElement *_pipeline, *_source, *_filter, *_sink, *_convert, *_encode;
-      GstBus *_bus;
-      int _bitrate, _channels, _depth, _sample_rate;
-      GMainLoop *_loop;
-      std::string _format, _sample_format;
-  };
-}
-
-int main (int argc, char **argv)
-{
-  ros::init(argc, argv, "audio_capture");
-  gst_init(&argc, &argv);
-
-  audio_transport::RosGstCapture server;
-  ros::spin();
-}
\ No newline at end of file
diff --git a/vz_acoustic_scene_analysis/scripts/ros_interface.py b/vz_acoustic_scene_analysis/scripts/ros_interface.py
deleted file mode 100755
index b18612e..0000000
--- a/vz_acoustic_scene_analysis/scripts/ros_interface.py
+++ /dev/null
@@ -1,89 +0,0 @@
-#! /usr/bin/python3
-
-from sklearn.cluster import k_means
-import rospy
-import numpy as np
-from vz_acoustic_scene_analysis.msg import MyAudioData
-from std_msgs.msg import String
-from scipy.io.wavfile import write
-from collections import deque
-import time
-from pathlib import Path
-
-
-
-# Non ROS import
-# import acoustic_scene_analysis as asa
-
-class RosInterface:
-    def __init__(self):
-        home_dir = str(Path.home())
-        self.save_dir = home_dir + "/Music/"
-        # 1) Write subscriber to /audio topic and populate a numpy data structure (array) with the uint8[] data
-        # self.maxSize = 7
-        # self.queue = [None] * 7
-        # self.head = self.tail = -1
-        self.wav_data = []
-        self.arraylength = 0
-        self.msg_count = 0
-
-        rospy.Subscriber("/audio", MyAudioData, self.raw_callback)
-
-    def enqueue(self,data):
-        # if queue is full
-        if ((self.tail+1) % self.k == self.head):
-            # convert to mp3
-            # publish mp3
-            # remove the first element (call dequeue)
-            pass
-        elif (self.head == -1):
-            self.head = 0
-            self.tail = 0
-        else:
-            self.tail = (self.tail +1) % self.maxSize
-            self.queue[self.tail] = data
-
-    def dequeue(self):
-        # if empty queue
-        if (self.head == -1):
-            pass
-        # if the self
-        else:
-            temp = self.queue[self.head]
-
-    def raw_callback(self, msg):
-        # print("Length of uint8[]:", len(msg.data))
-        self.wav_data.append(msg.data)
-
-        # if (self.msg_count < 10000):
-        #     self.arraylength += len(msg.data)
-        #     print(self.nparray)
-        #     print(len(bytes))
-        # else :
-        #     self.byteArray[self.msg_count] = bytes
-        #     print(len(bytes))
-        self.msg_count += 1 
-
-    def on_shutdown(self):
-        wav_arr = np.array(self.wav_data)
-        print(wav_arr)
-        print(wav_arr.shape)
-        write(self.save_dir +'test.mp3', 44100, wav_arr)
-        print("check music")
-        pass
-
-# 2) Check you're "Decoding" the audio ROS message correctly by saving to a .wav file
-# 3) Be curious. Listen to the .wav file and see if it sounds gucci, and then maybe twiddle about with the encoding on the audio_capture.cpp, seeing if that changes anything e.g. encoding with mp3 instead.
-# 4) If you've made it this far, well done. Try find Utku's function to pass the np array.
-
-
-if __name__ == '__main__':
-    try:
-        rospy.init_node('ros_interface', anonymous=True)
-        ros_int = RosInterface()
-
-        rospy.on_shutdown(ros_int.on_shutdown)
-        rospy.spin()
-    except rospy.ROSInterruptException:
-        print('Audio converter node failed!')
-        pass
\ No newline at end of file
diff --git a/vz_acoustic_scene_analysis/scripts/stretch_respeak_test2.py b/vz_acoustic_scene_analysis/scripts/stretch_audio.py
similarity index 85%
rename from vz_acoustic_scene_analysis/scripts/stretch_respeak_test2.py
rename to vz_acoustic_scene_analysis/scripts/stretch_audio.py
index 27157f8..14768db 100755
--- a/vz_acoustic_scene_analysis/scripts/stretch_respeak_test2.py
+++ b/vz_acoustic_scene_analysis/scripts/stretch_audio.py
@@ -185,53 +185,12 @@ def get_respeaker_device_id():
     return device_id
 
 
-
 RESPEAKER_RATE = 16000
 RESPEAKER_CHANNELS = 6 # must flash 6_channels_firmware.bin first
 RESPEAKER_WIDTH = 2
 RESPEAKER_INDEX = get_respeaker_device_id()
 CHUNK = 1024
 
-# def record_audio(seconds=5):
-#     p = pyaudio.PyAudio()
-#     stream = p.open(rate=RESPEAKER_RATE,
-#                     format=p.get_format_from_width(RESPEAKER_WIDTH),
-#                     channels=RESPEAKER_CHANNELS,
-#                     input=True,
-#                     input_device_index=RESPEAKER_INDEX,
-#                     output= False)
-
-#     frames = []
-#     for i in range(0, int(RESPEAKER_RATE / CHUNK * seconds)):
-#         data = stream.read(CHUNK)
-#         a = np.frombuffer(data,dtype=np.int16)[0::6] # extracts fused channel 0
-#         frames.append(a.tobytes())
-
-#     stream.stop_stream()
-#     stream.close()
-#     p.terminate()
-
-#     return frames
-
-
-def save_wav(frames, fname):
-    p = pyaudio.PyAudio()
-    wf = wave.open(fname, 'wb')
-    wf.setnchannels(1)
-    wf.setsampwidth(p.get_sample_size(p.get_format_from_width(RESPEAKER_WIDTH)))
-    wf.setframerate(RESPEAKER_RATE)
-    for val in frames:
-        wf.writeframes(b''.join(val))
-    wf.close() 
-
-# def rec_and_save():
-#     print("* recording 5 seconds")
-#     frames = record_audio() # rospy.Timer(rospy.Duration(0.2), record_audio)
-#     print("* done")
-#     file_name = "/home/hello-robot/Desktop/output_audio.wav"
-#     save_wav(frames, file_name)
-#     # send test.wav files
-#     print("* done")
 
 class Audio:
     def __init__(self):
@@ -239,24 +198,25 @@ class Audio:
         self.wav_list = []
         self.record_count = 0 # Count how many times we've recorded f seconds of audio
         self.file_name = "/home/hello-robot/Desktop/output_audio.wav"
+        # Publisher for Audio Data
+        # self.audio_data_pub = rospy.Publisher("/wav_data", )
 
-    def write_audio(self):
-        recorded_frames = self.record_audio(.5)
-        print("i haz frames: ", self.record_count)
+    def get_audio(self):
+        recorded_frames = self.record_audio(.2) # set param here chunk size
         self.wav_list.append(recorded_frames)
         self.record_count += 1
         # Every 5 seconds for 
-        if ((self.record_count % 5) == 0):
+        if ((self.record_count % 2) == 0): # set param sequence size
+            return_list =  self.wav_list
+            # Remove the first object (0.2 seconds of audio data)
+            self.wav_list.pop(0)
             # send the frames at the beginning of the list (save as wav for now)
-            save_wav(self.wav_list,self.file_name)
-            # Empty list
-            self.wav_list = []
-            print("5 seconds have passed, very nice")
+            return return_list
+            
 
 
     def record_audio(self, seconds=5):
         p = pyaudio.PyAudio()
-        print ("i NO haz stream")
 
         stream = p.open(rate=RESPEAKER_RATE,
                         format=p.get_format_from_width(RESPEAKER_WIDTH),
@@ -264,12 +224,10 @@ class Audio:
                         input=True,
                         input_device_index=RESPEAKER_INDEX,
                         output= False)
-        print ("i haz stream")
 
         frames = []
         for i in range(0, int(RESPEAKER_RATE / CHUNK * seconds)):
             data = stream.read(CHUNK)
-            print("I haz data from stream: ", i)
             a = np.frombuffer(data,dtype=np.int16)[0::6] # extracts fused channel 0
             frames.append(a.tobytes())
 
@@ -282,6 +240,7 @@ class Audio:
 
     def process_audio_loop(self):
         rospy.init_node("audio_capture")
+        rospy.param
         audio_count = 0
         dev = usb.core.find(idVendor=0x2886, idProduct=0x0018)
         try:
@@ -289,7 +248,15 @@ class Audio:
                 respeaker = Tuning()
                 while True:
                     if respeaker.is_voice() == 1:
-                        self.write_audio()
+                        # wav_data = list of lists of bytes
+                        wav_data = self.get_audio()
+                        if (str(type(wav_data)) == "<class 'list'>"):
+                            # Maybe publish wav_data as ROS message?
+                            pass
+                        # publish_wav(wav_data) # lol
+                        # Call of Utku's function
+                        # asa_out = process_wav(wav_data, asa_params)
+                        # Convert asa_out to ROS message
                         audio_count += 1
                         print(audio_count)
         except usb.core.USBError: