From 9f5e91ade24ff4e4e7ac77d60a33f05445201323 Mon Sep 17 00:00:00 2001
From: Hongyu Li <li.hongyu1@northeastern.edu>
Date: Fri, 18 Mar 2022 17:05:40 -0400
Subject: [PATCH] moved respeak test node here

---
 .../launch/audio.launch                       |   5 +-
 .../launch/audio_test.launch                  |   4 +
 .../msg/MyAudioData.msg                       |   2 +-
 .../scripts/ros_interface.py                  |  16 +-
 .../scripts/stretch_respeak_test2.py          | 263 ++++++++++++++++++
 5 files changed, 280 insertions(+), 10 deletions(-)
 create mode 100644 vz_acoustic_scene_analysis/launch/audio_test.launch
 create mode 100644 vz_acoustic_scene_analysis/scripts/stretch_respeak_test2.py
diff --git a/vz_acoustic_scene_analysis/launch/audio.launch b/vz_acoustic_scene_analysis/launch/audio.launch
index e954e2c..2912b46 100644
--- a/vz_acoustic_scene_analysis/launch/audio.launch
+++ b/vz_acoustic_scene_analysis/launch/audio.launch
@@ -1,13 +1,12 @@
 <launch>
-    <arg name="device" default="" />
+    <arg name="device" default="plughw:1,0" />
   
     <!-- publish audio data as wav format -->
     <node name="audio_capture" pkg="vz_acoustic_scene_analysis" type="audio_capture" output="screen">
-      <param name="device" value="" />
       <param name="format" value="wave" />
       <param name="channels" value="1" />
       <param name="depth" value="16" />
-      <param name="sample_rate" value="16000" />
+      <param name="sample_rate" value="44100" />
       <param name="device" value="$(arg device)" />
     </node>
     <node pkg="vz_acoustic_scene_analysis" type="ros_interface.py" name="ros_interface" output="screen" />
diff --git a/vz_acoustic_scene_analysis/launch/audio_test.launch b/vz_acoustic_scene_analysis/launch/audio_test.launch
new file mode 100644
index 0000000..eab5a03
--- /dev/null
+++ b/vz_acoustic_scene_analysis/launch/audio_test.launch
@@ -0,0 +1,4 @@
+<launch>
+    <node name="respeak_test" pkg="respeaker_ros" type="stretch_respeak_test2.py" />
+    <!-- <rosparam file="$(find respeaker_ros)/config/audio_params.yaml" /> -->
+  </launch>
\ No newline at end of file
diff --git a/vz_acoustic_scene_analysis/msg/MyAudioData.msg b/vz_acoustic_scene_analysis/msg/MyAudioData.msg
index 38c1e43..1735052 100644
--- a/vz_acoustic_scene_analysis/msg/MyAudioData.msg
+++ b/vz_acoustic_scene_analysis/msg/MyAudioData.msg
@@ -1 +1 @@
-uint16[] data
\ No newline at end of file
+uint8[] data
\ No newline at end of file
diff --git a/vz_acoustic_scene_analysis/scripts/ros_interface.py b/vz_acoustic_scene_analysis/scripts/ros_interface.py
index 959d36f..b18612e 100755
--- a/vz_acoustic_scene_analysis/scripts/ros_interface.py
+++ b/vz_acoustic_scene_analysis/scripts/ros_interface.py
@@ -23,7 +23,7 @@ class RosInterface:
         # self.maxSize = 7
         # self.queue = [None] * 7
         # self.head = self.tail = -1
-        self.nparray = np.empty(1)
+        self.wav_data = []
         self.arraylength = 0
         self.msg_count = 0
 
@@ -53,9 +53,11 @@ class RosInterface:
 
     def raw_callback(self, msg):
         # print("Length of uint8[]:", len(msg.data))
-        if (self.msg_count < 50):
-            self.arraylength += len(msg.data)
-            np.append(self.nparray,bytes)
+        self.wav_data.append(msg.data)
+
+        # if (self.msg_count < 10000):
+        #     self.arraylength += len(msg.data)
+        #     print(self.nparray)
         #     print(len(bytes))
         # else :
         #     self.byteArray[self.msg_count] = bytes
@@ -63,8 +65,10 @@ class RosInterface:
         self.msg_count += 1 
 
     def on_shutdown(self):
-        print(str(self.arraylength))
-        write(self.save_dir +'test.wav', self.arraylength, self.nparray)
+        wav_arr = np.array(self.wav_data)
+        print(wav_arr)
+        print(wav_arr.shape)
+        write(self.save_dir +'test.mp3', 44100, wav_arr)
         print("check music")
         pass
 
diff --git a/vz_acoustic_scene_analysis/scripts/stretch_respeak_test2.py b/vz_acoustic_scene_analysis/scripts/stretch_respeak_test2.py
new file mode 100644
index 0000000..b971055
--- /dev/null
+++ b/vz_acoustic_scene_analysis/scripts/stretch_respeak_test2.py
@@ -0,0 +1,263 @@
+#!/usr/bin/env python3
+
+from __future__ import print_function
+import pyaudio
+import wave
+import numpy as np
+import usb.core
+import struct
+import time
+import os
+import sys
+import rospy
+from contextlib import contextmanager
+import stretch_body.hello_utils as hu
+hu.print_stretch_re_use()
+
+# what does this mean
+@contextmanager
+def ignore_stderr():
+    devnull = None
+    try:
+        devnull = os.open(os.devnull, os.O_WRONLY)
+        stderr = os.dup(2)
+        sys.stderr.flush()
+        os.dup2(devnull, 2)
+        try:
+            yield
+        finally:
+            os.dup2(stderr, 2)
+            os.close(stderr)
+    finally:
+        if devnull is not None:
+            os.close(devnull)
+
+
+# parameter list
+# name: (id, offset, type, max, min , r/w, info)
+PARAMETERS = {
+    'AECFREEZEONOFF': (18, 7, 'int', 1, 0, 'rw', 'Adaptive Echo Canceler updates inhibit.', '0 = Adaptation enabled', '1 = Freeze adaptation, filter only'),
+    'AECNORM': (18, 19, 'float', 16, 0.25, 'rw', 'Limit on norm of AEC filter coefficients'),
+    'AECPATHCHANGE': (18, 25, 'int', 1, 0, 'ro', 'AEC Path Change Detection.', '0 = false (no path change detected)', '1 = true (path change detected)'),
+    'RT60': (18, 26, 'float', 0.9, 0.25, 'ro', 'Current RT60 estimate in seconds'),
+    'HPFONOFF': (18, 27, 'int', 3, 0, 'rw', 'High-pass Filter on microphone signals.', '0 = OFF', '1 = ON - 70 Hz cut-off', '2 = ON - 125 Hz cut-off', '3 = ON - 180 Hz cut-off'),
+    'RT60ONOFF': (18, 28, 'int', 1, 0, 'rw', 'RT60 Estimation for AES. 0 = OFF 1 = ON'),
+    'AECSILENCELEVEL': (18, 30, 'float', 1, 1e-09, 'rw', 'Threshold for signal detection in AEC [-inf .. 0] dBov (Default: -80dBov = 10log10(1x10-8))'),
+    'AECSILENCEMODE': (18, 31, 'int', 1, 0, 'ro', 'AEC far-end silence detection status. ', '0 = false (signal detected) ', '1 = true (silence detected)'),
+    'AGCONOFF': (19, 0, 'int', 1, 0, 'rw', 'Automatic Gain Control. ', '0 = OFF ', '1 = ON'),
+    'AGCMAXGAIN': (19, 1, 'float', 1000, 1, 'rw', 'Maximum AGC gain factor. ', '[0 .. 60] dB (default 30dB = 20log10(31.6))'),
+    'AGCDESIREDLEVEL': (19, 2, 'float', 0.99, 1e-08, 'rw', 'Target power level of the output signal. ', '[-inf .. 0] dBov (default: -23dBov = 10log10(0.005))'),
+    'AGCGAIN': (19, 3, 'float', 1000, 1, 'rw', 'Current AGC gain factor. ', '[0 .. 60] dB (default: 0.0dB = 20log10(1.0))'),
+    'AGCTIME': (19, 4, 'float', 1, 0.1, 'rw', 'Ramps-up / down time-constant in seconds.'),
+    'CNIONOFF': (19, 5, 'int', 1, 0, 'rw', 'Comfort Noise Insertion.', '0 = OFF', '1 = ON'),
+    'FREEZEONOFF': (19, 6, 'int', 1, 0, 'rw', 'Adaptive beamformer updates.', '0 = Adaptation enabled', '1 = Freeze adaptation, filter only'),
+    'STATNOISEONOFF': (19, 8, 'int', 1, 0, 'rw', 'Stationary noise suppression.', '0 = OFF', '1 = ON'),
+    'GAMMA_NS': (19, 9, 'float', 3, 0, 'rw', 'Over-subtraction factor of stationary noise. min .. max attenuation'),
+    'MIN_NS': (19, 10, 'float', 1, 0, 'rw', 'Gain-floor for stationary noise suppression.', '[-inf .. 0] dB (default: -16dB = 20log10(0.15))'),
+    'NONSTATNOISEONOFF': (19, 11, 'int', 1, 0, 'rw', 'Non-stationary noise suppression.', '0 = OFF', '1 = ON'),
+    'GAMMA_NN': (19, 12, 'float', 3, 0, 'rw', 'Over-subtraction factor of non- stationary noise. min .. max attenuation'),
+    'MIN_NN': (19, 13, 'float', 1, 0, 'rw', 'Gain-floor for non-stationary noise suppression.', '[-inf .. 0] dB (default: -10dB = 20log10(0.3))'),
+    'ECHOONOFF': (19, 14, 'int', 1, 0, 'rw', 'Echo suppression.', '0 = OFF', '1 = ON'),
+    'GAMMA_E': (19, 15, 'float', 3, 0, 'rw', 'Over-subtraction factor of echo (direct and early components). min .. max attenuation'),
+    'GAMMA_ETAIL': (19, 16, 'float', 3, 0, 'rw', 'Over-subtraction factor of echo (tail components). min .. max attenuation'),
+    'GAMMA_ENL': (19, 17, 'float', 5, 0, 'rw', 'Over-subtraction factor of non-linear echo. min .. max attenuation'),
+    'NLATTENONOFF': (19, 18, 'int', 1, 0, 'rw', 'Non-Linear echo attenuation.', '0 = OFF', '1 = ON'),
+    'NLAEC_MODE': (19, 20, 'int', 2, 0, 'rw', 'Non-Linear AEC training mode.', '0 = OFF', '1 = ON - phase 1', '2 = ON - phase 2'),
+    'SPEECHDETECTED': (19, 22, 'int', 1, 0, 'ro', 'Speech detection status.', '0 = false (no speech detected)', '1 = true (speech detected)'),
+    'FSBUPDATED': (19, 23, 'int', 1, 0, 'ro', 'FSB Update Decision.', '0 = false (FSB was not updated)', '1 = true (FSB was updated)'),
+    'FSBPATHCHANGE': (19, 24, 'int', 1, 0, 'ro', 'FSB Path Change Detection.', '0 = false (no path change detected)', '1 = true (path change detected)'),
+    'TRANSIENTONOFF': (19, 29, 'int', 1, 0, 'rw', 'Transient echo suppression.', '0 = OFF', '1 = ON'),
+    'VOICEACTIVITY': (19, 32, 'int', 1, 0, 'ro', 'VAD voice activity status.', '0 = false (no voice activity)', '1 = true (voice activity)'),
+    'STATNOISEONOFF_SR': (19, 33, 'int', 1, 0, 'rw', 'Stationary noise suppression for ASR.', '0 = OFF', '1 = ON'),
+    'NONSTATNOISEONOFF_SR': (19, 34, 'int', 1, 0, 'rw', 'Non-stationary noise suppression for ASR.', '0 = OFF', '1 = ON'),
+    'GAMMA_NS_SR': (19, 35, 'float', 3, 0, 'rw', 'Over-subtraction factor of stationary noise for ASR. ', '[0.0 .. 3.0] (default: 1.0)'),
+    'GAMMA_NN_SR': (19, 36, 'float', 3, 0, 'rw', 'Over-subtraction factor of non-stationary noise for ASR. ', '[0.0 .. 3.0] (default: 1.1)'),
+    'MIN_NS_SR': (19, 37, 'float', 1, 0, 'rw', 'Gain-floor for stationary noise suppression for ASR.', '[-inf .. 0] dB (default: -16dB = 20log10(0.15))'),
+    'MIN_NN_SR': (19, 38, 'float', 1, 0, 'rw', 'Gain-floor for non-stationary noise suppression for ASR.', '[-inf .. 0] dB (default: -10dB = 20log10(0.3))'),
+    'GAMMAVAD_SR': (19, 39, 'float', 1000, 0, 'rw', 'Set the threshold for voice activity detection.', '[-inf .. 60] dB (default: 3.5dB 20log10(1.5))'),
+    # 'KEYWORDDETECT': (20, 0, 'int', 1, 0, 'ro', 'Keyword detected. Current value so needs polling.'),
+    'DOAANGLE': (21, 0, 'int', 359, 0, 'ro', 'DOA angle. Current value. Orientation depends on build configuration.')
+}
+
+
+class Tuning:
+    TIMEOUT = 100000
+    
+    # what is dev?
+    def __init__(self):
+        self.TIMEOUT = 100000
+        self.dev = usb.core.find(idVendor=0x2886, idProduct=0x0018)
+
+        if not self.dev:
+            raise RuntimeError("Failed to find Respeaker device")
+        rospy.loginfo("Initializing Respeaker device")
+        # Initialize inputs and outputs to other module code
+        self.audio_signal = None
+        self.prediction = None
+        # self.param = rospy.get_param('')
+
+
+    def write(self, name, value):
+        try:
+            data = PARAMETERS[name]
+        except KeyError:
+            return
+
+        if data[5] == 'ro':
+            raise ValueError('{} is read-only'.format(name))
+
+        id = data[0]
+
+        # 4 bytes offset, 4 bytes value, 4 bytes type
+        if data[2] == 'int':
+            payload = struct.pack(b'iii', data[1], int(value), 1)
+        else:
+            payload = struct.pack(b'ifi', data[1], float(value), 0)
+
+        self.dev.ctrl_transfer(
+            usb.util.CTRL_OUT | usb.util.CTRL_TYPE_VENDOR | usb.util.CTRL_RECIPIENT_DEVICE,
+            0, 0, id, payload, self.TIMEOUT)
+
+    def read(self, name):
+        try:
+            data = PARAMETERS[name]
+        except KeyError:
+            return
+
+        id = data[0]
+
+        # not sure what is happening here
+        cmd = 0x80 | data[1]
+        if data[2] == 'int':
+            cmd |= 0x40
+
+        length = 8
+
+        response = self.dev.ctrl_transfer(
+            usb.util.CTRL_IN | usb.util.CTRL_TYPE_VENDOR | usb.util.CTRL_RECIPIENT_DEVICE,
+            0, cmd, id, length, self.TIMEOUT)
+
+        response = struct.unpack(b'ii', response.tobytes())
+
+        if data[2] == 'int':
+            result = response[0]
+        else:
+            result = response[0] * (2.**response[1])
+
+        return result
+
+    def set_vad_threshold(self, db):
+        self.write('GAMMAVAD_SR', db)
+
+    def is_voice(self):
+        return self.read('VOICEACTIVITY')
+
+    @property
+    def direction(self):
+        return self.read('DOAANGLE')
+
+    @property
+    def version(self):
+        return self.dev.ctrl_transfer(
+            usb.util.CTRL_IN | usb.util.CTRL_TYPE_VENDOR | usb.util.CTRL_RECIPIENT_DEVICE,
+            0, 0x80, 0, 1, self.TIMEOUT)[0]
+
+    def close(self):
+        """
+        close the interface
+        """
+        usb.util.dispose_resources(self.dev)
+
+
+def get_respeaker_device_id():
+
+    with ignore_stderr():
+        p = pyaudio.PyAudio()
+    info = p.get_host_api_info_by_index(0)
+    num_devices = info.get('deviceCount')
+
+    device_id = -1
+    for i in range(num_devices):
+        if (p.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0:
+            if "ReSpeaker" in p.get_device_info_by_host_api_device_index(0, i).get('name'):
+                device_id = i
+
+    return device_id
+
+
+
+RESPEAKER_RATE = 16000
+RESPEAKER_CHANNELS = 6 # must flash 6_channels_firmware.bin first
+RESPEAKER_WIDTH = 2
+RESPEAKER_INDEX = get_respeaker_device_id()
+CHUNK = 1024
+
+
+def record_audio(seconds=5):
+    p = pyaudio.PyAudio()
+    stream = p.open(rate=RESPEAKER_RATE,
+                    format=p.get_format_from_width(RESPEAKER_WIDTH),
+                    channels=RESPEAKER_CHANNELS,
+                    input=True,
+                    input_device_index=RESPEAKER_INDEX,
+                    output= False)
+
+    frames = []
+    for i in range(0, int(RESPEAKER_RATE / CHUNK * seconds)):
+        data = stream.read(CHUNK)
+        a = np.frombuffer(data,dtype=np.int16)[0::6] # extracts fused channel 0
+        frames.append(a.tobytes())
+
+    stream.stop_stream()
+    stream.close()
+    p.terminate()
+
+    return frames
+
+
+def save_wav(frames, fname):
+    p = pyaudio.PyAudio()
+    wf = wave.open(fname, 'wb')
+    wf.setnchannels(1)
+    wf.setsampwidth(p.get_sample_size(p.get_format_from_width(RESPEAKER_WIDTH)))
+    wf.setframerate(RESPEAKER_RATE)
+    wf.writeframes(b''.join(frames))
+    wf.close()
+    
+
+def run_mic(respeaker, num, printed_wait_statement):
+    if not printed_wait_statement and respeaker.is_voice() == 0:
+        print("\n* waiting for audio...")
+        printed_wait_statement = True
+    else:
+        if respeaker.is_voice() == 1:
+            print("* recording 5 seconds")
+            frames = record_audio()
+            print("* done")
+            # save_wav(frames, "/home/hello-robot/Desktop/output_audio_" + str(num) + ".wav")
+            num_files += 1
+            # send test.wav files
+            print("* done")
+            printed_wait_statement = False
+
+
+if __name__ == "__main__":
+    try:
+        rospy.init_node("audio_capture")
+        audio_ctrl = Tuning()
+        num_files = 0
+        dev = usb.core.find(idVendor=0x2886, idProduct=0x0018)
+        try:
+            printed_wait_statement = False
+            if dev:
+                respeaker = Tuning()
+                while True:
+                    try:
+                        rospy.Timer(rospy.Duration(0.2), run_mic(respeaker, num_files, printed_wait_statement))
+                    except KeyboardInterrupt:
+                        break
+        except usb.core.USBError:
+            print('Respeaker not on USB bus')
+    except rospy.ROSInterruptException:
+        print('Audio processing node failed!')
+        pass