agent_gsm/src/utils_audio.py

458 lines
17 KiB
Python

import pyaudio
import wave
import argparse
import os
import sys
import signal
import time
import utils
import typing
import subprocess
import sox
import re
from typing import Tuple
# Record with bitrate width 16 bits
FORMAT = pyaudio.paInt16
# To mono audio
CHANNELS = 1
# Target rate is 16K
RATE = 48000
CHUNK = 1024
# Time limitation 300 seconds
TIME_LIMIT = 300
# Open PyAudio instance
PY_AUDIO = pyaudio.PyAudio()
# Restart PyAudio
def restart_audio():
global PY_AUDIO
if PY_AUDIO:
PY_AUDIO.terminate()
PY_AUDIO = None
PY_AUDIO = pyaudio.PyAudio()
# Get list of input files
def get_input_devices():
# Dump info about available audio devices
info = PY_AUDIO.get_host_api_info_by_index(0)
numdevices = info.get('deviceCount')
result = []
for i in range(0, numdevices):
device_info = PY_AUDIO.get_device_info_by_host_api_device_index(0, i)
num_channels = device_info.get('maxInputChannels')
if num_channels > 0:
result.append({'name': device_info.get('name'), 'index': i, 'num_channels': num_channels, 'default_samplerate': device_info['defaultSampleRate']})
return result
class Recorder(object):
'''A recorder class for recording audio to a WAV file.
Records in mono by default.
'''
def __init__(self, device_index=0, channels=1, rate=RATE, frames_per_buffer=1024):
self.channels = channels
self.rate = rate
self.frames_per_buffer = frames_per_buffer
self.device_index = device_index
def open(self, fname, mode='wb'):
return RecordingFile(fname, mode, self.device_index, self.channels, self.rate,
self.frames_per_buffer)
class RecordingFile(object):
def __init__(self, fname, mode, device_index, channels,
rate, frames_per_buffer):
self.fname = fname
self.mode = mode
self.channels = channels
self.rate = rate
self.frames_per_buffer = frames_per_buffer
self.wavefile = self._prepare_file(self.fname, self.mode)
self._stream = None
self.device_index = device_index
def __enter__(self):
return self
def __exit__(self, exception, value, traceback):
self.close()
def start_recording(self):
utils.log(f'Start recording with device index {self.device_index}, channels {self.channels}, samplerate {self.rate} to {self.fname}')
# Use a stream with a callback in non-blocking mode
self._stream = PY_AUDIO.open(format=pyaudio.paInt16,
channels=self.channels,
rate=int(self.rate),
input=True,
input_device_index=self.device_index,
frames_per_buffer=self.frames_per_buffer,
stream_callback=self.get_callback())
self._stream.start_stream()
return self
def stop_recording(self):
self._stream.stop_stream()
utils.log(f'Recording stopped.')
return self
def get_callback(self):
def callback(in_data, frame_count, time_info, status):
self.wavefile.writeframes(in_data)
return in_data, pyaudio.paContinue
return callback
def close(self):
if self._stream:
self._stream.close()
self._stream = None
if self.wavefile:
self.wavefile.close()
self.wavefile = None
utils.log('Recorder device & file are closed.')
def _prepare_file(self, fname, mode='wb'):
wavefile = wave.open(fname, mode)
wavefile.setnchannels(self.channels)
wavefile.setsampwidth(PY_AUDIO.get_sample_size(pyaudio.paInt16))
wavefile.setframerate(self.rate)
return wavefile
# Show available input devices
def show_input_devices():
# Get list of devices
devices = get_input_devices()
for d in devices:
print(f'Idx: {d["index"]} name: {d["name"]} channels: {d["num_channels"]} default samplerate: {d["default_samplerate"]}')
# Returns tuple with device index and device rate
def get_input_device_index(device_name: str) -> Tuple[int, int]:
# Get list of devices to find device index
devices = get_input_devices()
# Find device index
device_index = 0
rate = 0
if device_name.isnumeric():
device_index = int(device_name)
found_devices = list(filter(lambda item: int(item['index']) == device_index, devices))
if found_devices is None or len(found_devices) == 0:
utils.log_error(f'Failed to find record audio device with index {device_index}, exiting')
return -1, 0
rate = found_devices[0]['default_samplerate']
else:
found_devices = list(filter(lambda item: device_name.lower() in item['name'].lower(), devices))
if found_devices is None or len(found_devices) == 0:
utils.log_error(f'Failed to find record audio device {device_name}, exiting')
return -1
device_index = found_devices[0]['index']
rate = found_devices[0]['default_samplerate']
return device_index, rate
# Capture on device with name device_name (or it can be index in string representation)
def capture(device_name: str, samplerate: int, limit: int, output_path: str) -> bool:
if os.path.exists(output_path):
utils.log("Warning - output file exists, it will be rewritten.")
device_index, rate = get_input_device_index(device_name)
if device_index == -1:
return False
utils.log_verbose('Starting record with device {device_name}, samplerate {samplerate}, output file {output_path}')
rec = Recorder(device_index=device_index, channels=CHANNELS, rate=rate)
with rec.open(output_path) as recfile:
recfile.start_recording()
time.sleep(limit)
recfile.stop_recording()
# Playing support
def get_output_devices():
# Dump info about available audio devices
info = PY_AUDIO.get_host_api_info_by_index(0)
numdevices = info.get('deviceCount')
result = []
for i in range(0, numdevices):
device_info = PY_AUDIO.get_device_info_by_host_api_device_index(0, i)
num_channels = device_info.get('maxOutputChannels')
if num_channels > 0:
result.append({'name': device_info.get('name'), 'index': i, 'num_channels': num_channels, 'default_samplerate': device_info['defaultSampleRate']})
return result
def get_output_device_index(device_name: str) -> Tuple[int, int]:
# Look for device index
devices = get_output_devices()
device_index = -1
rate = 0
if device_name.isnumeric():
# Get device by index
device_index = int(device_name)
# Check if this index belongs to playing devices
found_devices = list(filter(lambda item: int(item['index']) == device_index, devices))
if found_devices is None or len(found_devices) == 0:
utils.log_error(f'Failed to find play audio device with index {device_index}, exiting')
return -1, 0
rate = found_devices[0]['default_samplerate']
else:
found_devices = list(filter(lambda item: device_name.lower() in item['name'].lower(), devices))
if found_devices is None or len(found_devices) == 0:
utils.log_error(f'Failed to find play audio device {device_name}, exiting')
return -1, 0
device_index = found_devices[0]['index']
rate = found_devices[0]['default_samplerate']
return device_index, rate
class Player(object):
'''A player class for playing audio from a WAV file.
'''
def __init__(self, device_index=0, frames_per_buffer=1024):
self.device_index = device_index
self.frames_per_buffer = frames_per_buffer
def open(self, fname, mode='rb', silence_prefix: int = 0, silence_suffix: int = 0):
return PlayingFile(fname, mode, self.device_index,
self.frames_per_buffer, silence_prefix, silence_suffix)
class PlayingFile(object):
def __init__(self, fname, mode, device_index, frames_per_buffer, silence_prefix: int = 0, silence_suffix: int = 0):
self.fname = fname
self.mode = mode
self.frames_per_buffer = frames_per_buffer
self.wavefile = self._prepare_file(self.fname, self.mode)
self._stream = None
self.device_index = device_index
self.frames_counter = 0
# Normalize silence lengths
if silence_prefix is None:
silence_prefix = 0
if silence_suffix is None:
silence_suffix = 0
self.silence_prefix_total_frames: int = int(silence_prefix) * self.wavefile.getframerate()
self.silence_suffix_total_frames: int = int(silence_suffix) * self.wavefile.getframerate()
self.silence_prefix_frame_counter: int = 0
self.silence_suffix_frame_counter: int = 0
self.silence_prefix_finished: bool = False
self.silence_suffix_finished: bool = False
# Read all samples from wave file before playing to minimize possible delays
self.wavefile.rewind()
self.wavefile_frames = self.wavefile.readframes(self.wavefile.getnframes())
self.wavefile_read = 0 # Current offset
self.wavefile_length = self.wavefile.getnframes() # Total number of available frames
self.wavefile_finished = False
utils.log(f'Available {self.wavefile_length} frames in wave file {self.fname}')
def __enter__(self):
return self
def __exit__(self, exception, value, traceback):
self.close()
def start_playing(self):
rate = self.wavefile.getframerate()
channels = self.wavefile.getnchannels()
total_frames = self.wavefile.getnframes()
utils.log(f'Start playing with device #{self.device_index}, samplerate {rate}, channels {channels}, total frames {total_frames}')
utils.log(f'Silence prefix length: {self.silence_prefix_total_frames} frames, silence suffix length: {self.silence_suffix_total_frames} frames')
# Use a stream with a callback in non-blocking mode
self._stream = PY_AUDIO.open(format=pyaudio.paInt16,
channels=channels,
rate=rate,
output=True,
output_device_index=self.device_index,
frames_per_buffer=self.frames_per_buffer,
stream_callback=self.get_callback())
self._stream.start_stream()
return self
def stop_playing(self):
self._stream.stop_stream()
utils.log(f'Playing stopped.')
return self
def get_callback(self):
def callback(in_data, frame_count, time_info, status):
# print(f'Enter audio callback')
# Initialize with empty bytes
data = bytes(0)
# Save initial frame counter value
original_frame_count = frame_count
# Fill by 'prefix' silence if configured
if self.silence_prefix_total_frames and not self.silence_prefix_finished:
if self.silence_prefix_frame_counter < self.silence_prefix_total_frames:
# utils.log('Playing silence prefix')
# Check how much silence frames has to be sent
silence_frames_available = min(self.silence_prefix_total_frames - self.silence_prefix_frame_counter, frame_count)
# utils.log(f'Playing prefix silence {silence_frames_available} frames')
# Replace byte object
if silence_frames_available > 0:
data = bytes(silence_frames_available * 2)
self.silence_prefix_frame_counter += silence_frames_available
frame_count -= silence_frames_available
self.silence_prefix_finished = self.silence_prefix_frame_counter == self.silence_prefix_total_frames
if self.silence_prefix_finished:
utils.log(f'Silence prefix is played.')
# Fill by audio from file
if frame_count > 0 and not self.wavefile_finished:
# utils.log('Playing wave file')
# Read the audio
wavefile_available = min(self.wavefile_length - self.wavefile_read, frame_count)
# Frames are 16 bits - but this is a byte array
frames = self.wavefile_frames[self.wavefile_read * 2: (self.wavefile_read + wavefile_available) * 2]
# print(type(frames), type(self.wavefile_frames), len(frames))
# Increase counter of read frames
self.wavefile_read = self.wavefile_read + wavefile_available
# utils.log(f'Played {wavefile_available} frames, requested {frame_count}')
# utils.log(f'Playing wave file audio {len(frames)/2} frames')
if len(frames) > 0:
frame_count -= len(frames) / 2
data = data + frames
self.wavefile_finished = self.wavefile_read >= self.wavefile_length
if self.wavefile_finished:
utils.log(f'Wave file content is played.')
#else:
# utils.log('Wave file content is not played yet')
# Do we need silence_suffix ?
if self.silence_prefix_finished and self.wavefile_finished and frame_count > 0 and not self.silence_suffix_finished:
# utils.log('Playing silence suffix')
# File reading is over, switch to 'suffix' silence
silence_frames_available = int(min(self.silence_suffix_total_frames - self.silence_suffix_frame_counter, frame_count))
# utils.log(f'Playing suffix silence {silence_frames_available} frames')
if silence_frames_available > 0:
data = data + bytes(silence_frames_available * 2)
frame_count -= silence_frames_available
self.silence_suffix_finished = self.silence_suffix_frame_counter == self.silence_suffix_total_frames
# Increase counter of total read frames
self.frames_counter += original_frame_count - frame_count
if frame_count > 0:
# print(f'Have to read {frame_count} frames, available {len(data)}. Total read frames: {self.frames_counter}. Playing finished.')
code = pyaudio.paComplete
else:
code = pyaudio.paContinue
return (data, code)
return callback
def close(self):
if self._stream:
self._stream.close()
self._stream = None
if self.wavefile:
self.wavefile.close()
self.wavefile = None
utils.log('Player device & file are closed.')
def _prepare_file(self, fname, mode='rb') -> wave.Wave_read:
wavefile = wave.open(fname, mode)
return wavefile
def show_output_devices():
devices = get_output_devices()
for d in devices:
print(f'Idx: {d["index"]} name: {d["name"]} channels: {d["num_channels"]} default samplerate: {d["default_samplerate"]}')
def play(device_name: str, input_path: str, silence_prefix: int, silence_suffix: int) -> bool:
# Audio device will be opened with samplerate from input audio file
device_index, _ = get_output_device_index(device_name)
player = Player(device_index=device_index)
with player.open(input_path, 'rb', silence_prefix, silence_suffix) as pf:
pf.start_playing()
total_frames = pf.wavefile.getnframes() + (silence_prefix + silence_suffix) * pf.wavefile.getframerate()
while pf.frames_counter < total_frames:
time.sleep(0.1)
pf.stop_playing()
return True
def start_PA() -> bool:
# Ensure pulseaudio is available
retcode = os.system('pulseaudio --start')
if retcode != 0:
utils.log(f'pulseaudio failed to start, exit code: {retcode}')
return False
# Check if module-bluetooth-discover is available
retcode, output = subprocess.getoutput('/bin/bash pacmd list modules | grep module-bluetooth-discover')
if retcode == 0 and 'module-bluetooth-discover' in output:
utils.log('PA module-bluetooth-discover is loaded already.')
return True
utils.log('Attempt to load module-bluetooth-discover...')
retcode = os.system('pacmd load-module module-bluetooth-discover')
if retcode != 0:
utils.log(f'Failed to load module-bluetooth-discover, exit code: {retcode}')
return False
else:
print('...success.')
return True