diff --git a/src/agent_config.py b/src/agent_config.py index 506e676..b3512e1 100644 --- a/src/agent_config.py +++ b/src/agent_config.py @@ -36,8 +36,8 @@ class AgentConfig: # Should the first task run immediately ? ForceRun = False - # Use silence eraser or not (speech detector is used in this case) - UseSilenceEraser = True + # Use external speech detector if needed + UseSpeechDetector = False # Path to log file LogPath : Path = None @@ -90,7 +90,7 @@ class AgentConfig: if 'speech_detector' in config: if config['speech_detector']: - self.UseSilenceEraser = False + self.UseSpeechDetector = True if 'audio' in config: audio = config['audio'] diff --git a/src/agent_gsm.py b/src/agent_gsm.py index db24856..bd8a11b 100644 --- a/src/agent_gsm.py +++ b/src/agent_gsm.py @@ -68,7 +68,10 @@ def detect_degraded_signal(file_test: Path, file_reference: Path) -> SignalBound # Seems some problem with recording, return zero boundaries return SignalBoundaries() - r = bt_signal.find_reference_signal(file_test) + if CONFIG.UseSpeechDetector: + r = bt_signal.find_reference_signal_via_speechdetector(file_test) + else: + r = bt_signal.find_reference_signal(file_test) if r.offset_start == 0.0 and is_caller: r.offset_start = 5.0 # Skip ringing tones @@ -78,7 +81,10 @@ def detect_degraded_signal(file_test: Path, file_reference: Path) -> SignalBound def detect_reference_signal(file_reference: Path) -> SignalBoundaries: # Run silence eraser on reference file as well - result = bt_signal.find_reference_signal(file_reference) + if CONFIG.UseSpeechDetector: + result = bt_signal.find_reference_signal_via_speechdetector(file_reference) + else: + result = bt_signal.find_reference_signal(file_reference) return result diff --git a/src/bt_signal.py b/src/bt_signal.py index d8fb0ac..bcb7f5c 100644 --- a/src/bt_signal.py +++ b/src/bt_signal.py @@ -3,33 +3,22 @@ import sys import os import pathlib +from utils_types import SignalBoundaries +from utils_sevana import speech_detector from pydub import silence, AudioSegment -class SignalBoundaries: - # Offset from start (in seconds) - offset_start: float - - # Offset from finish (in seconds) - offset_finish: float - - def __init__(self, offset_start = 0.0, offset_finish = 0.0) -> None: - self.offset_start = offset_start - self.offset_finish = offset_finish - - def __repr__(self) -> str: - return f'[offset_start: {round(self.offset_start, 3)}, offset_finish : {round(self.offset_finish, 3)}]' - +SILENCE_DELTA = 16 def find_reference_signal(input_file: pathlib.Path, output_file: pathlib.Path = None, use_end_offset: bool = True) -> SignalBoundaries: myaudio = AudioSegment.from_wav(str(input_file)) dBFS = myaudio.dBFS # Find silence intervals - intervals = silence.detect_nonsilent(myaudio, min_silence_len=1000, silence_thresh=dBFS-17, seek_step=50) + intervals = silence.detect_nonsilent(myaudio, min_silence_len=1000, silence_thresh=dBFS-SILENCE_DELTA, seek_step=50) # Translate to seconds - intervals = [((start/1000),(stop/1000)) for start,stop in intervals] #in sec + intervals = [((start/1000),(stop/1000)) for start,stop in intervals] # in sec # print(intervals) @@ -48,6 +37,12 @@ def find_reference_signal(input_file: pathlib.Path, output_file: pathlib.Path = return SignalBoundaries() + +def find_reference_signal_via_speechdetector(input_file: pathlib.Path) -> SignalBoundaries: + bounds = speech_detector(str(input_file)) + r = SignalBoundaries(bounds[0], bounds[1]) + return bounds + if __name__ == '__main__': if len(sys.argv) < 2: print(f'Please specify input filename.') diff --git a/src/utils_types.py b/src/utils_types.py index 7ef1fc8..8c9969c 100644 --- a/src/utils_types.py +++ b/src/utils_types.py @@ -6,12 +6,25 @@ import utils import json from crontab import CronTab - - # Exit codes EXIT_OK = 0 EXIT_ERROR = 1 +class SignalBoundaries: + # Offset from start (in seconds) + offset_start: float + + # Offset from finish (in seconds) + offset_finish: float + + def __init__(self, offset_start = 0.0, offset_finish = 0.0) -> None: + self.offset_start = offset_start + self.offset_finish = offset_finish + + def __repr__(self) -> str: + return f'[offset_start: {round(self.offset_start, 3)}, offset_finish : {round(self.offset_finish, 3)}]' + + class Phone: identifier: int = 0 name: str = ""