- use native speech detector

2023-09-14 15:44:55 +03:00 · 2023-09-14 15:44:55 +03:00 · 0bf8134feb
parent 48743574ad
commit 0bf8134feb
4 changed files with 37 additions and 23 deletions
--- a/src/agent_config.py
+++ b/src/agent_config.py
@ -36,8 +36,8 @@ class AgentConfig:
    # Should the first task run immediately ?
    ForceRun = False
-    # Use silence eraser or not (speech detector is used in this case)
+    # Use external speech detector if needed
-    UseSilenceEraser = True
+    UseSpeechDetector = False
    # Path to log file
    LogPath : Path = None
@ -90,7 +90,7 @@ class AgentConfig:
            if 'speech_detector' in config:
                if config['speech_detector']:
-                    self.UseSilenceEraser = False
+                    self.UseSpeechDetector = True
            if 'audio' in config:
                audio = config['audio']
--- a/src/agent_gsm.py
+++ b/src/agent_gsm.py
@ -68,7 +68,10 @@ def detect_degraded_signal(file_test: Path, file_reference: Path) -> SignalBound
        # Seems some problem with recording, return zero boundaries
        return SignalBoundaries()
-    r = bt_signal.find_reference_signal(file_test)
+    if CONFIG.UseSpeechDetector:
        r = bt_signal.find_reference_signal_via_speechdetector(file_test)
    else:
        r = bt_signal.find_reference_signal(file_test)
    if r.offset_start == 0.0 and is_caller:
        r.offset_start = 5.0 # Skip ringing tones
@ -78,7 +81,10 @@ def detect_degraded_signal(file_test: Path, file_reference: Path) -> SignalBound
 def detect_reference_signal(file_reference: Path) -> SignalBoundaries:
    # Run silence eraser on reference file as well
-    result = bt_signal.find_reference_signal(file_reference)
+    if CONFIG.UseSpeechDetector:
        result = bt_signal.find_reference_signal_via_speechdetector(file_reference)
    else:
        result = bt_signal.find_reference_signal(file_reference)
    return result
--- a/src/bt_signal.py
+++ b/src/bt_signal.py
@ -3,33 +3,22 @@
 import sys
 import os
 import pathlib
 from utils_types import SignalBoundaries
 from utils_sevana import speech_detector
 from pydub import silence, AudioSegment
-class SignalBoundaries:
+SILENCE_DELTA = 16
    # Offset from start (in seconds)
    offset_start: float
    # Offset from finish (in seconds)
    offset_finish: float
    def __init__(self, offset_start = 0.0, offset_finish = 0.0) -> None:
        self.offset_start = offset_start
        self.offset_finish = offset_finish
    def __repr__(self) -> str:
        return f'[offset_start: {round(self.offset_start, 3)}, offset_finish : {round(self.offset_finish, 3)}]'
 def find_reference_signal(input_file: pathlib.Path, output_file: pathlib.Path = None, use_end_offset: bool = True) -> SignalBoundaries:
    myaudio = AudioSegment.from_wav(str(input_file))
    dBFS = myaudio.dBFS
    # Find silence intervals
-    intervals = silence.detect_nonsilent(myaudio, min_silence_len=1000, silence_thresh=dBFS-17, seek_step=50)
+    intervals = silence.detect_nonsilent(myaudio, min_silence_len=1000, silence_thresh=dBFS-SILENCE_DELTA, seek_step=50)
    # Translate to seconds
-    intervals = [((start/1000),(stop/1000)) for start,stop in intervals] #in sec
+    intervals = [((start/1000),(stop/1000)) for start,stop in intervals] # in sec
    # print(intervals)
@ -48,6 +37,12 @@ def find_reference_signal(input_file: pathlib.Path, output_file: pathlib.Path =
    return SignalBoundaries()
 def find_reference_signal_via_speechdetector(input_file: pathlib.Path) -> SignalBoundaries:
    bounds = speech_detector(str(input_file))
    r = SignalBoundaries(bounds[0], bounds[1])
    return bounds
 if __name__ == '__main__':
    if len(sys.argv) < 2:
        print(f'Please specify input filename.')
--- a/src/utils_types.py
+++ b/src/utils_types.py
@ -6,12 +6,25 @@ import utils
 import json
 from crontab import CronTab
 # Exit codes
 EXIT_OK = 0
 EXIT_ERROR = 1
 class SignalBoundaries:
    # Offset from start (in seconds)
    offset_start: float
    # Offset from finish (in seconds)
    offset_finish: float
    def __init__(self, offset_start = 0.0, offset_finish = 0.0) -> None:
        self.offset_start = offset_start
        self.offset_finish = offset_finish
    def __repr__(self) -> str:
        return f'[offset_start: {round(self.offset_start, 3)}, offset_finish : {round(self.offset_finish, 3)}]'
 class Phone:
    identifier: int = 0
    name: str = ""