- use native speech detector

2023-09-14 15:44:55 +03:00 · 2023-09-14 15:44:55 +03:00 · 0bf8134feb
parent 48743574ad
commit 0bf8134feb
4 changed files with 37 additions and 23 deletions
--- a/src/agent_config.py
+++ b/src/agent_config.py
@ -36,8 +36,8 @@ class AgentConfig:
    # Should the first task run immediately ?
    ForceRun = False

-    # Use silence eraser or not (speech detector is used in this case)
-    UseSilenceEraser = True
+    # Use external speech detector if needed
+    UseSpeechDetector = False

    # Path to log file
    LogPath : Path = None
@ -90,7 +90,7 @@ class AgentConfig:

            if 'speech_detector' in config:
                if config['speech_detector']:
-                    self.UseSilenceEraser = False
+                    self.UseSpeechDetector = True

            if 'audio' in config:
                audio = config['audio']
--- a/src/agent_gsm.py
+++ b/src/agent_gsm.py
@ -68,6 +68,9 @@ def detect_degraded_signal(file_test: Path, file_reference: Path) -> SignalBound
        # Seems some problem with recording, return zero boundaries
        return SignalBoundaries()
    
+    if CONFIG.UseSpeechDetector:
+        r = bt_signal.find_reference_signal_via_speechdetector(file_test)
+    else:
        r = bt_signal.find_reference_signal(file_test)
    
    if r.offset_start == 0.0 and is_caller:
@ -78,6 +81,9 @@ def detect_degraded_signal(file_test: Path, file_reference: Path) -> SignalBound

 def detect_reference_signal(file_reference: Path) -> SignalBoundaries:
    # Run silence eraser on reference file as well
+    if CONFIG.UseSpeechDetector:
+        result = bt_signal.find_reference_signal_via_speechdetector(file_reference)
+    else:
        result = bt_signal.find_reference_signal(file_reference)
    return result

--- a/src/bt_signal.py
+++ b/src/bt_signal.py
@ -3,33 +3,22 @@
 import sys
 import os
 import pathlib
+from utils_types import SignalBoundaries
+from utils_sevana import speech_detector

 from pydub import silence, AudioSegment

-class SignalBoundaries:
-    # Offset from start (in seconds)
-    offset_start: float
-
-    # Offset from finish (in seconds)
-    offset_finish: float
-
-    def __init__(self, offset_start = 0.0, offset_finish = 0.0) -> None:
-        self.offset_start = offset_start
-        self.offset_finish = offset_finish
-
-    def __repr__(self) -> str:
-        return f'[offset_start: {round(self.offset_start, 3)}, offset_finish : {round(self.offset_finish, 3)}]'
-
+SILENCE_DELTA = 16

 def find_reference_signal(input_file: pathlib.Path, output_file: pathlib.Path = None, use_end_offset: bool = True) -> SignalBoundaries:
    myaudio = AudioSegment.from_wav(str(input_file))
    dBFS = myaudio.dBFS
    
    # Find silence intervals
-    intervals = silence.detect_nonsilent(myaudio, min_silence_len=1000, silence_thresh=dBFS-17, seek_step=50)
+    intervals = silence.detect_nonsilent(myaudio, min_silence_len=1000, silence_thresh=dBFS-SILENCE_DELTA, seek_step=50)
    
    # Translate to seconds
-    intervals = [((start/1000),(stop/1000)) for start,stop in intervals] #in sec
+    intervals = [((start/1000),(stop/1000)) for start,stop in intervals] # in sec

    # print(intervals)

@ -48,6 +37,12 @@ def find_reference_signal(input_file: pathlib.Path, output_file: pathlib.Path =

    return SignalBoundaries()

+
+def find_reference_signal_via_speechdetector(input_file: pathlib.Path) -> SignalBoundaries:
+    bounds = speech_detector(str(input_file))
+    r = SignalBoundaries(bounds[0], bounds[1])
+    return bounds
+
 if __name__ == '__main__':
    if len(sys.argv) < 2:
        print(f'Please specify input filename.')
--- a/src/utils_types.py
+++ b/src/utils_types.py
@ -6,12 +6,25 @@ import utils
 import json
 from crontab import CronTab

-
-
 # Exit codes
 EXIT_OK = 0
 EXIT_ERROR = 1

+class SignalBoundaries:
+    # Offset from start (in seconds)
+    offset_start: float
+
+    # Offset from finish (in seconds)
+    offset_finish: float
+
+    def __init__(self, offset_start = 0.0, offset_finish = 0.0) -> None:
+        self.offset_start = offset_start
+        self.offset_finish = offset_finish
+
+    def __repr__(self) -> str:
+        return f'[offset_start: {round(self.offset_start, 3)}, offset_finish : {round(self.offset_finish, 3)}]'
+
+
 class Phone:
    identifier: int = 0
    name: str = ""