From 94f30b25e992f0a00e3a3181841e1aa80e523ed2 Mon Sep 17 00:00:00 2001 From: Dmytro Bogovych Date: Thu, 19 Feb 2026 15:27:32 +0300 Subject: [PATCH] - improving the decoder --- src/engine/agent/Agent_Impl.cpp | 2 + src/engine/endpoint/EP_Session.cpp | 1 + src/engine/endpoint/EP_Session.h | 1 + src/engine/media/MT_AmrCodec.cpp | 30 +++++--- src/engine/media/MT_AmrCodec.h | 30 ++++---- src/engine/media/MT_AudioReceiver.cpp | 83 +++++++++++++++-------- src/engine/media/MT_AudioReceiver.h | 32 +++++---- src/engine/media/MT_SingleAudioStream.cpp | 3 +- src/engine/media/MT_Statistics.h | 1 + 9 files changed, 119 insertions(+), 64 deletions(-) diff --git a/src/engine/agent/Agent_Impl.cpp b/src/engine/agent/Agent_Impl.cpp index 411058d7..ddec55bb 100644 --- a/src/engine/agent/Agent_Impl.cpp +++ b/src/engine/agent/Agent_Impl.cpp @@ -526,6 +526,8 @@ void AgentImpl::processGetMediaStats(JsonCpp::Value& request, JsonCpp::Value& an answer["rtt"] = result[SessionInfo_Rtt].asFloat(); if (result.exists(SessionInfo_BitrateSwitchCounter)) answer["bitrate_switch_counter"] = result[SessionInfo_BitrateSwitchCounter].asInt(); + if (result.exists(SessionInfo_CngCounter)) + answer["cng_counter"] = result[SessionInfo_CngCounter].asInt(); if (result.exists(SessionInfo_SSRC)) answer["rtp_ssrc"] = result[SessionInfo_SSRC].asInt(); if (result.exists(SessionInfo_RemotePeer)) diff --git a/src/engine/endpoint/EP_Session.cpp b/src/engine/endpoint/EP_Session.cpp index 41b453ea..95dd70f9 100644 --- a/src/engine/endpoint/EP_Session.cpp +++ b/src/engine/endpoint/EP_Session.cpp @@ -483,6 +483,7 @@ void Session::getSessionInfo(Session::InfoOptions options, VariantMap& info) info[SessionInfo_Rtt] = static_cast(stat.mRttDelay * 1000); #if defined(USE_AMR_CODEC) info[SessionInfo_BitrateSwitchCounter] = stat.mBitrateSwitchCounter; + info[SessionInfo_CngCounter] = stat.mCng; #endif info[SessionInfo_SSRC] = stat.mSsrc; info[SessionInfo_RemotePeer] = stat.mRemotePeer.toStdString(); diff --git a/src/engine/endpoint/EP_Session.h b/src/engine/endpoint/EP_Session.h index 625a3d6e..ac8ac894 100644 --- a/src/engine/endpoint/EP_Session.h +++ b/src/engine/endpoint/EP_Session.h @@ -72,6 +72,7 @@ enum SessionInfo SessionInfo_BitrateSwitchCounter, // It is for AMR codecs only SessionInfo_RemotePeer, SessionInfo_SSRC, + SessionInfo_CngCounter // For AMR codecs only }; diff --git a/src/engine/media/MT_AmrCodec.cpp b/src/engine/media/MT_AmrCodec.cpp index bafe051e..f7136b6a 100644 --- a/src/engine/media/MT_AmrCodec.cpp +++ b/src/engine/media/MT_AmrCodec.cpp @@ -61,7 +61,7 @@ struct AmrPayload // Header // Table of Contents // Frames -static AmrPayload parseAmrPayload(AmrPayloadInfo& input) +static AmrPayload parseAmrPayload(AmrPayloadInfo& input, size_t& cngCounter) { AmrPayload result; @@ -128,6 +128,8 @@ static AmrPayload parseAmrPayload(AmrPayloadInfo& input) frame.mTimestamp = input.mCurrentTimestamp; result.mFrames.push_back(frame); input.mCurrentTimestamp += input.mWideband ? 320 : 160; + if (FT == SID_FT) + cngCounter++; } while (F != 0); @@ -140,13 +142,17 @@ static AmrPayload parseAmrPayload(AmrPayloadInfo& input) // avoid the loss of data synchronization in the depacketization // process, which can result in a huge degradation in speech quality. bool discard = input.mWideband ? (f.mFrameType >= 10 && f.mFrameType <= 13) : (f.mFrameType >= 9 && f.mFrameType <= 14); - // discard |= input.mWideband ? f.mFrameType >= 14 : f.mFrameType >= 15; if (discard) { result.mDiscardPacket = true; continue; } + if (input.mWideband && f.mMode == 0xFF /* CNG */) + { + int a = 1; + } + if (input.mWideband && f.mFrameType == 15) { // DTX, no sense to decode the data @@ -165,8 +171,8 @@ static AmrPayload parseAmrPayload(AmrPayloadInfo& input) continue; } - size_t bitsLength = input.mWideband ? amrwb_framelenbits[f.mFrameType] : amrnb_framelenbits[f.mFrameType]; - size_t byteLength = input.mWideband ? amrwb_framelen[f.mFrameType] : amrnb_framelen[f.mFrameType]; + size_t bitsLength = input.mWideband ? amrwb_framelenbits[f.mFrameType] : amrnb_framelenbits[f.mFrameType]; + size_t byteLength = input.mWideband ? amrwb_framelen[f.mFrameType] : amrnb_framelen[f.mFrameType]; if (bitsLength > 0) { @@ -260,8 +266,7 @@ PCodec AmrNbCodec::CodecFactory::create() AmrNbCodec::AmrNbCodec(const AmrCodecConfig& config) - :mEncoderCtx(nullptr), mDecoderCtx(nullptr), mConfig(config), mCurrentDecoderTimestamp(0), - mSwitchCounter(0), mPreviousPacketLength(0) + :mConfig(config) { mEncoderCtx = Encoder_Interface_init(1); mDecoderCtx = Decoder_Interface_init(); @@ -397,7 +402,7 @@ int AmrNbCodec::decode(const void* input, int inputBytes, void* output, int outp AmrPayload ap; try { - ap = parseAmrPayload(info); + ap = parseAmrPayload(info, mCngCounter); } catch(...) { @@ -459,6 +464,11 @@ int AmrNbCodec::getSwitchCounter() const return mSwitchCounter; } +int AmrNbCodec::getCngCounter() const +{ + return mCngCounter; +} + // -------- AMR WB codec AmrWbCodec::CodecFactory::CodecFactory(const AmrCodecConfig& config) :mConfig(config) @@ -600,7 +610,7 @@ int AmrWbCodec::decodePlain(std::span input, std::span o AmrPayload ap; try { - ap = parseAmrPayload(info); + ap = parseAmrPayload(info, mCngCounter); } catch(...) { @@ -674,6 +684,10 @@ int AmrWbCodec::getSwitchCounter() const return mSwitchCounter; } +int AmrWbCodec::getCngCounter() const +{ + return mCngCounter; +} // ------------- GSM EFR ----------------- diff --git a/src/engine/media/MT_AmrCodec.h b/src/engine/media/MT_AmrCodec.h index 00919fd0..dd35192e 100644 --- a/src/engine/media/MT_AmrCodec.h +++ b/src/engine/media/MT_AmrCodec.h @@ -26,13 +26,13 @@ struct AmrCodecConfig class AmrNbCodec : public Codec { protected: - void* mEncoderCtx; - void* mDecoderCtx; + void* mEncoderCtx = nullptr; + void* mDecoderCtx = nullptr; AmrCodecConfig mConfig; - unsigned mCurrentDecoderTimestamp; - int mSwitchCounter; - int mPreviousPacketLength; - + unsigned mCurrentDecoderTimestamp = 0; + int mPreviousPacketLength = 0; + size_t mCngCounter = 0; + size_t mSwitchCounter = 0; public: class CodecFactory: public Factory { @@ -65,6 +65,7 @@ public: int decode(const void* input, int inputBytes, void* output, int outputCapacity) override; int plc(int lostFrames, void* output, int outputCapacity) override; int getSwitchCounter() const; + int getCngCounter() const; }; struct AmrWbStatistics @@ -77,11 +78,13 @@ extern AmrWbStatistics GAmrWbStatistics; class AmrWbCodec : public Codec { protected: - void* mEncoderCtx; - void* mDecoderCtx; + void* mEncoderCtx = nullptr; + void* mDecoderCtx = nullptr; AmrCodecConfig mConfig; - uint64_t mCurrentDecoderTimestamp; - int mSwitchCounter; + uint64_t mCurrentDecoderTimestamp = 0; + size_t mSwitchCounter = 0; + size_t mCngCounter = 0; + int mPreviousPacketLength; int decodeIuup(std::span input, std::span output); @@ -119,14 +122,15 @@ public: int decode(const void* input, int inputBytes, void* output, int outputCapacity) override; int plc(int lostFrames, void* output, int outputCapacity) override; int getSwitchCounter() const; + int getCngCounter() const; }; class GsmEfrCodec : public Codec { protected: - void* mEncoderCtx; - void* mDecoderCtx; - bool mIuUP; + void* mEncoderCtx = nullptr; + void* mDecoderCtx = nullptr; + bool mIuUP = false; public: class GsmEfrFactory: public Factory diff --git a/src/engine/media/MT_AudioReceiver.cpp b/src/engine/media/MT_AudioReceiver.cpp index f88b65aa..6021687f 100644 --- a/src/engine/media/MT_AudioReceiver.cpp +++ b/src/engine/media/MT_AudioReceiver.cpp @@ -553,7 +553,7 @@ void AudioReceiver::produceCNG(std::chrono::milliseconds length, Audio::DataWind } } -AudioReceiver::DecodeResult AudioReceiver::decodeGap(Audio::DataWindow& output, DecodeOptions options) +AudioReceiver::DecodeResult AudioReceiver::decodeGapTo(Audio::DataWindow& output, DecodeOptions options) { ICELogDebug(<< "Gap detected."); @@ -588,20 +588,21 @@ AudioReceiver::DecodeResult AudioReceiver::decodeGap(Audio::DataWindow& output, if (mDecodedLength) { processDecoded(output, options); - return DecodeResult_Ok; + return {.mStatus = DecodeResult::Status::Ok,.mChannels = mCodec->channels(), .mSamplerate = mCodec->samplerate()}; } else - return DecodeResult_Skip; + return {.mStatus = DecodeResult::Status::Skip}; } -AudioReceiver::DecodeResult AudioReceiver::decodePacket(const RtpBuffer::ResultList& rl, Audio::DataWindow& output, DecodeOptions options, int* rate) +AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& output, DecodeOptions options, const RtpBuffer::ResultList& rl) { - DecodeResult result = DecodeResult_Skip; + DecodeResult result = {.mStatus = DecodeResult::Status::Skip}; mFailedCount = 0; for (const std::shared_ptr& p: rl) { assert(p); + // Check if we need to emit silence or CNG - previously CNG packet was detected. Emit CNG audio here if needed. if (mLastPacketTimestamp && mLastPacketTimeLength && mCodec) { @@ -635,8 +636,8 @@ AudioReceiver::DecodeResult AudioReceiver::decodePacket(const RtpBuffer::ResultL mCodec = codecIter->second; if (mCodec) { - if (rate) - *rate = mCodec->samplerate(); + result.mChannels = mCodec->channels(); + result.mSamplerate = mCodec->samplerate(); // Check if it is CNG packet if ((ptype == 0 || ptype == 8) && p->rtp()->GetPayloadLength() >= 1 && p->rtp()->GetPayloadLength() <= 6) @@ -654,7 +655,7 @@ AudioReceiver::DecodeResult AudioReceiver::decodePacket(const RtpBuffer::ResultL if (mDecodedLength) processDecoded(output, options); } - result = DecodeResult_Ok; + result.mStatus = DecodeResult::Status::Ok; } else { @@ -691,7 +692,7 @@ AudioReceiver::DecodeResult AudioReceiver::decodePacket(const RtpBuffer::ResultL processDecoded(output, options); } } - result = mFrameCount > 0 ? DecodeResult_Ok : DecodeResult_Skip; + result.mStatus = mFrameCount > 0 ? DecodeResult::Status::Ok : DecodeResult::Status::Skip; // Check for bitrate counter updateAmrCodecStats(mCodec.get()); @@ -699,7 +700,7 @@ AudioReceiver::DecodeResult AudioReceiver::decodePacket(const RtpBuffer::ResultL else { // RTP packet with tail - it should not happen - result = DecodeResult_BadPacket; + result.mStatus = DecodeResult::Status::BadPacket; } } } @@ -707,39 +708,55 @@ AudioReceiver::DecodeResult AudioReceiver::decodePacket(const RtpBuffer::ResultL return result; } -AudioReceiver::DecodeResult AudioReceiver::decodeNone(Audio::DataWindow& output, DecodeOptions options) +AudioReceiver::DecodeResult AudioReceiver::decodeEmptyTo(Audio::DataWindow& output, DecodeOptions options) { - // ICELogDebug(<< "No packet available in jitter buffer"); + // No packet available in jitter buffer - just increase the counter for now mFailedCount++; - return DecodeResult_Skip; + return {.mStatus = DecodeResult::Status::Skip}; } -AudioReceiver::DecodeResult AudioReceiver::getAudio(Audio::DataWindow& output, DecodeOptions options, int* rate) +AudioReceiver::DecodeResult AudioReceiver::getAudioTo(Audio::DataWindow& output, DecodeOptions options) { - DecodeResult result = DecodeResult_Skip; + DecodeResult result = {.mStatus = DecodeResult::Status::Skip}; - // Get next packet from buffer - RtpBuffer::ResultList rl; - RtpBuffer::FetchResult fr = mBuffer.fetch(rl); - switch (fr) + size_t initialOffset = output.filled(); // Size in bytes + std::chrono::milliseconds decoded = 0ms; + do { - case RtpBuffer::FetchResult::Gap: result = decodeGap(output, options); break; - case RtpBuffer::FetchResult::NoPacket: result = decodeNone(output, options); break; - case RtpBuffer::FetchResult::RegularPacket: result = decodePacket(rl, output, options, rate); break; - default: - assert(0); - } + // Get next packet from buffer + RtpBuffer::ResultList rl; + RtpBuffer::FetchResult fr = mBuffer.fetch(rl); + switch (fr) + { + case RtpBuffer::FetchResult::Gap: result = decodeGapTo(output, options); break; + case RtpBuffer::FetchResult::NoPacket: result = decodeEmptyTo(output, options); break; + case RtpBuffer::FetchResult::RegularPacket: result = decodePacketTo(output, options, rl); break; + default: + assert(0); + } - if (result == DecodeResult_Ok) + size_t available = output.filled() - initialOffset; + if (!available) + break; + initialOffset = output.filled(); + + // ToDo: calculate how much milliseconds was decoded + int samplerate = options.mResampleToMainRate ? AUDIO_SAMPLERATE : result.mSamplerate; + decoded += std::chrono::milliseconds(available / sizeof(short) / (samplerate / 1000)); + } + while (decoded < options.mElapsed); + + // Time statistics + if (result.mStatus == DecodeResult::Status::Ok) { // Decode statistics - if (!mLastDecodeTimestamp) - mLastDecodeTimestamp = std::chrono::steady_clock::now(); + if (!mDecodeTimestamp) + mDecodeTimestamp = std::chrono::steady_clock::now(); else { auto t = std::chrono::steady_clock::now(); - mStat.mDecodingInterval.process(std::chrono::duration_cast(t - *mLastDecodeTimestamp).count()); - mLastDecodeTimestamp = t; + mStat.mDecodingInterval.process(std::chrono::duration_cast(t - *mDecodeTimestamp).count()); + mDecodeTimestamp = t; } } return result; @@ -795,10 +812,16 @@ void AudioReceiver::updateAmrCodecStats(Codec* c) AmrWbCodec* wb = dynamic_cast(c); if (nb != nullptr) + { mStat.mBitrateSwitchCounter = nb->getSwitchCounter(); + mStat.mCng = nb->getCngCounter(); + } else if (wb != nullptr) + { mStat.mBitrateSwitchCounter = wb->getSwitchCounter(); + mStat.mCng = wb->getCngCounter(); + } #endif } diff --git a/src/engine/media/MT_AudioReceiver.h b/src/engine/media/MT_AudioReceiver.h index 1859bdfd..26c8d6d6 100644 --- a/src/engine/media/MT_AudioReceiver.h +++ b/src/engine/media/MT_AudioReceiver.h @@ -144,19 +144,27 @@ public: struct DecodeOptions { - bool mResampleToMainRate = true; - bool mFillGapByCNG = false; - bool mSkipDecode = false; + bool mResampleToMainRate = true; // Resample all decoded audio to AUDIO_SAMPLERATE + bool mFillGapByCNG = false; // Use CNG information if available + bool mSkipDecode = false; // Don't do decode, just dry run - fetch packets, remove them from the jitter buffer + std::chrono::milliseconds mElapsed = 0ms; // How much milliseconds should be decoded; zero value means "decode just next packet from the buffer" }; - enum DecodeResult + struct DecodeResult { - DecodeResult_Ok, // Decoded ok - DecodeResult_Skip, // Just no data - emit silence instead - DecodeResult_BadPacket // Error happened during the decode + enum class Status + { + Ok, // Decoded ok + Skip, // Just no data - emit silence instead + BadPacket // Error happened during the decode + }; + + Status mStatus = Status::Ok; + int mSamplerate = 0; + int mChannels = 0; }; - DecodeResult getAudio(Audio::DataWindow& output, DecodeOptions options = {.mResampleToMainRate = true, .mFillGapByCNG = false, .mSkipDecode = false}, int* rate = nullptr); + DecodeResult getAudioTo(Audio::DataWindow& output, DecodeOptions options); // Looks for codec by payload type Codec* findCodec(int payloadType); @@ -204,7 +212,7 @@ protected: Audio::PWavFileWriter mDecodedDump; - std::optional mLastDecodeTimestamp; // Time last call happened to codec->decode() + std::optional mDecodeTimestamp; // Time last call happened to codec->decode() float mIntervalSum = 0.0f; int mIntervalCount = 0; @@ -220,9 +228,9 @@ protected: // Calculate bitrate switch statistics for AMR codecs void updateAmrCodecStats(Codec* c); - DecodeResult decodeGap(Audio::DataWindow& output, DecodeOptions options); - DecodeResult decodePacket(const RtpBuffer::ResultList& rl, Audio::DataWindow& output, DecodeOptions options, int* rate = nullptr); - DecodeResult decodeNone(Audio::DataWindow& output, DecodeOptions options); + DecodeResult decodeGapTo(Audio::DataWindow& output, DecodeOptions options); + DecodeResult decodePacketTo(Audio::DataWindow& output, DecodeOptions options, const RtpBuffer::ResultList& rl); + DecodeResult decodeEmptyTo(Audio::DataWindow& output, DecodeOptions options); }; class DtmfReceiver: public Receiver diff --git a/src/engine/media/MT_SingleAudioStream.cpp b/src/engine/media/MT_SingleAudioStream.cpp index effdda73..0baa426e 100644 --- a/src/engine/media/MT_SingleAudioStream.cpp +++ b/src/engine/media/MT_SingleAudioStream.cpp @@ -33,9 +33,10 @@ void SingleAudioStream::process(const std::shared_ptr& packe void SingleAudioStream::copyPcmTo(Audio::DataWindow& output, int needed) { + // Packet by packet while (output.filled() < needed) { - if (mReceiver.getAudio(output, {}) != AudioReceiver::DecodeResult_Ok) + if (mReceiver.getAudioTo(output, {}).mStatus != AudioReceiver::DecodeResult::Status::Ok) break; } diff --git a/src/engine/media/MT_Statistics.h b/src/engine/media/MT_Statistics.h index cf2c505e..d637f6e2 100644 --- a/src/engine/media/MT_Statistics.h +++ b/src/engine/media/MT_Statistics.h @@ -86,6 +86,7 @@ public: // AMR codec bitrate switch counter int mBitrateSwitchCounter = 0; + int mCng = 0; std::string mCodecName; float mJitter = 0.0f; // Jitter TestResult mRttDelay; // RTT delay