- fixes + correct decode of DTX/CNG periods

2026-01-11 17:48:42 +03:00
parent fba022c7f6
commit f650eaccb7
5 changed files with 300 additions and 261 deletions
@@ -114,4 +114,7 @@
 // In milliseconds
 #define MT_SEVANA_FRAME_TIME 680
 // Number of samples
 #define MT_MAX_DECODEBUFFER  32768
 #endif
@@ -79,7 +79,7 @@ void RtpBuffer::setHigh(int milliseconds)
    mHigh = milliseconds;
 }
-int RtpBuffer::high()
+int RtpBuffer::high() const
 {
    return mHigh;
 }
@@ -89,7 +89,7 @@ void RtpBuffer::setLow(int milliseconds)
    mLow = milliseconds;
 }
-int RtpBuffer::low()
+int RtpBuffer::low() const
 {
    return mLow;
 }
@@ -99,7 +99,7 @@ void RtpBuffer::setPrebuffer(int milliseconds)
    mPrebuffer = milliseconds;
 }
-int RtpBuffer::prebuffer()
+int RtpBuffer::prebuffer() const
 {
    return mPrebuffer;
 }
@@ -224,7 +224,7 @@ RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl)
    }
    else
    {
-        if (mLastSeqno.has_value())
+        if (mLastSeqno) // It means we had previous packet
        {
            if (mPacketList.empty())
            {
@@ -237,6 +237,7 @@ RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl)
                auto& packet = *mPacketList.front();
                uint32_t seqno = packet.rtp()->GetExtendedSequenceNumber();
                // Gap between new packet and previous on
                int gap = (int64_t)seqno - (int64_t)*mLastSeqno - 1;
                gap = std::min(gap, 127);
@@ -244,9 +245,16 @@ RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl)
                {
                    // std::cout << "Increase the packet loss for SSRC " << std::hex << mSsrc << std::endl;
                    mStat.mPacketLoss++;
-                    auto currentTimestamp = uint64_t(packet.rtp()->GetReceiveTime().GetDouble() * 1000000);
+                    auto currentTimestamp = std::chrono::microseconds(uint64_t(packet.rtp()->GetReceiveTime().GetDouble() * 1000000));
-                    mStat.mPacketLossTimeline.push_back({gap, std::chrono::microseconds(currentTimestamp)});
+
-                    mLastSeqno = *mLastSeqno + 1;
+                    if (mStat.mPacketLossTimeline.empty() || (mStat.mPacketLossTimeline.back().mEndSeqno != seqno))
                        mStat.mPacketLossTimeline.push_back({.mStartSeqno = *mLastSeqno,
                                                             .mEndSeqno = seqno,
                                                             .mGap = gap,
                                                             .mTimestamp = currentTimestamp});
                    mLastSeqno = *mLastSeqno + 1; // As we deal with the audio gap - return the silence and increase last seqno
                    result = FetchResult::Gap;
                }
                else
@@ -475,235 +483,249 @@ bool AudioReceiver::add(const std::shared_ptr<jrtplib::RTPPacket>& p, Codec** de
    // Queue packet to buffer
    auto packet = mBuffer.add(p, time_length, samplerate).get();
-    if (packet)
+    return packet;
    {
        // Check if early decoding configured
        if (mEarlyDecode && codec)
        {
            // Move data to packet buffer
            size_t available = decode_packet(*codec, *p, mDecodedFrame, sizeof mDecodedFrame);
            if (available > 0)
            {
                packet->pcm().resize(available / 2);
                memcpy(packet->pcm().data(), mDecodedFrame, available / 2);
            }
        }
        return true;
    }
    else
        return false;
 }
-void AudioReceiver::processDecoded(Audio::DataWindow& output, int options)
+void AudioReceiver::processDecoded(Audio::DataWindow& output, DecodeOptions options)
 {
    // Write to audio dump if requested
    if (mDecodedDump && mDecodedLength)
        mDecodedDump->write(mDecodedFrame, mDecodedLength);
    // Resample to target rate
-    bool resample = !(options & DecodeOptions_DontResample);
+    makeMonoAndResample(options.mResampleToMainRate ? mCodec->samplerate() : 0, mCodec->channels());
    makeMonoAndResample(resample ? mCodec->samplerate() : 0,
                        mCodec->channels());
    // Send to output
    output.add(mResampledFrame, mResampledLength);
 }
-AudioReceiver::DecodeResult AudioReceiver::getAudio(Audio::DataWindow& output, int options, int* rate)
+void AudioReceiver::produceSilence(std::chrono::milliseconds length, Audio::DataWindow& output, DecodeOptions options)
 {
    // Fill mDecodeBuffer as much as needed and call processDecoded()
    // Depending on used codec mono or stereo silence should be produced
    size_t chunks = length.count() / 10;
    size_t tail = length.count() % 10;
    size_t chunk_size = 10 * sizeof(int16_t) * mCodec->samplerate() / 1000 * mCodec->channels();
    size_t tail_size = tail * sizeof(int16_t) * mCodec->samplerate() / 1000 * mCodec->channels();
    for (size_t i = 0; i < chunks; i++)
    {
        memset(mDecodedFrame, 0, chunk_size);
        mDecodedLength = chunk_size;
        processDecoded(output, options);
    }
    if (tail)
    {
        memset(mDecodedFrame, 0, tail_size);
        mDecodedLength = tail_size;
        processDecoded(output, options);
    }
 }
 void AudioReceiver::produceCNG(std::chrono::milliseconds length, Audio::DataWindow& output, DecodeOptions options)
 {
    int frames100ms = length.count() / 100;
    for (int frameIndex = 0; frameIndex < frames100ms; frameIndex++)
    {
        if (options.mSkipDecode)
            mDecodedLength = 0;
        else
            mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), 100, mDecodedFrame, false);
        if (mDecodedLength)
            processDecoded(output, options);
    }
    // Do not forget about tail!
    int tail = length.count() % 100;
    if (tail)
    {
        if (options.mSkipDecode)
            mDecodedLength = 0;
        else
            mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), tail, reinterpret_cast<short*>(mDecodedFrame), false);
        if (mDecodedLength)
            processDecoded(output, options);
    }
 }
 AudioReceiver::DecodeResult AudioReceiver::decodeGap(Audio::DataWindow& output, DecodeOptions options)
 {
    ICELogDebug(<< "Gap detected.");
    mDecodedLength = mResampledLength = 0;
    if (mCngPacket && mCodec)
    {
        // Synthesize comfort noise. It will be done on AUDIO_SAMPLERATE rate directly to mResampledFrame buffer.
        // Do not forget to send this noise to analysis
        mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), mLastPacketTimeLength,
                                             reinterpret_cast<short*>(mDecodedFrame), false);
    }
    else
    if (mCodec && mFrameCount && !mCodecSettings.mSkipDecode)
    {
        // Do PLC to mDecodedFrame/mDecodedLength
        if (options.mSkipDecode)
            mDecodedLength = 0;
        else
        {
            mDecodedLength = mCodec->plc(mFrameCount, mDecodedFrame, sizeof mDecodedFrame);
            if (!mDecodedLength)
            {
                // PLC is not support or failed
                // So substitute the silence
                size_t nr_of_samples = mCodec->frameTime() * mCodec->samplerate() / 1000 * sizeof(short);
                mDecodedLength = nr_of_samples * sizeof(short);
                memset(mDecodedFrame, 0, mDecodedLength);
            }
        }
    }
    if (mDecodedLength)
    {
        processDecoded(output, options);
        return DecodeResult_Ok;
    }
    else
        return DecodeResult_Skip;
 }
 AudioReceiver::DecodeResult AudioReceiver::decodePacket(const RtpBuffer::ResultList& rl, Audio::DataWindow& output, DecodeOptions options, int* rate)
 {
    DecodeResult result = DecodeResult_Skip;
    mFailedCount = 0;
    for (const std::shared_ptr<RtpBuffer::Packet>& p: rl)
    {
        assert(p);
        // Check if we need to emit silence or CNG - previously CNG packet was detected. Emit CNG audio here if needed.
        if (mLastPacketTimestamp && mLastPacketTimeLength && mCodec)
        {
            int units = p->rtp()->GetTimestamp() - *mLastPacketTimestamp;
            int milliseconds = units / (mCodec->samplerate() / 1000);
            if (milliseconds > mLastPacketTimeLength)
            {
                auto silenceLength = std::chrono::milliseconds(milliseconds - mLastPacketTimeLength);
                if (mCngPacket && options.mFillGapByCNG)
                    produceCNG(silenceLength, output, options);
                else
                    produceSilence(silenceLength, output, options);
            }
        }
        mLastPacketTimestamp = p->rtp()->GetTimestamp();
        // Find codec by payload type
        int ptype = p->rtp()->GetPayloadType();
        mCodec = mCodecMap[ptype];
        if (mCodec)
        {
            if (rate)
                *rate = mCodec->samplerate();
            // Check if it is CNG packet
            if ((ptype == 0 || ptype == 8) && p->rtp()->GetPayloadLength() >= 1 && p->rtp()->GetPayloadLength() <= 6)
            {
                if (options.mSkipDecode)
                    mDecodedLength = 0;
                else
                {
                    mCngPacket = p->rtp();
                    mCngDecoder.decode3389(p->rtp()->GetPayloadData(), p->rtp()->GetPayloadLength());
                    // Emit CNG mLastPacketLength milliseconds
                    mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), mLastPacketTimeLength,
                                                         (short*)mDecodedFrame, true);
                    if (mDecodedLength)
                        processDecoded(output, options);
                }
                result = DecodeResult_Ok;
            }
            else
            {
                // Reset CNG packet as we get regular RTP packet
                mCngPacket.reset();
                // Handle here regular RTP packets
                // Check if payload length is ok
                size_t payload_length = p->rtp()->GetPayloadLength();
                size_t rtp_frame_length = mCodec->rtpLength();
                int tail = rtp_frame_length ? payload_length % rtp_frame_length : 0;
                if (!tail)
                {
                    // Find number of frames
                    mFrameCount = mCodec->rtpLength() ? p->rtp()->GetPayloadLength() / mCodec->rtpLength() : 1;
                    int frameLength = mCodec->rtpLength() ? mCodec->rtpLength() : (int)p->rtp()->GetPayloadLength();
                    // Save last packet time length
                    mLastPacketTimeLength = mFrameCount * mCodec->frameTime();
                    // Decode
                    for (int i=0; i<mFrameCount && !mCodecSettings.mSkipDecode; i++)
                    {
                        if (options.mSkipDecode)
                            mDecodedLength = 0;
                        else
                        {
                            // Decode frame by frame
                            mDecodedLength = mCodec->decode(p->rtp()->GetPayloadData() + i * mCodec->rtpLength(),
                                                            frameLength, mDecodedFrame, sizeof mDecodedFrame);
                            if (mDecodedLength > 0)
                                processDecoded(output, options);
                        }
                    }
                    result = mFrameCount > 0 ? DecodeResult_Ok : DecodeResult_Skip;
                    // Check for bitrate counter
                    updateAmrCodecStats(mCodec.get());
                }
                else
                {
                    // RTP packet with tail - it should not happen
                    result = DecodeResult_BadPacket;
                }
            }
        }
    }
    return result;
 }
 AudioReceiver::DecodeResult AudioReceiver::decodeNone(Audio::DataWindow& output, DecodeOptions options)
 {
    ICELogDebug(<< "No packet available in jitter buffer");
    mFailedCount++;
    return DecodeResult_Skip;
 }
 AudioReceiver::DecodeResult AudioReceiver::getAudio(Audio::DataWindow& output, DecodeOptions options, int* rate)
 {
    DecodeResult result = DecodeResult_Skip;
    bool had_decode = false;
    // Get next packet from buffer
    RtpBuffer::ResultList rl;
    RtpBuffer::FetchResult fr = mBuffer.fetch(rl);
    switch (fr)
    {
-    case RtpBuffer::FetchResult::Gap:
+    case RtpBuffer::FetchResult::Gap:           result = decodeGap(output, options);                break;
-        ICELogDebug(<< "Gap detected.");
+    case RtpBuffer::FetchResult::NoPacket:      result = decodeNone(output, options);               break;
-
+    case RtpBuffer::FetchResult::RegularPacket: result = decodePacket(rl, output, options, rate);   break;
        mDecodedLength = mResampledLength = 0;
        if (mCngPacket && mCodec)
        {
            // Synthesize comfort noise. It will be done on AUDIO_SAMPLERATE rate directly to mResampledFrame buffer.
            // Do not forget to send this noise to analysis
            mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), mLastPacketTimeLength,
                                                 reinterpret_cast<short*>(mDecodedFrame), false);
        }
        else
        if (mCodec && mFrameCount && !mCodecSettings.mSkipDecode)
        {
            // Do PLC to mDecodedFrame/mDecodedLength
            if (options & DecodeOptions_SkipDecode)
                mDecodedLength = 0;
            else
            {
                mDecodedLength = mCodec->plc(mFrameCount, mDecodedFrame, sizeof mDecodedFrame);
                if (!mDecodedLength)
                {
                    // PLC is not support or failed
                    // So substitute the silence
                    size_t nr_of_samples = mCodec->frameTime() * mCodec->samplerate() / 1000 * sizeof(short);
                    mDecodedLength = nr_of_samples * sizeof(short);
                    memset(mDecodedFrame, 0, mDecodedLength);
                }
            }
        }
        if (mDecodedLength)
        {
            processDecoded(output, options);
            result = DecodeResult_Ok;
        }
        break;
    case RtpBuffer::FetchResult::NoPacket:
        ICELogDebug(<< "No packet available in jitter buffer");
        mFailedCount++;
        break;
    case RtpBuffer::FetchResult::RegularPacket:
        mFailedCount = 0;
        for (std::shared_ptr<RtpBuffer::Packet>& p: rl)
        {
            assert(p);
            // Check if previously CNG packet was detected. Emit CNG audio here if needed.
            if (options & DecodeOptions_FillCngGap && mCngPacket && mCodec)
            {
                // Fill CNG audio is server mode is present
                int units = p->rtp()->GetTimestamp() - mCngPacket->GetTimestamp();
                int milliseconds = units / (mCodec->samplerate() / 1000);
                if (milliseconds > mLastPacketTimeLength)
                {
                    int frames100ms = milliseconds / 100;
                    for (int frameIndex = 0; frameIndex < frames100ms; frameIndex++)
                    {
                        if (options & DecodeOptions_SkipDecode)
                            mDecodedLength = 0;
                        else
                            mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), 100,
                                                                 reinterpret_cast<short*>(mDecodedFrame), false);
                        if (mDecodedLength)
                            processDecoded(output, options);
                    }
                    // Do not forget about tail!
                    int tail = milliseconds % 100;
                    if (tail)
                    {
                        if (options & DecodeOptions_SkipDecode)
                            mDecodedLength = 0;
                        else
                            mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), tail,
                                                                 reinterpret_cast<short*>(mDecodedFrame), false);
                        if (mDecodedLength)
                            processDecoded(output, options);
                    }
                    result = DecodeResult_Ok;
                }
            }
            if (mEarlyDecode)
            {
                // ToDo - copy the decoded data to output buffer
            }
            else
            {
                // Find codec by payload type
                int ptype = p->rtp()->GetPayloadType();
                mCodec = mCodecMap[ptype];
                if (mCodec)
                {
                    if (rate)
                        *rate = mCodec->samplerate();
                    // Check if it is CNG packet
                    if ((ptype == 0 || ptype == 8) && p->rtp()->GetPayloadLength() >= 1 && p->rtp()->GetPayloadLength() <= 6)
                    {
                        if (options & DecodeOptions_SkipDecode)
                            mDecodedLength = 0;
                        else
                        {
                            mCngPacket = p->rtp();
                            mCngDecoder.decode3389(p->rtp()->GetPayloadData(), p->rtp()->GetPayloadLength());
                            // Emit CNG mLastPacketLength milliseconds
                            mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), mLastPacketTimeLength,
                                                                 (short*)mDecodedFrame, true);
                            if (mDecodedLength)
                                processDecoded(output, options);
                        }
                        result = DecodeResult_Ok;
                    }
                    else
                    {
                        // Reset CNG packet
                        mCngPacket.reset();
                        // Handle here regular RTP packets
                        // Check if payload length is ok
                        size_t payload_length = p->rtp()->GetPayloadLength();
                        size_t rtp_frame_length = mCodec->rtpLength();
                        int tail = rtp_frame_length ? payload_length % rtp_frame_length : 0;
                        if (!tail)
                        {
                            // Find number of frames
                            mFrameCount = mCodec->rtpLength() ? p->rtp()->GetPayloadLength() / mCodec->rtpLength() : 1;
                            int frameLength = mCodec->rtpLength() ? mCodec->rtpLength() : (int)p->rtp()->GetPayloadLength();
                            // Save last packet time length
                            mLastPacketTimeLength = mFrameCount * mCodec->frameTime();
                            // Decode
                            for (int i=0; i<mFrameCount && !mCodecSettings.mSkipDecode; i++)
                            {
                                if (options & DecodeOptions_SkipDecode)
                                    mDecodedLength = 0;
                                else
                                {
                                    // Trigger the statistics
                                    had_decode = true;
                                    // Decode frame by frame
                                    mDecodedLength = mCodec->decode(p->rtp()->GetPayloadData() + i * mCodec->rtpLength(),
                                                                    frameLength, mDecodedFrame, sizeof mDecodedFrame);
                                    if (mDecodedLength > 0)
                                        processDecoded(output, options);
                                }
                            }
                            result = mFrameCount > 0 ? DecodeResult_Ok : DecodeResult_Skip;
                            // Check for bitrate counter
                            processStatisticsWithAmrCodec(mCodec.get());
                        }
                        else
                        {
                            result = DecodeResult_BadPacket;
                            // ICELogMedia(<< "RTP packet with tail.");
                        }
                    }
                }
            }
        }
        break;
    default:
        assert(0);
    }
-    if (had_decode)
+    if (result == DecodeResult_Ok)
    {
-        // mStat.mDecodeRequested++;
+        // Decode statistics
-        if (mLastDecodeTime == 0.0)
+        if (!mLastDecodeTimestamp)
-            mLastDecodeTime = now_ms();
+            mLastDecodeTimestamp = std::chrono::steady_clock::now();
        else
        {
-            float t = now_ms();
+            auto t = std::chrono::steady_clock::now();
-            mStat.mDecodingInterval.process(t - mLastDecodeTime);
+            mStat.mDecodingInterval.process(std::chrono::duration_cast<std::chrono::milliseconds>(t - *mLastDecodeTimestamp).count());
-            mLastDecodeTime = t;
+            mLastDecodeTimestamp = t;
        }
    }
    return result;
@@ -752,7 +774,7 @@ Codec* AudioReceiver::findCodec(int payloadType)
 }
-void AudioReceiver::processStatisticsWithAmrCodec(Codec* c)
+void AudioReceiver::updateAmrCodecStats(Codec* c)
 {
 #if !defined(TARGET_ANDROID) && !defined(TARGET_OPENWRT) && !defined(TARGET_WIN) && !defined(TARGET_RPI) && defined(USE_AMR_CODEC)
    AmrNbCodec* nb = dynamic_cast<AmrNbCodec*>(c);
@@ -761,8 +783,8 @@ void AudioReceiver::processStatisticsWithAmrCodec(Codec* c)
    if (nb != nullptr)
        mStat.mBitrateSwitchCounter = nb->getSwitchCounter();
    else
-        if (wb != nullptr)
+    if (wb != nullptr)
-            mStat.mBitrateSwitchCounter = wb->getSwitchCounter();
+        mStat.mBitrateSwitchCounter = wb->getSwitchCounter();
 #endif
 }
@@ -819,13 +841,10 @@ int AudioReceiver::samplerateFor(jrtplib::RTPPacket& p)
 // ----------------------- DtmfReceiver -------------------
 DtmfReceiver::DtmfReceiver(Statistics& stat)
    :Receiver(stat)
-{
+{}
 }
 DtmfReceiver::~DtmfReceiver()
-{
+{}
 }
 void DtmfReceiver::add(std::shared_ptr<RTPPacket> /*p*/)
-{
+{}
 }
@@ -6,6 +6,7 @@
 #ifndef __MT_AUDIO_RECEIVER_H
 #define __MT_AUDIO_RECEIVER_H
 #include "../engine_config.h"
 #include "MT_Stream.h"
 #include "MT_CodecList.h"
 #include "MT_CngHelper.h"
@@ -61,17 +62,17 @@ public:
    RtpBuffer(Statistics& stat);
    ~RtpBuffer();
-    unsigned ssrc();
+    unsigned ssrc() const;
    void setSsrc(unsigned ssrc);
    void setHigh(int milliseconds);
-    int high();
+    int high() const;
    void setLow(int milliseconds);
-    int low();
+    int low() const;
    void setPrebuffer(int milliseconds);
-    int prebuffer();
+    int prebuffer() const;
    int getNumberOfReturnedPackets() const;
    int getNumberOfAddPackets() const;
@@ -88,12 +89,12 @@ public:
    FetchResult fetch(ResultList& rl);
 protected:
-    unsigned mSsrc = 0;
+    unsigned    mSsrc = 0;
-    int mHigh = RTP_BUFFER_HIGH,
+    int         mHigh = RTP_BUFFER_HIGH,
-    mLow = RTP_BUFFER_LOW,
+                mLow = RTP_BUFFER_LOW,
-    mPrebuffer = RTP_BUFFER_PREBUFFER;
+                mPrebuffer = RTP_BUFFER_PREBUFFER;
-    int mReturnedCounter = 0,
+    int         mReturnedCounter = 0,
-    mAddCounter = 0;
+                mAddCounter = 0;
    mutable Mutex mGuard;
    typedef std::vector<std::shared_ptr<Packet>> PacketList;
@@ -105,7 +106,7 @@ protected:
    std::optional<uint32_t> mLastSeqno;
    // To calculate average interval between packet add. It is close to jitter but more useful in debugging.
-    float mLastAddTime = 0.0;
+    float mLastAddTime = 0.0f;
 };
 class Receiver
@@ -125,29 +126,37 @@ public:
    ~AudioReceiver();
    // Update codec settings
-    void setCodecSettings(const CodecList::Settings& codecSettings);
+    void                    setCodecSettings(const CodecList::Settings& codecSettings);
-    CodecList::Settings& getCodecSettings();
+    CodecList::Settings&    getCodecSettings();
    // Returns false when packet is rejected as illegal. codec parameter will show codec which will be used for decoding.
    // Lifetime of pointer to codec is limited by lifetime of AudioReceiver (it is container).
    bool add(const std::shared_ptr<jrtplib::RTPPacket>& p, Codec** codec = nullptr);
    // Returns false when there is no rtp data from jitter
-    enum DecodeOptions
+    /*enum DecodeOptions
    {
        DecodeOptions_ResampleToMainRate = 0,
        DecodeOptions_DontResample = 1,
        DecodeOptions_FillCngGap = 2,
        DecodeOptions_SkipDecode = 4
    };*/
    struct DecodeOptions
    {
        bool mResampleToMainRate = true;
        bool mFillGapByCNG = false;
        bool mSkipDecode = false;
    };
    enum DecodeResult
    {
-        DecodeResult_Ok,
+        DecodeResult_Ok,        // Decoded ok
-        DecodeResult_Skip,
+        DecodeResult_Skip,      // Just no data - emit silence instead
-        DecodeResult_BadPacket
+        DecodeResult_BadPacket  // Error happened during the decode
    };
-    DecodeResult getAudio(Audio::DataWindow& output, int options = DecodeOptions_ResampleToMainRate, int* rate = nullptr);
+    DecodeResult getAudio(Audio::DataWindow& output, DecodeOptions options = {.mResampleToMainRate = true, .mFillGapByCNG = false, .mSkipDecode = false}, int* rate = nullptr);
    // Looks for codec by payload type
    Codec* findCodec(int payloadType);
@@ -163,52 +172,57 @@ public:
    int samplerateFor(jrtplib::RTPPacket& p);
 protected:
-    RtpBuffer mBuffer;
+    RtpBuffer                           mBuffer;                // Jitter buffer itself
-    CodecMap mCodecMap;
+    CodecMap                            mCodecMap;
-    PCodec mCodec;
+    PCodec                              mCodec;
-    int mFrameCount = 0;
+    int                                 mFrameCount = 0;
-    CodecList::Settings mCodecSettings;
+    CodecList::Settings                 mCodecSettings;
-    CodecList mCodecList;
+    CodecList                           mCodecList;
-    JitterStatistics mJitterStats;
+    JitterStatistics                    mJitterStats;
    std::shared_ptr<jrtplib::RTPPacket> mCngPacket;
-    CngDecoder mCngDecoder;
+    CngDecoder                          mCngDecoder;
-
+    size_t                              mDTXSamplesToEmit = 0;   // How much silence (or CNG) should be emited before next RTP packet gets into the action
    // Decode RTP early, do not wait for speaker callback
    bool mEarlyDecode = false;
    // Buffer to hold decoded data
-    char mDecodedFrame[65536];
+    int16_t mDecodedFrame[MT_MAX_DECODEBUFFER];
-    int mDecodedLength = 0;
+    size_t mDecodedLength = 0;
-    // Buffer to hold data converted to stereo/mono
+    // Buffer to hold data converted to stereo/mono; there is multiplier 2 as it can be stereo audio
-    char mConvertedFrame[32768];
+    int16_t mConvertedFrame[MT_MAX_DECODEBUFFER * 2];
-    int mConvertedLength = 0;
+    size_t mConvertedLength = 0;
    // Buffer to hold data resampled to AUDIO_SAMPLERATE
-    char mResampledFrame[65536];
+    int16_t mResampledFrame[MT_MAX_DECODEBUFFER];
-    int mResampledLength = 0;
+    size_t mResampledLength = 0;
    // Last packet time length
    int mLastPacketTimeLength = 0;
    std::optional<uint32_t> mLastPacketTimestamp;
    int mFailedCount = 0;
-    Audio::Resampler  mResampler8, mResampler16,
+    Audio::Resampler  mResampler8, mResampler16, mResampler32, mResampler48;
    mResampler32, mResampler48;
    Audio::PWavFileWriter mDecodedDump;
-    float mLastDecodeTime = 0.0; // Time last call happened to codec->decode()
+    std::optional<std::chrono::steady_clock::time_point> mLastDecodeTimestamp; // Time last call happened to codec->decode()
-    float mIntervalSum = 0.0;
+    float mIntervalSum = 0.0f;
    int mIntervalCount = 0;
    // Zero rate will make audio mono but resampling will be skipped
    void makeMonoAndResample(int rate, int channels);
    // Resamples, sends to analysis, writes to dump and queues to output decoded frames from mDecodedFrame
-    void processDecoded(Audio::DataWindow& output, int options);
+    void processDecoded(Audio::DataWindow& output, DecodeOptions options);
    void produceSilence(std::chrono::milliseconds length, Audio::DataWindow& output, DecodeOptions options);
    void produceCNG(std::chrono::milliseconds length, Audio::DataWindow& output, DecodeOptions options);
-    void processStatisticsWithAmrCodec(Codec* c);
+    // Calculate bitrate switch statistics for AMR codecs
    void updateAmrCodecStats(Codec* c);
    DecodeResult decodeGap(Audio::DataWindow& output, DecodeOptions options);
    DecodeResult decodePacket(const RtpBuffer::ResultList& rl, Audio::DataWindow& output, DecodeOptions options, int* rate = nullptr);
    DecodeResult decodeNone(Audio::DataWindow& output, DecodeOptions options);
 };
 class DtmfReceiver: public Receiver
@@ -35,7 +35,7 @@ void SingleAudioStream::copyPcmTo(Audio::DataWindow& output, int needed)
 {
    while (output.filled() < needed)
    {
-        if (mReceiver.getAudio(output) != AudioReceiver::DecodeResult_Ok)
+        if (mReceiver.getAudio(output, {}) != AudioReceiver::DecodeResult_Ok)
            break;
    }
@@ -52,7 +52,10 @@ protected:
 struct PacketLossEvent
 {
-    int mGap = 0;
+    // This is extended sequence numbers (not the raw uint16_t seqno)
    uint32_t    mStartSeqno = 0,
                mEndSeqno = 0;
    int         mGap = 0;
    std::chrono::microseconds mTimestamp;
 };