- work to improve the decoding process - many problems fixes + however there are problems yet

2026-02-20 13:16:13 +03:00
parent 94f30b25e9
commit 78d77c4e69
11 changed files with 375 additions and 273 deletions
--- a/src/engine/media/MT_AudioReceiver.cpp
+++ b/src/engine/media/MT_AudioReceiver.cpp
@@ -115,7 +115,7 @@ bool SequenceSort(const std::shared_ptr<RtpBuffer::Packet>& p1, const std::share
    return p1->rtp()->GetExtendedSequenceNumber() < p2->rtp()->GetExtendedSequenceNumber();
 }

-std::shared_ptr<RtpBuffer::Packet> RtpBuffer::add(std::shared_ptr<jrtplib::RTPPacket> packet, std::chrono::milliseconds timelength, int rate)
+std::shared_ptr<RtpBuffer::Packet> RtpBuffer::add(const std::shared_ptr<jrtplib::RTPPacket>& packet, std::chrono::milliseconds timelength, int rate)
 {
    if (!packet)
        return std::shared_ptr<Packet>();
@@ -191,12 +191,11 @@ std::shared_ptr<RtpBuffer::Packet> RtpBuffer::add(std::shared_ptr<jrtplib::RTPPa
    return std::shared_ptr<Packet>();
 }

-RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl)
+RtpBuffer::FetchResult RtpBuffer::fetch()
 {
    Lock l(mGuard);

-    FetchResult result = FetchResult::NoPacket;
-    rl.clear();
+    FetchResult result;

    // See if there is enough information in buffer
    auto total = findTimelength();
@@ -217,10 +216,10 @@ RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl)
        mStat.mPacketDropped++;
    }

-    if (total < mLow)
+    if (total < mLow || total == 0ms)
    {
        // Still not prebuffered
-        result = FetchResult::NoPacket;
+        result = {FetchResult::Status::NoPacket};
    }
    else
    {
@@ -228,8 +227,8 @@ RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl)
        {
            if (mPacketList.empty())
            {
-                result = FetchResult::NoPacket;
                // Don't increase counter of lost packets here; maybe it is DTX
+                result = {FetchResult::Status::NoPacket};
            }
            else
            {
@@ -237,7 +236,6 @@ RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl)
                auto& packet = *mPacketList.front();
                uint32_t seqno = packet.rtp()->GetExtendedSequenceNumber();

-
                // Gap between new packet and previous on
                int gap = (int64_t)seqno - (int64_t)*mLastSeqno - 1;
                gap = std::min(gap, 127);
@@ -255,16 +253,15 @@ RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl)

                    mLastSeqno = *mLastSeqno + 1; // As we deal with the audio gap - return the silence and increase last seqno

-                    result = FetchResult::Gap;
+                    result = {FetchResult::Status::Gap};
                }
                else
                {
-                    result = FetchResult::RegularPacket;
-                    rl.push_back(mPacketList.front());
+                    result = {FetchResult::Status::RegularPacket, mPacketList.front()};

                    // Save last returned normal packet
-                    mFetchedPacket = mPacketList.front();
-                    mLastSeqno = mPacketList.front()->rtp()->GetExtendedSequenceNumber();
+                    mFetchedPacket = result.mPacket;
+                    mLastSeqno = result.mPacket->rtp()->GetExtendedSequenceNumber();

                    // Remove returned packet from the list
                    mPacketList.erase(mPacketList.begin());
@@ -277,14 +274,11 @@ RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl)
            if (findTimelength() >= mPrebuffer && !mPacketList.empty())
            {
                // Normal packet will be returned
-                result = FetchResult::RegularPacket;
-
-                // Put it to output list
-                rl.push_back(mPacketList.front());
+                result = {FetchResult::Status::RegularPacket, mPacketList.front()};

                // Remember returned packet
-                mFetchedPacket = mPacketList.front();
-                mLastSeqno = mPacketList.front()->rtp()->GetExtendedSequenceNumber();
+                mFetchedPacket = result.mPacket;
+                mLastSeqno = result.mPacket->rtp()->GetExtendedSequenceNumber();

                // Remove returned packet from buffer list
                mPacketList.erase(mPacketList.begin());
@@ -292,12 +286,12 @@ RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl)
            else
            {
                ICELogMedia(<< "Jitter buffer was not prebuffered yet; resulting no packet");
-                result = FetchResult::NoPacket;
+                result = {FetchResult::Status::NoPacket};
            }
        }
    }

-    if (result != FetchResult::NoPacket)
+    if (result.mStatus != FetchResult::Status::NoPacket)
        mReturnedCounter++;

    return result;
@@ -333,8 +327,7 @@ Receiver::~Receiver()

 //-------------- AudioReceiver ----------------
 AudioReceiver::AudioReceiver(const CodecList::Settings& settings, MT::Statistics &stat)
-    :Receiver(stat), mBuffer(stat), mCodecSettings(settings),
-      mCodecList(settings)
+    :Receiver(stat), mBuffer(stat), mCodecSettings(settings), mCodecList(settings)
 {
    // Init resamplers
    mResampler8.start(AUDIO_CHANNELS, 8000, AUDIO_SAMPLERATE);
@@ -346,6 +339,8 @@ AudioReceiver::AudioReceiver(const CodecList::Settings& settings, MT::Statistics
    mCodecList.setSettings(settings);
    mCodecList.fillCodecMap(mCodecMap);

+    mAvailable.setCapacity(AUDIO_SAMPLERATE * sizeof(short));
+
 #if defined(DUMP_DECODED)
    mDecodedDump = std::make_shared<Audio::WavFileWriter>();
    mDecodedDump->open("decoded.wav", 8000 /*G711*/, AUDIO_CHANNELS);
@@ -560,10 +555,14 @@ AudioReceiver::DecodeResult AudioReceiver::decodeGapTo(Audio::DataWindow& output
    mDecodedLength = mResampledLength = 0;
    if (mCngPacket && mCodec)
    {
-        // Synthesize comfort noise. It will be done on AUDIO_SAMPLERATE rate directly to mResampledFrame buffer.
-        // Do not forget to send this noise to analysis
-        mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), mLastPacketTimeLength,
-                                             reinterpret_cast<short*>(mDecodedFrame), false);
+        if (mCngPacket->rtp()->GetPayloadType() == 13)
+        {
+            // Synthesize comfort noise. It will be done on AUDIO_SAMPLERATE rate directly to mResampledFrame buffer.
+            // Do not forget to send this noise to analysis
+            mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), mLastPacketTimeLength, reinterpret_cast<short*>(mDecodedFrame), false);
+        }
+        else
+            decodePacketTo(output, options, mCngPacket);
    }
    else
    if (mCodec && mFrameCount && !mCodecSettings.mSkipDecode)
@@ -594,114 +593,110 @@ AudioReceiver::DecodeResult AudioReceiver::decodeGapTo(Audio::DataWindow& output
        return {.mStatus = DecodeResult::Status::Skip};
 }

-AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& output, DecodeOptions options, const RtpBuffer::ResultList& rl)
+AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& output, DecodeOptions options, const std::shared_ptr<RtpBuffer::Packet>& packet)
 {
+    if (!packet || !packet->rtp())
+        return {DecodeResult::Status::Skip};
+
    DecodeResult result = {.mStatus = DecodeResult::Status::Skip};
+    auto& rtp = *packet->rtp(); // Syntax sugar

    mFailedCount = 0;
-    for (const std::shared_ptr<RtpBuffer::Packet>& p: rl)
+    // Check if we need to emit silence or CNG - previously CNG packet was detected. Emit CNG audio here if needed.
+    if (mLastPacketTimestamp && mLastPacketTimeLength && mCodec)
    {
-        assert(p);
-
-        // Check if we need to emit silence or CNG - previously CNG packet was detected. Emit CNG audio here if needed.
-        if (mLastPacketTimestamp && mLastPacketTimeLength && mCodec)
+        int units = rtp.GetTimestamp() - *mLastPacketTimestamp;
+        int milliseconds = units / (mCodec->samplerate() / 1000);
+        if (milliseconds > mLastPacketTimeLength)
        {
-            int units = p->rtp()->GetTimestamp() - *mLastPacketTimestamp;
-            int milliseconds = units / (mCodec->samplerate() / 1000);
-            if (milliseconds > mLastPacketTimeLength)
-            {
-                auto silenceLength = std::chrono::milliseconds(milliseconds - mLastPacketTimeLength);
+            auto silenceLength = std::chrono::milliseconds(milliseconds - mLastPacketTimeLength);

-                if (mCngPacket && options.mFillGapByCNG)
-                    produceCNG(silenceLength, output, options);
-                else
-                    produceSilence(silenceLength, output, options);
-            }
+            if (mCngPacket && options.mFillGapByCNG)
+                produceCNG(silenceLength, output, options);
+            else
+                produceSilence(silenceLength, output, options);
        }
+    }

-        mLastPacketTimestamp = p->rtp()->GetTimestamp();
+    mLastPacketTimestamp = rtp.GetTimestamp();

-        // Find codec by payload type
-        int ptype = p->rtp()->GetPayloadType();
+    // Find codec by payload type
+    int ptype = rtp.GetPayloadType();

-        // Look into mCodecMap if exists
-        auto codecIter = mCodecMap.find(ptype);
-        if (codecIter == mCodecMap.end())
-            return  {};
+    // Look into mCodecMap if exists
+    auto codecIter = mCodecMap.find(ptype);
+    if (codecIter == mCodecMap.end())
+        return  {};

+    if (!codecIter->second)
+        codecIter->second = mCodecList.createCodecByPayloadType(ptype);

-        if (!codecIter->second)
-            codecIter->second = mCodecList.createCodecByPayloadType(ptype);
+    mCodec = codecIter->second;
+    if (mCodec)
+    {
+        result.mChannels = mCodec->channels();
+        result.mSamplerate = mCodec->samplerate();

-        mCodec = codecIter->second;
-        if (mCodec)
+        // Check if it is CNG packet
+        if (((ptype == 0 || ptype == 8) && rtp.GetPayloadLength() >= 1 && rtp.GetPayloadLength() <= 6) || rtp.GetPayloadType() == 13)
        {
-            result.mChannels = mCodec->channels();
-            result.mSamplerate = mCodec->samplerate();
-
-            // Check if it is CNG packet
-            if ((ptype == 0 || ptype == 8) && p->rtp()->GetPayloadLength() >= 1 && p->rtp()->GetPayloadLength() <= 6)
+            if (options.mSkipDecode)
+                mDecodedLength = 0;
+            else
            {
-                if (options.mSkipDecode)
-                    mDecodedLength = 0;
-                else
-                {
-                    mCngPacket = p->rtp();
-                    mCngDecoder.decode3389(p->rtp()->GetPayloadData(), p->rtp()->GetPayloadLength());
+                mCngPacket = packet;
+                mCngDecoder.decode3389(rtp.GetPayloadData(), rtp.GetPayloadLength());

-                    // Emit CNG mLastPacketLength milliseconds
-                    mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), mLastPacketTimeLength,
-                                                         (short*)mDecodedFrame, true);
-                    if (mDecodedLength)
-                        processDecoded(output, options);
+                // Emit CNG mLastPacketLength milliseconds
+                mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), mLastPacketTimeLength, (short*)mDecodedFrame, true);
+                if (mDecodedLength)
+                    processDecoded(output, options);
+            }
+            result.mStatus = DecodeResult::Status::Ok;
+        }
+        else
+        {
+            // Reset CNG packet as we get regular RTP packet
+            mCngPacket.reset();
+
+            // Handle here regular RTP packets
+            // Check if payload length is ok
+            size_t payload_length = rtp.GetPayloadLength();
+            size_t rtp_frame_length = mCodec->rtpLength();
+
+            int tail = rtp_frame_length ? payload_length % rtp_frame_length : 0;
+
+            if (!tail)
+            {
+                // Find number of frames
+                mFrameCount = mCodec->rtpLength() ? rtp.GetPayloadLength() / mCodec->rtpLength() : 1;
+                int frameLength = mCodec->rtpLength() ? mCodec->rtpLength() : (int)rtp.GetPayloadLength();
+
+                // Save last packet time length
+                mLastPacketTimeLength = mFrameCount * mCodec->frameTime();
+
+                // Decode
+                for (int i=0; i<mFrameCount && !mCodecSettings.mSkipDecode; i++)
+                {
+                    if (options.mSkipDecode)
+                        mDecodedLength = 0;
+                    else
+                    {
+                        // Decode frame by frame
+                        mDecodedLength = mCodec->decode(rtp.GetPayloadData() + i * mCodec->rtpLength(), frameLength, mDecodedFrame, sizeof mDecodedFrame);
+                        if (mDecodedLength > 0)
+                            processDecoded(output, options);
+                    }
                }
-                result.mStatus = DecodeResult::Status::Ok;
+                result.mStatus = mFrameCount > 0 ? DecodeResult::Status::Ok : DecodeResult::Status::Skip;
+
+                // Check for bitrate counter
+                updateAmrCodecStats(mCodec.get());
            }
            else
            {
-                // Reset CNG packet as we get regular RTP packet
-                mCngPacket.reset();
-
-                // Handle here regular RTP packets
-                // Check if payload length is ok
-                size_t payload_length = p->rtp()->GetPayloadLength();
-                size_t rtp_frame_length = mCodec->rtpLength();
-
-                int tail = rtp_frame_length ? payload_length % rtp_frame_length : 0;
-
-                if (!tail)
-                {
-                    // Find number of frames
-                    mFrameCount = mCodec->rtpLength() ? p->rtp()->GetPayloadLength() / mCodec->rtpLength() : 1;
-                    int frameLength = mCodec->rtpLength() ? mCodec->rtpLength() : (int)p->rtp()->GetPayloadLength();
-
-                    // Save last packet time length
-                    mLastPacketTimeLength = mFrameCount * mCodec->frameTime();
-
-                    // Decode
-                    for (int i=0; i<mFrameCount && !mCodecSettings.mSkipDecode; i++)
-                    {
-                        if (options.mSkipDecode)
-                            mDecodedLength = 0;
-                        else
-                        {
-                            // Decode frame by frame
-                            mDecodedLength = mCodec->decode(p->rtp()->GetPayloadData() + i * mCodec->rtpLength(),
-                                                            frameLength, mDecodedFrame, sizeof mDecodedFrame);
-                            if (mDecodedLength > 0)
-                                processDecoded(output, options);
-                        }
-                    }
-                    result.mStatus = mFrameCount > 0 ? DecodeResult::Status::Ok : DecodeResult::Status::Skip;
-
-                    // Check for bitrate counter
-                    updateAmrCodecStats(mCodec.get());
-                }
-                else
-                {
-                    // RTP packet with tail - it should not happen
-                    result.mStatus = DecodeResult::Status::BadPacket;
-                }
+                // RTP packet with tail - it should not happen
+                result.mStatus = DecodeResult::Status::BadPacket;
            }
        }
    }
@@ -710,7 +705,16 @@ AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& out

 AudioReceiver::DecodeResult AudioReceiver::decodeEmptyTo(Audio::DataWindow& output, DecodeOptions options)
 {
-    // No packet available in jitter buffer - just increase the counter for now
+    // No packet available at all (and no previous CNG packet) - so return the silence
+    if (options.mElapsed != 0ms && mCodec)
+    {
+        Audio::Format fmt = options.mResampleToMainRate ? Audio::Format(AUDIO_SAMPLERATE, 1) : mCodec->getAudioFormat();
+        // Emit silence if codec information is available - it is to properly handle the gaps
+        auto avail = output.getTimeLength(fmt.rate(), fmt.channels());
+        if (options.mElapsed > avail)
+            mAvailable.addZero(fmt.sizeFromTime(options.mElapsed - avail));
+    }
+
    mFailedCount++;
    return {.mStatus = DecodeResult::Status::Skip};
 }
@@ -719,32 +723,71 @@ AudioReceiver::DecodeResult AudioReceiver::getAudioTo(Audio::DataWindow& output,
 {
    DecodeResult result = {.mStatus = DecodeResult::Status::Skip};

-    size_t initialOffset = output.filled();   // Size in bytes
+    auto produced = 0ms;
+    if (mAvailable.filled() && mCodec && options.mElapsed != 0ms)
+    {
+        Audio::Format fmt = options.mResampleToMainRate ? Audio::Format(AUDIO_SAMPLERATE, 1) : mCodec->getAudioFormat();
+        auto initiallyAvailable = mCodec ? mAvailable.getTimeLength(fmt.rate(), fmt.channels()) : 0ms;
+        if (initiallyAvailable != 0ms)
+        {
+            std::chrono::milliseconds resultTime = std::min(initiallyAvailable, options.mElapsed);
+            auto resultLen = fmt.sizeFromTime(resultTime);
+            mAvailable.moveTo(output, resultLen);
+            produced += resultTime;
+
+            // Maybe request is satisfied ?
+            if (produced >= options.mElapsed)
+                return {.mStatus = DecodeResult::Status::Ok, .mSamplerate = fmt.rate(), .mChannels = fmt.channels()};
+        }
+    }
+
    std::chrono::milliseconds decoded = 0ms;
    do
    {
        // Get next packet from buffer
        RtpBuffer::ResultList rl;
-        RtpBuffer::FetchResult fr = mBuffer.fetch(rl);
-        switch (fr)
+        RtpBuffer::FetchResult fr = mBuffer.fetch();
+        // ICELogDebug(<< fr.toString() << " " << mBuffer.findTimelength());
+
+        switch (fr.mStatus)
        {
-        case RtpBuffer::FetchResult::Gap:           result = decodeGapTo(output, options);          break;
-        case RtpBuffer::FetchResult::NoPacket:      result = decodeEmptyTo(output, options);        break;
-        case RtpBuffer::FetchResult::RegularPacket: result = decodePacketTo(output, options, rl);   break;
+        case RtpBuffer::FetchResult::Status::Gap:           result = decodeGapTo(mAvailable, options);                  break;
+        case RtpBuffer::FetchResult::Status::NoPacket:      result = decodeEmptyTo(mAvailable, options);                break;
+        case RtpBuffer::FetchResult::Status::RegularPacket: result = decodePacketTo(mAvailable, options, fr.mPacket);   break;
        default:
            assert(0);
        }

-        size_t available = output.filled() - initialOffset;
-        if (!available)
-            break;
-        initialOffset  = output.filled();
+        // Was there decoding at all ?
+        if (!mCodec)
+            break; // No sense to continue - we have no information at all

-        // ToDo: calculate how much milliseconds was decoded
-        int samplerate = options.mResampleToMainRate ? AUDIO_SAMPLERATE : result.mSamplerate;
-        decoded += std::chrono::milliseconds(available / sizeof(short) / (samplerate / 1000));
+        Audio::Format fmt = options.mResampleToMainRate ? Audio::Format(AUDIO_SAMPLERATE, 1) : mCodec->getAudioFormat();
+        result.mSamplerate = fmt.rate();
+        result.mChannels = fmt.channels();
+
+        // Have we anything interesting in the buffer ?
+        auto bufferAvailable = mAvailable.getTimeLength(fmt.rate(), fmt.channels());
+        if (bufferAvailable == 0ms)
+            break; // No sense to continue - decoding / CNG / PLC stopped totally
+
+        // How much data should be moved to result buffer ?
+        if (options.mElapsed != 0ms)
+        {
+            std::chrono::milliseconds resultTime = std::min(bufferAvailable, options.mElapsed - produced);
+            auto resultLen = fmt.sizeFromTime(resultTime);
+            mAvailable.moveTo(output, resultLen);
+            produced += resultTime;
+        }
+        else
+            mAvailable.moveTo(output, mAvailable.filled());
+
+        decoded += bufferAvailable;
    }
-    while (decoded < options.mElapsed);
+    while (produced < options.mElapsed);
+
+    if (produced != 0ms)
+        result.mStatus = DecodeResult::Status::Ok;

    // Time statistics
    if (result.mStatus == DecodeResult::Status::Ok)