- work to improve the decoding process - many problems fixes + however there are problems yet

2026-02-20 13:16:13 +03:00
parent 94f30b25e9
commit 78d77c4e69
11 changed files with 375 additions and 273 deletions
@@ -11,7 +11,7 @@ using namespace Audio;
 DataWindow::DataWindow()
 {
    mFilled = 0;
-    mData = NULL;
+    mData = nullptr;
    mCapacity = 0;
 }

@@ -166,6 +166,25 @@ void DataWindow::zero(int length)
    memset(mData, 0, mFilled);
 }

+size_t DataWindow::moveTo(DataWindow& dst, size_t size)
+{
+    Lock l(mMutex);
+
+    size_t avail = std::min(size, (size_t)filled());
+    if (avail != 0)
+    {
+        dst.add(mData, avail);
+        erase(avail);
+    }
+    return avail;
+}
+
+std::chrono::milliseconds DataWindow::getTimeLength(int samplerate, int channels) const
+{
+    Lock l(mMutex);
+    return std::chrono::milliseconds(mFilled / sizeof(short) / channels / (samplerate / 1000));
+}
+
 void DataWindow::makeStereoFromMono(DataWindow& dst, DataWindow& src)
 {
    Lock lockDst(dst.mMutex), lockSrc(src.mMutex);
@@ -11,9 +11,9 @@

 namespace Audio
 {
-  class DataWindow
-  {
-  public:
+class DataWindow
+{
+public:
    DataWindow();
    ~DataWindow();

@@ -34,14 +34,17 @@ namespace Audio
    short       shortAt(int index) const;
    void        setShortAt(short value, int index);
    void        zero(int length);
+    size_t      moveTo(DataWindow& dst, size_t size);
+
+    std::chrono::milliseconds getTimeLength(int samplerate, int channels) const;

    static void makeStereoFromMono(DataWindow& dst, DataWindow& src);

-  protected:
+protected:
    mutable Mutex mMutex;
    char* mData;
    int mFilled;
    int mCapacity;
-  };
+};
 }
 #endif
@@ -51,6 +51,11 @@ struct Format
        return float((milliseconds * mRate) / 500.0 * mChannels);
    }

+    size_t sizeFromTime(std::chrono::milliseconds ms) const
+    {
+        return sizeFromTime(ms.count());
+    }
+
    std::string toString()
    {
        char buffer[64];
@@ -50,13 +50,11 @@
 #define MT_MAXRTPPACKET      1500
 #define MT_DTMF_END_PACKETS  3

-#define RTP_BUFFER_HIGH      0
-#define RTP_BUFFER_LOW       0
-#define RTP_BUFFER_PREBUFFER 0
+// Milliseconds before
+#define RTP_BUFFER_HIGH      (2000)
+#define RTP_BUFFER_LOW       (0)
+#define RTP_BUFFER_PREBUFFER (100)

-// #define RTP_BUFFER_HIGH      160
-// #define RTP_BUFFER_LOW       10
-// #define RTP_BUFFER_PREBUFFER 160
 #define RTP_DECODED_CAPACITY 2048

 #define DEFAULT_SUBSCRIPTION_TIME 1200
@@ -31,30 +31,30 @@ const uint16_t amrwb_framelenbits[10] =

 struct AmrPayloadInfo
 {
-    const uint8_t*  mPayload;
-    int             mPayloadLength;
-    bool            mOctetAligned;
-    bool            mInterleaving;
-    bool            mWideband;
-    uint64_t        mCurrentTimestamp;
+    const uint8_t*  mPayload = nullptr;
+    int             mPayloadLength = 0;
+    bool            mOctetAligned = false;
+    bool            mInterleaving = false;
+    bool            mWideband = false;
+    uint64_t        mCurrentTimestamp = 0;
 };


 struct AmrFrame
 {
-    uint8_t               mFrameType;
-    uint8_t               mMode;
-    bool                  mGoodQuality;
-    uint64_t              mTimestamp;
+    uint8_t               mFrameType = 0;
+    uint8_t               mMode = 0;
+    bool                  mGoodQuality = false;
+    uint64_t              mTimestamp = 0;
    std::shared_ptr<ByteBuffer> mData;
-    uint8_t               mSTI;
+    uint8_t               mSTI = 0;
 };

 struct AmrPayload
 {
-    uint8_t               mCodeModeRequest;
+    uint8_t               mCodeModeRequest = 0;
    std::vector<AmrFrame> mFrames;
-    bool                  mDiscardPacket;
+    bool                  mDiscardPacket = false;
 };

 // ARM RTP payload has next structure
@@ -148,10 +148,10 @@ static AmrPayload parseAmrPayload(AmrPayloadInfo& input, size_t& cngCounter)
            continue;
        }

-        if (input.mWideband && f.mMode == 0xFF /* CNG */)
-        {
-            int a = 1;
-        }
+        // if (input.mWideband && f.mMode == 0xFF /* CNG */)
+        // {
+        //     int a = 1;
+        // }

        if (input.mWideband && f.mFrameType == 15)
        {
@@ -628,21 +628,30 @@ int AmrWbCodec::decodePlain(std::span<const uint8_t> input, std::span<uint8_t> o
        return 0;
    }

-    // Check for output buffer capacity
-    if (output.size() < (int)ap.mFrames.size() * pcmLength())
+    // Find the required output capacity
+    size_t capacity = 0;
+    for (AmrFrame& frame: ap.mFrames)
+        capacity += frame.mMode == 0xFF /* CNG */ ? pcmLength() * 8 : pcmLength();
+
+    if (output.size() < capacity)
        return 0;

    short* dataOut = (short*)output.data();
    size_t dataOutSizeInBytes = 0;
    for (AmrFrame& frame: ap.mFrames)
    {
-        memset(dataOut, 0, static_cast<size_t>(pcmLength()));
+        size_t frameOutputSize = frame.mMode == 0xFF ? pcmLength() * 8 : pcmLength();
+        memset(dataOut, 0, frameOutputSize);

        if (frame.mData)
        {
+            if (frame.mMode == 0xFF)
+            {
+                // int bp = 1;
+            }
            D_IF_decode(mDecoderCtx, (const unsigned char*)frame.mData->data(), (short*)dataOut, 0);
-            dataOut += pcmLength() / 2;
-            dataOutSizeInBytes += pcmLength();
+            dataOut += frameOutputSize / 2;
+            dataOutSizeInBytes += frameOutputSize;
        }
    }
    return dataOutSizeInBytes;
@@ -635,10 +635,10 @@ int IlbcCodec::samplerate()
    return 8000;
 }

-int IlbcCodec::encode(const void *input, int inputBytes, void* outputBuffer, int outputCapacity)
+Codec::EncodeResult IlbcCodec::encode(const void *input, int inputBytes, void* outputBuffer, int outputCapacity)
 {
    if (inputBytes % pcmLength())
-        return 0;
+        return {};

    // Declare the data input pointer
    short *dataIn = (short *)input;
@@ -657,10 +657,10 @@ int IlbcCodec::encode(const void *input, int inputBytes, void* outputBuffer, int
        dataOut += rtpLength();
    }

-    return frames * rtpLength();
+    return {frames * rtpLength()};
 }

-int IlbcCodec::decode(const void* input, int inputBytes, void* output, int outputCapacity)
+Codec::DecodeResult IlbcCodec::decode(const void* input, int inputBytes, void* output, int outputCapacity)
 {
    unsigned frames = inputBytes / rtpLength();

@@ -675,12 +675,12 @@ int IlbcCodec::decode(const void* input, int inputBytes, void* output, int outpu
        dataOut += pcmLength() / 2;
    }

-    return frames * pcmLength();
+    return {frames * pcmLength()};
 }

-int IlbcCodec::plc(int lostFrames, void* output, int outputCapacity)
+int IlbcCodec::plc(int lostFrames, std::span<uint8_t> output)
 {
-    return 2 * WebRtcIlbcfix_DecodePlc(mDecoderCtx, (WebRtc_Word16*)output, lostFrames);
+    return sizeof(short) * WebRtcIlbcfix_DecodePlc(mDecoderCtx, (WebRtc_Word16*)output.data(), lostFrames);
 }

 // --- IlbcFactory ---
@@ -58,9 +58,10 @@ public:
    int frameTime() override;
    int samplerate() override;
    int channels() override;
-    int encode(const void* input, int inputBytes, void* output, int outputCapacity) override;
-    int decode(const void* input, int inputBytes, void* output, int outputCapacity) override;
-    int plc(int lostFrames, void* output, int outputCapacity) override;
+
+    EncodeResult    encode(std::span<const uint8_t> input, std::span<uint8_t> output) override;
+    DecodeResult    decode(std::span<const uint8_t> input, std::span<uint8_t> output) override;
+    size_t          plc(int lostFrames, std::span<uint8_t> output) override;
 };

 class OpusCodec: public Codec
@@ -112,9 +113,10 @@ public:
    int frameTime();
    int samplerate();
    int channels();
-    int encode(const void* input, int inputBytes, void* output, int outputCapacity);
-    int decode(const void* input, int inputBytes, void* output, int outputCapacity);
-    int plc(int lostFrames, void* output, int outputCapacity);
+
+    EncodeResult    encode(std::span<const uint8_t> input, std::span<uint8_t> output);
+    DecodeResult    decode(std::span<const uint8_t> input, std::span<uint8_t> output);
+    size_t          plc(int lostFrames, std::span<uint8_t> output);
 };


@@ -146,14 +148,15 @@ public:

    IlbcCodec(int packetTime);
    virtual ~IlbcCodec();
-    const char* name();
-    int pcmLength();
-    int rtpLength();
-    int frameTime();
-    int samplerate();
-    int encode(const void* input, int inputBytes, void* output, int outputCapacity);
-    int decode(const void* input, int inputBytes, void* output, int outputCapacity);
-    int plc(int lostFrames, void* output, int outputCapacity);
+    const char* name() override;
+    int pcmLength() override;
+    int rtpLength() override;
+    int frameTime() override;
+    int samplerate() override;
+
+    EncodeResult    encode(std::span<const uint8_t> input, std::span<uint8_t> output) override;
+    DecodeResult    decode(std::span<const uint8_t> input, std::span<uint8_t> output) override;
+    size_t          plc(int lostFrames, std::span<uint8_t> output) override;
 };

 class G711Codec: public Codec
@@ -186,15 +189,15 @@ public:
    G711Codec(int type);
    ~G711Codec();
    
-    const char* name();
-    int    pcmLength();
-    int    frameTime();
-    int    rtpLength();
-    int    samplerate();
+    const char* name()  override;
+    int    pcmLength()  override;
+    int    frameTime()  override;
+    int    rtpLength()  override;
+    int    samplerate()  override;

-    int encode(const void* input, int inputBytes, void* output, int outputCapacity);
-    int decode(const void* input, int inputBytes, void* output, int outputCapacity);
-    int plc(int lostSamples, void* output, int outputCapacity);
+    EncodeResult encode(std::span<const uint8_t> input, std::span<uint8_t> output) override;
+    DecodeResult decode(std::span<const uint8_t> input, std::span<uint8_t> output) override;
+    size_t       plc(int lostSamples, std::span<uint8_t> output) override ;

 protected:
    int mType;    /// Determines if it is u-law or a-law codec. Its value is ALaw or ULaw.
@@ -237,15 +240,15 @@ public:
    IsacCodec(int sampleRate);
    ~IsacCodec();
    
-    const char* name();
-    int pcmLength();
-    int rtpLength();
-    int frameTime();
-    int samplerate();
+    const char* name() override;
+    int pcmLength() override;
+    int rtpLength() override;
+    int frameTime() override;
+    int samplerate() override;

-    int encode(const void* input, int inputBytes, void* output, int outputCapacity);
-    int decode(const void* input, int inputBytes, void* output, int outputCapacity);
-    int plc(int lostFrames, void* output, int outputCapacity);
+    EncodeResult    encode(std::span<const uint8_t> input, std::span<uint8_t> output) override;
+    DecodeResult    decode(std::span<const uint8_t> input, std::span<uint8_t> output) override;
+    size_t          plc(int lostFrames, std::span<uint8_t> output) override;
 };


@@ -311,11 +314,11 @@ public:
    /*! Destructor. */
    virtual ~GsmCodec();

-    const char* name();
-    int pcmLength();
-    int rtpLength();
-    int frameTime();
-    int samplerate();
+    const char* name() override;
+    int pcmLength() override;
+    int rtpLength() override;
+    int frameTime() override;
+    int samplerate() override;

    int encode(const void* input, int inputBytes, void* output, int outputCapacity);
    int decode(const void* input, int inputBytes, void* output, int outputCapacity);
@@ -115,7 +115,7 @@ bool SequenceSort(const std::shared_ptr<RtpBuffer::Packet>& p1, const std::share
    return p1->rtp()->GetExtendedSequenceNumber() < p2->rtp()->GetExtendedSequenceNumber();
 }

-std::shared_ptr<RtpBuffer::Packet> RtpBuffer::add(std::shared_ptr<jrtplib::RTPPacket> packet, std::chrono::milliseconds timelength, int rate)
+std::shared_ptr<RtpBuffer::Packet> RtpBuffer::add(const std::shared_ptr<jrtplib::RTPPacket>& packet, std::chrono::milliseconds timelength, int rate)
 {
    if (!packet)
        return std::shared_ptr<Packet>();
@@ -191,12 +191,11 @@ std::shared_ptr<RtpBuffer::Packet> RtpBuffer::add(std::shared_ptr<jrtplib::RTPPa
    return std::shared_ptr<Packet>();
 }

-RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl)
+RtpBuffer::FetchResult RtpBuffer::fetch()
 {
    Lock l(mGuard);

-    FetchResult result = FetchResult::NoPacket;
-    rl.clear();
+    FetchResult result;

    // See if there is enough information in buffer
    auto total = findTimelength();
@@ -217,10 +216,10 @@ RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl)
        mStat.mPacketDropped++;
    }

-    if (total < mLow)
+    if (total < mLow || total == 0ms)
    {
        // Still not prebuffered
-        result = FetchResult::NoPacket;
+        result = {FetchResult::Status::NoPacket};
    }
    else
    {
@@ -228,8 +227,8 @@ RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl)
        {
            if (mPacketList.empty())
            {
-                result = FetchResult::NoPacket;
                // Don't increase counter of lost packets here; maybe it is DTX
+                result = {FetchResult::Status::NoPacket};
            }
            else
            {
@@ -237,7 +236,6 @@ RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl)
                auto& packet = *mPacketList.front();
                uint32_t seqno = packet.rtp()->GetExtendedSequenceNumber();

-
                // Gap between new packet and previous on
                int gap = (int64_t)seqno - (int64_t)*mLastSeqno - 1;
                gap = std::min(gap, 127);
@@ -255,16 +253,15 @@ RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl)

                    mLastSeqno = *mLastSeqno + 1; // As we deal with the audio gap - return the silence and increase last seqno

-                    result = FetchResult::Gap;
+                    result = {FetchResult::Status::Gap};
                }
                else
                {
-                    result = FetchResult::RegularPacket;
-                    rl.push_back(mPacketList.front());
+                    result = {FetchResult::Status::RegularPacket, mPacketList.front()};

                    // Save last returned normal packet
-                    mFetchedPacket = mPacketList.front();
-                    mLastSeqno = mPacketList.front()->rtp()->GetExtendedSequenceNumber();
+                    mFetchedPacket = result.mPacket;
+                    mLastSeqno = result.mPacket->rtp()->GetExtendedSequenceNumber();

                    // Remove returned packet from the list
                    mPacketList.erase(mPacketList.begin());
@@ -277,14 +274,11 @@ RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl)
            if (findTimelength() >= mPrebuffer && !mPacketList.empty())
            {
                // Normal packet will be returned
-                result = FetchResult::RegularPacket;
-
-                // Put it to output list
-                rl.push_back(mPacketList.front());
+                result = {FetchResult::Status::RegularPacket, mPacketList.front()};

                // Remember returned packet
-                mFetchedPacket = mPacketList.front();
-                mLastSeqno = mPacketList.front()->rtp()->GetExtendedSequenceNumber();
+                mFetchedPacket = result.mPacket;
+                mLastSeqno = result.mPacket->rtp()->GetExtendedSequenceNumber();

                // Remove returned packet from buffer list
                mPacketList.erase(mPacketList.begin());
@@ -292,12 +286,12 @@ RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl)
            else
            {
                ICELogMedia(<< "Jitter buffer was not prebuffered yet; resulting no packet");
-                result = FetchResult::NoPacket;
+                result = {FetchResult::Status::NoPacket};
            }
        }
    }

-    if (result != FetchResult::NoPacket)
+    if (result.mStatus != FetchResult::Status::NoPacket)
        mReturnedCounter++;

    return result;
@@ -333,8 +327,7 @@ Receiver::~Receiver()

 //-------------- AudioReceiver ----------------
 AudioReceiver::AudioReceiver(const CodecList::Settings& settings, MT::Statistics &stat)
-    :Receiver(stat), mBuffer(stat), mCodecSettings(settings),
-      mCodecList(settings)
+    :Receiver(stat), mBuffer(stat), mCodecSettings(settings), mCodecList(settings)
 {
    // Init resamplers
    mResampler8.start(AUDIO_CHANNELS, 8000, AUDIO_SAMPLERATE);
@@ -346,6 +339,8 @@ AudioReceiver::AudioReceiver(const CodecList::Settings& settings, MT::Statistics
    mCodecList.setSettings(settings);
    mCodecList.fillCodecMap(mCodecMap);

+    mAvailable.setCapacity(AUDIO_SAMPLERATE * sizeof(short));
+
 #if defined(DUMP_DECODED)
    mDecodedDump = std::make_shared<Audio::WavFileWriter>();
    mDecodedDump->open("decoded.wav", 8000 /*G711*/, AUDIO_CHANNELS);
@@ -559,11 +554,15 @@ AudioReceiver::DecodeResult AudioReceiver::decodeGapTo(Audio::DataWindow& output

    mDecodedLength = mResampledLength = 0;
    if (mCngPacket && mCodec)
+    {
+        if (mCngPacket->rtp()->GetPayloadType() == 13)
        {
            // Synthesize comfort noise. It will be done on AUDIO_SAMPLERATE rate directly to mResampledFrame buffer.
            // Do not forget to send this noise to analysis
-        mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), mLastPacketTimeLength,
-                                             reinterpret_cast<short*>(mDecodedFrame), false);
+            mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), mLastPacketTimeLength, reinterpret_cast<short*>(mDecodedFrame), false);
+        }
+        else
+            decodePacketTo(output, options, mCngPacket);
    }
    else
    if (mCodec && mFrameCount && !mCodecSettings.mSkipDecode)
@@ -594,19 +593,19 @@ AudioReceiver::DecodeResult AudioReceiver::decodeGapTo(Audio::DataWindow& output
        return {.mStatus = DecodeResult::Status::Skip};
 }

-AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& output, DecodeOptions options, const RtpBuffer::ResultList& rl)
+AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& output, DecodeOptions options, const std::shared_ptr<RtpBuffer::Packet>& packet)
 {
+    if (!packet || !packet->rtp())
+        return {DecodeResult::Status::Skip};
+
    DecodeResult result = {.mStatus = DecodeResult::Status::Skip};
+    auto& rtp = *packet->rtp(); // Syntax sugar

    mFailedCount = 0;
-    for (const std::shared_ptr<RtpBuffer::Packet>& p: rl)
-    {
-        assert(p);
-
    // Check if we need to emit silence or CNG - previously CNG packet was detected. Emit CNG audio here if needed.
    if (mLastPacketTimestamp && mLastPacketTimeLength && mCodec)
    {
-            int units = p->rtp()->GetTimestamp() - *mLastPacketTimestamp;
+        int units = rtp.GetTimestamp() - *mLastPacketTimestamp;
        int milliseconds = units / (mCodec->samplerate() / 1000);
        if (milliseconds > mLastPacketTimeLength)
        {
@@ -619,17 +618,16 @@ AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& out
        }
    }

-        mLastPacketTimestamp = p->rtp()->GetTimestamp();
+    mLastPacketTimestamp = rtp.GetTimestamp();

    // Find codec by payload type
-        int ptype = p->rtp()->GetPayloadType();
+    int ptype = rtp.GetPayloadType();

    // Look into mCodecMap if exists
    auto codecIter = mCodecMap.find(ptype);
    if (codecIter == mCodecMap.end())
        return  {};

-
    if (!codecIter->second)
        codecIter->second = mCodecList.createCodecByPayloadType(ptype);

@@ -640,18 +638,17 @@ AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& out
        result.mSamplerate = mCodec->samplerate();

        // Check if it is CNG packet
-            if ((ptype == 0 || ptype == 8) && p->rtp()->GetPayloadLength() >= 1 && p->rtp()->GetPayloadLength() <= 6)
+        if (((ptype == 0 || ptype == 8) && rtp.GetPayloadLength() >= 1 && rtp.GetPayloadLength() <= 6) || rtp.GetPayloadType() == 13)
        {
            if (options.mSkipDecode)
                mDecodedLength = 0;
            else
            {
-                    mCngPacket = p->rtp();
-                    mCngDecoder.decode3389(p->rtp()->GetPayloadData(), p->rtp()->GetPayloadLength());
+                mCngPacket = packet;
+                mCngDecoder.decode3389(rtp.GetPayloadData(), rtp.GetPayloadLength());

                // Emit CNG mLastPacketLength milliseconds
-                    mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), mLastPacketTimeLength,
-                                                         (short*)mDecodedFrame, true);
+                mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), mLastPacketTimeLength, (short*)mDecodedFrame, true);
                if (mDecodedLength)
                    processDecoded(output, options);
            }
@@ -664,7 +661,7 @@ AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& out

            // Handle here regular RTP packets
            // Check if payload length is ok
-                size_t payload_length = p->rtp()->GetPayloadLength();
+            size_t payload_length = rtp.GetPayloadLength();
            size_t rtp_frame_length = mCodec->rtpLength();

            int tail = rtp_frame_length ? payload_length % rtp_frame_length : 0;
@@ -672,8 +669,8 @@ AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& out
            if (!tail)
            {
                // Find number of frames
-                    mFrameCount = mCodec->rtpLength() ? p->rtp()->GetPayloadLength() / mCodec->rtpLength() : 1;
-                    int frameLength = mCodec->rtpLength() ? mCodec->rtpLength() : (int)p->rtp()->GetPayloadLength();
+                mFrameCount = mCodec->rtpLength() ? rtp.GetPayloadLength() / mCodec->rtpLength() : 1;
+                int frameLength = mCodec->rtpLength() ? mCodec->rtpLength() : (int)rtp.GetPayloadLength();

                // Save last packet time length
                mLastPacketTimeLength = mFrameCount * mCodec->frameTime();
@@ -686,8 +683,7 @@ AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& out
                    else
                    {
                        // Decode frame by frame
-                            mDecodedLength = mCodec->decode(p->rtp()->GetPayloadData() + i * mCodec->rtpLength(),
-                                                            frameLength, mDecodedFrame, sizeof mDecodedFrame);
+                        mDecodedLength = mCodec->decode(rtp.GetPayloadData() + i * mCodec->rtpLength(), frameLength, mDecodedFrame, sizeof mDecodedFrame);
                        if (mDecodedLength > 0)
                            processDecoded(output, options);
                    }
@@ -704,13 +700,21 @@ AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& out
            }
        }
    }
-    }
    return result;
 }

 AudioReceiver::DecodeResult AudioReceiver::decodeEmptyTo(Audio::DataWindow& output, DecodeOptions options)
 {
-    // No packet available in jitter buffer - just increase the counter for now
+    // No packet available at all (and no previous CNG packet) - so return the silence
+    if (options.mElapsed != 0ms && mCodec)
+    {
+        Audio::Format fmt = options.mResampleToMainRate ? Audio::Format(AUDIO_SAMPLERATE, 1) : mCodec->getAudioFormat();
+        // Emit silence if codec information is available - it is to properly handle the gaps
+        auto avail = output.getTimeLength(fmt.rate(), fmt.channels());
+        if (options.mElapsed > avail)
+            mAvailable.addZero(fmt.sizeFromTime(options.mElapsed - avail));
+    }
+
    mFailedCount++;
    return {.mStatus = DecodeResult::Status::Skip};
 }
@@ -719,32 +723,71 @@ AudioReceiver::DecodeResult AudioReceiver::getAudioTo(Audio::DataWindow& output,
 {
    DecodeResult result = {.mStatus = DecodeResult::Status::Skip};

-    size_t initialOffset = output.filled();   // Size in bytes
+    auto produced = 0ms;
+    if (mAvailable.filled() && mCodec && options.mElapsed != 0ms)
+    {
+        Audio::Format fmt = options.mResampleToMainRate ? Audio::Format(AUDIO_SAMPLERATE, 1) : mCodec->getAudioFormat();
+        auto initiallyAvailable = mCodec ? mAvailable.getTimeLength(fmt.rate(), fmt.channels()) : 0ms;
+        if (initiallyAvailable != 0ms)
+        {
+            std::chrono::milliseconds resultTime = std::min(initiallyAvailable, options.mElapsed);
+            auto resultLen = fmt.sizeFromTime(resultTime);
+            mAvailable.moveTo(output, resultLen);
+            produced += resultTime;
+
+            // Maybe request is satisfied ?
+            if (produced >= options.mElapsed)
+                return {.mStatus = DecodeResult::Status::Ok, .mSamplerate = fmt.rate(), .mChannels = fmt.channels()};
+        }
+    }
+
    std::chrono::milliseconds decoded = 0ms;
    do
    {
        // Get next packet from buffer
        RtpBuffer::ResultList rl;
-        RtpBuffer::FetchResult fr = mBuffer.fetch(rl);
-        switch (fr)
+        RtpBuffer::FetchResult fr = mBuffer.fetch();
+        // ICELogDebug(<< fr.toString() << " " << mBuffer.findTimelength());
+
+        switch (fr.mStatus)
        {
-        case RtpBuffer::FetchResult::Gap:           result = decodeGapTo(output, options);          break;
-        case RtpBuffer::FetchResult::NoPacket:      result = decodeEmptyTo(output, options);        break;
-        case RtpBuffer::FetchResult::RegularPacket: result = decodePacketTo(output, options, rl);   break;
+        case RtpBuffer::FetchResult::Status::Gap:           result = decodeGapTo(mAvailable, options);                  break;
+        case RtpBuffer::FetchResult::Status::NoPacket:      result = decodeEmptyTo(mAvailable, options);                break;
+        case RtpBuffer::FetchResult::Status::RegularPacket: result = decodePacketTo(mAvailable, options, fr.mPacket);   break;
        default:
            assert(0);
        }

-        size_t available = output.filled() - initialOffset;
-        if (!available)
-            break;
-        initialOffset  = output.filled();
+        // Was there decoding at all ?
+        if (!mCodec)
+            break; // No sense to continue - we have no information at all

-        // ToDo: calculate how much milliseconds was decoded
-        int samplerate = options.mResampleToMainRate ? AUDIO_SAMPLERATE : result.mSamplerate;
-        decoded += std::chrono::milliseconds(available / sizeof(short) / (samplerate / 1000));
+        Audio::Format fmt = options.mResampleToMainRate ? Audio::Format(AUDIO_SAMPLERATE, 1) : mCodec->getAudioFormat();
+        result.mSamplerate = fmt.rate();
+        result.mChannels = fmt.channels();
+
+        // Have we anything interesting in the buffer ?
+        auto bufferAvailable = mAvailable.getTimeLength(fmt.rate(), fmt.channels());
+        if (bufferAvailable == 0ms)
+            break; // No sense to continue - decoding / CNG / PLC stopped totally
+
+        // How much data should be moved to result buffer ?
+        if (options.mElapsed != 0ms)
+        {
+            std::chrono::milliseconds resultTime = std::min(bufferAvailable, options.mElapsed - produced);
+            auto resultLen = fmt.sizeFromTime(resultTime);
+            mAvailable.moveTo(output, resultLen);
+            produced += resultTime;
        }
-    while (decoded < options.mElapsed);
+        else
+            mAvailable.moveTo(output, mAvailable.filled());
+
+        decoded += bufferAvailable;
+    }
+    while (produced < options.mElapsed);
+
+    if (produced != 0ms)
+        result.mStatus = DecodeResult::Status::Ok;

    // Time statistics
    if (result.mStatus == DecodeResult::Status::Ok)
@@ -28,13 +28,6 @@ using jrtplib::RTPPacket;
 class RtpBuffer
 {
 public:
-    enum class FetchResult
-    {
-        RegularPacket,
-        Gap,
-        NoPacket
-    };
-
    // Owns rtp packet data
    class Packet
    {
@@ -59,6 +52,29 @@ public:
        std::chrono::microseconds mTimestamp = 0us;
    };

+    struct FetchResult
+    {
+        enum class Status
+        {
+            RegularPacket,
+            Gap,
+            NoPacket
+        };
+
+        Status mStatus = Status::NoPacket;
+        std::shared_ptr<Packet> mPacket;
+
+        std::string toString() const
+        {
+            switch (mStatus)
+            {
+                case Status::RegularPacket: return "packet";
+                case Status::Gap:           return "gap";
+                case Status::NoPacket:      return "empty";
+            }
+        }
+    };
+
    RtpBuffer(Statistics& stat);
    ~RtpBuffer();

@@ -81,12 +97,12 @@ public:
    int getCount() const;

    // Returns false if packet was not add - maybe too old or too new or duplicate
-    std::shared_ptr<Packet> add(std::shared_ptr<RTPPacket> packet, std::chrono::milliseconds timelength, int rate);
+    std::shared_ptr<Packet> add(const std::shared_ptr<RTPPacket>& packet, std::chrono::milliseconds timelength, int rate);

    typedef std::vector<std::shared_ptr<Packet>> ResultList;
    typedef std::shared_ptr<ResultList> PResultList;

-    FetchResult fetch(ResultList& rl);
+    FetchResult fetch();
    
 protected:
    unsigned    mSsrc = 0;
@@ -133,15 +149,6 @@ public:
    // Lifetime of pointer to codec is limited by lifetime of AudioReceiver (it is container).
    bool add(const std::shared_ptr<jrtplib::RTPPacket>& p, Codec** codec = nullptr);

-    // Returns false when there is no rtp data from jitter
-    /*enum DecodeOptions
-    {
-        DecodeOptions_ResampleToMainRate = 0,
-        DecodeOptions_DontResample = 1,
-        DecodeOptions_FillCngGap = 2,
-        DecodeOptions_SkipDecode = 4
-    };*/
-
    struct DecodeOptions
    {
        bool mResampleToMainRate = true;                // Resample all decoded audio to AUDIO_SAMPLERATE
@@ -187,11 +194,14 @@ protected:
    CodecList::Settings                 mCodecSettings;
    CodecList                           mCodecList;
    JitterStatistics                    mJitterStats;
-    std::shared_ptr<jrtplib::RTPPacket> mCngPacket;
+    std::shared_ptr<RtpBuffer::Packet>  mCngPacket;
    CngDecoder                          mCngDecoder;
    size_t                              mDTXSamplesToEmit = 0;   // How much silence (or CNG) should be emited before next RTP packet gets into the action

-    // Buffer to hold decoded data
+    // Already decoded data that can be retrieved without actual decoding - it may happen because of getAudioTo() may be limited by time interval
+    Audio::DataWindow mAvailable;
+
+    // Temporary buffer to hold decoded data (it is better than allocate data on stack)
    int16_t mDecodedFrame[MT_MAX_DECODEBUFFER];
    size_t mDecodedLength = 0;

@@ -208,7 +218,10 @@ protected:
    std::optional<uint32_t> mLastPacketTimestamp;

    int mFailedCount = 0;
-    Audio::Resampler  mResampler8, mResampler16, mResampler32, mResampler48;
+    Audio::Resampler  mResampler8,
+                      mResampler16,
+                      mResampler32,
+                      mResampler48;

    Audio::PWavFileWriter mDecodedDump;

@@ -229,7 +242,7 @@ protected:
    void updateAmrCodecStats(Codec* c);

    DecodeResult decodeGapTo(Audio::DataWindow& output, DecodeOptions options);
-    DecodeResult decodePacketTo(Audio::DataWindow& output, DecodeOptions options, const RtpBuffer::ResultList& rl);
+    DecodeResult decodePacketTo(Audio::DataWindow& output, DecodeOptions options, const std::shared_ptr<RtpBuffer::Packet>& p);
    DecodeResult decodeEmptyTo(Audio::DataWindow& output, DecodeOptions options);
 };

@@ -1,4 +1,4 @@
-/* Copyright(C) 2007-2014 VoIP objects (voipobjects.com)
+/* Copyright(C) 2007-2026 VoIP objects (voipobjects.com)
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
@@ -10,7 +10,7 @@
 #include "../helper/HL_Types.h"
 #include <map>
 #include "../helper/HL_Pointer.h"
-
+#include "../audio/Audio_Interface.h"

 namespace MT
 {
@@ -18,8 +18,7 @@ class Codec;
 typedef std::shared_ptr<Codec> PCodec;

 class CodecMap: public std::map<int, PCodec>
-{
-};
+{};

 class Codec
 {
@@ -58,18 +57,28 @@ public:
    // Number of audio channels
    virtual int channels() { return 1; }

-
    // Returns size of encoded data (RTP) in bytes
-    virtual int encode(const void* input, int inputBytes, void* output, int outputCapacity) = 0;
+    struct EncodeResult
+    {
+        size_t mEncoded = 0; // Number of encoded bytes
+    };
+    virtual EncodeResult encode(std::span<const uint8_t> input, std::span<uint8_t> output) = 0;

    // Returns size of decoded data (PCM signed short) in bytes
-    virtual int decode(const void* input, int inputBytes, void* output, int outputCapacity) = 0;
+    struct DecodeResult
+    {
+        size_t  mDecoded = 0;    // Number of decoded bytes
+        bool    mIsCng = false;    // Should this packet to be used as CNG ? (used for AMR codecs)
+    };
+    virtual DecodeResult decode(std::span<const uint8_t> input, std::span<uint8_t> output) = 0;

    // Returns size of produced data (PCM signed short) in bytes
-    virtual int plc(int lostFrames, void* output, int outputCapacity) = 0;
+    virtual size_t plc(int lostFrames, std::span<uint8_t> output) = 0;

    // Returns size of codec in memory
-    virtual int getSize() const { return 0; };
+    virtual size_t getSize() const { return 0; };
+
+    virtual Audio::Format getAudioFormat() { return Audio::Format(this->samplerate(), this->channels());};
 };
 }
 #endif