diff --git a/src/engine/audio/Audio_DataWindow.cpp b/src/engine/audio/Audio_DataWindow.cpp index f4036154..c33f5040 100644 --- a/src/engine/audio/Audio_DataWindow.cpp +++ b/src/engine/audio/Audio_DataWindow.cpp @@ -11,7 +11,7 @@ using namespace Audio; DataWindow::DataWindow() { mFilled = 0; - mData = NULL; + mData = nullptr; mCapacity = 0; } @@ -166,6 +166,25 @@ void DataWindow::zero(int length) memset(mData, 0, mFilled); } +size_t DataWindow::moveTo(DataWindow& dst, size_t size) +{ + Lock l(mMutex); + + size_t avail = std::min(size, (size_t)filled()); + if (avail != 0) + { + dst.add(mData, avail); + erase(avail); + } + return avail; +} + +std::chrono::milliseconds DataWindow::getTimeLength(int samplerate, int channels) const +{ + Lock l(mMutex); + return std::chrono::milliseconds(mFilled / sizeof(short) / channels / (samplerate / 1000)); +} + void DataWindow::makeStereoFromMono(DataWindow& dst, DataWindow& src) { Lock lockDst(dst.mMutex), lockSrc(src.mMutex); diff --git a/src/engine/audio/Audio_DataWindow.h b/src/engine/audio/Audio_DataWindow.h index a0e7f306..1d13e52e 100644 --- a/src/engine/audio/Audio_DataWindow.h +++ b/src/engine/audio/Audio_DataWindow.h @@ -11,37 +11,40 @@ namespace Audio { - class DataWindow - { - public: +class DataWindow +{ +public: DataWindow(); ~DataWindow(); - void setCapacity(int capacity); - int capacity() const; + void setCapacity(int capacity); + int capacity() const; - void addZero(int length); - void add(const void* data, int length); - void add(short sample); - int read(void* buffer, int length); - void erase(int length = -1); + void addZero(int length); + void add(const void* data, int length); + void add(short sample); + int read(void* buffer, int length); + void erase(int length = -1); const char* data() const; - char* mutableData(); - int filled() const; - void setFilled(int filled); - void clear(); + char* mutableData(); + int filled() const; + void setFilled(int filled); + void clear(); - short shortAt(int index) const; - void setShortAt(short value, int index); - void zero(int length); + short shortAt(int index) const; + void setShortAt(short value, int index); + void zero(int length); + size_t moveTo(DataWindow& dst, size_t size); + + std::chrono::milliseconds getTimeLength(int samplerate, int channels) const; static void makeStereoFromMono(DataWindow& dst, DataWindow& src); - protected: +protected: mutable Mutex mMutex; char* mData; int mFilled; int mCapacity; - }; +}; } #endif diff --git a/src/engine/audio/Audio_Interface.h b/src/engine/audio/Audio_Interface.h index 82f0f864..e08f763d 100644 --- a/src/engine/audio/Audio_Interface.h +++ b/src/engine/audio/Audio_Interface.h @@ -51,6 +51,11 @@ struct Format return float((milliseconds * mRate) / 500.0 * mChannels); } + size_t sizeFromTime(std::chrono::milliseconds ms) const + { + return sizeFromTime(ms.count()); + } + std::string toString() { char buffer[64]; diff --git a/src/engine/engine_config.h b/src/engine/engine_config.h index 8c78d164..3433e1e1 100644 --- a/src/engine/engine_config.h +++ b/src/engine/engine_config.h @@ -38,11 +38,11 @@ //#define AUDIO_DUMPOUTPUT -#define UA_REGISTRATION_TIME 3600 -#define UA_MEDIA_PORT_START 20000 -#define UA_MEDIA_PORT_FINISH 30000 -#define UA_MAX_UDP_PACKET_SIZE 576 -#define UA_PUBLICATION_ID "314" +#define UA_REGISTRATION_TIME 3600 +#define UA_MEDIA_PORT_START 20000 +#define UA_MEDIA_PORT_FINISH 30000 +#define UA_MAX_UDP_PACKET_SIZE 576 +#define UA_PUBLICATION_ID "314" #define MT_SAMPLERATE AUDIO_SAMPLERATE @@ -50,13 +50,11 @@ #define MT_MAXRTPPACKET 1500 #define MT_DTMF_END_PACKETS 3 -#define RTP_BUFFER_HIGH 0 -#define RTP_BUFFER_LOW 0 -#define RTP_BUFFER_PREBUFFER 0 +// Milliseconds before +#define RTP_BUFFER_HIGH (2000) +#define RTP_BUFFER_LOW (0) +#define RTP_BUFFER_PREBUFFER (100) -// #define RTP_BUFFER_HIGH 160 -// #define RTP_BUFFER_LOW 10 -// #define RTP_BUFFER_PREBUFFER 160 #define RTP_DECODED_CAPACITY 2048 #define DEFAULT_SUBSCRIPTION_TIME 1200 diff --git a/src/engine/media/MT_AmrCodec.cpp b/src/engine/media/MT_AmrCodec.cpp index f7136b6a..4085360a 100644 --- a/src/engine/media/MT_AmrCodec.cpp +++ b/src/engine/media/MT_AmrCodec.cpp @@ -31,30 +31,30 @@ const uint16_t amrwb_framelenbits[10] = struct AmrPayloadInfo { - const uint8_t* mPayload; - int mPayloadLength; - bool mOctetAligned; - bool mInterleaving; - bool mWideband; - uint64_t mCurrentTimestamp; + const uint8_t* mPayload = nullptr; + int mPayloadLength = 0; + bool mOctetAligned = false; + bool mInterleaving = false; + bool mWideband = false; + uint64_t mCurrentTimestamp = 0; }; struct AmrFrame { - uint8_t mFrameType; - uint8_t mMode; - bool mGoodQuality; - uint64_t mTimestamp; + uint8_t mFrameType = 0; + uint8_t mMode = 0; + bool mGoodQuality = false; + uint64_t mTimestamp = 0; std::shared_ptr mData; - uint8_t mSTI; + uint8_t mSTI = 0; }; struct AmrPayload { - uint8_t mCodeModeRequest; + uint8_t mCodeModeRequest = 0; std::vector mFrames; - bool mDiscardPacket; + bool mDiscardPacket = false; }; // ARM RTP payload has next structure @@ -148,10 +148,10 @@ static AmrPayload parseAmrPayload(AmrPayloadInfo& input, size_t& cngCounter) continue; } - if (input.mWideband && f.mMode == 0xFF /* CNG */) - { - int a = 1; - } + // if (input.mWideband && f.mMode == 0xFF /* CNG */) + // { + // int a = 1; + // } if (input.mWideband && f.mFrameType == 15) { @@ -600,12 +600,12 @@ int AmrWbCodec::decodeIuup(std::span input, std::span ou int AmrWbCodec::decodePlain(std::span input, std::span output) { AmrPayloadInfo info; - info.mCurrentTimestamp = mCurrentDecoderTimestamp; - info.mOctetAligned = mConfig.mOctetAligned; - info.mPayload = input.data(); - info.mPayloadLength = input.size(); - info.mWideband = true; - info.mInterleaving = false; + info.mCurrentTimestamp = mCurrentDecoderTimestamp; + info.mOctetAligned = mConfig.mOctetAligned; + info.mPayload = input.data(); + info.mPayloadLength = input.size(); + info.mWideband = true; + info.mInterleaving = false; AmrPayload ap; try @@ -628,21 +628,30 @@ int AmrWbCodec::decodePlain(std::span input, std::span o return 0; } - // Check for output buffer capacity - if (output.size() < (int)ap.mFrames.size() * pcmLength()) + // Find the required output capacity + size_t capacity = 0; + for (AmrFrame& frame: ap.mFrames) + capacity += frame.mMode == 0xFF /* CNG */ ? pcmLength() * 8 : pcmLength(); + + if (output.size() < capacity) return 0; short* dataOut = (short*)output.data(); size_t dataOutSizeInBytes = 0; for (AmrFrame& frame: ap.mFrames) { - memset(dataOut, 0, static_cast(pcmLength())); + size_t frameOutputSize = frame.mMode == 0xFF ? pcmLength() * 8 : pcmLength(); + memset(dataOut, 0, frameOutputSize); if (frame.mData) { + if (frame.mMode == 0xFF) + { + // int bp = 1; + } D_IF_decode(mDecoderCtx, (const unsigned char*)frame.mData->data(), (short*)dataOut, 0); - dataOut += pcmLength() / 2; - dataOutSizeInBytes += pcmLength(); + dataOut += frameOutputSize / 2; + dataOutSizeInBytes += frameOutputSize; } } return dataOutSizeInBytes; diff --git a/src/engine/media/MT_AudioCodec.cpp b/src/engine/media/MT_AudioCodec.cpp index 36ee8fd7..e08e65f4 100644 --- a/src/engine/media/MT_AudioCodec.cpp +++ b/src/engine/media/MT_AudioCodec.cpp @@ -635,10 +635,10 @@ int IlbcCodec::samplerate() return 8000; } -int IlbcCodec::encode(const void *input, int inputBytes, void* outputBuffer, int outputCapacity) +Codec::EncodeResult IlbcCodec::encode(const void *input, int inputBytes, void* outputBuffer, int outputCapacity) { if (inputBytes % pcmLength()) - return 0; + return {}; // Declare the data input pointer short *dataIn = (short *)input; @@ -657,10 +657,10 @@ int IlbcCodec::encode(const void *input, int inputBytes, void* outputBuffer, int dataOut += rtpLength(); } - return frames * rtpLength(); + return {frames * rtpLength()}; } -int IlbcCodec::decode(const void* input, int inputBytes, void* output, int outputCapacity) +Codec::DecodeResult IlbcCodec::decode(const void* input, int inputBytes, void* output, int outputCapacity) { unsigned frames = inputBytes / rtpLength(); @@ -675,12 +675,12 @@ int IlbcCodec::decode(const void* input, int inputBytes, void* output, int outpu dataOut += pcmLength() / 2; } - return frames * pcmLength(); + return {frames * pcmLength()}; } -int IlbcCodec::plc(int lostFrames, void* output, int outputCapacity) +int IlbcCodec::plc(int lostFrames, std::span output) { - return 2 * WebRtcIlbcfix_DecodePlc(mDecoderCtx, (WebRtc_Word16*)output, lostFrames); + return sizeof(short) * WebRtcIlbcfix_DecodePlc(mDecoderCtx, (WebRtc_Word16*)output.data(), lostFrames); } // --- IlbcFactory --- diff --git a/src/engine/media/MT_AudioCodec.h b/src/engine/media/MT_AudioCodec.h index 049d24b4..da11bec4 100644 --- a/src/engine/media/MT_AudioCodec.h +++ b/src/engine/media/MT_AudioCodec.h @@ -58,9 +58,10 @@ public: int frameTime() override; int samplerate() override; int channels() override; - int encode(const void* input, int inputBytes, void* output, int outputCapacity) override; - int decode(const void* input, int inputBytes, void* output, int outputCapacity) override; - int plc(int lostFrames, void* output, int outputCapacity) override; + + EncodeResult encode(std::span input, std::span output) override; + DecodeResult decode(std::span input, std::span output) override; + size_t plc(int lostFrames, std::span output) override; }; class OpusCodec: public Codec @@ -112,9 +113,10 @@ public: int frameTime(); int samplerate(); int channels(); - int encode(const void* input, int inputBytes, void* output, int outputCapacity); - int decode(const void* input, int inputBytes, void* output, int outputCapacity); - int plc(int lostFrames, void* output, int outputCapacity); + + EncodeResult encode(std::span input, std::span output); + DecodeResult decode(std::span input, std::span output); + size_t plc(int lostFrames, std::span output); }; @@ -146,14 +148,15 @@ public: IlbcCodec(int packetTime); virtual ~IlbcCodec(); - const char* name(); - int pcmLength(); - int rtpLength(); - int frameTime(); - int samplerate(); - int encode(const void* input, int inputBytes, void* output, int outputCapacity); - int decode(const void* input, int inputBytes, void* output, int outputCapacity); - int plc(int lostFrames, void* output, int outputCapacity); + const char* name() override; + int pcmLength() override; + int rtpLength() override; + int frameTime() override; + int samplerate() override; + + EncodeResult encode(std::span input, std::span output) override; + DecodeResult decode(std::span input, std::span output) override; + size_t plc(int lostFrames, std::span output) override; }; class G711Codec: public Codec @@ -186,15 +189,15 @@ public: G711Codec(int type); ~G711Codec(); - const char* name(); - int pcmLength(); - int frameTime(); - int rtpLength(); - int samplerate(); + const char* name() override; + int pcmLength() override; + int frameTime() override; + int rtpLength() override; + int samplerate() override; - int encode(const void* input, int inputBytes, void* output, int outputCapacity); - int decode(const void* input, int inputBytes, void* output, int outputCapacity); - int plc(int lostSamples, void* output, int outputCapacity); + EncodeResult encode(std::span input, std::span output) override; + DecodeResult decode(std::span input, std::span output) override; + size_t plc(int lostSamples, std::span output) override ; protected: int mType; /// Determines if it is u-law or a-law codec. Its value is ALaw or ULaw. @@ -237,15 +240,15 @@ public: IsacCodec(int sampleRate); ~IsacCodec(); - const char* name(); - int pcmLength(); - int rtpLength(); - int frameTime(); - int samplerate(); + const char* name() override; + int pcmLength() override; + int rtpLength() override; + int frameTime() override; + int samplerate() override; - int encode(const void* input, int inputBytes, void* output, int outputCapacity); - int decode(const void* input, int inputBytes, void* output, int outputCapacity); - int plc(int lostFrames, void* output, int outputCapacity); + EncodeResult encode(std::span input, std::span output) override; + DecodeResult decode(std::span input, std::span output) override; + size_t plc(int lostFrames, std::span output) override; }; @@ -311,11 +314,11 @@ public: /*! Destructor. */ virtual ~GsmCodec(); - const char* name(); - int pcmLength(); - int rtpLength(); - int frameTime(); - int samplerate(); + const char* name() override; + int pcmLength() override; + int rtpLength() override; + int frameTime() override; + int samplerate() override; int encode(const void* input, int inputBytes, void* output, int outputCapacity); int decode(const void* input, int inputBytes, void* output, int outputCapacity); diff --git a/src/engine/media/MT_AudioReceiver.cpp b/src/engine/media/MT_AudioReceiver.cpp index 6021687f..6f21e5c2 100644 --- a/src/engine/media/MT_AudioReceiver.cpp +++ b/src/engine/media/MT_AudioReceiver.cpp @@ -115,7 +115,7 @@ bool SequenceSort(const std::shared_ptr& p1, const std::share return p1->rtp()->GetExtendedSequenceNumber() < p2->rtp()->GetExtendedSequenceNumber(); } -std::shared_ptr RtpBuffer::add(std::shared_ptr packet, std::chrono::milliseconds timelength, int rate) +std::shared_ptr RtpBuffer::add(const std::shared_ptr& packet, std::chrono::milliseconds timelength, int rate) { if (!packet) return std::shared_ptr(); @@ -191,12 +191,11 @@ std::shared_ptr RtpBuffer::add(std::shared_ptr(); } -RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl) +RtpBuffer::FetchResult RtpBuffer::fetch() { Lock l(mGuard); - FetchResult result = FetchResult::NoPacket; - rl.clear(); + FetchResult result; // See if there is enough information in buffer auto total = findTimelength(); @@ -217,10 +216,10 @@ RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl) mStat.mPacketDropped++; } - if (total < mLow) + if (total < mLow || total == 0ms) { // Still not prebuffered - result = FetchResult::NoPacket; + result = {FetchResult::Status::NoPacket}; } else { @@ -228,8 +227,8 @@ RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl) { if (mPacketList.empty()) { - result = FetchResult::NoPacket; // Don't increase counter of lost packets here; maybe it is DTX + result = {FetchResult::Status::NoPacket}; } else { @@ -237,7 +236,6 @@ RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl) auto& packet = *mPacketList.front(); uint32_t seqno = packet.rtp()->GetExtendedSequenceNumber(); - // Gap between new packet and previous on int gap = (int64_t)seqno - (int64_t)*mLastSeqno - 1; gap = std::min(gap, 127); @@ -255,16 +253,15 @@ RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl) mLastSeqno = *mLastSeqno + 1; // As we deal with the audio gap - return the silence and increase last seqno - result = FetchResult::Gap; + result = {FetchResult::Status::Gap}; } else { - result = FetchResult::RegularPacket; - rl.push_back(mPacketList.front()); + result = {FetchResult::Status::RegularPacket, mPacketList.front()}; // Save last returned normal packet - mFetchedPacket = mPacketList.front(); - mLastSeqno = mPacketList.front()->rtp()->GetExtendedSequenceNumber(); + mFetchedPacket = result.mPacket; + mLastSeqno = result.mPacket->rtp()->GetExtendedSequenceNumber(); // Remove returned packet from the list mPacketList.erase(mPacketList.begin()); @@ -277,14 +274,11 @@ RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl) if (findTimelength() >= mPrebuffer && !mPacketList.empty()) { // Normal packet will be returned - result = FetchResult::RegularPacket; - - // Put it to output list - rl.push_back(mPacketList.front()); + result = {FetchResult::Status::RegularPacket, mPacketList.front()}; // Remember returned packet - mFetchedPacket = mPacketList.front(); - mLastSeqno = mPacketList.front()->rtp()->GetExtendedSequenceNumber(); + mFetchedPacket = result.mPacket; + mLastSeqno = result.mPacket->rtp()->GetExtendedSequenceNumber(); // Remove returned packet from buffer list mPacketList.erase(mPacketList.begin()); @@ -292,12 +286,12 @@ RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl) else { ICELogMedia(<< "Jitter buffer was not prebuffered yet; resulting no packet"); - result = FetchResult::NoPacket; + result = {FetchResult::Status::NoPacket}; } } } - if (result != FetchResult::NoPacket) + if (result.mStatus != FetchResult::Status::NoPacket) mReturnedCounter++; return result; @@ -333,8 +327,7 @@ Receiver::~Receiver() //-------------- AudioReceiver ---------------- AudioReceiver::AudioReceiver(const CodecList::Settings& settings, MT::Statistics &stat) - :Receiver(stat), mBuffer(stat), mCodecSettings(settings), - mCodecList(settings) + :Receiver(stat), mBuffer(stat), mCodecSettings(settings), mCodecList(settings) { // Init resamplers mResampler8.start(AUDIO_CHANNELS, 8000, AUDIO_SAMPLERATE); @@ -346,6 +339,8 @@ AudioReceiver::AudioReceiver(const CodecList::Settings& settings, MT::Statistics mCodecList.setSettings(settings); mCodecList.fillCodecMap(mCodecMap); + mAvailable.setCapacity(AUDIO_SAMPLERATE * sizeof(short)); + #if defined(DUMP_DECODED) mDecodedDump = std::make_shared(); mDecodedDump->open("decoded.wav", 8000 /*G711*/, AUDIO_CHANNELS); @@ -560,10 +555,14 @@ AudioReceiver::DecodeResult AudioReceiver::decodeGapTo(Audio::DataWindow& output mDecodedLength = mResampledLength = 0; if (mCngPacket && mCodec) { - // Synthesize comfort noise. It will be done on AUDIO_SAMPLERATE rate directly to mResampledFrame buffer. - // Do not forget to send this noise to analysis - mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), mLastPacketTimeLength, - reinterpret_cast(mDecodedFrame), false); + if (mCngPacket->rtp()->GetPayloadType() == 13) + { + // Synthesize comfort noise. It will be done on AUDIO_SAMPLERATE rate directly to mResampledFrame buffer. + // Do not forget to send this noise to analysis + mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), mLastPacketTimeLength, reinterpret_cast(mDecodedFrame), false); + } + else + decodePacketTo(output, options, mCngPacket); } else if (mCodec && mFrameCount && !mCodecSettings.mSkipDecode) @@ -594,114 +593,110 @@ AudioReceiver::DecodeResult AudioReceiver::decodeGapTo(Audio::DataWindow& output return {.mStatus = DecodeResult::Status::Skip}; } -AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& output, DecodeOptions options, const RtpBuffer::ResultList& rl) +AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& output, DecodeOptions options, const std::shared_ptr& packet) { + if (!packet || !packet->rtp()) + return {DecodeResult::Status::Skip}; + DecodeResult result = {.mStatus = DecodeResult::Status::Skip}; + auto& rtp = *packet->rtp(); // Syntax sugar mFailedCount = 0; - for (const std::shared_ptr& p: rl) + // Check if we need to emit silence or CNG - previously CNG packet was detected. Emit CNG audio here if needed. + if (mLastPacketTimestamp && mLastPacketTimeLength && mCodec) { - assert(p); - - // Check if we need to emit silence or CNG - previously CNG packet was detected. Emit CNG audio here if needed. - if (mLastPacketTimestamp && mLastPacketTimeLength && mCodec) + int units = rtp.GetTimestamp() - *mLastPacketTimestamp; + int milliseconds = units / (mCodec->samplerate() / 1000); + if (milliseconds > mLastPacketTimeLength) { - int units = p->rtp()->GetTimestamp() - *mLastPacketTimestamp; - int milliseconds = units / (mCodec->samplerate() / 1000); - if (milliseconds > mLastPacketTimeLength) - { - auto silenceLength = std::chrono::milliseconds(milliseconds - mLastPacketTimeLength); + auto silenceLength = std::chrono::milliseconds(milliseconds - mLastPacketTimeLength); - if (mCngPacket && options.mFillGapByCNG) - produceCNG(silenceLength, output, options); - else - produceSilence(silenceLength, output, options); - } + if (mCngPacket && options.mFillGapByCNG) + produceCNG(silenceLength, output, options); + else + produceSilence(silenceLength, output, options); } + } - mLastPacketTimestamp = p->rtp()->GetTimestamp(); + mLastPacketTimestamp = rtp.GetTimestamp(); - // Find codec by payload type - int ptype = p->rtp()->GetPayloadType(); + // Find codec by payload type + int ptype = rtp.GetPayloadType(); - // Look into mCodecMap if exists - auto codecIter = mCodecMap.find(ptype); - if (codecIter == mCodecMap.end()) - return {}; + // Look into mCodecMap if exists + auto codecIter = mCodecMap.find(ptype); + if (codecIter == mCodecMap.end()) + return {}; + if (!codecIter->second) + codecIter->second = mCodecList.createCodecByPayloadType(ptype); - if (!codecIter->second) - codecIter->second = mCodecList.createCodecByPayloadType(ptype); + mCodec = codecIter->second; + if (mCodec) + { + result.mChannels = mCodec->channels(); + result.mSamplerate = mCodec->samplerate(); - mCodec = codecIter->second; - if (mCodec) + // Check if it is CNG packet + if (((ptype == 0 || ptype == 8) && rtp.GetPayloadLength() >= 1 && rtp.GetPayloadLength() <= 6) || rtp.GetPayloadType() == 13) { - result.mChannels = mCodec->channels(); - result.mSamplerate = mCodec->samplerate(); - - // Check if it is CNG packet - if ((ptype == 0 || ptype == 8) && p->rtp()->GetPayloadLength() >= 1 && p->rtp()->GetPayloadLength() <= 6) + if (options.mSkipDecode) + mDecodedLength = 0; + else { - if (options.mSkipDecode) - mDecodedLength = 0; - else - { - mCngPacket = p->rtp(); - mCngDecoder.decode3389(p->rtp()->GetPayloadData(), p->rtp()->GetPayloadLength()); + mCngPacket = packet; + mCngDecoder.decode3389(rtp.GetPayloadData(), rtp.GetPayloadLength()); - // Emit CNG mLastPacketLength milliseconds - mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), mLastPacketTimeLength, - (short*)mDecodedFrame, true); - if (mDecodedLength) - processDecoded(output, options); + // Emit CNG mLastPacketLength milliseconds + mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), mLastPacketTimeLength, (short*)mDecodedFrame, true); + if (mDecodedLength) + processDecoded(output, options); + } + result.mStatus = DecodeResult::Status::Ok; + } + else + { + // Reset CNG packet as we get regular RTP packet + mCngPacket.reset(); + + // Handle here regular RTP packets + // Check if payload length is ok + size_t payload_length = rtp.GetPayloadLength(); + size_t rtp_frame_length = mCodec->rtpLength(); + + int tail = rtp_frame_length ? payload_length % rtp_frame_length : 0; + + if (!tail) + { + // Find number of frames + mFrameCount = mCodec->rtpLength() ? rtp.GetPayloadLength() / mCodec->rtpLength() : 1; + int frameLength = mCodec->rtpLength() ? mCodec->rtpLength() : (int)rtp.GetPayloadLength(); + + // Save last packet time length + mLastPacketTimeLength = mFrameCount * mCodec->frameTime(); + + // Decode + for (int i=0; idecode(rtp.GetPayloadData() + i * mCodec->rtpLength(), frameLength, mDecodedFrame, sizeof mDecodedFrame); + if (mDecodedLength > 0) + processDecoded(output, options); + } } - result.mStatus = DecodeResult::Status::Ok; + result.mStatus = mFrameCount > 0 ? DecodeResult::Status::Ok : DecodeResult::Status::Skip; + + // Check for bitrate counter + updateAmrCodecStats(mCodec.get()); } else { - // Reset CNG packet as we get regular RTP packet - mCngPacket.reset(); - - // Handle here regular RTP packets - // Check if payload length is ok - size_t payload_length = p->rtp()->GetPayloadLength(); - size_t rtp_frame_length = mCodec->rtpLength(); - - int tail = rtp_frame_length ? payload_length % rtp_frame_length : 0; - - if (!tail) - { - // Find number of frames - mFrameCount = mCodec->rtpLength() ? p->rtp()->GetPayloadLength() / mCodec->rtpLength() : 1; - int frameLength = mCodec->rtpLength() ? mCodec->rtpLength() : (int)p->rtp()->GetPayloadLength(); - - // Save last packet time length - mLastPacketTimeLength = mFrameCount * mCodec->frameTime(); - - // Decode - for (int i=0; idecode(p->rtp()->GetPayloadData() + i * mCodec->rtpLength(), - frameLength, mDecodedFrame, sizeof mDecodedFrame); - if (mDecodedLength > 0) - processDecoded(output, options); - } - } - result.mStatus = mFrameCount > 0 ? DecodeResult::Status::Ok : DecodeResult::Status::Skip; - - // Check for bitrate counter - updateAmrCodecStats(mCodec.get()); - } - else - { - // RTP packet with tail - it should not happen - result.mStatus = DecodeResult::Status::BadPacket; - } + // RTP packet with tail - it should not happen + result.mStatus = DecodeResult::Status::BadPacket; } } } @@ -710,7 +705,16 @@ AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& out AudioReceiver::DecodeResult AudioReceiver::decodeEmptyTo(Audio::DataWindow& output, DecodeOptions options) { - // No packet available in jitter buffer - just increase the counter for now + // No packet available at all (and no previous CNG packet) - so return the silence + if (options.mElapsed != 0ms && mCodec) + { + Audio::Format fmt = options.mResampleToMainRate ? Audio::Format(AUDIO_SAMPLERATE, 1) : mCodec->getAudioFormat(); + // Emit silence if codec information is available - it is to properly handle the gaps + auto avail = output.getTimeLength(fmt.rate(), fmt.channels()); + if (options.mElapsed > avail) + mAvailable.addZero(fmt.sizeFromTime(options.mElapsed - avail)); + } + mFailedCount++; return {.mStatus = DecodeResult::Status::Skip}; } @@ -719,32 +723,71 @@ AudioReceiver::DecodeResult AudioReceiver::getAudioTo(Audio::DataWindow& output, { DecodeResult result = {.mStatus = DecodeResult::Status::Skip}; - size_t initialOffset = output.filled(); // Size in bytes + auto produced = 0ms; + if (mAvailable.filled() && mCodec && options.mElapsed != 0ms) + { + Audio::Format fmt = options.mResampleToMainRate ? Audio::Format(AUDIO_SAMPLERATE, 1) : mCodec->getAudioFormat(); + auto initiallyAvailable = mCodec ? mAvailable.getTimeLength(fmt.rate(), fmt.channels()) : 0ms; + if (initiallyAvailable != 0ms) + { + std::chrono::milliseconds resultTime = std::min(initiallyAvailable, options.mElapsed); + auto resultLen = fmt.sizeFromTime(resultTime); + mAvailable.moveTo(output, resultLen); + produced += resultTime; + + // Maybe request is satisfied ? + if (produced >= options.mElapsed) + return {.mStatus = DecodeResult::Status::Ok, .mSamplerate = fmt.rate(), .mChannels = fmt.channels()}; + } + } + std::chrono::milliseconds decoded = 0ms; do { // Get next packet from buffer RtpBuffer::ResultList rl; - RtpBuffer::FetchResult fr = mBuffer.fetch(rl); - switch (fr) + RtpBuffer::FetchResult fr = mBuffer.fetch(); + // ICELogDebug(<< fr.toString() << " " << mBuffer.findTimelength()); + + switch (fr.mStatus) { - case RtpBuffer::FetchResult::Gap: result = decodeGapTo(output, options); break; - case RtpBuffer::FetchResult::NoPacket: result = decodeEmptyTo(output, options); break; - case RtpBuffer::FetchResult::RegularPacket: result = decodePacketTo(output, options, rl); break; + case RtpBuffer::FetchResult::Status::Gap: result = decodeGapTo(mAvailable, options); break; + case RtpBuffer::FetchResult::Status::NoPacket: result = decodeEmptyTo(mAvailable, options); break; + case RtpBuffer::FetchResult::Status::RegularPacket: result = decodePacketTo(mAvailable, options, fr.mPacket); break; default: assert(0); } - size_t available = output.filled() - initialOffset; - if (!available) - break; - initialOffset = output.filled(); + // Was there decoding at all ? + if (!mCodec) + break; // No sense to continue - we have no information at all - // ToDo: calculate how much milliseconds was decoded - int samplerate = options.mResampleToMainRate ? AUDIO_SAMPLERATE : result.mSamplerate; - decoded += std::chrono::milliseconds(available / sizeof(short) / (samplerate / 1000)); + Audio::Format fmt = options.mResampleToMainRate ? Audio::Format(AUDIO_SAMPLERATE, 1) : mCodec->getAudioFormat(); + result.mSamplerate = fmt.rate(); + result.mChannels = fmt.channels(); + + // Have we anything interesting in the buffer ? + auto bufferAvailable = mAvailable.getTimeLength(fmt.rate(), fmt.channels()); + if (bufferAvailable == 0ms) + break; // No sense to continue - decoding / CNG / PLC stopped totally + + // How much data should be moved to result buffer ? + if (options.mElapsed != 0ms) + { + std::chrono::milliseconds resultTime = std::min(bufferAvailable, options.mElapsed - produced); + auto resultLen = fmt.sizeFromTime(resultTime); + mAvailable.moveTo(output, resultLen); + produced += resultTime; + } + else + mAvailable.moveTo(output, mAvailable.filled()); + + decoded += bufferAvailable; } - while (decoded < options.mElapsed); + while (produced < options.mElapsed); + + if (produced != 0ms) + result.mStatus = DecodeResult::Status::Ok; // Time statistics if (result.mStatus == DecodeResult::Status::Ok) diff --git a/src/engine/media/MT_AudioReceiver.h b/src/engine/media/MT_AudioReceiver.h index 26c8d6d6..ed76c310 100644 --- a/src/engine/media/MT_AudioReceiver.h +++ b/src/engine/media/MT_AudioReceiver.h @@ -28,13 +28,6 @@ using jrtplib::RTPPacket; class RtpBuffer { public: - enum class FetchResult - { - RegularPacket, - Gap, - NoPacket - }; - // Owns rtp packet data class Packet { @@ -59,6 +52,29 @@ public: std::chrono::microseconds mTimestamp = 0us; }; + struct FetchResult + { + enum class Status + { + RegularPacket, + Gap, + NoPacket + }; + + Status mStatus = Status::NoPacket; + std::shared_ptr mPacket; + + std::string toString() const + { + switch (mStatus) + { + case Status::RegularPacket: return "packet"; + case Status::Gap: return "gap"; + case Status::NoPacket: return "empty"; + } + } + }; + RtpBuffer(Statistics& stat); ~RtpBuffer(); @@ -81,12 +97,12 @@ public: int getCount() const; // Returns false if packet was not add - maybe too old or too new or duplicate - std::shared_ptr add(std::shared_ptr packet, std::chrono::milliseconds timelength, int rate); + std::shared_ptr add(const std::shared_ptr& packet, std::chrono::milliseconds timelength, int rate); typedef std::vector> ResultList; typedef std::shared_ptr PResultList; - FetchResult fetch(ResultList& rl); + FetchResult fetch(); protected: unsigned mSsrc = 0; @@ -133,15 +149,6 @@ public: // Lifetime of pointer to codec is limited by lifetime of AudioReceiver (it is container). bool add(const std::shared_ptr& p, Codec** codec = nullptr); - // Returns false when there is no rtp data from jitter - /*enum DecodeOptions - { - DecodeOptions_ResampleToMainRate = 0, - DecodeOptions_DontResample = 1, - DecodeOptions_FillCngGap = 2, - DecodeOptions_SkipDecode = 4 - };*/ - struct DecodeOptions { bool mResampleToMainRate = true; // Resample all decoded audio to AUDIO_SAMPLERATE @@ -168,7 +175,7 @@ public: // Looks for codec by payload type Codec* findCodec(int payloadType); - RtpBuffer& getRtpBuffer() { return mBuffer; } + RtpBuffer& getRtpBuffer() { return mBuffer; } // Returns size of AudioReceiver's instance in bytes (including size of all data + codecs + etc.) int getSize() const; @@ -187,11 +194,14 @@ protected: CodecList::Settings mCodecSettings; CodecList mCodecList; JitterStatistics mJitterStats; - std::shared_ptr mCngPacket; + std::shared_ptr mCngPacket; CngDecoder mCngDecoder; size_t mDTXSamplesToEmit = 0; // How much silence (or CNG) should be emited before next RTP packet gets into the action - // Buffer to hold decoded data + // Already decoded data that can be retrieved without actual decoding - it may happen because of getAudioTo() may be limited by time interval + Audio::DataWindow mAvailable; + + // Temporary buffer to hold decoded data (it is better than allocate data on stack) int16_t mDecodedFrame[MT_MAX_DECODEBUFFER]; size_t mDecodedLength = 0; @@ -208,7 +218,10 @@ protected: std::optional mLastPacketTimestamp; int mFailedCount = 0; - Audio::Resampler mResampler8, mResampler16, mResampler32, mResampler48; + Audio::Resampler mResampler8, + mResampler16, + mResampler32, + mResampler48; Audio::PWavFileWriter mDecodedDump; @@ -229,7 +242,7 @@ protected: void updateAmrCodecStats(Codec* c); DecodeResult decodeGapTo(Audio::DataWindow& output, DecodeOptions options); - DecodeResult decodePacketTo(Audio::DataWindow& output, DecodeOptions options, const RtpBuffer::ResultList& rl); + DecodeResult decodePacketTo(Audio::DataWindow& output, DecodeOptions options, const std::shared_ptr& p); DecodeResult decodeEmptyTo(Audio::DataWindow& output, DecodeOptions options); }; diff --git a/src/engine/media/MT_Codec.cpp b/src/engine/media/MT_Codec.cpp index 134291c7..69336264 100644 --- a/src/engine/media/MT_Codec.cpp +++ b/src/engine/media/MT_Codec.cpp @@ -1,4 +1,4 @@ -/* Copyright(C) 2007-2014 VoIP objects (voipobjects.com) +/* Copyright(C) 2007-2026 VoIP objects (voipobjects.com) * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ @@ -9,31 +9,31 @@ using namespace MT; int Codec::Factory::channels() { - return 1; + return 1; } void Codec::Factory::create(CodecMap& codecs) { - codecs[payloadType()] = std::shared_ptr(create()); + codecs[payloadType()] = std::shared_ptr(create()); } void Codec::Factory::updateSdp(resip::SdpContents::Session::Medium::CodecContainer& codecs, SdpDirection direction) { - codecs.push_back(resipCodec()); + codecs.push_back(resipCodec()); } resip::Codec Codec::Factory::resipCodec() { - resip::Codec c(this->name(), this->payloadType(), this->samplerate()); - return c; + resip::Codec c(this->name(), this->payloadType(), this->samplerate()); + return c; } int Codec::Factory::processSdp(const resip::SdpContents::Session::Medium::CodecContainer& codecs, SdpDirection direction) { - for (resip::SdpContents::Session::Medium::CodecContainer::const_iterator codecIter = codecs.begin(); codecIter != codecs.end(); ++codecIter) - { - if (resipCodec() == *codecIter) - return codecIter->payloadType(); - } - return -1; + for (resip::SdpContents::Session::Medium::CodecContainer::const_iterator codecIter = codecs.begin(); codecIter != codecs.end(); ++codecIter) + { + if (resipCodec() == *codecIter) + return codecIter->payloadType(); + } + return -1; } diff --git a/src/engine/media/MT_Codec.h b/src/engine/media/MT_Codec.h index 0dd04676..ea43f100 100644 --- a/src/engine/media/MT_Codec.h +++ b/src/engine/media/MT_Codec.h @@ -10,7 +10,7 @@ #include "../helper/HL_Types.h" #include #include "../helper/HL_Pointer.h" - +#include "../audio/Audio_Interface.h" namespace MT { @@ -18,8 +18,7 @@ class Codec; typedef std::shared_ptr PCodec; class CodecMap: public std::map -{ -}; +{}; class Codec { @@ -58,18 +57,28 @@ public: // Number of audio channels virtual int channels() { return 1; } - // Returns size of encoded data (RTP) in bytes - virtual int encode(const void* input, int inputBytes, void* output, int outputCapacity) = 0; + struct EncodeResult + { + size_t mEncoded = 0; // Number of encoded bytes + }; + virtual EncodeResult encode(std::span input, std::span output) = 0; // Returns size of decoded data (PCM signed short) in bytes - virtual int decode(const void* input, int inputBytes, void* output, int outputCapacity) = 0; + struct DecodeResult + { + size_t mDecoded = 0; // Number of decoded bytes + bool mIsCng = false; // Should this packet to be used as CNG ? (used for AMR codecs) + }; + virtual DecodeResult decode(std::span input, std::span output) = 0; // Returns size of produced data (PCM signed short) in bytes - virtual int plc(int lostFrames, void* output, int outputCapacity) = 0; + virtual size_t plc(int lostFrames, std::span output) = 0; // Returns size of codec in memory - virtual int getSize() const { return 0; }; + virtual size_t getSize() const { return 0; }; + + virtual Audio::Format getAudioFormat() { return Audio::Format(this->samplerate(), this->channels());}; }; } #endif