- work to improve the decoding process - many problems fixes + however there are problems yet

This commit is contained in:
2026-02-20 13:16:13 +03:00
parent 94f30b25e9
commit 78d77c4e69
11 changed files with 375 additions and 273 deletions

View File

@@ -11,7 +11,7 @@ using namespace Audio;
DataWindow::DataWindow() DataWindow::DataWindow()
{ {
mFilled = 0; mFilled = 0;
mData = NULL; mData = nullptr;
mCapacity = 0; mCapacity = 0;
} }
@@ -166,6 +166,25 @@ void DataWindow::zero(int length)
memset(mData, 0, mFilled); memset(mData, 0, mFilled);
} }
size_t DataWindow::moveTo(DataWindow& dst, size_t size)
{
Lock l(mMutex);
size_t avail = std::min(size, (size_t)filled());
if (avail != 0)
{
dst.add(mData, avail);
erase(avail);
}
return avail;
}
std::chrono::milliseconds DataWindow::getTimeLength(int samplerate, int channels) const
{
Lock l(mMutex);
return std::chrono::milliseconds(mFilled / sizeof(short) / channels / (samplerate / 1000));
}
void DataWindow::makeStereoFromMono(DataWindow& dst, DataWindow& src) void DataWindow::makeStereoFromMono(DataWindow& dst, DataWindow& src)
{ {
Lock lockDst(dst.mMutex), lockSrc(src.mMutex); Lock lockDst(dst.mMutex), lockSrc(src.mMutex);

View File

@@ -11,9 +11,9 @@
namespace Audio namespace Audio
{ {
class DataWindow class DataWindow
{ {
public: public:
DataWindow(); DataWindow();
~DataWindow(); ~DataWindow();
@@ -34,14 +34,17 @@ namespace Audio
short shortAt(int index) const; short shortAt(int index) const;
void setShortAt(short value, int index); void setShortAt(short value, int index);
void zero(int length); void zero(int length);
size_t moveTo(DataWindow& dst, size_t size);
std::chrono::milliseconds getTimeLength(int samplerate, int channels) const;
static void makeStereoFromMono(DataWindow& dst, DataWindow& src); static void makeStereoFromMono(DataWindow& dst, DataWindow& src);
protected: protected:
mutable Mutex mMutex; mutable Mutex mMutex;
char* mData; char* mData;
int mFilled; int mFilled;
int mCapacity; int mCapacity;
}; };
} }
#endif #endif

View File

@@ -51,6 +51,11 @@ struct Format
return float((milliseconds * mRate) / 500.0 * mChannels); return float((milliseconds * mRate) / 500.0 * mChannels);
} }
size_t sizeFromTime(std::chrono::milliseconds ms) const
{
return sizeFromTime(ms.count());
}
std::string toString() std::string toString()
{ {
char buffer[64]; char buffer[64];

View File

@@ -50,13 +50,11 @@
#define MT_MAXRTPPACKET 1500 #define MT_MAXRTPPACKET 1500
#define MT_DTMF_END_PACKETS 3 #define MT_DTMF_END_PACKETS 3
#define RTP_BUFFER_HIGH 0 // Milliseconds before
#define RTP_BUFFER_LOW 0 #define RTP_BUFFER_HIGH (2000)
#define RTP_BUFFER_PREBUFFER 0 #define RTP_BUFFER_LOW (0)
#define RTP_BUFFER_PREBUFFER (100)
// #define RTP_BUFFER_HIGH 160
// #define RTP_BUFFER_LOW 10
// #define RTP_BUFFER_PREBUFFER 160
#define RTP_DECODED_CAPACITY 2048 #define RTP_DECODED_CAPACITY 2048
#define DEFAULT_SUBSCRIPTION_TIME 1200 #define DEFAULT_SUBSCRIPTION_TIME 1200

View File

@@ -31,30 +31,30 @@ const uint16_t amrwb_framelenbits[10] =
struct AmrPayloadInfo struct AmrPayloadInfo
{ {
const uint8_t* mPayload; const uint8_t* mPayload = nullptr;
int mPayloadLength; int mPayloadLength = 0;
bool mOctetAligned; bool mOctetAligned = false;
bool mInterleaving; bool mInterleaving = false;
bool mWideband; bool mWideband = false;
uint64_t mCurrentTimestamp; uint64_t mCurrentTimestamp = 0;
}; };
struct AmrFrame struct AmrFrame
{ {
uint8_t mFrameType; uint8_t mFrameType = 0;
uint8_t mMode; uint8_t mMode = 0;
bool mGoodQuality; bool mGoodQuality = false;
uint64_t mTimestamp; uint64_t mTimestamp = 0;
std::shared_ptr<ByteBuffer> mData; std::shared_ptr<ByteBuffer> mData;
uint8_t mSTI; uint8_t mSTI = 0;
}; };
struct AmrPayload struct AmrPayload
{ {
uint8_t mCodeModeRequest; uint8_t mCodeModeRequest = 0;
std::vector<AmrFrame> mFrames; std::vector<AmrFrame> mFrames;
bool mDiscardPacket; bool mDiscardPacket = false;
}; };
// ARM RTP payload has next structure // ARM RTP payload has next structure
@@ -148,10 +148,10 @@ static AmrPayload parseAmrPayload(AmrPayloadInfo& input, size_t& cngCounter)
continue; continue;
} }
if (input.mWideband && f.mMode == 0xFF /* CNG */) // if (input.mWideband && f.mMode == 0xFF /* CNG */)
{ // {
int a = 1; // int a = 1;
} // }
if (input.mWideband && f.mFrameType == 15) if (input.mWideband && f.mFrameType == 15)
{ {
@@ -628,21 +628,30 @@ int AmrWbCodec::decodePlain(std::span<const uint8_t> input, std::span<uint8_t> o
return 0; return 0;
} }
// Check for output buffer capacity // Find the required output capacity
if (output.size() < (int)ap.mFrames.size() * pcmLength()) size_t capacity = 0;
for (AmrFrame& frame: ap.mFrames)
capacity += frame.mMode == 0xFF /* CNG */ ? pcmLength() * 8 : pcmLength();
if (output.size() < capacity)
return 0; return 0;
short* dataOut = (short*)output.data(); short* dataOut = (short*)output.data();
size_t dataOutSizeInBytes = 0; size_t dataOutSizeInBytes = 0;
for (AmrFrame& frame: ap.mFrames) for (AmrFrame& frame: ap.mFrames)
{ {
memset(dataOut, 0, static_cast<size_t>(pcmLength())); size_t frameOutputSize = frame.mMode == 0xFF ? pcmLength() * 8 : pcmLength();
memset(dataOut, 0, frameOutputSize);
if (frame.mData) if (frame.mData)
{ {
if (frame.mMode == 0xFF)
{
// int bp = 1;
}
D_IF_decode(mDecoderCtx, (const unsigned char*)frame.mData->data(), (short*)dataOut, 0); D_IF_decode(mDecoderCtx, (const unsigned char*)frame.mData->data(), (short*)dataOut, 0);
dataOut += pcmLength() / 2; dataOut += frameOutputSize / 2;
dataOutSizeInBytes += pcmLength(); dataOutSizeInBytes += frameOutputSize;
} }
} }
return dataOutSizeInBytes; return dataOutSizeInBytes;

View File

@@ -635,10 +635,10 @@ int IlbcCodec::samplerate()
return 8000; return 8000;
} }
int IlbcCodec::encode(const void *input, int inputBytes, void* outputBuffer, int outputCapacity) Codec::EncodeResult IlbcCodec::encode(const void *input, int inputBytes, void* outputBuffer, int outputCapacity)
{ {
if (inputBytes % pcmLength()) if (inputBytes % pcmLength())
return 0; return {};
// Declare the data input pointer // Declare the data input pointer
short *dataIn = (short *)input; short *dataIn = (short *)input;
@@ -657,10 +657,10 @@ int IlbcCodec::encode(const void *input, int inputBytes, void* outputBuffer, int
dataOut += rtpLength(); dataOut += rtpLength();
} }
return frames * rtpLength(); return {frames * rtpLength()};
} }
int IlbcCodec::decode(const void* input, int inputBytes, void* output, int outputCapacity) Codec::DecodeResult IlbcCodec::decode(const void* input, int inputBytes, void* output, int outputCapacity)
{ {
unsigned frames = inputBytes / rtpLength(); unsigned frames = inputBytes / rtpLength();
@@ -675,12 +675,12 @@ int IlbcCodec::decode(const void* input, int inputBytes, void* output, int outpu
dataOut += pcmLength() / 2; dataOut += pcmLength() / 2;
} }
return frames * pcmLength(); return {frames * pcmLength()};
} }
int IlbcCodec::plc(int lostFrames, void* output, int outputCapacity) int IlbcCodec::plc(int lostFrames, std::span<uint8_t> output)
{ {
return 2 * WebRtcIlbcfix_DecodePlc(mDecoderCtx, (WebRtc_Word16*)output, lostFrames); return sizeof(short) * WebRtcIlbcfix_DecodePlc(mDecoderCtx, (WebRtc_Word16*)output.data(), lostFrames);
} }
// --- IlbcFactory --- // --- IlbcFactory ---

View File

@@ -58,9 +58,10 @@ public:
int frameTime() override; int frameTime() override;
int samplerate() override; int samplerate() override;
int channels() override; int channels() override;
int encode(const void* input, int inputBytes, void* output, int outputCapacity) override;
int decode(const void* input, int inputBytes, void* output, int outputCapacity) override; EncodeResult encode(std::span<const uint8_t> input, std::span<uint8_t> output) override;
int plc(int lostFrames, void* output, int outputCapacity) override; DecodeResult decode(std::span<const uint8_t> input, std::span<uint8_t> output) override;
size_t plc(int lostFrames, std::span<uint8_t> output) override;
}; };
class OpusCodec: public Codec class OpusCodec: public Codec
@@ -112,9 +113,10 @@ public:
int frameTime(); int frameTime();
int samplerate(); int samplerate();
int channels(); int channels();
int encode(const void* input, int inputBytes, void* output, int outputCapacity);
int decode(const void* input, int inputBytes, void* output, int outputCapacity); EncodeResult encode(std::span<const uint8_t> input, std::span<uint8_t> output);
int plc(int lostFrames, void* output, int outputCapacity); DecodeResult decode(std::span<const uint8_t> input, std::span<uint8_t> output);
size_t plc(int lostFrames, std::span<uint8_t> output);
}; };
@@ -146,14 +148,15 @@ public:
IlbcCodec(int packetTime); IlbcCodec(int packetTime);
virtual ~IlbcCodec(); virtual ~IlbcCodec();
const char* name(); const char* name() override;
int pcmLength(); int pcmLength() override;
int rtpLength(); int rtpLength() override;
int frameTime(); int frameTime() override;
int samplerate(); int samplerate() override;
int encode(const void* input, int inputBytes, void* output, int outputCapacity);
int decode(const void* input, int inputBytes, void* output, int outputCapacity); EncodeResult encode(std::span<const uint8_t> input, std::span<uint8_t> output) override;
int plc(int lostFrames, void* output, int outputCapacity); DecodeResult decode(std::span<const uint8_t> input, std::span<uint8_t> output) override;
size_t plc(int lostFrames, std::span<uint8_t> output) override;
}; };
class G711Codec: public Codec class G711Codec: public Codec
@@ -186,15 +189,15 @@ public:
G711Codec(int type); G711Codec(int type);
~G711Codec(); ~G711Codec();
const char* name(); const char* name() override;
int pcmLength(); int pcmLength() override;
int frameTime(); int frameTime() override;
int rtpLength(); int rtpLength() override;
int samplerate(); int samplerate() override;
int encode(const void* input, int inputBytes, void* output, int outputCapacity); EncodeResult encode(std::span<const uint8_t> input, std::span<uint8_t> output) override;
int decode(const void* input, int inputBytes, void* output, int outputCapacity); DecodeResult decode(std::span<const uint8_t> input, std::span<uint8_t> output) override;
int plc(int lostSamples, void* output, int outputCapacity); size_t plc(int lostSamples, std::span<uint8_t> output) override ;
protected: protected:
int mType; /// Determines if it is u-law or a-law codec. Its value is ALaw or ULaw. int mType; /// Determines if it is u-law or a-law codec. Its value is ALaw or ULaw.
@@ -237,15 +240,15 @@ public:
IsacCodec(int sampleRate); IsacCodec(int sampleRate);
~IsacCodec(); ~IsacCodec();
const char* name(); const char* name() override;
int pcmLength(); int pcmLength() override;
int rtpLength(); int rtpLength() override;
int frameTime(); int frameTime() override;
int samplerate(); int samplerate() override;
int encode(const void* input, int inputBytes, void* output, int outputCapacity); EncodeResult encode(std::span<const uint8_t> input, std::span<uint8_t> output) override;
int decode(const void* input, int inputBytes, void* output, int outputCapacity); DecodeResult decode(std::span<const uint8_t> input, std::span<uint8_t> output) override;
int plc(int lostFrames, void* output, int outputCapacity); size_t plc(int lostFrames, std::span<uint8_t> output) override;
}; };
@@ -311,11 +314,11 @@ public:
/*! Destructor. */ /*! Destructor. */
virtual ~GsmCodec(); virtual ~GsmCodec();
const char* name(); const char* name() override;
int pcmLength(); int pcmLength() override;
int rtpLength(); int rtpLength() override;
int frameTime(); int frameTime() override;
int samplerate(); int samplerate() override;
int encode(const void* input, int inputBytes, void* output, int outputCapacity); int encode(const void* input, int inputBytes, void* output, int outputCapacity);
int decode(const void* input, int inputBytes, void* output, int outputCapacity); int decode(const void* input, int inputBytes, void* output, int outputCapacity);

View File

@@ -115,7 +115,7 @@ bool SequenceSort(const std::shared_ptr<RtpBuffer::Packet>& p1, const std::share
return p1->rtp()->GetExtendedSequenceNumber() < p2->rtp()->GetExtendedSequenceNumber(); return p1->rtp()->GetExtendedSequenceNumber() < p2->rtp()->GetExtendedSequenceNumber();
} }
std::shared_ptr<RtpBuffer::Packet> RtpBuffer::add(std::shared_ptr<jrtplib::RTPPacket> packet, std::chrono::milliseconds timelength, int rate) std::shared_ptr<RtpBuffer::Packet> RtpBuffer::add(const std::shared_ptr<jrtplib::RTPPacket>& packet, std::chrono::milliseconds timelength, int rate)
{ {
if (!packet) if (!packet)
return std::shared_ptr<Packet>(); return std::shared_ptr<Packet>();
@@ -191,12 +191,11 @@ std::shared_ptr<RtpBuffer::Packet> RtpBuffer::add(std::shared_ptr<jrtplib::RTPPa
return std::shared_ptr<Packet>(); return std::shared_ptr<Packet>();
} }
RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl) RtpBuffer::FetchResult RtpBuffer::fetch()
{ {
Lock l(mGuard); Lock l(mGuard);
FetchResult result = FetchResult::NoPacket; FetchResult result;
rl.clear();
// See if there is enough information in buffer // See if there is enough information in buffer
auto total = findTimelength(); auto total = findTimelength();
@@ -217,10 +216,10 @@ RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl)
mStat.mPacketDropped++; mStat.mPacketDropped++;
} }
if (total < mLow) if (total < mLow || total == 0ms)
{ {
// Still not prebuffered // Still not prebuffered
result = FetchResult::NoPacket; result = {FetchResult::Status::NoPacket};
} }
else else
{ {
@@ -228,8 +227,8 @@ RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl)
{ {
if (mPacketList.empty()) if (mPacketList.empty())
{ {
result = FetchResult::NoPacket;
// Don't increase counter of lost packets here; maybe it is DTX // Don't increase counter of lost packets here; maybe it is DTX
result = {FetchResult::Status::NoPacket};
} }
else else
{ {
@@ -237,7 +236,6 @@ RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl)
auto& packet = *mPacketList.front(); auto& packet = *mPacketList.front();
uint32_t seqno = packet.rtp()->GetExtendedSequenceNumber(); uint32_t seqno = packet.rtp()->GetExtendedSequenceNumber();
// Gap between new packet and previous on // Gap between new packet and previous on
int gap = (int64_t)seqno - (int64_t)*mLastSeqno - 1; int gap = (int64_t)seqno - (int64_t)*mLastSeqno - 1;
gap = std::min(gap, 127); gap = std::min(gap, 127);
@@ -255,16 +253,15 @@ RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl)
mLastSeqno = *mLastSeqno + 1; // As we deal with the audio gap - return the silence and increase last seqno mLastSeqno = *mLastSeqno + 1; // As we deal with the audio gap - return the silence and increase last seqno
result = FetchResult::Gap; result = {FetchResult::Status::Gap};
} }
else else
{ {
result = FetchResult::RegularPacket; result = {FetchResult::Status::RegularPacket, mPacketList.front()};
rl.push_back(mPacketList.front());
// Save last returned normal packet // Save last returned normal packet
mFetchedPacket = mPacketList.front(); mFetchedPacket = result.mPacket;
mLastSeqno = mPacketList.front()->rtp()->GetExtendedSequenceNumber(); mLastSeqno = result.mPacket->rtp()->GetExtendedSequenceNumber();
// Remove returned packet from the list // Remove returned packet from the list
mPacketList.erase(mPacketList.begin()); mPacketList.erase(mPacketList.begin());
@@ -277,14 +274,11 @@ RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl)
if (findTimelength() >= mPrebuffer && !mPacketList.empty()) if (findTimelength() >= mPrebuffer && !mPacketList.empty())
{ {
// Normal packet will be returned // Normal packet will be returned
result = FetchResult::RegularPacket; result = {FetchResult::Status::RegularPacket, mPacketList.front()};
// Put it to output list
rl.push_back(mPacketList.front());
// Remember returned packet // Remember returned packet
mFetchedPacket = mPacketList.front(); mFetchedPacket = result.mPacket;
mLastSeqno = mPacketList.front()->rtp()->GetExtendedSequenceNumber(); mLastSeqno = result.mPacket->rtp()->GetExtendedSequenceNumber();
// Remove returned packet from buffer list // Remove returned packet from buffer list
mPacketList.erase(mPacketList.begin()); mPacketList.erase(mPacketList.begin());
@@ -292,12 +286,12 @@ RtpBuffer::FetchResult RtpBuffer::fetch(ResultList& rl)
else else
{ {
ICELogMedia(<< "Jitter buffer was not prebuffered yet; resulting no packet"); ICELogMedia(<< "Jitter buffer was not prebuffered yet; resulting no packet");
result = FetchResult::NoPacket; result = {FetchResult::Status::NoPacket};
} }
} }
} }
if (result != FetchResult::NoPacket) if (result.mStatus != FetchResult::Status::NoPacket)
mReturnedCounter++; mReturnedCounter++;
return result; return result;
@@ -333,8 +327,7 @@ Receiver::~Receiver()
//-------------- AudioReceiver ---------------- //-------------- AudioReceiver ----------------
AudioReceiver::AudioReceiver(const CodecList::Settings& settings, MT::Statistics &stat) AudioReceiver::AudioReceiver(const CodecList::Settings& settings, MT::Statistics &stat)
:Receiver(stat), mBuffer(stat), mCodecSettings(settings), :Receiver(stat), mBuffer(stat), mCodecSettings(settings), mCodecList(settings)
mCodecList(settings)
{ {
// Init resamplers // Init resamplers
mResampler8.start(AUDIO_CHANNELS, 8000, AUDIO_SAMPLERATE); mResampler8.start(AUDIO_CHANNELS, 8000, AUDIO_SAMPLERATE);
@@ -346,6 +339,8 @@ AudioReceiver::AudioReceiver(const CodecList::Settings& settings, MT::Statistics
mCodecList.setSettings(settings); mCodecList.setSettings(settings);
mCodecList.fillCodecMap(mCodecMap); mCodecList.fillCodecMap(mCodecMap);
mAvailable.setCapacity(AUDIO_SAMPLERATE * sizeof(short));
#if defined(DUMP_DECODED) #if defined(DUMP_DECODED)
mDecodedDump = std::make_shared<Audio::WavFileWriter>(); mDecodedDump = std::make_shared<Audio::WavFileWriter>();
mDecodedDump->open("decoded.wav", 8000 /*G711*/, AUDIO_CHANNELS); mDecodedDump->open("decoded.wav", 8000 /*G711*/, AUDIO_CHANNELS);
@@ -559,11 +554,15 @@ AudioReceiver::DecodeResult AudioReceiver::decodeGapTo(Audio::DataWindow& output
mDecodedLength = mResampledLength = 0; mDecodedLength = mResampledLength = 0;
if (mCngPacket && mCodec) if (mCngPacket && mCodec)
{
if (mCngPacket->rtp()->GetPayloadType() == 13)
{ {
// Synthesize comfort noise. It will be done on AUDIO_SAMPLERATE rate directly to mResampledFrame buffer. // Synthesize comfort noise. It will be done on AUDIO_SAMPLERATE rate directly to mResampledFrame buffer.
// Do not forget to send this noise to analysis // Do not forget to send this noise to analysis
mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), mLastPacketTimeLength, mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), mLastPacketTimeLength, reinterpret_cast<short*>(mDecodedFrame), false);
reinterpret_cast<short*>(mDecodedFrame), false); }
else
decodePacketTo(output, options, mCngPacket);
} }
else else
if (mCodec && mFrameCount && !mCodecSettings.mSkipDecode) if (mCodec && mFrameCount && !mCodecSettings.mSkipDecode)
@@ -594,19 +593,19 @@ AudioReceiver::DecodeResult AudioReceiver::decodeGapTo(Audio::DataWindow& output
return {.mStatus = DecodeResult::Status::Skip}; return {.mStatus = DecodeResult::Status::Skip};
} }
AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& output, DecodeOptions options, const RtpBuffer::ResultList& rl) AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& output, DecodeOptions options, const std::shared_ptr<RtpBuffer::Packet>& packet)
{ {
if (!packet || !packet->rtp())
return {DecodeResult::Status::Skip};
DecodeResult result = {.mStatus = DecodeResult::Status::Skip}; DecodeResult result = {.mStatus = DecodeResult::Status::Skip};
auto& rtp = *packet->rtp(); // Syntax sugar
mFailedCount = 0; mFailedCount = 0;
for (const std::shared_ptr<RtpBuffer::Packet>& p: rl)
{
assert(p);
// Check if we need to emit silence or CNG - previously CNG packet was detected. Emit CNG audio here if needed. // Check if we need to emit silence or CNG - previously CNG packet was detected. Emit CNG audio here if needed.
if (mLastPacketTimestamp && mLastPacketTimeLength && mCodec) if (mLastPacketTimestamp && mLastPacketTimeLength && mCodec)
{ {
int units = p->rtp()->GetTimestamp() - *mLastPacketTimestamp; int units = rtp.GetTimestamp() - *mLastPacketTimestamp;
int milliseconds = units / (mCodec->samplerate() / 1000); int milliseconds = units / (mCodec->samplerate() / 1000);
if (milliseconds > mLastPacketTimeLength) if (milliseconds > mLastPacketTimeLength)
{ {
@@ -619,17 +618,16 @@ AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& out
} }
} }
mLastPacketTimestamp = p->rtp()->GetTimestamp(); mLastPacketTimestamp = rtp.GetTimestamp();
// Find codec by payload type // Find codec by payload type
int ptype = p->rtp()->GetPayloadType(); int ptype = rtp.GetPayloadType();
// Look into mCodecMap if exists // Look into mCodecMap if exists
auto codecIter = mCodecMap.find(ptype); auto codecIter = mCodecMap.find(ptype);
if (codecIter == mCodecMap.end()) if (codecIter == mCodecMap.end())
return {}; return {};
if (!codecIter->second) if (!codecIter->second)
codecIter->second = mCodecList.createCodecByPayloadType(ptype); codecIter->second = mCodecList.createCodecByPayloadType(ptype);
@@ -640,18 +638,17 @@ AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& out
result.mSamplerate = mCodec->samplerate(); result.mSamplerate = mCodec->samplerate();
// Check if it is CNG packet // Check if it is CNG packet
if ((ptype == 0 || ptype == 8) && p->rtp()->GetPayloadLength() >= 1 && p->rtp()->GetPayloadLength() <= 6) if (((ptype == 0 || ptype == 8) && rtp.GetPayloadLength() >= 1 && rtp.GetPayloadLength() <= 6) || rtp.GetPayloadType() == 13)
{ {
if (options.mSkipDecode) if (options.mSkipDecode)
mDecodedLength = 0; mDecodedLength = 0;
else else
{ {
mCngPacket = p->rtp(); mCngPacket = packet;
mCngDecoder.decode3389(p->rtp()->GetPayloadData(), p->rtp()->GetPayloadLength()); mCngDecoder.decode3389(rtp.GetPayloadData(), rtp.GetPayloadLength());
// Emit CNG mLastPacketLength milliseconds // Emit CNG mLastPacketLength milliseconds
mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), mLastPacketTimeLength, mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), mLastPacketTimeLength, (short*)mDecodedFrame, true);
(short*)mDecodedFrame, true);
if (mDecodedLength) if (mDecodedLength)
processDecoded(output, options); processDecoded(output, options);
} }
@@ -664,7 +661,7 @@ AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& out
// Handle here regular RTP packets // Handle here regular RTP packets
// Check if payload length is ok // Check if payload length is ok
size_t payload_length = p->rtp()->GetPayloadLength(); size_t payload_length = rtp.GetPayloadLength();
size_t rtp_frame_length = mCodec->rtpLength(); size_t rtp_frame_length = mCodec->rtpLength();
int tail = rtp_frame_length ? payload_length % rtp_frame_length : 0; int tail = rtp_frame_length ? payload_length % rtp_frame_length : 0;
@@ -672,8 +669,8 @@ AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& out
if (!tail) if (!tail)
{ {
// Find number of frames // Find number of frames
mFrameCount = mCodec->rtpLength() ? p->rtp()->GetPayloadLength() / mCodec->rtpLength() : 1; mFrameCount = mCodec->rtpLength() ? rtp.GetPayloadLength() / mCodec->rtpLength() : 1;
int frameLength = mCodec->rtpLength() ? mCodec->rtpLength() : (int)p->rtp()->GetPayloadLength(); int frameLength = mCodec->rtpLength() ? mCodec->rtpLength() : (int)rtp.GetPayloadLength();
// Save last packet time length // Save last packet time length
mLastPacketTimeLength = mFrameCount * mCodec->frameTime(); mLastPacketTimeLength = mFrameCount * mCodec->frameTime();
@@ -686,8 +683,7 @@ AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& out
else else
{ {
// Decode frame by frame // Decode frame by frame
mDecodedLength = mCodec->decode(p->rtp()->GetPayloadData() + i * mCodec->rtpLength(), mDecodedLength = mCodec->decode(rtp.GetPayloadData() + i * mCodec->rtpLength(), frameLength, mDecodedFrame, sizeof mDecodedFrame);
frameLength, mDecodedFrame, sizeof mDecodedFrame);
if (mDecodedLength > 0) if (mDecodedLength > 0)
processDecoded(output, options); processDecoded(output, options);
} }
@@ -704,13 +700,21 @@ AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& out
} }
} }
} }
}
return result; return result;
} }
AudioReceiver::DecodeResult AudioReceiver::decodeEmptyTo(Audio::DataWindow& output, DecodeOptions options) AudioReceiver::DecodeResult AudioReceiver::decodeEmptyTo(Audio::DataWindow& output, DecodeOptions options)
{ {
// No packet available in jitter buffer - just increase the counter for now // No packet available at all (and no previous CNG packet) - so return the silence
if (options.mElapsed != 0ms && mCodec)
{
Audio::Format fmt = options.mResampleToMainRate ? Audio::Format(AUDIO_SAMPLERATE, 1) : mCodec->getAudioFormat();
// Emit silence if codec information is available - it is to properly handle the gaps
auto avail = output.getTimeLength(fmt.rate(), fmt.channels());
if (options.mElapsed > avail)
mAvailable.addZero(fmt.sizeFromTime(options.mElapsed - avail));
}
mFailedCount++; mFailedCount++;
return {.mStatus = DecodeResult::Status::Skip}; return {.mStatus = DecodeResult::Status::Skip};
} }
@@ -719,32 +723,71 @@ AudioReceiver::DecodeResult AudioReceiver::getAudioTo(Audio::DataWindow& output,
{ {
DecodeResult result = {.mStatus = DecodeResult::Status::Skip}; DecodeResult result = {.mStatus = DecodeResult::Status::Skip};
size_t initialOffset = output.filled(); // Size in bytes auto produced = 0ms;
if (mAvailable.filled() && mCodec && options.mElapsed != 0ms)
{
Audio::Format fmt = options.mResampleToMainRate ? Audio::Format(AUDIO_SAMPLERATE, 1) : mCodec->getAudioFormat();
auto initiallyAvailable = mCodec ? mAvailable.getTimeLength(fmt.rate(), fmt.channels()) : 0ms;
if (initiallyAvailable != 0ms)
{
std::chrono::milliseconds resultTime = std::min(initiallyAvailable, options.mElapsed);
auto resultLen = fmt.sizeFromTime(resultTime);
mAvailable.moveTo(output, resultLen);
produced += resultTime;
// Maybe request is satisfied ?
if (produced >= options.mElapsed)
return {.mStatus = DecodeResult::Status::Ok, .mSamplerate = fmt.rate(), .mChannels = fmt.channels()};
}
}
std::chrono::milliseconds decoded = 0ms; std::chrono::milliseconds decoded = 0ms;
do do
{ {
// Get next packet from buffer // Get next packet from buffer
RtpBuffer::ResultList rl; RtpBuffer::ResultList rl;
RtpBuffer::FetchResult fr = mBuffer.fetch(rl); RtpBuffer::FetchResult fr = mBuffer.fetch();
switch (fr) // ICELogDebug(<< fr.toString() << " " << mBuffer.findTimelength());
switch (fr.mStatus)
{ {
case RtpBuffer::FetchResult::Gap: result = decodeGapTo(output, options); break; case RtpBuffer::FetchResult::Status::Gap: result = decodeGapTo(mAvailable, options); break;
case RtpBuffer::FetchResult::NoPacket: result = decodeEmptyTo(output, options); break; case RtpBuffer::FetchResult::Status::NoPacket: result = decodeEmptyTo(mAvailable, options); break;
case RtpBuffer::FetchResult::RegularPacket: result = decodePacketTo(output, options, rl); break; case RtpBuffer::FetchResult::Status::RegularPacket: result = decodePacketTo(mAvailable, options, fr.mPacket); break;
default: default:
assert(0); assert(0);
} }
size_t available = output.filled() - initialOffset; // Was there decoding at all ?
if (!available) if (!mCodec)
break; break; // No sense to continue - we have no information at all
initialOffset = output.filled();
// ToDo: calculate how much milliseconds was decoded Audio::Format fmt = options.mResampleToMainRate ? Audio::Format(AUDIO_SAMPLERATE, 1) : mCodec->getAudioFormat();
int samplerate = options.mResampleToMainRate ? AUDIO_SAMPLERATE : result.mSamplerate; result.mSamplerate = fmt.rate();
decoded += std::chrono::milliseconds(available / sizeof(short) / (samplerate / 1000)); result.mChannels = fmt.channels();
// Have we anything interesting in the buffer ?
auto bufferAvailable = mAvailable.getTimeLength(fmt.rate(), fmt.channels());
if (bufferAvailable == 0ms)
break; // No sense to continue - decoding / CNG / PLC stopped totally
// How much data should be moved to result buffer ?
if (options.mElapsed != 0ms)
{
std::chrono::milliseconds resultTime = std::min(bufferAvailable, options.mElapsed - produced);
auto resultLen = fmt.sizeFromTime(resultTime);
mAvailable.moveTo(output, resultLen);
produced += resultTime;
} }
while (decoded < options.mElapsed); else
mAvailable.moveTo(output, mAvailable.filled());
decoded += bufferAvailable;
}
while (produced < options.mElapsed);
if (produced != 0ms)
result.mStatus = DecodeResult::Status::Ok;
// Time statistics // Time statistics
if (result.mStatus == DecodeResult::Status::Ok) if (result.mStatus == DecodeResult::Status::Ok)

View File

@@ -28,13 +28,6 @@ using jrtplib::RTPPacket;
class RtpBuffer class RtpBuffer
{ {
public: public:
enum class FetchResult
{
RegularPacket,
Gap,
NoPacket
};
// Owns rtp packet data // Owns rtp packet data
class Packet class Packet
{ {
@@ -59,6 +52,29 @@ public:
std::chrono::microseconds mTimestamp = 0us; std::chrono::microseconds mTimestamp = 0us;
}; };
struct FetchResult
{
enum class Status
{
RegularPacket,
Gap,
NoPacket
};
Status mStatus = Status::NoPacket;
std::shared_ptr<Packet> mPacket;
std::string toString() const
{
switch (mStatus)
{
case Status::RegularPacket: return "packet";
case Status::Gap: return "gap";
case Status::NoPacket: return "empty";
}
}
};
RtpBuffer(Statistics& stat); RtpBuffer(Statistics& stat);
~RtpBuffer(); ~RtpBuffer();
@@ -81,12 +97,12 @@ public:
int getCount() const; int getCount() const;
// Returns false if packet was not add - maybe too old or too new or duplicate // Returns false if packet was not add - maybe too old or too new or duplicate
std::shared_ptr<Packet> add(std::shared_ptr<RTPPacket> packet, std::chrono::milliseconds timelength, int rate); std::shared_ptr<Packet> add(const std::shared_ptr<RTPPacket>& packet, std::chrono::milliseconds timelength, int rate);
typedef std::vector<std::shared_ptr<Packet>> ResultList; typedef std::vector<std::shared_ptr<Packet>> ResultList;
typedef std::shared_ptr<ResultList> PResultList; typedef std::shared_ptr<ResultList> PResultList;
FetchResult fetch(ResultList& rl); FetchResult fetch();
protected: protected:
unsigned mSsrc = 0; unsigned mSsrc = 0;
@@ -133,15 +149,6 @@ public:
// Lifetime of pointer to codec is limited by lifetime of AudioReceiver (it is container). // Lifetime of pointer to codec is limited by lifetime of AudioReceiver (it is container).
bool add(const std::shared_ptr<jrtplib::RTPPacket>& p, Codec** codec = nullptr); bool add(const std::shared_ptr<jrtplib::RTPPacket>& p, Codec** codec = nullptr);
// Returns false when there is no rtp data from jitter
/*enum DecodeOptions
{
DecodeOptions_ResampleToMainRate = 0,
DecodeOptions_DontResample = 1,
DecodeOptions_FillCngGap = 2,
DecodeOptions_SkipDecode = 4
};*/
struct DecodeOptions struct DecodeOptions
{ {
bool mResampleToMainRate = true; // Resample all decoded audio to AUDIO_SAMPLERATE bool mResampleToMainRate = true; // Resample all decoded audio to AUDIO_SAMPLERATE
@@ -187,11 +194,14 @@ protected:
CodecList::Settings mCodecSettings; CodecList::Settings mCodecSettings;
CodecList mCodecList; CodecList mCodecList;
JitterStatistics mJitterStats; JitterStatistics mJitterStats;
std::shared_ptr<jrtplib::RTPPacket> mCngPacket; std::shared_ptr<RtpBuffer::Packet> mCngPacket;
CngDecoder mCngDecoder; CngDecoder mCngDecoder;
size_t mDTXSamplesToEmit = 0; // How much silence (or CNG) should be emited before next RTP packet gets into the action size_t mDTXSamplesToEmit = 0; // How much silence (or CNG) should be emited before next RTP packet gets into the action
// Buffer to hold decoded data // Already decoded data that can be retrieved without actual decoding - it may happen because of getAudioTo() may be limited by time interval
Audio::DataWindow mAvailable;
// Temporary buffer to hold decoded data (it is better than allocate data on stack)
int16_t mDecodedFrame[MT_MAX_DECODEBUFFER]; int16_t mDecodedFrame[MT_MAX_DECODEBUFFER];
size_t mDecodedLength = 0; size_t mDecodedLength = 0;
@@ -208,7 +218,10 @@ protected:
std::optional<uint32_t> mLastPacketTimestamp; std::optional<uint32_t> mLastPacketTimestamp;
int mFailedCount = 0; int mFailedCount = 0;
Audio::Resampler mResampler8, mResampler16, mResampler32, mResampler48; Audio::Resampler mResampler8,
mResampler16,
mResampler32,
mResampler48;
Audio::PWavFileWriter mDecodedDump; Audio::PWavFileWriter mDecodedDump;
@@ -229,7 +242,7 @@ protected:
void updateAmrCodecStats(Codec* c); void updateAmrCodecStats(Codec* c);
DecodeResult decodeGapTo(Audio::DataWindow& output, DecodeOptions options); DecodeResult decodeGapTo(Audio::DataWindow& output, DecodeOptions options);
DecodeResult decodePacketTo(Audio::DataWindow& output, DecodeOptions options, const RtpBuffer::ResultList& rl); DecodeResult decodePacketTo(Audio::DataWindow& output, DecodeOptions options, const std::shared_ptr<RtpBuffer::Packet>& p);
DecodeResult decodeEmptyTo(Audio::DataWindow& output, DecodeOptions options); DecodeResult decodeEmptyTo(Audio::DataWindow& output, DecodeOptions options);
}; };

View File

@@ -1,4 +1,4 @@
/* Copyright(C) 2007-2014 VoIP objects (voipobjects.com) /* Copyright(C) 2007-2026 VoIP objects (voipobjects.com)
* This Source Code Form is subject to the terms of the Mozilla Public * This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this * License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

View File

@@ -10,7 +10,7 @@
#include "../helper/HL_Types.h" #include "../helper/HL_Types.h"
#include <map> #include <map>
#include "../helper/HL_Pointer.h" #include "../helper/HL_Pointer.h"
#include "../audio/Audio_Interface.h"
namespace MT namespace MT
{ {
@@ -18,8 +18,7 @@ class Codec;
typedef std::shared_ptr<Codec> PCodec; typedef std::shared_ptr<Codec> PCodec;
class CodecMap: public std::map<int, PCodec> class CodecMap: public std::map<int, PCodec>
{ {};
};
class Codec class Codec
{ {
@@ -58,18 +57,28 @@ public:
// Number of audio channels // Number of audio channels
virtual int channels() { return 1; } virtual int channels() { return 1; }
// Returns size of encoded data (RTP) in bytes // Returns size of encoded data (RTP) in bytes
virtual int encode(const void* input, int inputBytes, void* output, int outputCapacity) = 0; struct EncodeResult
{
size_t mEncoded = 0; // Number of encoded bytes
};
virtual EncodeResult encode(std::span<const uint8_t> input, std::span<uint8_t> output) = 0;
// Returns size of decoded data (PCM signed short) in bytes // Returns size of decoded data (PCM signed short) in bytes
virtual int decode(const void* input, int inputBytes, void* output, int outputCapacity) = 0; struct DecodeResult
{
size_t mDecoded = 0; // Number of decoded bytes
bool mIsCng = false; // Should this packet to be used as CNG ? (used for AMR codecs)
};
virtual DecodeResult decode(std::span<const uint8_t> input, std::span<uint8_t> output) = 0;
// Returns size of produced data (PCM signed short) in bytes // Returns size of produced data (PCM signed short) in bytes
virtual int plc(int lostFrames, void* output, int outputCapacity) = 0; virtual size_t plc(int lostFrames, std::span<uint8_t> output) = 0;
// Returns size of codec in memory // Returns size of codec in memory
virtual int getSize() const { return 0; }; virtual size_t getSize() const { return 0; };
virtual Audio::Format getAudioFormat() { return Audio::Format(this->samplerate(), this->channels());};
}; };
} }
#endif #endif