- fix DTX decoding

This commit is contained in:
2026-05-30 18:11:03 +03:00
parent e7757fa08b
commit 27eefb34fe
3 changed files with 149 additions and 129 deletions
+129 -128
View File
@@ -28,8 +28,7 @@ using namespace MT;
// ----------------- RtpBuffer::Packet --------------
RtpBuffer::Packet::Packet(const std::shared_ptr<RTPPacket>& packet, std::chrono::milliseconds timelength, int samplerate)
:mRtp(packet), mTimelength(timelength), mSamplerate(samplerate)
{
}
{}
std::shared_ptr<RTPPacket> RtpBuffer::Packet::rtp() const
{
@@ -66,6 +65,7 @@ RtpBuffer::RtpBuffer(Statistics& stat)
RtpBuffer::~RtpBuffer()
{
if (mAddCounter)
ICELogDebug(<< "Number of add packets: " << mAddCounter << ", number of retrieved packets " << mReturnedCounter);
}
@@ -129,7 +129,7 @@ std::shared_ptr<RtpBuffer::Packet> RtpBuffer::add(const std::shared_ptr<jrtplib:
mStat.mSsrc = static_cast<uint16_t>(packet->GetSSRC());
// Update jitter
ICELogMedia(<< "Adding new packet into jitter buffer");
ICELogMedia(<< "Adding new packet seqno " << packet->GetSequenceNumber() << " into jitter buffer");
mAddCounter++;
// Look for maximum&minimal sequence number; check for dublicates
@@ -138,7 +138,7 @@ std::shared_ptr<RtpBuffer::Packet> RtpBuffer::add(const std::shared_ptr<jrtplib:
// New sequence number
unsigned newSeqno = packet->GetExtendedSequenceNumber();
for (std::shared_ptr<Packet>& p: mPacketList)
for (auto& p: mPacketList)
{
unsigned seqno = p->rtp()->GetExtendedSequenceNumber();
@@ -171,7 +171,7 @@ std::shared_ptr<RtpBuffer::Packet> RtpBuffer::add(const std::shared_ptr<jrtplib:
available = findTimelength();
if (available > mHigh)
ICELogMedia(<< "Available " << available << "ms with limit " << mHigh << "ms");
ICELogMedia(<< "Available " << available << " with limit " << mHigh);
return p;
}
@@ -346,16 +346,14 @@ int RtpBuffer::getNumberOfAddPackets() const
//-------------- Receiver ---------------
Receiver::Receiver(Statistics& stat)
:mStat(stat)
{
}
{}
Receiver::~Receiver()
{
}
{}
//-------------- AudioReceiver ----------------
AudioReceiver::AudioReceiver(const CodecList::Settings& settings, MT::Statistics &stat)
:Receiver(stat), mBuffer(stat), mDtmfBuffer(stat), mCodecSettings(settings), mCodecList(settings), mDtmfReceiver(stat)
:Receiver(stat), mRtpBuffer(stat), mDtmfBuffer(stat), mCodecSettings(settings), mCodecList(settings), mDtmfReceiver(stat)
{
// Init resamplers
mResampler8.start(AUDIO_CHANNELS, 8000, AUDIO_SAMPLERATE);
@@ -367,12 +365,17 @@ AudioReceiver::AudioReceiver(const CodecList::Settings& settings, MT::Statistics
mCodecList.setSettings(settings);
mCodecList.fillCodecMap(mCodecMap);
mAvailable.setCapacity(AUDIO_SAMPLERATE * sizeof(short));
// 10 seconds is the maximum length of decoded audio in single step
// It is important - DTX may produce silence up to few seconds easily
mAvailable.setCapacity(AUDIO_SAMPLERATE * 10 * sizeof(short));
mDtmfBuffer.setPrebuffer(0ms);
mDtmfBuffer.setLow(0ms);
mDtmfBuffer.setHigh(1ms);
// Avoid collecting too much data
mRtpBuffer.setHigh(240ms);
#if defined(DUMP_DECODED)
mDecodedDump = std::make_shared<Audio::WavFileWriter>();
mDecodedDump->open("decoded.wav", 8000 /*G711*/, AUDIO_CHANNELS);
@@ -386,6 +389,11 @@ AudioReceiver::~AudioReceiver()
mResampler32.stop();
mResampler48.stop();
mDecodedDump.reset();
if (mRequestedAudio != 0ms)
ICELogDebug(<< "Requested " << mRequestedAudio << ", produced " << mProducedAudio);
if (mDecodeCount)
ICELogDebug(<< "Average interval between packet decoding " << mIntervalBetweenDecode / mDecodeCount);
}
// Update codec settings
@@ -450,7 +458,7 @@ Codec* AudioReceiver::add(const std::shared_ptr<jrtplib::RTPPacket>& p)
payloadLength = p->GetPayloadLength(),
ptype = p->GetPayloadType();
ICELogMedia(<< "Adding packet No " << p->GetSequenceNumber());
// ICELogMedia(<< "Adding packet No " << p->GetSequenceNumber());
// Increase codec counter
mStat.mCodecCount[ptype]++;
@@ -508,12 +516,12 @@ Codec* AudioReceiver::add(const std::shared_ptr<jrtplib::RTPPacket>& p)
{
// It will cause statistics to report about bad RTP packet
// I have to replay last packet payload here to avoid report about lost packet
mBuffer.add(p, std::chrono::milliseconds(time_length), samplerate);
mRtpBuffer.add(p, std::chrono::milliseconds(time_length), samplerate);
return nullptr;
}
// Queue packet to buffer
mBuffer.add(p, std::chrono::milliseconds(time_length), samplerate).get();
mRtpBuffer.add(p, std::chrono::milliseconds(time_length), samplerate).get();
}
return codec;
}
@@ -533,8 +541,12 @@ void AudioReceiver::processDecoded(Audio::DataWindow& output, DecodeOptions opti
void AudioReceiver::produceSilence(std::chrono::milliseconds length, Audio::DataWindow& output, DecodeOptions options)
{
if (!mCodec)
return;
// Fill mDecodeBuffer as much as needed and call processDecoded()
// Depending on used codec mono or stereo silence should be produced
size_t chunks = length.count() / 10;
size_t tail = length.count() % 10;
size_t chunk_size = 10 * sizeof(int16_t) * mCodec->samplerate() / 1000 * mCodec->channels();
@@ -635,7 +647,8 @@ AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& out
auto& rtp = *packet->rtp(); // Syntax sugar
mFailedCount = 0;
// Check if we need to emit silence or CNG - previously CNG packet was detected. Emit CNG audio here if needed.
// Check if we need to emit silence - it may happen in the case if next packet has RTP timestamp much beyond the previous one; maybe DTX was active.
if (mLastPacketTimestamp && mLastPacketTimeLength && mCodec)
{
int units = rtp.GetTimestamp() - *mLastPacketTimestamp;
@@ -643,7 +656,8 @@ AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& out
if (milliseconds > mLastPacketTimeLength)
{
auto silenceLength = std::chrono::milliseconds(milliseconds - mLastPacketTimeLength);
ICELogDebug(<< "Emit " << silenceLength << " silence while requested " << options.mElapsed);
silenceLength = std::min(silenceLength, options.mElapsed);
if (mCngPacket && options.mFillGapByCNG)
produceCNG(silenceLength, output, options);
else
@@ -677,6 +691,7 @@ AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& out
mDecodedLength = 0;
else
{
ICELogDebug(<< "Decoding CNG");
mCngPacket = packet;
mCngDecoder.decode3389(rtp.GetPayloadData(), rtp.GetPayloadLength());
@@ -775,7 +790,7 @@ AudioReceiver::DecodeResult AudioReceiver::decodeEmptyTo(Audio::DataWindow& outp
else
{
// Emit silence if codec information is available - it is to properly handle the gaps
auto avail = output.getTimeLength(fmt.rate(), fmt.channels());
auto avail = output.getTimeLength(fmt);
if (options.mElapsed > avail)
output.addZero(fmt.sizeFromTime(options.mElapsed - avail));
}
@@ -785,86 +800,18 @@ AudioReceiver::DecodeResult AudioReceiver::decodeEmptyTo(Audio::DataWindow& outp
return {.mStatus = DecodeResult::Status::Skip};
}
AudioReceiver::DecodeResult AudioReceiver::getAudioTo(Audio::DataWindow& output, DecodeOptions options)
void MT::AudioReceiver::processDtmf()
{
DecodeResult result = {.mStatus = DecodeResult::Status::Skip};
// Process RFC2833 here; it doesn't result in any audio - only callbacks and statistics
if (mDtmfBuffer.getCount())
{
auto fr = mDtmfBuffer.fetch();
if (fr.mPacket && fr.mStatus == RtpBuffer::FetchResult::Status::RegularPacket)
mDtmfReceiver.add(fr.mPacket->rtp());
auto produced = 0ms;
if (mAvailable.filled() && mCodec && options.mElapsed != 0ms)
{
Audio::Format fmt = options.mResampleToMainRate ? Audio::Format(AUDIO_SAMPLERATE, 1) : mCodec->getAudioFormat();
auto initiallyAvailable = mCodec ? mAvailable.getTimeLength(fmt.rate(), fmt.channels()) : 0ms;
if (initiallyAvailable != 0ms)
{
std::chrono::milliseconds resultTime = std::min(initiallyAvailable, options.mElapsed);
auto resultLen = fmt.sizeFromTime(resultTime);
mAvailable.moveTo(output, resultLen);
produced += resultTime;
// Maybe request is satisfied ?
if (produced >= options.mElapsed)
return {.mStatus = DecodeResult::Status::Ok, .mSamplerate = fmt.rate(), .mChannels = fmt.channels()};
}
}
}
std::chrono::milliseconds decoded = 0ms;
do
{
// Get next packet from buffer
RtpBuffer::ResultList rl;
RtpBuffer::FetchResult fr = mBuffer.fetch();
// ICELogDebug(<< fr.toString() << " " << mBuffer.findTimelength());
switch (fr.mStatus)
{
case RtpBuffer::FetchResult::Status::Gap: result = decodeGapTo(mAvailable, options); break;
case RtpBuffer::FetchResult::Status::NoPacket: result = decodeEmptyTo(mAvailable, options); break;
case RtpBuffer::FetchResult::Status::RegularPacket: result = decodePacketTo(mAvailable, options, fr.mPacket); break;
default:
assert(0);
}
// Was there decoding at all ?
if (!mCodec)
break; // No sense to continue - we have no information at all
Audio::Format fmt = options.mResampleToMainRate ? Audio::Format(AUDIO_SAMPLERATE, 1) : mCodec->getAudioFormat();
result.mSamplerate = fmt.rate();
result.mChannels = fmt.channels();
// Have we anything interesting in the buffer ?
auto bufferAvailable = mAvailable.getTimeLength(fmt.rate(), fmt.channels());
if (bufferAvailable == 0ms)
break; // No sense to continue - decoding / CNG / PLC stopped totally
// How much data should be moved to result buffer ?
if (options.mElapsed != 0ms)
{
std::chrono::milliseconds resultTime = std::min(bufferAvailable, options.mElapsed - produced);
auto resultLen = fmt.sizeFromTime(resultTime);
mAvailable.moveTo(output, resultLen);
produced += resultTime;
}
else
mAvailable.moveTo(output, mAvailable.filled());
decoded += bufferAvailable;
}
while (produced < options.mElapsed);
if (produced != 0ms)
result.mStatus = DecodeResult::Status::Ok;
// Time statistics
if (result.mStatus == DecodeResult::Status::Ok)
{
// Decode statistics
void MT::AudioReceiver::updateDecodingTimeStatistics()
{
if (!mDecodeTimestamp)
mDecodeTimestamp = std::chrono::steady_clock::now();
else
@@ -873,7 +820,88 @@ AudioReceiver::DecodeResult AudioReceiver::getAudioTo(Audio::DataWindow& output,
mStat.mDecodingInterval.process(std::chrono::duration_cast<std::chrono::milliseconds>(t - *mDecodeTimestamp).count());
mDecodeTimestamp = t;
}
}
AudioReceiver::DecodeResult AudioReceiver::getAudioTo(Audio::DataWindow& output, DecodeOptions options)
{
// ICELogDebug(<< "getAudioTo() for " << options.mElapsed);
assert (options.mElapsed != 0ms);
// Increase counter of requested audio
mRequestedAudio += options.mElapsed;
DecodeResult result = {.mStatus = DecodeResult::Status::Skip};
// Process RFC2833 here; it doesn't result in any audio - only callbacks and statistics
processDtmf();
// How much time length audio we produced here
auto produced = 0ms;
Audio::Format fmt;
// Have we anything from the previous decode attempts ?
if (mAvailable.filled())
{
// Find what audio format is used in mAvailable data
fmt = options.mResampleToMainRate ? Audio::Format(AUDIO_SAMPLERATE, 1) : mCodec->getAudioFormat();
// How much milliseconds are available ?
auto availTime = mAvailable.getTimeLength(fmt);
if (availTime != 0ms)
{
// How much we can consume from the mAvailable buffer ?
std::chrono::milliseconds resultTime = std::min(availTime, options.mElapsed);
// Number of bytes
mAvailable.moveTo(output, fmt.sizeFromTime(resultTime));
// Increase the counter of produced milliseconds
produced += resultTime;
}
}
while (produced < options.mElapsed)
{
// Get next packet from buffer
RtpBuffer::FetchResult fr = mRtpBuffer.fetch();
// Decode to mAvailable buffer
switch (fr.mStatus)
{
case RtpBuffer::FetchResult::Status::Gap: result = decodeGapTo(mAvailable, options.decreaseElapsedBy(produced)); break;
case RtpBuffer::FetchResult::Status::NoPacket: result = decodeEmptyTo(mAvailable, options.decreaseElapsedBy(produced)); break;
case RtpBuffer::FetchResult::Status::RegularPacket: result = decodePacketTo(mAvailable, options.decreaseElapsedBy(produced), fr.mPacket); updateDecodeIntervalStatistics(); break;
default:
assert(0);
}
// Was there decoding at all ?
if (!mCodec)
break; // No sense to continue - we have no information at all
fmt = options.mResampleToMainRate ? Audio::Format(AUDIO_SAMPLERATE, 1) : mCodec->getAudioFormat();
result.mSamplerate = fmt.rate();
result.mChannels = fmt.channels();
// How much milliseconds we have in audio buffer ?
auto bufferAvailable = mAvailable.getTimeLength(fmt);
if (bufferAvailable == 0ms)
break; // No sense to continue - decoding / CNG / PLC stopped totally
// How much data should be moved to result buffer ?
std::chrono::milliseconds resultTime = std::min(bufferAvailable, options.mElapsed - produced);
mAvailable.moveTo(output, fmt.sizeFromTime(resultTime));
produced += resultTime;
}
if (produced != 0ms)
{
result.mStatus = DecodeResult::Status::Ok;
updateDecodingTimeStatistics();
}
mProducedAudio += produced;
// ICELogDebug(<< "Requested " << options.mElapsed << ", produced " << produced << ", remains " << mAvailable.getTimeLength(fmt) << ", packets " << getRtpBuffer().getCount());
return result;
}
@@ -987,43 +1015,16 @@ AudioReceiver::MediaInfo AudioReceiver::infoFor(jrtplib::RTPPacket& p)
return {packetTime, codec->samplerate()};
}
// int AudioReceiver::timelengthFor(jrtplib::RTPPacket& p)
// {
// CodecMap::iterator codecIter = mCodecMap.find(p.GetPayloadType());
// if (codecIter == mCodecMap.end())
// return 0;
// PCodec codec = codecIter->second;
// if (codec)
// {
// int frame_count = 0;
// if (codec->rtpLength() != 0)
// {
// frame_count = static_cast<int>(p.GetPayloadLength() / codec->rtpLength());
// if (p.GetPayloadType() == 9/*G729A silence*/ && p.GetPayloadLength() % codec->rtpLength())
// frame_count++;
// }
// else
// frame_count = 1;
// return frame_count * codec->frameTime();
// }
// else
// return 0;
// }
// int AudioReceiver::samplerateFor(jrtplib::RTPPacket& p)
// {
// CodecMap::iterator codecIter = mCodecMap.find(p.GetPayloadType());
// if (codecIter != mCodecMap.end())
// {
// PCodec codec = codecIter->second;
// if (codec)
// return codec->samplerate();
// }
// return 8000;
// }
void AudioReceiver::updateDecodeIntervalStatistics()
{
auto now = std::chrono::steady_clock::now();
if (mLastDecodeTimestamp)
{
mIntervalBetweenDecode += std::chrono::duration_cast<std::chrono::microseconds>(now - *mLastDecodeTimestamp);
mDecodeCount ++;
}
mLastDecodeTimestamp = now;
}
// ----------------------- DtmfReceiver -------------------
DtmfReceiver::DtmfReceiver(Statistics& stat)
+26 -6
View File
@@ -122,6 +122,7 @@ protected:
std::optional<uint32_t> mLastSeqno;
std::optional<jrtplib::RTPTime> mLastReceiveTime;
// To calculate average interval between packet add. It is close to jitter but more useful in debugging.
float mLastAddTime = 0.0f;
};
@@ -169,10 +170,22 @@ public:
struct DecodeOptions
{
bool mRealtimeProcessing = false; // Target PCAP parsing by default
bool mResampleToMainRate = true; // Resample all decoded audio to AUDIO_SAMPLERATE
bool mFillGapByCNG = false; // Use CNG information if available
bool mSkipDecode = false; // Don't do decode, just dry run - fetch packets, remove them from the jitter buffer
std::chrono::milliseconds mElapsed = 0ms; // How much milliseconds should be decoded; zero value means "decode just next packet from the buffer"
DecodeOptions decreaseElapsedBy(std::chrono::milliseconds delta)
{
return
{
.mRealtimeProcessing = mRealtimeProcessing,
.mResampleToMainRate = mResampleToMainRate,
.mFillGapByCNG = mFillGapByCNG,
.mSkipDecode = mSkipDecode,
.mElapsed = std::max(mElapsed - delta, 0ms)
};
}
};
struct DecodeResult
@@ -193,7 +206,7 @@ public:
// Looks for codec by payload type
Codec* findCodec(int payloadType);
RtpBuffer& getRtpBuffer() { return mBuffer; }
RtpBuffer& getRtpBuffer() { return mRtpBuffer; }
// Returns size of AudioReceiver's instance in bytes (including size of all data + codecs + etc.)
int getSize() const;
@@ -205,14 +218,12 @@ public:
};
MediaInfo infoFor(jrtplib::RTPPacket& p);
// // Returns timelength for given packet
// int timelengthFor(jrtplib::RTPPacket& p);
void processDtmf();
// // Return samplerate for given packet
// int samplerateFor(jrtplib::RTPPacket& p);
void updateDecodingTimeStatistics();
protected:
RtpBuffer mBuffer; // Jitter buffer itself
RtpBuffer mRtpBuffer; // RTP jitter buffer itself; here are audio packets
RtpBuffer mDtmfBuffer; // These two (mDtmfBuffer / mDtmfReceiver) are for our analyzer stack only; in normal softphone logic DTMF packets goes via SingleAudioStream::mDtmfReceiver
DtmfReceiver mDtmfReceiver;
@@ -258,6 +269,9 @@ protected:
float mIntervalSum = 0.0f;
int mIntervalCount = 0;
std::chrono::milliseconds mRequestedAudio = 0ms;
std::chrono::milliseconds mProducedAudio = 0ms;
// Zero rate will make audio mono but resampling will be skipped
void makeMonoAndResample(int rate, int channels);
@@ -272,6 +286,12 @@ protected:
DecodeResult decodeGapTo(Audio::DataWindow& output, DecodeOptions options);
DecodeResult decodePacketTo(Audio::DataWindow& output, DecodeOptions options, const std::shared_ptr<RtpBuffer::Packet>& p);
DecodeResult decodeEmptyTo(Audio::DataWindow& output, DecodeOptions options);
std::optional<std::chrono::steady_clock::time_point> mLastDecodeTimestamp;
std::chrono::microseconds mIntervalBetweenDecode = 0us;
size_t mDecodeCount = 0;
void updateDecodeIntervalStatistics();
};
}
-1
View File
@@ -137,7 +137,6 @@ std::string CodecList::Settings::toString() const
oss << "OPUS ptype: " << spec.mPayloadType << ", rate: " << spec.mRate << ", channels: " << spec.mChannels << std::endl;
}
return oss.str();
}