- performance optimizations

This commit is contained in:
2026-06-17 14:18:31 +03:00
parent 40f3b34b16
commit 2cb2a93d59
7 changed files with 177 additions and 32 deletions
+30 -3
View File
@@ -265,8 +265,20 @@ PCodec AmrNbCodec::CodecFactory::create()
AmrNbCodec::AmrNbCodec(const AmrCodecConfig& config)
:mConfig(config)
{
mEncoderCtx = Encoder_Interface_init(1);
mDecoderCtx = Decoder_Interface_init();
// Contexts are created lazily (see ensureEncoder/ensureDecoder) - a codec
// resolved only for network-MOS metadata never allocates them.
}
void AmrNbCodec::ensureEncoder()
{
if (!mEncoderCtx)
mEncoderCtx = Encoder_Interface_init(1);
}
void AmrNbCodec::ensureDecoder()
{
if (!mDecoderCtx)
mDecoderCtx = Decoder_Interface_init();
}
AmrNbCodec::~AmrNbCodec()
@@ -298,6 +310,8 @@ Codec::Info AmrNbCodec::info()
Codec::EncodeResult AmrNbCodec::encode(std::span<const uint8_t> input, std::span<uint8_t> output)
{
ensureEncoder();
if (input.size_bytes() % pcmLength())
return {.mEncoded = 0};
@@ -324,6 +338,8 @@ Codec::EncodeResult AmrNbCodec::encode(std::span<const uint8_t> input, std::span
#define AMR_BITRATE_DTX 15
Codec::DecodeResult AmrNbCodec::decode(std::span<const uint8_t> input, std::span<uint8_t> output)
{
ensureDecoder();
if (mConfig.mOctetAligned)
return {.mDecoded = 0};
@@ -427,6 +443,8 @@ Codec::DecodeResult AmrNbCodec::decode(std::span<const uint8_t> input, std::span
size_t AmrNbCodec::plc(int lostFrames, std::span<uint8_t> output)
{
ensureDecoder();
if (output.size_bytes() < lostFrames * pcmLength())
return 0;
@@ -496,7 +514,14 @@ AmrWbStatistics MT::GAmrWbStatistics;
AmrWbCodec::AmrWbCodec(const AmrCodecConfig& config)
:mConfig(config)
{
mDecoderCtx = D_IF_init();
// Decoder context is created lazily (see ensureDecoder) - a codec resolved
// only for network-MOS metadata never allocates the AMR-WB decoder state.
}
void AmrWbCodec::ensureDecoder()
{
if (!mDecoderCtx)
mDecoderCtx = D_IF_init();
}
AmrWbCodec::~AmrWbCodec()
@@ -630,6 +655,8 @@ Codec::DecodeResult AmrWbCodec::decodePlain(std::span<const uint8_t> input, std:
Codec::DecodeResult AmrWbCodec::decode(std::span<const uint8_t> input, std::span<uint8_t> output)
{
ensureDecoder();
if (mConfig.mIuUP)
return decodeIuup(input, output);
else
+11
View File
@@ -33,6 +33,13 @@ protected:
int mPreviousPacketLength = 0;
size_t mCngCounter = 0;
size_t mSwitchCounter = 0;
// opencore-amr encoder/decoder state is allocated lazily on first encode/decode.
// Network-MOS-only streams resolve codec metadata (name/samplerate/frame timing)
// but never decode, so they must not pay for a context they never use - at scale
// this is ~a decoder state (several KB) saved per network-only stream.
void ensureEncoder();
void ensureDecoder();
public:
class CodecFactory: public Factory
{
@@ -85,6 +92,10 @@ protected:
int mPreviousPacketLength;
// Decoder state is allocated lazily on first decode/plc (see AmrNbCodec) so
// network-MOS-only streams never instantiate the AMR-WB decoder.
void ensureDecoder();
DecodeResult decodeIuup(std::span<const uint8_t> input, std::span<uint8_t> output);
DecodeResult decodePlain(std::span<const uint8_t> input, std::span<uint8_t> output);
+53 -21
View File
@@ -184,16 +184,18 @@ std::shared_ptr<RtpBuffer::Packet> RtpBuffer::add(const std::shared_ptr<jrtplib:
return std::shared_ptr<Packet>();
}
RtpBuffer::FetchResult RtpBuffer::fetch()
void RtpBuffer::trimToHighWater(size_t maxPackets)
{
Lock l(mGuard);
FetchResult result;
// See if there is enough information in buffer
auto total = findTimelength();
while (total > mHigh && mPacketList.size() > 1 && 0ms != mHigh)
// Drop the oldest packet while either bound is exceeded: the time-based
// high-water mark (mHigh, when set) or, if maxPackets != 0, the packet-count
// cap. Always keep at least one packet so loss/gap accounting has a reference.
while (mPacketList.size() > 1 &&
((0ms != mHigh && total > mHigh) ||
(maxPackets != 0 && mPacketList.size() > maxPackets)))
{
ICELogMedia( << "Dropping RTP packets from jitter buffer");
total -= mPacketList.front()->timelength();
@@ -233,6 +235,19 @@ RtpBuffer::FetchResult RtpBuffer::fetch()
// Increase number in statistics
mStat.mPacketDropped++;
}
}
RtpBuffer::FetchResult RtpBuffer::fetch()
{
Lock l(mGuard);
FetchResult result;
// Bound the buffer to the high-water mark before fetching.
trimToHighWater();
// See how much audio is buffered now.
auto total = findTimelength();
if (total < mLow || total == 0ms)
{
@@ -494,13 +509,13 @@ void AudioReceiver::processDecoded(Audio::DataWindow& output, DecodeOptions opti
{
// Write to audio dump if requested
if (mDecodedDump && mDecodedLength)
mDecodedDump->write(mDecodedFrame, mDecodedLength);
mDecodedDump->write(mDecodedFrame.data(), mDecodedLength);
// Resample to target rate
makeMonoAndResample(options.mResampleToMainRate ? mCodec->samplerate() : 0, mCodec->channels());
// Send to output
output.add(mResampledFrame, mResampledLength);
output.add(mResampledFrame.data(), mResampledLength);
}
void AudioReceiver::produceSilence(std::chrono::milliseconds length, Audio::DataWindow& output, DecodeOptions options)
@@ -517,13 +532,13 @@ void AudioReceiver::produceSilence(std::chrono::milliseconds length, Audio::Data
size_t tail_size = tail * sizeof(int16_t) * mCodec->samplerate() / 1000 * mCodec->channels();
for (size_t i = 0; i < chunks; i++)
{
memset(mDecodedFrame, 0, chunk_size);
memset(mDecodedFrame.data(), 0, chunk_size);
mDecodedLength = chunk_size;
processDecoded(output, options);
}
if (tail)
{
memset(mDecodedFrame, 0, tail_size);
memset(mDecodedFrame.data(), 0, tail_size);
mDecodedLength = tail_size;
processDecoded(output, options);
}
@@ -537,7 +552,7 @@ void AudioReceiver::produceCNG(std::chrono::milliseconds length, Audio::DataWind
if (options.mSkipDecode)
mDecodedLength = 0;
else
mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), 100, mDecodedFrame, false);
mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), 100, mDecodedFrame.data(), false);
if (mDecodedLength)
processDecoded(output, options);
@@ -550,7 +565,7 @@ void AudioReceiver::produceCNG(std::chrono::milliseconds length, Audio::DataWind
if (options.mSkipDecode)
mDecodedLength = 0;
else
mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), tail, reinterpret_cast<short*>(mDecodedFrame), false);
mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), tail, reinterpret_cast<short*>(mDecodedFrame.data()), false);
if (mDecodedLength)
processDecoded(output, options);
@@ -568,7 +583,7 @@ AudioReceiver::DecodeResult AudioReceiver::decodeGapTo(Audio::DataWindow& output
{
// Synthesize comfort noise. It will be done on AUDIO_SAMPLERATE rate directly to mResampledFrame buffer.
// Do not forget to send this noise to analysis
mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), mLastPacketTimeLength, reinterpret_cast<short*>(mDecodedFrame), false);
mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), mLastPacketTimeLength, reinterpret_cast<short*>(mDecodedFrame.data()), false);
}
else
decodePacketTo(output, options, mCngPacket);
@@ -581,14 +596,14 @@ AudioReceiver::DecodeResult AudioReceiver::decodeGapTo(Audio::DataWindow& output
mDecodedLength = 0;
else
{
mDecodedLength = mCodec->plc(mFrameCount, {(uint8_t*)mDecodedFrame, sizeof mDecodedFrame});
mDecodedLength = mCodec->plc(mFrameCount, {(uint8_t*)mDecodedFrame.data(), mDecodedFrame.size() * sizeof(int16_t)});
if (!mDecodedLength)
{
// PLC is not support or failed
// So substitute the silence
size_t nr_of_samples = mCodec->frameTime() * mCodec->samplerate() / 1000 * sizeof(short);
mDecodedLength = nr_of_samples * sizeof(short);
memset(mDecodedFrame, 0, mDecodedLength);
memset(mDecodedFrame.data(), 0, mDecodedLength);
}
}
}
@@ -660,7 +675,7 @@ AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& out
mCngDecoder.decode3389(rtp.GetPayloadData(), rtp.GetPayloadLength());
// Emit CNG mLastPacketLength milliseconds
mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), mLastPacketTimeLength, (short*)mDecodedFrame, true);
mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), mLastPacketTimeLength, (short*)mDecodedFrame.data(), true);
if (mDecodedLength)
processDecoded(output, options);
}
@@ -696,7 +711,7 @@ AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& out
{
// Decode frame by frame
auto codecInput = std::span{rtp.GetPayloadData() + i * mCodec->rtpLength(), (size_t)frameLength};
auto codecOutput = std::span{(uint8_t*)mDecodedFrame, sizeof mDecodedFrame};
auto codecOutput = std::span{(uint8_t*)mDecodedFrame.data(), mDecodedFrame.size() * sizeof(int16_t)};
auto r = mCodec->decode(codecInput, codecOutput);
mDecodedLength = r.mDecoded;
if (mDecodedLength > 0)
@@ -798,6 +813,10 @@ AudioReceiver::DecodeResult AudioReceiver::getAudioTo(Audio::DataWindow& output,
// ICELogDebug(<< "getAudioTo() for " << options.mElapsed);
assert (options.mElapsed != 0ms);
// First decode on this receiver: allocate the scratch buffers. Network-MOS-only
// streams never reach this point, so they never pay for them.
ensureDecodeBuffers();
// Increase counter of requested audio
mRequestedAudio += options.mElapsed;
@@ -876,6 +895,19 @@ AudioReceiver::DecodeResult AudioReceiver::getAudioTo(Audio::DataWindow& output,
return result;
}
void AudioReceiver::ensureDecodeBuffers()
{
// Allocate the decode/convert/resample scratch buffers to full capacity on the
// first decode. mDecodedFrame being empty means none are allocated yet; they
// are always allocated together, so checking one is enough.
if (mDecodedFrame.empty())
{
mDecodedFrame.resize(MT_MAX_DECODEBUFFER);
mConvertedFrame.resize(MT_MAX_DECODEBUFFER * 2);
mResampledFrame.resize(MT_MAX_DECODEBUFFER);
}
}
void AudioReceiver::makeMonoAndResample(int rate, int channels)
{
// Make mono from stereo - engine works with mono only for now
@@ -883,12 +915,12 @@ void AudioReceiver::makeMonoAndResample(int rate, int channels)
if (channels != AUDIO_CHANNELS)
{
if (channels == 1)
mConvertedLength = Audio::ChannelConverter::monoToStereo(mDecodedFrame, mDecodedLength, mConvertedFrame, mDecodedLength * 2);
mConvertedLength = Audio::ChannelConverter::monoToStereo(mDecodedFrame.data(), mDecodedLength, mConvertedFrame.data(), mDecodedLength * 2);
else
mDecodedLength = Audio::ChannelConverter::stereoToMono(mDecodedFrame, mDecodedLength, mDecodedFrame, mDecodedLength / 2);
mDecodedLength = Audio::ChannelConverter::stereoToMono(mDecodedFrame.data(), mDecodedLength, mDecodedFrame.data(), mDecodedLength / 2);
}
void* frames = mConvertedLength ? mConvertedFrame : mDecodedFrame;
void* frames = mConvertedLength ? (void*)mConvertedFrame.data() : (void*)mDecodedFrame.data();
unsigned length = mConvertedLength ? mConvertedLength : mDecodedLength;
Audio::Resampler* r = nullptr;
@@ -899,13 +931,13 @@ void AudioReceiver::makeMonoAndResample(int rate, int channels)
case 32000: r = &mResampler32; break;
case 48000: r = &mResampler48; break;
default:
memcpy(mResampledFrame, frames, length);
memcpy(mResampledFrame.data(), frames, length);
mResampledLength = length;
return;
}
size_t processedInput = 0;
mResampledLength = r->processBuffer(frames, length, processedInput, mResampledFrame, r->getDestLength(length));
mResampledLength = r->processBuffer(frames, length, processedInput, mResampledFrame.data(), r->getDestLength(length));
// processedInput result value is ignored - it is always equal to length as internal sample rate is 8/16/32/48K
}
+31 -5
View File
@@ -20,6 +20,8 @@
#include <optional>
#include <chrono>
#include <vector>
#include <cstdint>
using namespace std::chrono_literals;
namespace MT
@@ -103,7 +105,19 @@ public:
typedef std::shared_ptr<ResultList> PResultList;
FetchResult fetch();
// Drop oldest packets so buffered audio stays within the high-water mark,
// recording packet-loss events for any sequence gaps crossed (the same
// accounting fetch() performs). Used to bound memory on streams that never
// call fetch() - i.e. network-MOS-only streams with audio decode disabled,
// which would otherwise retain every packet for the whole call.
//
// maxPackets, when non-zero, additionally caps the buffer to that many packets
// regardless of buffered time. The decode path (fetch()) leaves it 0 so jitter
// tolerance stays governed by the time-based high-water mark; the network-only
// path passes a small cap since those packets are never decoded.
void trimToHighWater(size_t maxPackets = 0);
protected:
unsigned mSsrc = 0;
std::chrono::milliseconds mHigh = std::chrono::milliseconds(RTP_BUFFER_HIGH),
@@ -240,16 +254,22 @@ protected:
// Already decoded data that can be retrieved without actual decoding - it may happen because of getAudioTo() may be limited by time interval
Audio::DataWindow mAvailable;
// Temporary buffer to hold decoded data (it is better than allocate data on stack)
int16_t mDecodedFrame[MT_MAX_DECODEBUFFER];
// Decode/convert/resample scratch buffers. These were inline arrays
// (MT_MAX_DECODEBUFFER * {1,2,1} * int16_t = 256 KB total) carried by every
// AudioReceiver, hence by every StreamDecoder - including network-MOS-only
// streams that never decode. They are now allocated lazily on the first
// getAudioTo() call via ensureDecodeBuffers(); non-decoding streams keep them
// empty. Once allocated they are sized to full capacity and reused, so decode
// behaviour is unchanged.
std::vector<int16_t> mDecodedFrame; // sized to MT_MAX_DECODEBUFFER
size_t mDecodedLength = 0;
// Buffer to hold data converted to stereo/mono; there is multiplier 2 as it can be stereo audio
int16_t mConvertedFrame[MT_MAX_DECODEBUFFER * 2];
std::vector<int16_t> mConvertedFrame; // sized to MT_MAX_DECODEBUFFER * 2
size_t mConvertedLength = 0;
// Buffer to hold data resampled to AUDIO_SAMPLERATE
int16_t mResampledFrame[MT_MAX_DECODEBUFFER];
std::vector<int16_t> mResampledFrame; // sized to MT_MAX_DECODEBUFFER
size_t mResampledLength = 0;
// Last packet time length
@@ -272,6 +292,12 @@ protected:
std::chrono::milliseconds mRequestedAudio = 0ms;
std::chrono::milliseconds mProducedAudio = 0ms;
// Lazily allocate the decode/convert/resample scratch buffers (mDecodedFrame,
// mConvertedFrame, mResampledFrame) to full capacity on the first decode. A
// no-op once allocated. Called at the top of getAudioTo(); network-MOS-only
// streams never reach it, so they never pay the 256 KB.
void ensureDecodeBuffers();
// Zero rate will make audio mono but resampling will be skipped
void makeMonoAndResample(int rate, int channels);
+25 -2
View File
@@ -152,6 +152,27 @@ EVSCodec::EVSCodec(const StreamParameters &sp)
{
EVSCodec::sp = sp;
// Metadata only - the heavy decoder state is created lazily (ensureDecoder()).
mOutputFs = outputFsFromBw(sp.bw);
}
int EVSCodec::outputFsFromBw(int bw)
{
switch (bw)
{
case NB: return 8000;
case WB: return 16000;
case SWB: return 32000;
case FB: return 48000;
}
return 0;
}
void EVSCodec::ensureDecoder()
{
if (st_dec)
return;
if ((st_dec = reinterpret_cast<evs::Decoder_State*>(malloc(sizeof(evs::Decoder_State)))) == nullptr)
throw std::bad_alloc();
@@ -170,9 +191,9 @@ EVSCodec::~EVSCodec()
Codec::Info EVSCodec::info() {
return {
.mName = MT_EVS_CODECNAME,
.mSamplerate = st_dec->output_Fs,
.mSamplerate = mOutputFs,
.mChannels = 1,
.mPcmLength = st_dec->output_Fs / 1000 * sp.ptime * 2,
.mPcmLength = mOutputFs / 1000 * sp.ptime * 2,
.mFrameTime = sp.ptime,
.mRtpLength = 0
};
@@ -187,6 +208,8 @@ Codec::EncodeResult EVSCodec::encode(std::span<const uint8_t> input, std::span<u
Codec::DecodeResult EVSCodec::decode(std::span<const uint8_t> input, std::span<uint8_t> output)
{
ensureDecoder();
if (output.size_bytes() < pcmLength())
return {.mDecoded = 0};
+14
View File
@@ -57,7 +57,21 @@ public:
private:
evs::Decoder_State* st_dec = nullptr;
StreamParameters sp;
// Output sample rate, derived from the negotiated bandwidth (sp.bw) at
// construction. Cached so info()/samplerate()/pcmLength() work for network-MOS
// metadata without allocating the (large) EVS decoder state - see ensureDecoder.
int mOutputFs = 0;
void initDecoder(const StreamParameters& sp);
// Allocate + initialize the EVS decoder state lazily on first decode().
// Network-MOS-only streams resolve metadata but never decode, so they never
// pay for the EVS decoder (Decoder_State + CLDFB/FD-CNG sub-allocations).
void ensureDecoder();
// Maps an EVS bandwidth (NB/WB/SWB/FB) to its output sample rate in Hz.
static int outputFsFromBw(int bw);
};
} // End of namespace
+13 -1
View File
@@ -44,7 +44,19 @@
#include "rtptypes.h"
#include "rtpmemoryobject.h"
#define RTPSOURCES_HASHSIZE 8317
// Number of buckets in the per-RTPSession SSRC->source hash table. This is an
// inline array of pointers in every RTPSources instance (sizeof == hashsize *
// sizeof(void*)), so it is paid by every RTPSession object regardless of how many
// sources it actually tracks. The original jrtplib default (8317) targets RTP
// mixers/conferences that demultiplex thousands of distinct SSRCs on one session;
// it costs ~65 KB per session. Sevana's per-stream capture sessions carry ~1 SSRC,
// so a far smaller table is ample - collisions are resolved by linked lists, so a
// small size only affects lookup cost (negligible at our source counts), never
// correctness. Overridable at build time for products that genuinely need many
// sources per session.
#ifndef RTPSOURCES_HASHSIZE
#define RTPSOURCES_HASHSIZE 251
#endif
namespace jrtplib
{