- performance optimizations
This commit is contained in:
@@ -265,8 +265,20 @@ PCodec AmrNbCodec::CodecFactory::create()
|
|||||||
AmrNbCodec::AmrNbCodec(const AmrCodecConfig& config)
|
AmrNbCodec::AmrNbCodec(const AmrCodecConfig& config)
|
||||||
:mConfig(config)
|
:mConfig(config)
|
||||||
{
|
{
|
||||||
mEncoderCtx = Encoder_Interface_init(1);
|
// Contexts are created lazily (see ensureEncoder/ensureDecoder) - a codec
|
||||||
mDecoderCtx = Decoder_Interface_init();
|
// resolved only for network-MOS metadata never allocates them.
|
||||||
|
}
|
||||||
|
|
||||||
|
void AmrNbCodec::ensureEncoder()
|
||||||
|
{
|
||||||
|
if (!mEncoderCtx)
|
||||||
|
mEncoderCtx = Encoder_Interface_init(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void AmrNbCodec::ensureDecoder()
|
||||||
|
{
|
||||||
|
if (!mDecoderCtx)
|
||||||
|
mDecoderCtx = Decoder_Interface_init();
|
||||||
}
|
}
|
||||||
|
|
||||||
AmrNbCodec::~AmrNbCodec()
|
AmrNbCodec::~AmrNbCodec()
|
||||||
@@ -298,6 +310,8 @@ Codec::Info AmrNbCodec::info()
|
|||||||
|
|
||||||
Codec::EncodeResult AmrNbCodec::encode(std::span<const uint8_t> input, std::span<uint8_t> output)
|
Codec::EncodeResult AmrNbCodec::encode(std::span<const uint8_t> input, std::span<uint8_t> output)
|
||||||
{
|
{
|
||||||
|
ensureEncoder();
|
||||||
|
|
||||||
if (input.size_bytes() % pcmLength())
|
if (input.size_bytes() % pcmLength())
|
||||||
return {.mEncoded = 0};
|
return {.mEncoded = 0};
|
||||||
|
|
||||||
@@ -324,6 +338,8 @@ Codec::EncodeResult AmrNbCodec::encode(std::span<const uint8_t> input, std::span
|
|||||||
#define AMR_BITRATE_DTX 15
|
#define AMR_BITRATE_DTX 15
|
||||||
Codec::DecodeResult AmrNbCodec::decode(std::span<const uint8_t> input, std::span<uint8_t> output)
|
Codec::DecodeResult AmrNbCodec::decode(std::span<const uint8_t> input, std::span<uint8_t> output)
|
||||||
{
|
{
|
||||||
|
ensureDecoder();
|
||||||
|
|
||||||
if (mConfig.mOctetAligned)
|
if (mConfig.mOctetAligned)
|
||||||
return {.mDecoded = 0};
|
return {.mDecoded = 0};
|
||||||
|
|
||||||
@@ -427,6 +443,8 @@ Codec::DecodeResult AmrNbCodec::decode(std::span<const uint8_t> input, std::span
|
|||||||
|
|
||||||
size_t AmrNbCodec::plc(int lostFrames, std::span<uint8_t> output)
|
size_t AmrNbCodec::plc(int lostFrames, std::span<uint8_t> output)
|
||||||
{
|
{
|
||||||
|
ensureDecoder();
|
||||||
|
|
||||||
if (output.size_bytes() < lostFrames * pcmLength())
|
if (output.size_bytes() < lostFrames * pcmLength())
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
@@ -496,7 +514,14 @@ AmrWbStatistics MT::GAmrWbStatistics;
|
|||||||
AmrWbCodec::AmrWbCodec(const AmrCodecConfig& config)
|
AmrWbCodec::AmrWbCodec(const AmrCodecConfig& config)
|
||||||
:mConfig(config)
|
:mConfig(config)
|
||||||
{
|
{
|
||||||
mDecoderCtx = D_IF_init();
|
// Decoder context is created lazily (see ensureDecoder) - a codec resolved
|
||||||
|
// only for network-MOS metadata never allocates the AMR-WB decoder state.
|
||||||
|
}
|
||||||
|
|
||||||
|
void AmrWbCodec::ensureDecoder()
|
||||||
|
{
|
||||||
|
if (!mDecoderCtx)
|
||||||
|
mDecoderCtx = D_IF_init();
|
||||||
}
|
}
|
||||||
|
|
||||||
AmrWbCodec::~AmrWbCodec()
|
AmrWbCodec::~AmrWbCodec()
|
||||||
@@ -630,6 +655,8 @@ Codec::DecodeResult AmrWbCodec::decodePlain(std::span<const uint8_t> input, std:
|
|||||||
|
|
||||||
Codec::DecodeResult AmrWbCodec::decode(std::span<const uint8_t> input, std::span<uint8_t> output)
|
Codec::DecodeResult AmrWbCodec::decode(std::span<const uint8_t> input, std::span<uint8_t> output)
|
||||||
{
|
{
|
||||||
|
ensureDecoder();
|
||||||
|
|
||||||
if (mConfig.mIuUP)
|
if (mConfig.mIuUP)
|
||||||
return decodeIuup(input, output);
|
return decodeIuup(input, output);
|
||||||
else
|
else
|
||||||
|
|||||||
@@ -33,6 +33,13 @@ protected:
|
|||||||
int mPreviousPacketLength = 0;
|
int mPreviousPacketLength = 0;
|
||||||
size_t mCngCounter = 0;
|
size_t mCngCounter = 0;
|
||||||
size_t mSwitchCounter = 0;
|
size_t mSwitchCounter = 0;
|
||||||
|
|
||||||
|
// opencore-amr encoder/decoder state is allocated lazily on first encode/decode.
|
||||||
|
// Network-MOS-only streams resolve codec metadata (name/samplerate/frame timing)
|
||||||
|
// but never decode, so they must not pay for a context they never use - at scale
|
||||||
|
// this is ~a decoder state (several KB) saved per network-only stream.
|
||||||
|
void ensureEncoder();
|
||||||
|
void ensureDecoder();
|
||||||
public:
|
public:
|
||||||
class CodecFactory: public Factory
|
class CodecFactory: public Factory
|
||||||
{
|
{
|
||||||
@@ -85,6 +92,10 @@ protected:
|
|||||||
|
|
||||||
int mPreviousPacketLength;
|
int mPreviousPacketLength;
|
||||||
|
|
||||||
|
// Decoder state is allocated lazily on first decode/plc (see AmrNbCodec) so
|
||||||
|
// network-MOS-only streams never instantiate the AMR-WB decoder.
|
||||||
|
void ensureDecoder();
|
||||||
|
|
||||||
DecodeResult decodeIuup(std::span<const uint8_t> input, std::span<uint8_t> output);
|
DecodeResult decodeIuup(std::span<const uint8_t> input, std::span<uint8_t> output);
|
||||||
DecodeResult decodePlain(std::span<const uint8_t> input, std::span<uint8_t> output);
|
DecodeResult decodePlain(std::span<const uint8_t> input, std::span<uint8_t> output);
|
||||||
|
|
||||||
|
|||||||
@@ -184,16 +184,18 @@ std::shared_ptr<RtpBuffer::Packet> RtpBuffer::add(const std::shared_ptr<jrtplib:
|
|||||||
return std::shared_ptr<Packet>();
|
return std::shared_ptr<Packet>();
|
||||||
}
|
}
|
||||||
|
|
||||||
RtpBuffer::FetchResult RtpBuffer::fetch()
|
void RtpBuffer::trimToHighWater(size_t maxPackets)
|
||||||
{
|
{
|
||||||
Lock l(mGuard);
|
Lock l(mGuard);
|
||||||
|
|
||||||
FetchResult result;
|
|
||||||
|
|
||||||
// See if there is enough information in buffer
|
|
||||||
auto total = findTimelength();
|
auto total = findTimelength();
|
||||||
|
|
||||||
while (total > mHigh && mPacketList.size() > 1 && 0ms != mHigh)
|
// Drop the oldest packet while either bound is exceeded: the time-based
|
||||||
|
// high-water mark (mHigh, when set) or, if maxPackets != 0, the packet-count
|
||||||
|
// cap. Always keep at least one packet so loss/gap accounting has a reference.
|
||||||
|
while (mPacketList.size() > 1 &&
|
||||||
|
((0ms != mHigh && total > mHigh) ||
|
||||||
|
(maxPackets != 0 && mPacketList.size() > maxPackets)))
|
||||||
{
|
{
|
||||||
ICELogMedia( << "Dropping RTP packets from jitter buffer");
|
ICELogMedia( << "Dropping RTP packets from jitter buffer");
|
||||||
total -= mPacketList.front()->timelength();
|
total -= mPacketList.front()->timelength();
|
||||||
@@ -233,6 +235,19 @@ RtpBuffer::FetchResult RtpBuffer::fetch()
|
|||||||
// Increase number in statistics
|
// Increase number in statistics
|
||||||
mStat.mPacketDropped++;
|
mStat.mPacketDropped++;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
RtpBuffer::FetchResult RtpBuffer::fetch()
|
||||||
|
{
|
||||||
|
Lock l(mGuard);
|
||||||
|
|
||||||
|
FetchResult result;
|
||||||
|
|
||||||
|
// Bound the buffer to the high-water mark before fetching.
|
||||||
|
trimToHighWater();
|
||||||
|
|
||||||
|
// See how much audio is buffered now.
|
||||||
|
auto total = findTimelength();
|
||||||
|
|
||||||
if (total < mLow || total == 0ms)
|
if (total < mLow || total == 0ms)
|
||||||
{
|
{
|
||||||
@@ -494,13 +509,13 @@ void AudioReceiver::processDecoded(Audio::DataWindow& output, DecodeOptions opti
|
|||||||
{
|
{
|
||||||
// Write to audio dump if requested
|
// Write to audio dump if requested
|
||||||
if (mDecodedDump && mDecodedLength)
|
if (mDecodedDump && mDecodedLength)
|
||||||
mDecodedDump->write(mDecodedFrame, mDecodedLength);
|
mDecodedDump->write(mDecodedFrame.data(), mDecodedLength);
|
||||||
|
|
||||||
// Resample to target rate
|
// Resample to target rate
|
||||||
makeMonoAndResample(options.mResampleToMainRate ? mCodec->samplerate() : 0, mCodec->channels());
|
makeMonoAndResample(options.mResampleToMainRate ? mCodec->samplerate() : 0, mCodec->channels());
|
||||||
|
|
||||||
// Send to output
|
// Send to output
|
||||||
output.add(mResampledFrame, mResampledLength);
|
output.add(mResampledFrame.data(), mResampledLength);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AudioReceiver::produceSilence(std::chrono::milliseconds length, Audio::DataWindow& output, DecodeOptions options)
|
void AudioReceiver::produceSilence(std::chrono::milliseconds length, Audio::DataWindow& output, DecodeOptions options)
|
||||||
@@ -517,13 +532,13 @@ void AudioReceiver::produceSilence(std::chrono::milliseconds length, Audio::Data
|
|||||||
size_t tail_size = tail * sizeof(int16_t) * mCodec->samplerate() / 1000 * mCodec->channels();
|
size_t tail_size = tail * sizeof(int16_t) * mCodec->samplerate() / 1000 * mCodec->channels();
|
||||||
for (size_t i = 0; i < chunks; i++)
|
for (size_t i = 0; i < chunks; i++)
|
||||||
{
|
{
|
||||||
memset(mDecodedFrame, 0, chunk_size);
|
memset(mDecodedFrame.data(), 0, chunk_size);
|
||||||
mDecodedLength = chunk_size;
|
mDecodedLength = chunk_size;
|
||||||
processDecoded(output, options);
|
processDecoded(output, options);
|
||||||
}
|
}
|
||||||
if (tail)
|
if (tail)
|
||||||
{
|
{
|
||||||
memset(mDecodedFrame, 0, tail_size);
|
memset(mDecodedFrame.data(), 0, tail_size);
|
||||||
mDecodedLength = tail_size;
|
mDecodedLength = tail_size;
|
||||||
processDecoded(output, options);
|
processDecoded(output, options);
|
||||||
}
|
}
|
||||||
@@ -537,7 +552,7 @@ void AudioReceiver::produceCNG(std::chrono::milliseconds length, Audio::DataWind
|
|||||||
if (options.mSkipDecode)
|
if (options.mSkipDecode)
|
||||||
mDecodedLength = 0;
|
mDecodedLength = 0;
|
||||||
else
|
else
|
||||||
mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), 100, mDecodedFrame, false);
|
mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), 100, mDecodedFrame.data(), false);
|
||||||
|
|
||||||
if (mDecodedLength)
|
if (mDecodedLength)
|
||||||
processDecoded(output, options);
|
processDecoded(output, options);
|
||||||
@@ -550,7 +565,7 @@ void AudioReceiver::produceCNG(std::chrono::milliseconds length, Audio::DataWind
|
|||||||
if (options.mSkipDecode)
|
if (options.mSkipDecode)
|
||||||
mDecodedLength = 0;
|
mDecodedLength = 0;
|
||||||
else
|
else
|
||||||
mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), tail, reinterpret_cast<short*>(mDecodedFrame), false);
|
mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), tail, reinterpret_cast<short*>(mDecodedFrame.data()), false);
|
||||||
|
|
||||||
if (mDecodedLength)
|
if (mDecodedLength)
|
||||||
processDecoded(output, options);
|
processDecoded(output, options);
|
||||||
@@ -568,7 +583,7 @@ AudioReceiver::DecodeResult AudioReceiver::decodeGapTo(Audio::DataWindow& output
|
|||||||
{
|
{
|
||||||
// Synthesize comfort noise. It will be done on AUDIO_SAMPLERATE rate directly to mResampledFrame buffer.
|
// Synthesize comfort noise. It will be done on AUDIO_SAMPLERATE rate directly to mResampledFrame buffer.
|
||||||
// Do not forget to send this noise to analysis
|
// Do not forget to send this noise to analysis
|
||||||
mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), mLastPacketTimeLength, reinterpret_cast<short*>(mDecodedFrame), false);
|
mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), mLastPacketTimeLength, reinterpret_cast<short*>(mDecodedFrame.data()), false);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
decodePacketTo(output, options, mCngPacket);
|
decodePacketTo(output, options, mCngPacket);
|
||||||
@@ -581,14 +596,14 @@ AudioReceiver::DecodeResult AudioReceiver::decodeGapTo(Audio::DataWindow& output
|
|||||||
mDecodedLength = 0;
|
mDecodedLength = 0;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
mDecodedLength = mCodec->plc(mFrameCount, {(uint8_t*)mDecodedFrame, sizeof mDecodedFrame});
|
mDecodedLength = mCodec->plc(mFrameCount, {(uint8_t*)mDecodedFrame.data(), mDecodedFrame.size() * sizeof(int16_t)});
|
||||||
if (!mDecodedLength)
|
if (!mDecodedLength)
|
||||||
{
|
{
|
||||||
// PLC is not support or failed
|
// PLC is not support or failed
|
||||||
// So substitute the silence
|
// So substitute the silence
|
||||||
size_t nr_of_samples = mCodec->frameTime() * mCodec->samplerate() / 1000 * sizeof(short);
|
size_t nr_of_samples = mCodec->frameTime() * mCodec->samplerate() / 1000 * sizeof(short);
|
||||||
mDecodedLength = nr_of_samples * sizeof(short);
|
mDecodedLength = nr_of_samples * sizeof(short);
|
||||||
memset(mDecodedFrame, 0, mDecodedLength);
|
memset(mDecodedFrame.data(), 0, mDecodedLength);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -660,7 +675,7 @@ AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& out
|
|||||||
mCngDecoder.decode3389(rtp.GetPayloadData(), rtp.GetPayloadLength());
|
mCngDecoder.decode3389(rtp.GetPayloadData(), rtp.GetPayloadLength());
|
||||||
|
|
||||||
// Emit CNG mLastPacketLength milliseconds
|
// Emit CNG mLastPacketLength milliseconds
|
||||||
mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), mLastPacketTimeLength, (short*)mDecodedFrame, true);
|
mDecodedLength = mCngDecoder.produce(mCodec->samplerate(), mLastPacketTimeLength, (short*)mDecodedFrame.data(), true);
|
||||||
if (mDecodedLength)
|
if (mDecodedLength)
|
||||||
processDecoded(output, options);
|
processDecoded(output, options);
|
||||||
}
|
}
|
||||||
@@ -696,7 +711,7 @@ AudioReceiver::DecodeResult AudioReceiver::decodePacketTo(Audio::DataWindow& out
|
|||||||
{
|
{
|
||||||
// Decode frame by frame
|
// Decode frame by frame
|
||||||
auto codecInput = std::span{rtp.GetPayloadData() + i * mCodec->rtpLength(), (size_t)frameLength};
|
auto codecInput = std::span{rtp.GetPayloadData() + i * mCodec->rtpLength(), (size_t)frameLength};
|
||||||
auto codecOutput = std::span{(uint8_t*)mDecodedFrame, sizeof mDecodedFrame};
|
auto codecOutput = std::span{(uint8_t*)mDecodedFrame.data(), mDecodedFrame.size() * sizeof(int16_t)};
|
||||||
auto r = mCodec->decode(codecInput, codecOutput);
|
auto r = mCodec->decode(codecInput, codecOutput);
|
||||||
mDecodedLength = r.mDecoded;
|
mDecodedLength = r.mDecoded;
|
||||||
if (mDecodedLength > 0)
|
if (mDecodedLength > 0)
|
||||||
@@ -798,6 +813,10 @@ AudioReceiver::DecodeResult AudioReceiver::getAudioTo(Audio::DataWindow& output,
|
|||||||
// ICELogDebug(<< "getAudioTo() for " << options.mElapsed);
|
// ICELogDebug(<< "getAudioTo() for " << options.mElapsed);
|
||||||
assert (options.mElapsed != 0ms);
|
assert (options.mElapsed != 0ms);
|
||||||
|
|
||||||
|
// First decode on this receiver: allocate the scratch buffers. Network-MOS-only
|
||||||
|
// streams never reach this point, so they never pay for them.
|
||||||
|
ensureDecodeBuffers();
|
||||||
|
|
||||||
// Increase counter of requested audio
|
// Increase counter of requested audio
|
||||||
mRequestedAudio += options.mElapsed;
|
mRequestedAudio += options.mElapsed;
|
||||||
|
|
||||||
@@ -876,6 +895,19 @@ AudioReceiver::DecodeResult AudioReceiver::getAudioTo(Audio::DataWindow& output,
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void AudioReceiver::ensureDecodeBuffers()
|
||||||
|
{
|
||||||
|
// Allocate the decode/convert/resample scratch buffers to full capacity on the
|
||||||
|
// first decode. mDecodedFrame being empty means none are allocated yet; they
|
||||||
|
// are always allocated together, so checking one is enough.
|
||||||
|
if (mDecodedFrame.empty())
|
||||||
|
{
|
||||||
|
mDecodedFrame.resize(MT_MAX_DECODEBUFFER);
|
||||||
|
mConvertedFrame.resize(MT_MAX_DECODEBUFFER * 2);
|
||||||
|
mResampledFrame.resize(MT_MAX_DECODEBUFFER);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void AudioReceiver::makeMonoAndResample(int rate, int channels)
|
void AudioReceiver::makeMonoAndResample(int rate, int channels)
|
||||||
{
|
{
|
||||||
// Make mono from stereo - engine works with mono only for now
|
// Make mono from stereo - engine works with mono only for now
|
||||||
@@ -883,12 +915,12 @@ void AudioReceiver::makeMonoAndResample(int rate, int channels)
|
|||||||
if (channels != AUDIO_CHANNELS)
|
if (channels != AUDIO_CHANNELS)
|
||||||
{
|
{
|
||||||
if (channels == 1)
|
if (channels == 1)
|
||||||
mConvertedLength = Audio::ChannelConverter::monoToStereo(mDecodedFrame, mDecodedLength, mConvertedFrame, mDecodedLength * 2);
|
mConvertedLength = Audio::ChannelConverter::monoToStereo(mDecodedFrame.data(), mDecodedLength, mConvertedFrame.data(), mDecodedLength * 2);
|
||||||
else
|
else
|
||||||
mDecodedLength = Audio::ChannelConverter::stereoToMono(mDecodedFrame, mDecodedLength, mDecodedFrame, mDecodedLength / 2);
|
mDecodedLength = Audio::ChannelConverter::stereoToMono(mDecodedFrame.data(), mDecodedLength, mDecodedFrame.data(), mDecodedLength / 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
void* frames = mConvertedLength ? mConvertedFrame : mDecodedFrame;
|
void* frames = mConvertedLength ? (void*)mConvertedFrame.data() : (void*)mDecodedFrame.data();
|
||||||
unsigned length = mConvertedLength ? mConvertedLength : mDecodedLength;
|
unsigned length = mConvertedLength ? mConvertedLength : mDecodedLength;
|
||||||
|
|
||||||
Audio::Resampler* r = nullptr;
|
Audio::Resampler* r = nullptr;
|
||||||
@@ -899,13 +931,13 @@ void AudioReceiver::makeMonoAndResample(int rate, int channels)
|
|||||||
case 32000: r = &mResampler32; break;
|
case 32000: r = &mResampler32; break;
|
||||||
case 48000: r = &mResampler48; break;
|
case 48000: r = &mResampler48; break;
|
||||||
default:
|
default:
|
||||||
memcpy(mResampledFrame, frames, length);
|
memcpy(mResampledFrame.data(), frames, length);
|
||||||
mResampledLength = length;
|
mResampledLength = length;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t processedInput = 0;
|
size_t processedInput = 0;
|
||||||
mResampledLength = r->processBuffer(frames, length, processedInput, mResampledFrame, r->getDestLength(length));
|
mResampledLength = r->processBuffer(frames, length, processedInput, mResampledFrame.data(), r->getDestLength(length));
|
||||||
// processedInput result value is ignored - it is always equal to length as internal sample rate is 8/16/32/48K
|
// processedInput result value is ignored - it is always equal to length as internal sample rate is 8/16/32/48K
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -20,6 +20,8 @@
|
|||||||
|
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
#include <vector>
|
||||||
|
#include <cstdint>
|
||||||
using namespace std::chrono_literals;
|
using namespace std::chrono_literals;
|
||||||
|
|
||||||
namespace MT
|
namespace MT
|
||||||
@@ -103,7 +105,19 @@ public:
|
|||||||
typedef std::shared_ptr<ResultList> PResultList;
|
typedef std::shared_ptr<ResultList> PResultList;
|
||||||
|
|
||||||
FetchResult fetch();
|
FetchResult fetch();
|
||||||
|
|
||||||
|
// Drop oldest packets so buffered audio stays within the high-water mark,
|
||||||
|
// recording packet-loss events for any sequence gaps crossed (the same
|
||||||
|
// accounting fetch() performs). Used to bound memory on streams that never
|
||||||
|
// call fetch() - i.e. network-MOS-only streams with audio decode disabled,
|
||||||
|
// which would otherwise retain every packet for the whole call.
|
||||||
|
//
|
||||||
|
// maxPackets, when non-zero, additionally caps the buffer to that many packets
|
||||||
|
// regardless of buffered time. The decode path (fetch()) leaves it 0 so jitter
|
||||||
|
// tolerance stays governed by the time-based high-water mark; the network-only
|
||||||
|
// path passes a small cap since those packets are never decoded.
|
||||||
|
void trimToHighWater(size_t maxPackets = 0);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
unsigned mSsrc = 0;
|
unsigned mSsrc = 0;
|
||||||
std::chrono::milliseconds mHigh = std::chrono::milliseconds(RTP_BUFFER_HIGH),
|
std::chrono::milliseconds mHigh = std::chrono::milliseconds(RTP_BUFFER_HIGH),
|
||||||
@@ -240,16 +254,22 @@ protected:
|
|||||||
// Already decoded data that can be retrieved without actual decoding - it may happen because of getAudioTo() may be limited by time interval
|
// Already decoded data that can be retrieved without actual decoding - it may happen because of getAudioTo() may be limited by time interval
|
||||||
Audio::DataWindow mAvailable;
|
Audio::DataWindow mAvailable;
|
||||||
|
|
||||||
// Temporary buffer to hold decoded data (it is better than allocate data on stack)
|
// Decode/convert/resample scratch buffers. These were inline arrays
|
||||||
int16_t mDecodedFrame[MT_MAX_DECODEBUFFER];
|
// (MT_MAX_DECODEBUFFER * {1,2,1} * int16_t = 256 KB total) carried by every
|
||||||
|
// AudioReceiver, hence by every StreamDecoder - including network-MOS-only
|
||||||
|
// streams that never decode. They are now allocated lazily on the first
|
||||||
|
// getAudioTo() call via ensureDecodeBuffers(); non-decoding streams keep them
|
||||||
|
// empty. Once allocated they are sized to full capacity and reused, so decode
|
||||||
|
// behaviour is unchanged.
|
||||||
|
std::vector<int16_t> mDecodedFrame; // sized to MT_MAX_DECODEBUFFER
|
||||||
size_t mDecodedLength = 0;
|
size_t mDecodedLength = 0;
|
||||||
|
|
||||||
// Buffer to hold data converted to stereo/mono; there is multiplier 2 as it can be stereo audio
|
// Buffer to hold data converted to stereo/mono; there is multiplier 2 as it can be stereo audio
|
||||||
int16_t mConvertedFrame[MT_MAX_DECODEBUFFER * 2];
|
std::vector<int16_t> mConvertedFrame; // sized to MT_MAX_DECODEBUFFER * 2
|
||||||
size_t mConvertedLength = 0;
|
size_t mConvertedLength = 0;
|
||||||
|
|
||||||
// Buffer to hold data resampled to AUDIO_SAMPLERATE
|
// Buffer to hold data resampled to AUDIO_SAMPLERATE
|
||||||
int16_t mResampledFrame[MT_MAX_DECODEBUFFER];
|
std::vector<int16_t> mResampledFrame; // sized to MT_MAX_DECODEBUFFER
|
||||||
size_t mResampledLength = 0;
|
size_t mResampledLength = 0;
|
||||||
|
|
||||||
// Last packet time length
|
// Last packet time length
|
||||||
@@ -272,6 +292,12 @@ protected:
|
|||||||
std::chrono::milliseconds mRequestedAudio = 0ms;
|
std::chrono::milliseconds mRequestedAudio = 0ms;
|
||||||
std::chrono::milliseconds mProducedAudio = 0ms;
|
std::chrono::milliseconds mProducedAudio = 0ms;
|
||||||
|
|
||||||
|
// Lazily allocate the decode/convert/resample scratch buffers (mDecodedFrame,
|
||||||
|
// mConvertedFrame, mResampledFrame) to full capacity on the first decode. A
|
||||||
|
// no-op once allocated. Called at the top of getAudioTo(); network-MOS-only
|
||||||
|
// streams never reach it, so they never pay the 256 KB.
|
||||||
|
void ensureDecodeBuffers();
|
||||||
|
|
||||||
// Zero rate will make audio mono but resampling will be skipped
|
// Zero rate will make audio mono but resampling will be skipped
|
||||||
void makeMonoAndResample(int rate, int channels);
|
void makeMonoAndResample(int rate, int channels);
|
||||||
|
|
||||||
|
|||||||
@@ -152,6 +152,27 @@ EVSCodec::EVSCodec(const StreamParameters &sp)
|
|||||||
{
|
{
|
||||||
EVSCodec::sp = sp;
|
EVSCodec::sp = sp;
|
||||||
|
|
||||||
|
// Metadata only - the heavy decoder state is created lazily (ensureDecoder()).
|
||||||
|
mOutputFs = outputFsFromBw(sp.bw);
|
||||||
|
}
|
||||||
|
|
||||||
|
int EVSCodec::outputFsFromBw(int bw)
|
||||||
|
{
|
||||||
|
switch (bw)
|
||||||
|
{
|
||||||
|
case NB: return 8000;
|
||||||
|
case WB: return 16000;
|
||||||
|
case SWB: return 32000;
|
||||||
|
case FB: return 48000;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void EVSCodec::ensureDecoder()
|
||||||
|
{
|
||||||
|
if (st_dec)
|
||||||
|
return;
|
||||||
|
|
||||||
if ((st_dec = reinterpret_cast<evs::Decoder_State*>(malloc(sizeof(evs::Decoder_State)))) == nullptr)
|
if ((st_dec = reinterpret_cast<evs::Decoder_State*>(malloc(sizeof(evs::Decoder_State)))) == nullptr)
|
||||||
throw std::bad_alloc();
|
throw std::bad_alloc();
|
||||||
|
|
||||||
@@ -170,9 +191,9 @@ EVSCodec::~EVSCodec()
|
|||||||
Codec::Info EVSCodec::info() {
|
Codec::Info EVSCodec::info() {
|
||||||
return {
|
return {
|
||||||
.mName = MT_EVS_CODECNAME,
|
.mName = MT_EVS_CODECNAME,
|
||||||
.mSamplerate = st_dec->output_Fs,
|
.mSamplerate = mOutputFs,
|
||||||
.mChannels = 1,
|
.mChannels = 1,
|
||||||
.mPcmLength = st_dec->output_Fs / 1000 * sp.ptime * 2,
|
.mPcmLength = mOutputFs / 1000 * sp.ptime * 2,
|
||||||
.mFrameTime = sp.ptime,
|
.mFrameTime = sp.ptime,
|
||||||
.mRtpLength = 0
|
.mRtpLength = 0
|
||||||
};
|
};
|
||||||
@@ -187,6 +208,8 @@ Codec::EncodeResult EVSCodec::encode(std::span<const uint8_t> input, std::span<u
|
|||||||
|
|
||||||
Codec::DecodeResult EVSCodec::decode(std::span<const uint8_t> input, std::span<uint8_t> output)
|
Codec::DecodeResult EVSCodec::decode(std::span<const uint8_t> input, std::span<uint8_t> output)
|
||||||
{
|
{
|
||||||
|
ensureDecoder();
|
||||||
|
|
||||||
if (output.size_bytes() < pcmLength())
|
if (output.size_bytes() < pcmLength())
|
||||||
return {.mDecoded = 0};
|
return {.mDecoded = 0};
|
||||||
|
|
||||||
|
|||||||
@@ -57,7 +57,21 @@ public:
|
|||||||
private:
|
private:
|
||||||
evs::Decoder_State* st_dec = nullptr;
|
evs::Decoder_State* st_dec = nullptr;
|
||||||
StreamParameters sp;
|
StreamParameters sp;
|
||||||
|
|
||||||
|
// Output sample rate, derived from the negotiated bandwidth (sp.bw) at
|
||||||
|
// construction. Cached so info()/samplerate()/pcmLength() work for network-MOS
|
||||||
|
// metadata without allocating the (large) EVS decoder state - see ensureDecoder.
|
||||||
|
int mOutputFs = 0;
|
||||||
|
|
||||||
void initDecoder(const StreamParameters& sp);
|
void initDecoder(const StreamParameters& sp);
|
||||||
|
|
||||||
|
// Allocate + initialize the EVS decoder state lazily on first decode().
|
||||||
|
// Network-MOS-only streams resolve metadata but never decode, so they never
|
||||||
|
// pay for the EVS decoder (Decoder_State + CLDFB/FD-CNG sub-allocations).
|
||||||
|
void ensureDecoder();
|
||||||
|
|
||||||
|
// Maps an EVS bandwidth (NB/WB/SWB/FB) to its output sample rate in Hz.
|
||||||
|
static int outputFsFromBw(int bw);
|
||||||
};
|
};
|
||||||
|
|
||||||
} // End of namespace
|
} // End of namespace
|
||||||
|
|||||||
@@ -44,7 +44,19 @@
|
|||||||
#include "rtptypes.h"
|
#include "rtptypes.h"
|
||||||
#include "rtpmemoryobject.h"
|
#include "rtpmemoryobject.h"
|
||||||
|
|
||||||
#define RTPSOURCES_HASHSIZE 8317
|
// Number of buckets in the per-RTPSession SSRC->source hash table. This is an
|
||||||
|
// inline array of pointers in every RTPSources instance (sizeof == hashsize *
|
||||||
|
// sizeof(void*)), so it is paid by every RTPSession object regardless of how many
|
||||||
|
// sources it actually tracks. The original jrtplib default (8317) targets RTP
|
||||||
|
// mixers/conferences that demultiplex thousands of distinct SSRCs on one session;
|
||||||
|
// it costs ~65 KB per session. Sevana's per-stream capture sessions carry ~1 SSRC,
|
||||||
|
// so a far smaller table is ample - collisions are resolved by linked lists, so a
|
||||||
|
// small size only affects lookup cost (negligible at our source counts), never
|
||||||
|
// correctness. Overridable at build time for products that genuinely need many
|
||||||
|
// sources per session.
|
||||||
|
#ifndef RTPSOURCES_HASHSIZE
|
||||||
|
#define RTPSOURCES_HASHSIZE 251
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace jrtplib
|
namespace jrtplib
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user