diff --git a/src/engine/media/MT_AudioReceiver.cpp b/src/engine/media/MT_AudioReceiver.cpp index 45becfce..9c2da62c 100644 --- a/src/engine/media/MT_AudioReceiver.cpp +++ b/src/engine/media/MT_AudioReceiver.cpp @@ -201,7 +201,7 @@ RtpBuffer::FetchResult RtpBuffer::fetch() // See if there is enough information in buffer auto total = findTimelength(); - while (total > mHigh && mPacketList.size() && 0ms != mHigh) + while (total > mHigh && mPacketList.size() > 1 && 0ms != mHigh) { ICELogMedia( << "Dropping RTP packets from jitter buffer"); total -= mPacketList.front()->timelength(); @@ -337,7 +337,7 @@ Receiver::~Receiver() //-------------- AudioReceiver ---------------- AudioReceiver::AudioReceiver(const CodecList::Settings& settings, MT::Statistics &stat) - :Receiver(stat), mBuffer(stat), mCodecSettings(settings), mCodecList(settings) + :Receiver(stat), mBuffer(stat), mDtmfBuffer(stat), mCodecSettings(settings), mCodecList(settings), mDtmfReceiver(stat) { // Init resamplers mResampler8.start(AUDIO_CHANNELS, 8000, AUDIO_SAMPLERATE); @@ -351,6 +351,10 @@ AudioReceiver::AudioReceiver(const CodecList::Settings& settings, MT::Statistics mAvailable.setCapacity(AUDIO_SAMPLERATE * sizeof(short)); + mDtmfBuffer.setPrebuffer(0ms); + mDtmfBuffer.setLow(0ms); + mDtmfBuffer.setHigh(1ms); + #if defined(DUMP_DECODED) mDecodedDump = std::make_shared(); mDecodedDump->open("decoded.wav", 8000 /*G711*/, AUDIO_CHANNELS); @@ -431,66 +435,72 @@ bool AudioReceiver::add(const std::shared_ptr& p, Codec** de // Increase codec counter mStat.mCodecCount[ptype]++; - // Check if codec can be handled - Codec* codec = nullptr; - auto codecIter = mCodecMap.find(ptype); - if (codecIter == mCodecMap.end()) + // Check if we deal with telephone-event + if (p->GetPayloadType() == mCodecSettings.mTelephoneEvent) { - // Well, there is no information about the codec; skip this packet + *detectedCodec = nullptr; + mDtmfBuffer.add(p, 10ms, 8000); } else { - // Check if codec is creating lazily - if (!codecIter->second) + // Look for codec + // Check if codec can be handled + Codec* codec = nullptr; + auto codecIter = mCodecMap.find(ptype); + if (codecIter != mCodecMap.end()) { - codecIter->second = mCodecList.createCodecByPayloadType(ptype); + // Check if codec is creating lazily + if (!codecIter->second) + { + codecIter->second = mCodecList.createCodecByPayloadType(ptype); + } + codec = codecIter->second.get(); + + // Return pointer to codec if needed.get() + if (detectedCodec) + *detectedCodec = codec; + + if (mStat.mCodecName.empty() && codec) + mStat.mCodecName = codec->name(); + + + if (!codec) + time_length = 10; + else + if (!codec->rtpLength()) + time_length = codec->frameTime(); + else + time_length = lround(double(payloadLength) / codec->rtpLength() * codec->frameTime()); + + if (codec) + samplerate = codec->samplerate(); } - codec = codecIter->second.get(); - - // Return pointer to codec if needed.get() - if (detectedCodec) - *detectedCodec = codec; - - if (mStat.mCodecName.empty() && codec) - mStat.mCodecName = codec->name(); + // Process jitter anyway - can we decode payload or not + mJitterStats.process(p.get(), samplerate); + mStat.mJitter = static_cast(mJitterStats.get()); if (!codec) - time_length = 10; + return false; // There is no sense to add this packet into jitter buffer - we can't decode this + + // Check if packet is CNG + if (payloadLength >= 1 && payloadLength <= 6 && (ptype == 0 || ptype == 8)) + time_length = mLastPacketTimeLength ? mLastPacketTimeLength : 20; else - if (!codec->rtpLength()) - time_length = codec->frameTime(); - else - time_length = lround(double(payloadLength) / codec->rtpLength() * codec->frameTime()); + // Check if packet is too short from time length side + if (time_length < 2) + { + // It will cause statistics to report about bad RTP packet + // I have to replay last packet payload here to avoid report about lost packet + mBuffer.add(p, std::chrono::milliseconds(time_length), samplerate); + return false; + } - if (codec) - samplerate = codec->samplerate(); + // Queue packet to buffer + auto packet = mBuffer.add(p, std::chrono::milliseconds(time_length), samplerate).get(); + return packet; } - - // Process jitter - mJitterStats.process(p.get(), samplerate); - mStat.mJitter = static_cast(mJitterStats.get()); - - if (!codec) - return false; // There is no sense to add this packet into jitter buffer - we can't decode this - - // Check if packet is CNG - if (payloadLength >= 1 && payloadLength <= 6 && (ptype == 0 || ptype == 8)) - time_length = mLastPacketTimeLength ? mLastPacketTimeLength : 20; - else - // Check if packet is too short from time length side - if (time_length < 2) - { - // It will cause statistics to report about bad RTP packet - // I have to replay last packet payload here to avoid report about lost packet - mBuffer.add(p, std::chrono::milliseconds(time_length), samplerate); - return false; - } - - // Queue packet to buffer - auto packet = mBuffer.add(p, std::chrono::milliseconds(time_length), samplerate).get(); - - return packet; + return {}; } void AudioReceiver::processDecoded(Audio::DataWindow& output, DecodeOptions options) @@ -764,6 +774,12 @@ AudioReceiver::DecodeResult AudioReceiver::getAudioTo(Audio::DataWindow& output, { DecodeResult result = {.mStatus = DecodeResult::Status::Skip}; + // Process RFC2833 here; it doesn't result in any audio - only callbacks and statistics + auto fr = mDtmfBuffer.fetch(); + if (fr.mPacket && fr.mStatus == RtpBuffer::FetchResult::Status::RegularPacket) + mDtmfReceiver.add(fr.mPacket->rtp()); + + auto produced = 0ms; if (mAvailable.filled() && mCodec && options.mElapsed != 0ms) { @@ -968,23 +984,22 @@ DtmfReceiver::~DtmfReceiver() void DtmfReceiver::add(const std::shared_ptr& p) { - // This receiver always work in context of single RTP stream; so there is no need to put SSRC map and so on - if (p->GetPayloadType() != 101) - return; - auto ev = DtmfBuilder::parseRfc2833({p->GetPayloadData(), p->GetPayloadLength()}); if (ev.mTone != mEvent || ev.mEnd != mEventEnded) { - // New tone is here - if (mCallback) - mCallback(ev.mTone); + if (!(mEvent == ev.mTone && !mEventEnded && ev.mEnd)) + { + // New tone is here + if (mCallback) + mCallback(ev.mTone); - // Queue statistics item - mStat.mDtmf2833Timeline.emplace_back(Dtmf2833Event{.mTone = ev.mTone, - .mTimestamp = RtpHelper::toMicroseconds(p->GetReceiveTime())}); + // Queue statistics item + mStat.mDtmf2833Timeline.emplace_back(Dtmf2833Event{.mTone = ev.mTone, + .mTimestamp = RtpHelper::toMicroseconds(p->GetReceiveTime())}); - // Store to avoid triggering on the packet - mEvent = ev.mTone; - mEventEnded = ev.mEnd; + // Store to avoid triggering on the packet + mEvent = ev.mTone; + mEventEnded = ev.mEnd; + } } } diff --git a/src/engine/media/MT_AudioReceiver.h b/src/engine/media/MT_AudioReceiver.h index 3efb8b09..0c567800 100644 --- a/src/engine/media/MT_AudioReceiver.h +++ b/src/engine/media/MT_AudioReceiver.h @@ -136,6 +136,23 @@ protected: Statistics& mStat; }; +class DtmfReceiver: public Receiver +{ +private: + char mEvent = 0; + bool mEventEnded = false; + std::chrono::milliseconds mEventStart = 0ms; + std::function mCallback; + +public: + DtmfReceiver(Statistics& stat); + ~DtmfReceiver(); + + void add(const std::shared_ptr& p); + void setCallback(std::function callback); +}; + + class AudioReceiver: public Receiver { public: @@ -189,6 +206,9 @@ public: protected: RtpBuffer mBuffer; // Jitter buffer itself + RtpBuffer mDtmfBuffer; // These two (mDtmfBuffer / mDtmfReceiver) are for our analyzer stack only; in normal softphone logic DTMF packets goes via SingleAudioStream::mDtmfReceiver + DtmfReceiver mDtmfReceiver; + CodecMap mCodecMap; PCodec mCodec; int mFrameCount = 0; @@ -247,21 +267,6 @@ protected: DecodeResult decodeEmptyTo(Audio::DataWindow& output, DecodeOptions options); }; -class DtmfReceiver: public Receiver -{ -private: - char mEvent = 0; - bool mEventEnded = false; - std::chrono::milliseconds mEventStart = 0ms; - std::function mCallback; - -public: - DtmfReceiver(Statistics& stat); - ~DtmfReceiver(); - - void add(const std::shared_ptr& p); - void setCallback(std::function callback); -}; } #endif diff --git a/src/engine/media/MT_CodecList.cpp b/src/engine/media/MT_CodecList.cpp index 2ee9282d..f4b25742 100644 --- a/src/engine/media/MT_CodecList.cpp +++ b/src/engine/media/MT_CodecList.cpp @@ -65,6 +65,8 @@ bool CodecList::Settings::contains(int ptype) const if (mGsmEfrPayloadType == ptype || mGsmFrPayloadType == ptype || mGsmHrPayloadType == ptype) return true; + if (mTelephoneEvent == ptype) + return true; return false; } @@ -122,6 +124,9 @@ std::string CodecList::Settings::toString() const if (mGsmEfrPayloadType != -1) oss << "GSM EFR ptype: " << mGsmEfrPayloadType << " "; + if (mTelephoneEvent != -1) + oss << "RFC2833 DTMF ptype: " << mTelephoneEvent; + for (auto& spec: mEvsSpec) { oss << "EVS ptype: " << spec.mPayloadType << ", bw: " << spec.mBandwidth << ", enc: " << (spec.mEncodingType == EvsSpec::Encoding_MIME ? "mime" : "g192") << " "; @@ -132,6 +137,7 @@ std::string CodecList::Settings::toString() const oss << "OPUS ptype: " << spec.mPayloadType << ", rate: " << spec.mRate << ", channels: " << spec.mChannels << std::endl; } + return oss.str(); } @@ -151,6 +157,7 @@ void CodecList::Settings::clear() mGsmEfrPayloadType = -1; mGsmFrPayloadType = -1; mGsmHrPayloadType = -1; + mTelephoneEvent = -1; } bool CodecList::Settings::EvsSpec::isValid() const @@ -268,15 +275,16 @@ CodecList::Settings CodecList::Settings::parseSdp(const std::list& } } else if (codec_name == "EVS") { r.mEvsSpec.push_back({ptype}); - } + } else if (codec_name == "TELEPHONE-EVENT") + r.mTelephoneEvent = ptype; } return r; } bool CodecList::Settings::operator == (const Settings& rhs) const { - if (std::tie(mWrapIuUP, mSkipDecode, mIsac16KPayloadType, mIsac32KPayloadType, mIlbc20PayloadType, mIlbc30PayloadType, mGsmFrPayloadType, mGsmFrPayloadLength, mGsmEfrPayloadType, mGsmHrPayloadType) != - std::tie(rhs.mWrapIuUP, rhs.mSkipDecode, rhs.mIsac16KPayloadType, rhs.mIsac32KPayloadType, rhs.mIlbc20PayloadType, rhs.mIlbc30PayloadType, rhs.mGsmFrPayloadType, rhs.mGsmFrPayloadLength, rhs.mGsmEfrPayloadType, rhs.mGsmHrPayloadType)) + if (std::tie(mWrapIuUP, mSkipDecode, mIsac16KPayloadType, mIsac32KPayloadType, mIlbc20PayloadType, mIlbc30PayloadType, mGsmFrPayloadType, mGsmFrPayloadLength, mGsmEfrPayloadType, mGsmHrPayloadType, mTelephoneEvent) != + std::tie(rhs.mWrapIuUP, rhs.mSkipDecode, rhs.mIsac16KPayloadType, rhs.mIsac32KPayloadType, rhs.mIlbc20PayloadType, rhs.mIlbc30PayloadType, rhs.mGsmFrPayloadType, rhs.mGsmFrPayloadLength, rhs.mGsmEfrPayloadType, rhs.mGsmHrPayloadType, rhs.mTelephoneEvent)) return false; if (mAmrNbOctetPayloadType != rhs.mAmrNbOctetPayloadType) @@ -306,6 +314,9 @@ bool CodecList::Settings::operator == (const Settings& rhs) const if (mOpusSpec[i] != rhs.mOpusSpec[i]) return false; + if (mTelephoneEvent != rhs.mTelephoneEvent) + return false; + return true; } diff --git a/src/engine/media/MT_CodecList.h b/src/engine/media/MT_CodecList.h index fdfa69ce..c8a107c0 100644 --- a/src/engine/media/MT_CodecList.h +++ b/src/engine/media/MT_CodecList.h @@ -28,6 +28,9 @@ public: bool mWrapIuUP = false; bool mSkipDecode = false; + // RFC2833 DTMF + int mTelephoneEvent = -1; + // AMR payload types std::set mAmrWbPayloadType = { }; std::set mAmrNbPayloadType = { }; diff --git a/src/engine/media/MT_SingleAudioStream.cpp b/src/engine/media/MT_SingleAudioStream.cpp index 3aa5702d..1c3c737a 100644 --- a/src/engine/media/MT_SingleAudioStream.cpp +++ b/src/engine/media/MT_SingleAudioStream.cpp @@ -23,7 +23,7 @@ SingleAudioStream::~SingleAudioStream() void SingleAudioStream::process(const std::shared_ptr& packet) { ICELogMedia(<< "Processing incoming RTP/RTCP packet"); - if (packet->GetPayloadType() == 101/*resip::Codec::TelephoneEvent.payloadType()*/) + if (packet->GetPayloadType() == mReceiver.getCodecSettings().mTelephoneEvent) mDtmfReceiver.add(packet); else mReceiver.add(packet);