From e7946b57e3afdf6677fed48002711d336cfd2e0d Mon Sep 17 00:00:00 2001
From: "dmytro.bogovych" <dmytro.bogovych@gmail.com>
Date: Thu, 21 Mar 2019 12:57:53 +0200
Subject: [PATCH] - fix for AMR codec - fix for .wav file write rate issue

---
 src/engine/media/MT_AmrCodec.cpp      | 71 +++++++++++++++++++--------
 src/engine/media/MT_AudioReceiver.cpp |  2 +-
 2 files changed, 51 insertions(+), 22 deletions(-)

diff --git a/src/engine/media/MT_AmrCodec.cpp b/src/engine/media/MT_AmrCodec.cpp
index e18d1bfc..185fe5ed 100644
--- a/src/engine/media/MT_AmrCodec.cpp
+++ b/src/engine/media/MT_AmrCodec.cpp
@@ -119,13 +119,23 @@ struct AmrPayload
 static AmrPayload parseAmrPayload(AmrPayloadInfo& input)
 {
     AmrPayload result;
+    // Do not skip packet by default; I suppose packet is good enough by default.
     result.mDiscardPacket = false;
 
     // Wrap incoming data with ByteArray to make bit dequeuing easy
-    ByteBuffer dataIn(input.mPayload, input.mPayloadLength);
-    BitReader br(input.mPayload, input.mPayloadLength);
+    ByteBuffer dataIn(input.mPayload, static_cast<size_t>(input.mPayloadLength));
+    BitReader br(input.mPayload, static_cast<size_t>(input.mPayloadLength));
 
-    result.mCodeModeRequest = br.readBits(4);
+    // In bandwidth-efficient mode, the payload header simply consists of a
+    //   4-bit codec mode request:
+    // CMR (4 bits): Indicates a codec mode request sent to the speech
+    //      encoder at the site of the receiver of this payload.  The value of
+    //      the CMR field is set to the frame type index of the corresponding
+    //      speech mode being requested.  The frame type index may be 0-7 for
+    //      AMR, as defined in Table 1a in [2], or 0-8 for AMR-WB, as defined
+    //      in Table 1a in [4].  CMR value 15 indicates that no mode request
+    //      is present, and other values are for future use.
+    result.mCodeModeRequest = static_cast<uint8_t>(br.readBits(4));
     //ICELogMedia(<< "CMR: " << result.mCodeModeRequest);
 
     // Consume extra 4 bits for octet aligned profile
@@ -136,6 +146,7 @@ static AmrPayload parseAmrPayload(AmrPayloadInfo& input)
     if (input.mInterleaving && input.mOctetAligned)
         br.readBits(8);
 
+    // Silence codec mode constant (it differs for wideband and narrowband codecs)
     uint8_t SID_FT = input.mWideband ? 9 : 8;
 
     // Table of contents
@@ -143,11 +154,29 @@ static AmrPayload parseAmrPayload(AmrPayloadInfo& input)
 
     do
     {
+        // Read TOC. It is still relates to RTP part of AMR frames packing; not the AMR frame itself.
+        //    F (1 bit): If set to 1, indicates that this frame is followed by
+        // another speech frame in this payload; if set to 0, indicates that
+        // this frame is the last frame in this payload.
         F = br.readBit();
-        FT = br.readBits(4);
+
+        //    FT (4 bits): Frame type index, indicating either the AMR or AMR-WB
+        // speech coding mode or comfort noise (SID) mode of the
+        // corresponding frame carried in this payload.
+        FT = static_cast<uint8_t>(br.readBits(4));
+
+        //   Q (1 bit): Frame quality indicator.  If set to 0, indicates the
+        // corresponding frame is severely damaged, and the receiver should
+        // set the RX_TYPE (see [6]) to either SPEECH_BAD or SID_BAD
+        // depending on the frame type (FT).
         Q = br.readBit();
 
-        if (FT > SID_FT && FT < 14)
+        // If receiving a ToC entry with a FT value in the range 9-14 for AMR or
+        //   10-13 for AMR-WB, the whole packet SHOULD be discarded.  This is to
+        //   avoid the loss of data synchronization in the depacketization
+        //   process, which can result in a huge degradation in speech quality.
+        if ((input.mWideband && (FT >= 10 && FT <= 13)) ||
+            (!input.mWideband && (FT >= 9 && FT <= 14)))
         {
             ICELogMedia(<< "Discard corrupted packet");
             // Discard bad packet
@@ -161,10 +190,8 @@ static AmrPayload parseAmrPayload(AmrPayloadInfo& input)
 
         AmrFrame frame;
         frame.mFrameType = FT;
-        if (frame.mFrameType < 10)
-            throw std::runtime_error("Failed to parse AMR frame type");
 
-        frame.mMode = FT < SID_FT ? FT : -1;
+        frame.mMode = FT < SID_FT ? FT : 0xFF;
         frame.mGoodQuality = Q == 1;
         frame.mTimestamp = input.mCurrentTimestamp;
 
@@ -174,28 +201,28 @@ static AmrPayload parseAmrPayload(AmrPayloadInfo& input)
     }
     while (F != 0);
 
-    for (int frameIndex=0; frameIndex < (int)result.mFrames.size(); frameIndex++)
+    for (size_t frameIndex=0; frameIndex < result.mFrames.size(); frameIndex++)
     {
         AmrFrame& frame = result.mFrames[frameIndex];
-        int bitsLength = input.mWideband ? amrwb_framelenbits[frame.mFrameType] : amrnb_framelenbits[frame.mFrameType];
-        int byteLength = input.mWideband ? amrwb_framelen[frame.mFrameType] : amrnb_framelen[frame.mFrameType];
-        ICELogMedia(<< "New AMR speech frame: frame type = " << FT << ", mode = " << frame.mMode <<
-                    ", good quality = " << frame.mGoodQuality << ", timestamp = " << (int)frame.mTimestamp <<
-                    ", bits length = " << bitsLength << ", byte length =" << byteLength <<
-                    ", remaining packet length = " << (int)dataIn.size() );
+        size_t bitsLength = input.mWideband ? amrwb_framelenbits[frame.mFrameType] : amrnb_framelenbits[frame.mFrameType];
+        size_t byteLength = input.mWideband ? amrwb_framelen[frame.mFrameType] : amrnb_framelen[frame.mFrameType];
+        ICELogMedia(<< "New AMR speech frame: frame type = " << FT <<
+                    ", mode = " << frame.mMode <<
+                    ", timestamp = " << static_cast<int>(frame.mTimestamp) <<
+                    ", bits length = " << static_cast<int>(bitsLength) <<
+                    ", byte length =" << static_cast<int>(byteLength) <<
+                    ", remaining packet length = " << static_cast<int>(dataIn.size()));
 
         if (bitsLength > 0)
         {
             if (input.mOctetAligned)
             {
-                if ((int)dataIn.size() < byteLength)
-                {
+                if (dataIn.size() < byteLength)
                     frame.mGoodQuality = false;
-                }
                 else
                 {
                     // It is octet aligned scheme, so we are on byte boundary now
-                    int byteOffset = br.count() / 8;
+                    size_t byteOffset = br.count() / 8;
 
                     // Copy data of AMR frame
                     frame.mData = std::make_shared<ByteBuffer>(input.mPayload + byteOffset, byteLength);
@@ -218,9 +245,9 @@ static AmrPayload parseAmrPayload(AmrPayloadInfo& input)
     }
     // Padding bits are skipped
 
-    if (br.count() / 8 != br.position() / 8 &&
+    /*if (br.count() / 8 != br.position() / 8 &&
             br.count() / 8 != br.position() / 8 + 1)
-        throw std::runtime_error("Failed to parse AMR frame");
+        throw std::runtime_error("Failed to parse AMR frame");*/
 
     return result;
 }
@@ -662,6 +689,8 @@ int AmrWbCodec::decode(const void* input, int inputBytes, void* output, int outp
         short* dataOut = (short*)output;
         for (AmrFrame& frame: ap.mFrames)
         {
+            memset(dataOut, 0, static_cast<size_t>(pcmLength()));
+
             if (frame.mData)
             {
                 D_IF_decode(mDecoderCtx, (const unsigned char*)frame.mData->data(), (short*)dataOut, 0);
diff --git a/src/engine/media/MT_AudioReceiver.cpp b/src/engine/media/MT_AudioReceiver.cpp
index 9616392c..8473e23a 100644
--- a/src/engine/media/MT_AudioReceiver.cpp
+++ b/src/engine/media/MT_AudioReceiver.cpp
@@ -604,7 +604,7 @@ void AudioReceiver::makeMonoAndResample(int rate, int channels)
     void* frames = mConvertedLength ? mConvertedFrame : mDecodedFrame;
     unsigned length = mConvertedLength ? mConvertedLength : mDecodedLength;
 
-    Audio::Resampler* r = NULL;
+    Audio::Resampler* r = nullptr;
     switch (rate)
     {
     case 8000:     r = &mResampler8; break;