3 files changed, 335 insertions, 28 deletions
diff --git a/dom/media/platforms/agnostic/bytestreams/Adts.cpp b/dom/media/platforms/agnostic/bytestreams/Adts.cpp
index 5f31904d9c..71c9f15308 100644
--- a/dom/media/platforms/agnostic/bytestreams/Adts.cpp
+++ b/dom/media/platforms/agnostic/bytestreams/Adts.cpp
@@ -4,37 +4,56 @@
 
 #include "Adts.h"
 #include "MediaData.h"
+#include "PlatformDecoderModule.h"
 #include "mozilla/Array.h"
 #include "mozilla/ArrayUtils.h"
+#include "mozilla/Logging.h"
+#include "ADTSDemuxer.h"
+
+extern mozilla::LazyLogModule gMediaDemuxerLog;
+#define LOG(msg, ...) \
+  MOZ_LOG(gMediaDemuxerLog, LogLevel::Debug, msg, ##__VA_ARGS__)
+#define ADTSLOG(msg, ...) \
+  DDMOZ_LOG(gMediaDemuxerLog, LogLevel::Debug, msg, ##__VA_ARGS__)
+#define ADTSLOGV(msg, ...) \
+  DDMOZ_LOG(gMediaDemuxerLog, LogLevel::Verbose, msg, ##__VA_ARGS__)
 
 namespace mozilla {
+namespace ADTS {
 
 static const int kADTSHeaderSize = 7;
 
-int8_t Adts::GetFrequencyIndex(uint32_t aSamplesPerSecond) {
-  static const uint32_t freq_lookup[] = {96000, 88200, 64000, 48000, 44100,
-                                         32000, 24000, 22050, 16000, 12000,
-                                         11025, 8000,  7350,  0};
+constexpr std::array FREQ_LOOKUP{96000, 88200, 64000, 48000, 44100,
+                                 32000, 24000, 22050, 16000, 12000,
+                                 11025, 8000,  7350,  0};
 
-  int8_t i = 0;
-  while (freq_lookup[i] && aSamplesPerSecond < freq_lookup[i]) {
-    i++;
-  }
+Result<uint8_t, bool> GetFrequencyIndex(uint32_t aSamplesPerSecond) {
+  auto found =
+      std::find(FREQ_LOOKUP.begin(), FREQ_LOOKUP.end(), aSamplesPerSecond);
 
-  if (!freq_lookup[i]) {
-    return -1;
+  if (found == FREQ_LOOKUP.end()) {
+    return Err(false);
   }
 
-  return i;
+  return std::distance(FREQ_LOOKUP.begin(), found);
 }
 
-bool Adts::ConvertSample(uint16_t aChannelCount, int8_t aFrequencyIndex,
-                         int8_t aProfile, MediaRawData* aSample) {
+bool ConvertSample(uint16_t aChannelCount, uint8_t aFrequencyIndex,
+                   uint8_t aProfile, MediaRawData* aSample) {
   size_t newSize = aSample->Size() + kADTSHeaderSize;
 
+  MOZ_LOG(sPDMLog, LogLevel::Debug,
+          ("Converting sample to ADTS format: newSize: %zu, ch: %u, "
+           "profile: %u, freq index: %d",
+           newSize, aChannelCount, aProfile, aFrequencyIndex));
+
   // ADTS header uses 13 bits for packet size.
-  if (newSize >= (1 << 13) || aChannelCount > 15 || aFrequencyIndex < 0 ||
-      aProfile < 1 || aProfile > 4) {
+  if (newSize >= (1 << 13) || aChannelCount > 15 || aProfile < 1 ||
+      aProfile > 4 || aFrequencyIndex >= FREQ_LOOKUP.size()) {
+    MOZ_LOG(sPDMLog, LogLevel::Debug,
+            ("Couldn't convert sample to ADTS format: newSize: %zu, ch: %u, "
+             "profile: %u, freq index: %d",
+             newSize, aChannelCount, aProfile, aFrequencyIndex));
     return false;
   }
 
@@ -66,7 +85,36 @@ bool Adts::ConvertSample(uint16_t aChannelCount, int8_t aFrequencyIndex,
   return true;
 }
 
-bool Adts::RevertSample(MediaRawData* aSample) {
+bool StripHeader(MediaRawData* aSample) {
+  if (aSample->Size() < kADTSHeaderSize) {
+    return false;
+  }
+
+  FrameHeader header;
+  auto data = Span{aSample->Data(), aSample->Size()};
+  MOZ_ASSERT(FrameHeader::MatchesSync(data),
+             "Don't attempt to strip the ADTS header of a raw AAC packet.");
+
+  bool crcPresent = header.mHaveCrc;
+
+  LOG(("Stripping ADTS, crc %spresent", crcPresent ? "" : "not "));
+
+  size_t toStrip = crcPresent ? kADTSHeaderSize + 2 : kADTSHeaderSize;
+
+  UniquePtr<MediaRawDataWriter> writer(aSample->CreateWriter());
+  writer->PopFront(toStrip);
+
+  if (aSample->mCrypto.IsEncrypted()) {
+    if (aSample->mCrypto.mPlainSizes.Length() > 0 &&
+        writer->mCrypto.mPlainSizes[0] >= kADTSHeaderSize) {
+      writer->mCrypto.mPlainSizes[0] -= kADTSHeaderSize;
+    }
+  }
+
+  return true;
+}
+
+bool RevertSample(MediaRawData* aSample) {
   if (aSample->Size() < kADTSHeaderSize) {
     return false;
   }
@@ -91,4 +139,156 @@ bool Adts::RevertSample(MediaRawData* aSample) {
 
   return true;
 }
-}  // namespace mozilla
+
+bool FrameHeader::MatchesSync(const Span<const uint8_t>& aData) {
+  return aData.Length() >= 2 && aData[0] == 0xFF && (aData[1] & 0xF6) == 0xF0;
+}
+
+FrameHeader::FrameHeader() { Reset(); }
+
+// Header size
+uint64_t FrameHeader::HeaderSize() const { return (mHaveCrc) ? 9 : 7; }
+
+bool FrameHeader::IsValid() const { return mFrameLength > 0; }
+
+// Resets the state to allow for a new parsing session.
+void FrameHeader::Reset() { PodZero(this); }
+
+// Returns whether the byte creates a valid sequence up to this point.
+bool FrameHeader::Parse(const Span<const uint8_t>& aData) {
+  if (!MatchesSync(aData)) {
+    return false;
+  }
+
+  // AAC has 1024 samples per frame per channel.
+  mSamples = 1024;
+
+  mHaveCrc = !(aData[1] & 0x01);
+  mObjectType = ((aData[2] & 0xC0) >> 6) + 1;
+  mSamplingIndex = (aData[2] & 0x3C) >> 2;
+  mChannelConfig = (aData[2] & 0x01) << 2 | (aData[3] & 0xC0) >> 6;
+  mFrameLength =
+      static_cast<uint32_t>((aData[3] & 0x03) << 11 | (aData[4] & 0xFF) << 3 |
+                            (aData[5] & 0xE0) >> 5);
+  mNumAACFrames = (aData[6] & 0x03) + 1;
+
+  static const uint32_t SAMPLE_RATES[] = {96000, 88200, 64000, 48000, 44100,
+                                          32000, 24000, 22050, 16000, 12000,
+                                          11025, 8000,  7350};
+  if (mSamplingIndex >= ArrayLength(SAMPLE_RATES)) {
+    LOG(("ADTS: Init() failure: invalid sample-rate index value: %" PRIu32 ".",
+         mSamplingIndex));
+    // This marks the header as invalid.
+    mFrameLength = 0;
+    return false;
+  }
+  mSampleRate = SAMPLE_RATES[mSamplingIndex];
+
+  MOZ_ASSERT(mChannelConfig < 8);
+  mChannels = (mChannelConfig == 7) ? 8 : mChannelConfig;
+
+  return true;
+}
+
+Frame::Frame() : mOffset(0), mHeader() {}
+uint64_t Frame::Offset() const { return mOffset; }
+size_t Frame::Length() const {
+  // TODO: If fields are zero'd when invalid, this check wouldn't be
+  // necessary.
+  if (!mHeader.IsValid()) {
+    return 0;
+  }
+
+  return mHeader.mFrameLength;
+}
+
+// Returns the offset to the start of frame's raw data.
+uint64_t Frame::PayloadOffset() const { return mOffset + mHeader.HeaderSize(); }
+
+// Returns the length of the frame's raw data (excluding the header) in bytes.
+size_t Frame::PayloadLength() const {
+  // TODO: If fields are zero'd when invalid, this check wouldn't be
+  // necessary.
+  if (!mHeader.IsValid()) {
+    return 0;
+  }
+
+  return mHeader.mFrameLength - mHeader.HeaderSize();
+}
+
+// Returns the parsed frame header.
+const FrameHeader& Frame::Header() const { return mHeader; }
+
+bool Frame::IsValid() const { return mHeader.IsValid(); }
+
+// Resets the frame header and data.
+void Frame::Reset() {
+  mHeader.Reset();
+  mOffset = 0;
+}
+
+// Returns whether the valid
+bool Frame::Parse(uint64_t aOffset, const uint8_t* aStart,
+                  const uint8_t* aEnd) {
+  MOZ_ASSERT(aStart && aEnd && aStart <= aEnd);
+
+  bool found = false;
+  const uint8_t* ptr = aStart;
+  // Require at least 7 bytes of data at the end of the buffer for the minimum
+  // ADTS frame header.
+  while (ptr < aEnd - 7 && !found) {
+    found = mHeader.Parse(Span(ptr, aEnd));
+    ptr++;
+  }
+
+  mOffset = aOffset + (static_cast<size_t>(ptr - aStart)) - 1u;
+
+  return found;
+}
+
+const Frame& FrameParser::CurrentFrame() { return mFrame; }
+
+const Frame& FrameParser::FirstFrame() const { return mFirstFrame; }
+
+void FrameParser::Reset() {
+  EndFrameSession();
+  mFirstFrame.Reset();
+}
+
+void FrameParser::EndFrameSession() { mFrame.Reset(); }
+
+bool FrameParser::Parse(uint64_t aOffset, const uint8_t* aStart,
+                        const uint8_t* aEnd) {
+  const bool found = mFrame.Parse(aOffset, aStart, aEnd);
+
+  if (mFrame.Length() && !mFirstFrame.Length()) {
+    mFirstFrame = mFrame;
+  }
+
+  return found;
+}
+
+// Initialize the AAC AudioSpecificConfig.
+// Only handles two-byte version for AAC-LC.
+void InitAudioSpecificConfig(const ADTS::Frame& frame,
+                             MediaByteBuffer* aBuffer) {
+  const ADTS::FrameHeader& header = frame.Header();
+  MOZ_ASSERT(header.IsValid());
+
+  int audioObjectType = header.mObjectType;
+  int samplingFrequencyIndex = header.mSamplingIndex;
+  int channelConfig = header.mChannelConfig;
+
+  uint8_t asc[2];
+  asc[0] = (audioObjectType & 0x1F) << 3 | (samplingFrequencyIndex & 0x0E) >> 1;
+  asc[1] = (samplingFrequencyIndex & 0x01) << 7 | (channelConfig & 0x0F) << 3;
+
+  aBuffer->AppendElements(asc, 2);
+}
+
+};  // namespace ADTS
+};  // namespace mozilla
+
+#undef LOG
+#undef ADTSLOG
+#undef ADTSLOGV
diff --git a/dom/media/platforms/agnostic/bytestreams/Adts.h b/dom/media/platforms/agnostic/bytestreams/Adts.h
index c2b6b558b6..e6d20806ab 100644
--- a/dom/media/platforms/agnostic/bytestreams/Adts.h
+++ b/dom/media/platforms/agnostic/bytestreams/Adts.h
@@ -6,17 +6,124 @@
 #define ADTS_H_
 
 #include <stdint.h>
+#include "MediaData.h"
+#include "mozilla/Result.h"
 
 namespace mozilla {
 class MediaRawData;
 
-class Adts {
+namespace ADTS {
+
+// adts::FrameHeader - Holds the ADTS frame header and its parsing
+// state.
+//
+// ADTS Frame Structure
+//
+// 11111111 1111BCCD EEFFFFGH HHIJKLMM MMMMMMMM MMMOOOOO OOOOOOPP(QQQQQQQQ
+// QQQQQQQQ)
+//
+// Header consists of 7 or 9 bytes(without or with CRC).
+// Letter   Length(bits)  Description
+// { sync } 12            syncword 0xFFF, all bits must be 1
+// B        1             MPEG Version: 0 for MPEG-4, 1 for MPEG-2
+// C        2             Layer: always 0
+// D        1             protection absent, Warning, set to 1 if there is no
+//                        CRC and 0 if there is CRC
+// E        2             profile, the MPEG-4 Audio Object Type minus 1
+// F        4             MPEG-4 Sampling Frequency Index (15 is forbidden)
+// H        3             MPEG-4 Channel Configuration (in the case of 0, the
+//                        channel configuration is sent via an in-band PCE)
+// M        13            frame length, this value must include 7 or 9 bytes of
+//                        header length: FrameLength =
+//                          (ProtectionAbsent == 1 ? 7 : 9) + size(AACFrame)
+// O        11            Buffer fullness
+// P        2             Number of AAC frames(RDBs) in ADTS frame minus 1, for
+//                        maximum compatibility always use 1 AAC frame per ADTS
+//                        frame
+// Q        16            CRC if protection absent is 0
+class FrameHeader {
  public:
-  static int8_t GetFrequencyIndex(uint32_t aSamplesPerSecond);
-  static bool ConvertSample(uint16_t aChannelCount, int8_t aFrequencyIndex,
-                            int8_t aProfile, mozilla::MediaRawData* aSample);
-  static bool RevertSample(MediaRawData* aSample);
+  uint32_t mFrameLength{};
+  uint32_t mSampleRate{};
+  uint32_t mSamples{};
+  uint32_t mChannels{};
+  uint8_t mObjectType{};
+  uint8_t mSamplingIndex{};
+  uint8_t mChannelConfig{};
+  uint8_t mNumAACFrames{};
+  bool mHaveCrc{};
+
+  // Returns whether aPtr matches a valid ADTS header sync marker
+  static bool MatchesSync(const Span<const uint8_t>& aData);
+  FrameHeader();
+  // Header size
+  uint64_t HeaderSize() const;
+  bool IsValid() const;
+  // Resets the state to allow for a new parsing session.
+  void Reset();
+
+  // Returns whether the byte creates a valid sequence up to this point.
+  bool Parse(const Span<const uint8_t>& aData);
 };
+class Frame {
+ public:
+  Frame();
+
+  uint64_t Offset() const;
+  size_t Length() const;
+  // Returns the offset to the start of frame's raw data.
+  uint64_t PayloadOffset() const;
+
+  size_t PayloadLength() const;
+  // Returns the parsed frame header.
+  const FrameHeader& Header() const;
+  bool IsValid() const;
+  // Resets the frame header and data.
+  void Reset();
+  // Returns whether the valid
+  bool Parse(uint64_t aOffset, const uint8_t* aStart, const uint8_t* aEnd);
+
+ private:
+  // The offset to the start of the header.
+  uint64_t mOffset;
+  // The currently parsed frame header.
+  FrameHeader mHeader;
+};
+
+class FrameParser {
+ public:
+  // Returns the currently parsed frame. Reset via Reset or EndFrameSession.
+  const Frame& CurrentFrame();
+  // Returns the first parsed frame. Reset via Reset.
+  const Frame& FirstFrame() const;
+  // Resets the parser. Don't use between frames as first frame data is reset.
+  void Reset();
+  // Clear the last parsed frame to allow for next frame parsing, i.e.:
+  // - sets PrevFrame to CurrentFrame
+  // - resets the CurrentFrame
+  // - resets ID3Header if no valid header was parsed yet
+  void EndFrameSession();
+  // Parses contents of given ByteReader for a valid frame header and returns
+  // true if one was found. After returning, the variable passed to
+  // 'aBytesToSkip' holds the amount of bytes to be skipped (if any) in order to
+  // jump across a large ID3v2 tag spanning multiple buffers.
+  bool Parse(uint64_t aOffset, const uint8_t* aStart, const uint8_t* aEnd);
+
+ private:
+  // We keep the first parsed frame around for static info access, the
+  // previously parsed frame for debugging and the currently parsed frame.
+  Frame mFirstFrame;
+  Frame mFrame;
+};
+
+// Extract the audiospecificconfig from an ADTS header
+void InitAudioSpecificConfig(const Frame& aFrame, MediaByteBuffer* aBuffer);
+bool StripHeader(MediaRawData* aSample);
+Result<uint8_t, bool> GetFrequencyIndex(uint32_t aSamplesPerSecond);
+bool ConvertSample(uint16_t aChannelCount, uint8_t aFrequencyIndex,
+                   uint8_t aProfile, mozilla::MediaRawData* aSample);
+bool RevertSample(MediaRawData* aSample);
+}  // namespace ADTS
 }  // namespace mozilla
 
 #endif
diff --git a/dom/media/platforms/agnostic/bytestreams/AnnexB.cpp b/dom/media/platforms/agnostic/bytestreams/AnnexB.cpp
index 086936dcc6..4721ddefc3 100644
--- a/dom/media/platforms/agnostic/bytestreams/AnnexB.cpp
+++ b/dom/media/platforms/agnostic/bytestreams/AnnexB.cpp
@@ -256,21 +256,21 @@ static Result<Ok, nsresult> FindStartCodeInternal(BufferReader& aBr) {
   while (aBr.Remaining() >= 6) {
     uint32_t x32;
     MOZ_TRY_VAR(x32, aBr.PeekU32());
-    if ((x32 - 0x01010101) & (~x32) & 0x80808080) {
-      if ((x32 >> 8) == 0x000001) {
+    if ((x32 - 0x01010101) & (~x32) & 0x80808080) {  // Has 0x00 byte(s).
+      if ((x32 >> 8) == 0x000001) {                  // 0x000001??
         return Ok();
       }
-      if (x32 == 0x000001) {
+      if ((x32 & 0xffffff) == 0x000001) {  // 0x??000001
         mozilla::Unused << aBr.Read(1);
         return Ok();
       }
-      if ((x32 & 0xff) == 0) {
+      if ((x32 & 0xff) == 0) {  // 0x??????00
         const uint8_t* p = aBr.Peek(1);
-        if ((x32 & 0xff00) == 0 && p[4] == 1) {
+        if ((x32 & 0xff00) == 0 && p[4] == 1) {  // 0x????0000,01
           mozilla::Unused << aBr.Read(2);
           return Ok();
         }
-        if (p[4] == 0 && p[5] == 1) {
+        if (p[4] == 0 && p[5] == 1) {  // 0x??????00,00,01
           mozilla::Unused << aBr.Read(3);
           return Ok();
         }