From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 21:33:14 +0200 Subject: Adding upstream version 115.7.0esr. Signed-off-by: Daniel Baumann --- dom/media/ogg/OggCodecState.cpp | 1800 ++++++++++++++++++++++++++++++++ dom/media/ogg/OggCodecState.h | 628 +++++++++++ dom/media/ogg/OggCodecStore.cpp | 31 + dom/media/ogg/OggCodecStore.h | 37 + dom/media/ogg/OggDecoder.cpp | 82 ++ dom/media/ogg/OggDecoder.h | 29 + dom/media/ogg/OggDemuxer.cpp | 2172 +++++++++++++++++++++++++++++++++++++++ dom/media/ogg/OggDemuxer.h | 363 +++++++ dom/media/ogg/OggRLBox.h | 30 + dom/media/ogg/OggRLBoxTypes.h | 17 + dom/media/ogg/OggWriter.cpp | 197 ++++ dom/media/ogg/OggWriter.h | 55 + dom/media/ogg/OpusParser.cpp | 217 ++++ dom/media/ogg/OpusParser.h | 48 + dom/media/ogg/moz.build | 32 + 15 files changed, 5738 insertions(+) create mode 100644 dom/media/ogg/OggCodecState.cpp create mode 100644 dom/media/ogg/OggCodecState.h create mode 100644 dom/media/ogg/OggCodecStore.cpp create mode 100644 dom/media/ogg/OggCodecStore.h create mode 100644 dom/media/ogg/OggDecoder.cpp create mode 100644 dom/media/ogg/OggDecoder.h create mode 100644 dom/media/ogg/OggDemuxer.cpp create mode 100644 dom/media/ogg/OggDemuxer.h create mode 100644 dom/media/ogg/OggRLBox.h create mode 100644 dom/media/ogg/OggRLBoxTypes.h create mode 100644 dom/media/ogg/OggWriter.cpp create mode 100644 dom/media/ogg/OggWriter.h create mode 100644 dom/media/ogg/OpusParser.cpp create mode 100644 dom/media/ogg/OpusParser.h create mode 100644 dom/media/ogg/moz.build (limited to 'dom/media/ogg') diff --git a/dom/media/ogg/OggCodecState.cpp b/dom/media/ogg/OggCodecState.cpp new file mode 100644 index 0000000000..c20a6a17bc --- /dev/null +++ b/dom/media/ogg/OggCodecState.cpp @@ -0,0 +1,1800 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include + +#include "mozilla/EndianUtils.h" +#include "mozilla/ScopeExit.h" +#include "mozilla/TextUtils.h" +#include "mozilla/Utf8.h" +#include +#include +#include + +#include "OggCodecState.h" +#include "OggRLBox.h" +#include "OpusDecoder.h" +#include "OpusParser.h" +#include "VideoUtils.h" +#include "XiphExtradata.h" +#include "nsDebug.h" +#include "opus/opus_multistream.h" + +namespace mozilla { + +extern LazyLogModule gMediaDecoderLog; +#define LOG(type, msg) MOZ_LOG(gMediaDecoderLog, type, msg) + +using media::TimeUnit; + +/** Decoder base class for Ogg-encapsulated streams. */ +UniquePtr OggCodecState::Create( + rlbox_sandbox_ogg* aSandbox, tainted_opaque_ogg aPage, + uint32_t aSerial) { + NS_ASSERTION(sandbox_invoke(*aSandbox, ogg_page_bos, aPage) + .unverified_safe_because(RLBOX_SAFE_DEBUG_ASSERTION), + "Only call on BOS page!"); + UniquePtr codecState; + tainted_ogg aPage_t = rlbox::from_opaque(aPage); + const char codec_reason[] = + "These conditions set the type of codec. Since we are relying on " + "ogg_page to determine the codec type, the library could lie about " + "this. We allow this as it does not directly allow renderer " + "vulnerabilities if this is incorrect."; + long body_len = aPage_t->body_len.unverified_safe_because(codec_reason); + + if (body_len > 6 && rlbox::memcmp(*aSandbox, aPage_t->body + 1, "theora", 6u) + .unverified_safe_because(codec_reason) == 0) { + codecState = MakeUnique(aSandbox, aPage, aSerial); + } else if (body_len > 6 && + rlbox::memcmp(*aSandbox, aPage_t->body + 1, "vorbis", 6u) + .unverified_safe_because(codec_reason) == 0) { + codecState = MakeUnique(aSandbox, aPage, aSerial); + } else if (body_len > 8 && + rlbox::memcmp(*aSandbox, aPage_t->body, "OpusHead", 8u) + .unverified_safe_because(codec_reason) == 0) { + codecState = MakeUnique(aSandbox, aPage, aSerial); + } else if (body_len > 8 && + rlbox::memcmp(*aSandbox, aPage_t->body, "fishead\0", 8u) + .unverified_safe_because(codec_reason) == 0) { + codecState = MakeUnique(aSandbox, aPage, aSerial); + } else if (body_len > 5 && + rlbox::memcmp(*aSandbox, aPage_t->body, "\177FLAC", 5u) + .unverified_safe_because(codec_reason) == 0) { + codecState = MakeUnique(aSandbox, aPage, aSerial); + } else { + // Can't use MakeUnique here, OggCodecState is protected. + codecState.reset(new OggCodecState(aSandbox, aPage, aSerial, false)); + } + + if (!codecState->OggCodecState::InternalInit()) { + codecState.reset(); + } + + return codecState; +} + +OggCodecState::OggCodecState(rlbox_sandbox_ogg* aSandbox, + tainted_opaque_ogg aBosPage, + uint32_t aSerial, bool aActive) + : mPacketCount(0), + mSerial(aSerial), + mActive(aActive), + mDoneReadingHeaders(!aActive), + mSandbox(aSandbox) { + MOZ_COUNT_CTOR(OggCodecState); + tainted_ogg state = + mSandbox->malloc_in_sandbox(); + MOZ_RELEASE_ASSERT(state != nullptr); + rlbox::memset(*mSandbox, state, 0, sizeof(ogg_stream_state)); + mState = state.to_opaque(); +} + +OggCodecState::~OggCodecState() { + MOZ_COUNT_DTOR(OggCodecState); + Reset(); +#ifdef DEBUG + int ret = +#endif + sandbox_invoke(*mSandbox, ogg_stream_clear, mState) + .unverified_safe_because(RLBOX_SAFE_DEBUG_ASSERTION); + NS_ASSERTION(ret == 0, "ogg_stream_clear failed"); + mSandbox->free_in_sandbox(rlbox::from_opaque(mState)); + tainted_ogg nullval = nullptr; + mState = nullval.to_opaque(); +} + +nsresult OggCodecState::Reset() { + if (sandbox_invoke(*mSandbox, ogg_stream_reset, mState) + .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) != 0) { + return NS_ERROR_FAILURE; + } + mPackets.Erase(); + ClearUnstamped(); + return NS_OK; +} + +void OggCodecState::ClearUnstamped() { mUnstamped.Clear(); } + +bool OggCodecState::InternalInit() { + int ret = sandbox_invoke(*mSandbox, ogg_stream_init, mState, mSerial) + .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON); + return ret == 0; +} + +bool OggCodecState::IsValidVorbisTagName(nsCString& aName) { + // Tag names must consist of ASCII 0x20 through 0x7D, + // excluding 0x3D '=' which is the separator. + uint32_t length = aName.Length(); + const char* data = aName.Data(); + for (uint32_t i = 0; i < length; i++) { + if (data[i] < 0x20 || data[i] > 0x7D || data[i] == '=') { + return false; + } + } + return true; +} + +bool OggCodecState::AddVorbisComment(UniquePtr& aTags, + const char* aComment, uint32_t aLength) { + const char* div = (const char*)memchr(aComment, '=', aLength); + if (!div) { + LOG(LogLevel::Debug, ("Skipping comment: no separator")); + return false; + } + nsCString key = nsCString(aComment, div - aComment); + if (!IsValidVorbisTagName(key)) { + LOG(LogLevel::Debug, ("Skipping comment: invalid tag name")); + return false; + } + uint32_t valueLength = aLength - (div - aComment); + nsCString value = nsCString(div + 1, valueLength); + if (!IsUtf8(value)) { + LOG(LogLevel::Debug, ("Skipping comment: invalid UTF-8 in value")); + return false; + } + aTags->InsertOrUpdate(key, value); + return true; +} + +bool OggCodecState::SetCodecSpecificConfig(MediaByteBuffer* aBuffer, + OggPacketQueue& aHeaders) { + nsTArray headers; + nsTArray headerLens; + for (size_t i = 0; i < aHeaders.Length(); i++) { + headers.AppendElement(aHeaders[i]->packet); + headerLens.AppendElement(aHeaders[i]->bytes); + } + // Save header packets for the decoder + if (!XiphHeadersToExtradata(aBuffer, headers, headerLens)) { + return false; + } + aHeaders.Erase(); + return true; +} + +void VorbisState::RecordVorbisPacketSamples(ogg_packet* aPacket, + long aSamples) { +#ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION + mVorbisPacketSamples[aPacket] = aSamples; +#endif +} + +void VorbisState::ValidateVorbisPacketSamples(ogg_packet* aPacket, + long aSamples) { +#ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION + NS_ASSERTION(mVorbisPacketSamples[aPacket] == aSamples, + "Decoded samples for Vorbis packet don't match expected!"); + mVorbisPacketSamples.erase(aPacket); +#endif +} + +void VorbisState::AssertHasRecordedPacketSamples(ogg_packet* aPacket) { +#ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION + NS_ASSERTION(mVorbisPacketSamples.count(aPacket) == 1, + "Must have recorded packet samples"); +#endif +} + +// Clone the given packet from memory accessible to the sandboxed libOgg to +// memory accessible only to the Firefox renderer +static OggPacketPtr CloneOutOfSandbox(tainted_ogg aPacket) { + ogg_packet* clone = + aPacket.copy_and_verify([](std::unique_ptr> val) { + const char packet_reason[] = + "Packets have no guarantees on what data they hold. The renderer's " + "safety is not compromised even if packets return garbage data."; + + ogg_packet* p = new ogg_packet(); + p->bytes = val->bytes.unverified_safe_because(packet_reason); + p->b_o_s = val->b_o_s.unverified_safe_because(packet_reason); + p->e_o_s = val->e_o_s.unverified_safe_because(packet_reason); + p->granulepos = val->granulepos.unverified_safe_because(packet_reason); + p->packetno = val->packetno.unverified_safe_because(packet_reason); + if (p->bytes == 0) { + p->packet = nullptr; + } else { + p->packet = val->packet.copy_and_verify_range( + [](std::unique_ptr packet) { + return packet.release(); + }, + p->bytes); + } + return p; + }); + return OggPacketPtr(clone); +} + +void OggPacketQueue::Append(OggPacketPtr aPacket) { + nsDeque::Push(aPacket.release()); +} + +bool OggCodecState::IsPacketReady() { return !mPackets.IsEmpty(); } + +OggPacketPtr OggCodecState::PacketOut() { + if (mPackets.IsEmpty()) { + return nullptr; + } + return mPackets.PopFront(); +} + +ogg_packet* OggCodecState::PacketPeek() { + if (mPackets.IsEmpty()) { + return nullptr; + } + return mPackets.PeekFront(); +} + +void OggCodecState::PushFront(OggPacketQueue&& aOther) { + while (!aOther.IsEmpty()) { + mPackets.PushFront(aOther.Pop()); + } +} + +already_AddRefed OggCodecState::PacketOutAsMediaRawData() { + OggPacketPtr packet = PacketOut(); + if (!packet) { + return nullptr; + } + + NS_ASSERTION( + !IsHeader(packet.get()), + "PacketOutAsMediaRawData can only be called on non-header packets"); + RefPtr sample = new MediaRawData(packet->packet, packet->bytes); + if (packet->bytes && !sample->Data()) { + // OOM. + return nullptr; + } + + int64_t end_tstamp = Time(packet->granulepos); + NS_ASSERTION(end_tstamp >= 0, "timestamp invalid"); + + int64_t duration = PacketDuration(packet.get()); + NS_ASSERTION(duration >= 0, "duration invalid"); + + sample->mTimecode = TimeUnit::FromMicroseconds(packet->granulepos); + sample->mTime = TimeUnit::FromMicroseconds(end_tstamp - duration); + sample->mDuration = TimeUnit::FromMicroseconds(duration); + sample->mKeyframe = IsKeyframe(packet.get()); + sample->mEOS = packet->e_o_s; + + return sample.forget(); +} + +nsresult OggCodecState::PageIn(tainted_opaque_ogg aPage) { + if (!mActive) { + return NS_OK; + } + NS_ASSERTION((rlbox::sandbox_static_cast(sandbox_invoke( + *mSandbox, ogg_page_serialno, aPage)) == mSerial) + .unverified_safe_because(RLBOX_OGG_PAGE_SERIAL_REASON), + "Page must be for this stream!"); + if (sandbox_invoke(*mSandbox, ogg_stream_pagein, mState, aPage) + .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) == -1) { + return NS_ERROR_FAILURE; + } + int r; + tainted_ogg packet = mSandbox->malloc_in_sandbox(); + if (!packet) { + return NS_ERROR_OUT_OF_MEMORY; + } + auto clean_packet = MakeScopeExit([&] { mSandbox->free_in_sandbox(packet); }); + + do { + r = sandbox_invoke(*mSandbox, ogg_stream_packetout, mState, packet) + .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON); + if (r == 1) { + mPackets.Append(CloneOutOfSandbox(packet)); + } + } while (r != 0); + if (sandbox_invoke(*mSandbox, ogg_stream_check, mState) + .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON)) { + NS_WARNING("Unrecoverable error in ogg_stream_packetout"); + return NS_ERROR_FAILURE; + } + return NS_OK; +} + +nsresult OggCodecState::PacketOutUntilGranulepos(bool& aFoundGranulepos) { + tainted_ogg r; + aFoundGranulepos = false; + // Extract packets from the sync state until either no more packets + // come out, or we get a data packet with non -1 granulepos. + tainted_ogg packet = mSandbox->malloc_in_sandbox(); + if (!packet) { + return NS_ERROR_OUT_OF_MEMORY; + } + auto clean_packet = MakeScopeExit([&] { mSandbox->free_in_sandbox(packet); }); + + do { + r = sandbox_invoke(*mSandbox, ogg_stream_packetout, mState, packet); + if (r.unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) == 1) { + OggPacketPtr clone = CloneOutOfSandbox(packet); + if (IsHeader(clone.get())) { + // Header packets go straight into the packet queue. + mPackets.Append(std::move(clone)); + } else { + // We buffer data packets until we encounter a granulepos. We'll + // then use the granulepos to figure out the granulepos of the + // preceeding packets. + aFoundGranulepos = clone.get()->granulepos > 0; + mUnstamped.AppendElement(std::move(clone)); + } + } + } while (r.unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) != 0 && + !aFoundGranulepos); + if (sandbox_invoke(*mSandbox, ogg_stream_check, mState) + .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON)) { + NS_WARNING("Unrecoverable error in ogg_stream_packetout"); + return NS_ERROR_FAILURE; + } + return NS_OK; +} + +TheoraState::TheoraState(rlbox_sandbox_ogg* aSandbox, + tainted_opaque_ogg aBosPage, + uint32_t aSerial) + : OggCodecState(aSandbox, aBosPage, aSerial, true), + mSetup(nullptr), + mCtx(nullptr) { + MOZ_COUNT_CTOR(TheoraState); + th_info_init(&mTheoraInfo); + th_comment_init(&mComment); +} + +TheoraState::~TheoraState() { + MOZ_COUNT_DTOR(TheoraState); + th_setup_free(mSetup); + th_decode_free(mCtx); + th_comment_clear(&mComment); + th_info_clear(&mTheoraInfo); + Reset(); +} + +bool TheoraState::Init() { + if (!mActive) { + return false; + } + + int64_t n = mTheoraInfo.aspect_numerator; + int64_t d = mTheoraInfo.aspect_denominator; + + float aspectRatio = + (n == 0 || d == 0) ? 1.0f : static_cast(n) / static_cast(d); + + // Ensure the frame and picture regions aren't larger than our prescribed + // maximum, or zero sized. + gfx::IntSize frame(mTheoraInfo.frame_width, mTheoraInfo.frame_height); + gfx::IntRect picture(mTheoraInfo.pic_x, mTheoraInfo.pic_y, + mTheoraInfo.pic_width, mTheoraInfo.pic_height); + gfx::IntSize display(mTheoraInfo.pic_width, mTheoraInfo.pic_height); + ScaleDisplayByAspectRatio(display, aspectRatio); + if (!IsValidVideoRegion(frame, picture, display)) { + return mActive = false; + } + + mCtx = th_decode_alloc(&mTheoraInfo, mSetup); + if (!mCtx) { + return mActive = false; + } + + // Video track's frame sizes will not overflow. Activate the video track. + mInfo.mMimeType = "video/theora"_ns; + mInfo.mDisplay = display; + mInfo.mImage = frame; + mInfo.SetImageRect(picture); + + return mActive = SetCodecSpecificConfig(mInfo.mCodecSpecificConfig, mHeaders); +} + +nsresult TheoraState::Reset() { + mHeaders.Erase(); + return OggCodecState::Reset(); +} + +bool TheoraState::DecodeHeader(OggPacketPtr aPacket) { + ogg_packet* packet = aPacket.get(); // Will be owned by mHeaders. + mHeaders.Append(std::move(aPacket)); + mPacketCount++; + int ret = th_decode_headerin(&mTheoraInfo, &mComment, &mSetup, packet); + + // We must determine when we've read the last header packet. + // th_decode_headerin() does not tell us when it's read the last header, so + // we must keep track of the headers externally. + // + // There are 3 header packets, the Identification, Comment, and Setup + // headers, which must be in that order. If they're out of order, the file + // is invalid. If we've successfully read a header, and it's the setup + // header, then we're done reading headers. The first byte of each packet + // determines it's type as follows: + // 0x80 -> Identification header + // 0x81 -> Comment header + // 0x82 -> Setup header + // See http://www.theora.org/doc/Theora.pdf Chapter 6, "Bitstream Headers", + // for more details of the Ogg/Theora containment scheme. + bool isSetupHeader = packet->bytes > 0 && packet->packet[0] == 0x82; + if (ret < 0 || mPacketCount > 3) { + // We've received an error, or the first three packets weren't valid + // header packets. Assume bad input. + // Our caller will deactivate the bitstream. + return false; + } else if (ret > 0 && isSetupHeader && mPacketCount == 3) { + // Successfully read the three header packets. + mDoneReadingHeaders = true; + } + return true; +} + +int64_t TheoraState::Time(int64_t granulepos) { + if (!mActive) { + return -1; + } + return TheoraState::Time(&mTheoraInfo, granulepos); +} + +bool TheoraState::IsHeader(ogg_packet* aPacket) { + return th_packet_isheader(aPacket); +} + +#define TH_VERSION_CHECK(_info, _maj, _min, _sub) \ + (((_info)->version_major > (_maj) || (_info)->version_major == (_maj)) && \ + (((_info)->version_minor > (_min) || (_info)->version_minor == (_min)) && \ + (_info)->version_subminor >= (_sub))) + +int64_t TheoraState::Time(th_info* aInfo, int64_t aGranulepos) { + if (aGranulepos < 0 || aInfo->fps_numerator == 0) { + return -1; + } + // Implementation of th_granule_frame inlined here to operate + // on the th_info structure instead of the theora_state. + int shift = aInfo->keyframe_granule_shift; + ogg_int64_t iframe = aGranulepos >> shift; + ogg_int64_t pframe = aGranulepos - (iframe << shift); + int64_t frameno = iframe + pframe - TH_VERSION_CHECK(aInfo, 3, 2, 1); + CheckedInt64 t = + ((CheckedInt64(frameno) + 1) * USECS_PER_S) * aInfo->fps_denominator; + if (!t.isValid()) { + return -1; + } + t /= aInfo->fps_numerator; + return t.isValid() ? t.value() : -1; +} + +int64_t TheoraState::StartTime(int64_t granulepos) { + if (granulepos < 0 || !mActive || mTheoraInfo.fps_numerator == 0) { + return -1; + } + CheckedInt64 t = + (CheckedInt64(th_granule_frame(mCtx, granulepos)) * USECS_PER_S) * + mTheoraInfo.fps_denominator; + if (!t.isValid()) { + return -1; + } + return t.value() / mTheoraInfo.fps_numerator; +} + +int64_t TheoraState::PacketDuration(ogg_packet* aPacket) { + if (!mActive || mTheoraInfo.fps_numerator == 0) { + return -1; + } + CheckedInt64 t = SaferMultDiv(mTheoraInfo.fps_denominator, USECS_PER_S, + mTheoraInfo.fps_numerator); + return t.isValid() ? t.value() : -1; +} + +int64_t TheoraState::MaxKeyframeOffset() { + // Determine the maximum time in microseconds by which a key frame could + // offset for the theora bitstream. Theora granulepos encode time as: + // ((key_frame_number << granule_shift) + frame_offset). + // Therefore the maximum possible time by which any frame could be offset + // from a keyframe is the duration of (1 << granule_shift) - 1) frames. + int64_t frameDuration; + + // Max number of frames keyframe could possibly be offset. + int64_t keyframeDiff = (1 << mTheoraInfo.keyframe_granule_shift) - 1; + + // Length of frame in usecs. + frameDuration = + (mTheoraInfo.fps_denominator * USECS_PER_S) / mTheoraInfo.fps_numerator; + + // Total time in usecs keyframe can be offset from any given frame. + return frameDuration * keyframeDiff; +} + +bool TheoraState::IsKeyframe(ogg_packet* pkt) { + // first bit of packet is 1 for header, 0 for data + // second bit of packet is 1 for inter frame, 0 for intra frame + return (pkt->bytes >= 1 && (pkt->packet[0] & 0x40) == 0x00); +} + +nsresult TheoraState::PageIn(tainted_opaque_ogg aPage) { + if (!mActive) return NS_OK; + NS_ASSERTION((rlbox::sandbox_static_cast(sandbox_invoke( + *mSandbox, ogg_page_serialno, aPage)) == mSerial) + .unverified_safe_because(RLBOX_OGG_PAGE_SERIAL_REASON), + "Page must be for this stream!"); + if (sandbox_invoke(*mSandbox, ogg_stream_pagein, mState, aPage) + .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) == -1) { + return NS_ERROR_FAILURE; + } + bool foundGp; + nsresult res = PacketOutUntilGranulepos(foundGp); + if (NS_FAILED(res)) return res; + if (foundGp && mDoneReadingHeaders) { + // We've found a packet with a granulepos, and we've loaded our metadata + // and initialized our decoder. Determine granulepos of buffered packets. + ReconstructTheoraGranulepos(); + for (uint32_t i = 0; i < mUnstamped.Length(); ++i) { + OggPacketPtr packet = std::move(mUnstamped[i]); +#ifdef DEBUG + NS_ASSERTION(!IsHeader(packet.get()), + "Don't try to recover header packet gp"); + NS_ASSERTION(packet->granulepos != -1, "Packet must have gp by now"); +#endif + mPackets.Append(std::move(packet)); + } + mUnstamped.Clear(); + } + return NS_OK; +} + +// Returns 1 if the Theora info struct is decoding a media of Theora +// version (maj,min,sub) or later, otherwise returns 0. +int TheoraVersion(th_info* info, unsigned char maj, unsigned char min, + unsigned char sub) { + ogg_uint32_t ver = (maj << 16) + (min << 8) + sub; + ogg_uint32_t th_ver = (info->version_major << 16) + + (info->version_minor << 8) + info->version_subminor; + return (th_ver >= ver) ? 1 : 0; +} + +void TheoraState::ReconstructTheoraGranulepos() { + if (mUnstamped.Length() == 0) { + return; + } + ogg_int64_t lastGranulepos = mUnstamped[mUnstamped.Length() - 1]->granulepos; + NS_ASSERTION(lastGranulepos != -1, "Must know last granulepos"); + + // Reconstruct the granulepos (and thus timestamps) of the decoded + // frames. Granulepos are stored as ((keyframe<> shift; + + // The lastFrame, firstFrame, keyframe variables, as well as the frame + // variable in the loop below, store the frame number for Theora + // version >= 3.2.1 streams, and store the frame index for Theora + // version < 3.2.1 streams. + for (uint32_t i = 0; i < mUnstamped.Length() - 1; ++i) { + ogg_int64_t frame = firstFrame + i; + ogg_int64_t granulepos; + auto& packet = mUnstamped[i]; + bool isKeyframe = th_packet_iskeyframe(packet.get()) == 1; + + if (isKeyframe) { + granulepos = frame << shift; + keyframe = frame; + } else if (frame >= keyframe && + frame - keyframe < ((ogg_int64_t)1 << shift)) { + // (frame - keyframe) won't overflow the "offset" segment of the + // granulepos, so it's safe to calculate the granulepos. + granulepos = (keyframe << shift) + (frame - keyframe); + } else { + // (frame - keyframeno) will overflow the "offset" segment of the + // granulepos, so we take "keyframe" to be the max possible offset + // frame instead. + ogg_int64_t k = + std::max(frame - (((ogg_int64_t)1 << shift) - 1), version_3_2_1); + granulepos = (k << shift) + (frame - k); + } + // Theora 3.2.1+ granulepos store frame number [1..N], so granulepos + // should be > 0. + // Theora 3.2.0 granulepos store the frame index [0..(N-1)], so + // granulepos should be >= 0. + NS_ASSERTION(granulepos >= version_3_2_1, + "Invalid granulepos for Theora version"); + + // Check that the frame's granule number is one more than the + // previous frame's. + NS_ASSERTION( + i == 0 || th_granule_frame(mCtx, granulepos) == + th_granule_frame(mCtx, mUnstamped[i - 1]->granulepos) + 1, + "Granulepos calculation is incorrect!"); + + packet->granulepos = granulepos; + } + + // Check that the second to last frame's granule number is one less than + // the last frame's (the known granule number). If not our granulepos + // recovery missed a beat. + NS_ASSERTION(mUnstamped.Length() < 2 || + (th_granule_frame( + mCtx, mUnstamped[mUnstamped.Length() - 2]->granulepos) + + 1) == th_granule_frame(mCtx, lastGranulepos), + "Granulepos recovery should catch up with packet->granulepos!"); +} + +nsresult VorbisState::Reset() { + nsresult res = NS_OK; + if (mActive && vorbis_synthesis_restart(&mDsp) != 0) { + res = NS_ERROR_FAILURE; + } + mHeaders.Erase(); + if (NS_FAILED(OggCodecState::Reset())) { + return NS_ERROR_FAILURE; + } + + mGranulepos = 0; + mPrevVorbisBlockSize = 0; + + return res; +} + +VorbisState::VorbisState(rlbox_sandbox_ogg* aSandbox, + tainted_opaque_ogg aBosPage, + uint32_t aSerial) + : OggCodecState(aSandbox, aBosPage, aSerial, true), + mPrevVorbisBlockSize(0), + mGranulepos(0) { + MOZ_COUNT_CTOR(VorbisState); + vorbis_info_init(&mVorbisInfo); + vorbis_comment_init(&mComment); + memset(&mDsp, 0, sizeof(vorbis_dsp_state)); + memset(&mBlock, 0, sizeof(vorbis_block)); +} + +VorbisState::~VorbisState() { + MOZ_COUNT_DTOR(VorbisState); + Reset(); + vorbis_block_clear(&mBlock); + vorbis_dsp_clear(&mDsp); + vorbis_info_clear(&mVorbisInfo); + vorbis_comment_clear(&mComment); +} + +bool VorbisState::DecodeHeader(OggPacketPtr aPacket) { + ogg_packet* packet = aPacket.get(); // Will be owned by mHeaders. + mHeaders.Append(std::move(aPacket)); + mPacketCount++; + int ret = vorbis_synthesis_headerin(&mVorbisInfo, &mComment, packet); + // We must determine when we've read the last header packet. + // vorbis_synthesis_headerin() does not tell us when it's read the last + // header, so we must keep track of the headers externally. + // + // There are 3 header packets, the Identification, Comment, and Setup + // headers, which must be in that order. If they're out of order, the file + // is invalid. If we've successfully read a header, and it's the setup + // header, then we're done reading headers. The first byte of each packet + // determines it's type as follows: + // 0x1 -> Identification header + // 0x3 -> Comment header + // 0x5 -> Setup header + // For more details of the Vorbis/Ogg containment scheme, see the Vorbis I + // Specification, Chapter 4, Codec Setup and Packet Decode: + // http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-580004 + + bool isSetupHeader = packet->bytes > 0 && packet->packet[0] == 0x5; + + if (ret < 0 || mPacketCount > 3) { + // We've received an error, or the first three packets weren't valid + // header packets. Assume bad input. Our caller will deactivate the + // bitstream. + return false; + } else if (!ret && isSetupHeader && mPacketCount == 3) { + // Successfully read the three header packets. + // The bitstream remains active. + mDoneReadingHeaders = true; + } + + return true; +} + +bool VorbisState::Init() { + if (!mActive) { + return false; + } + + int ret = vorbis_synthesis_init(&mDsp, &mVorbisInfo); + if (ret != 0) { + NS_WARNING("vorbis_synthesis_init() failed initializing vorbis bitstream"); + return mActive = false; + } + ret = vorbis_block_init(&mDsp, &mBlock); + if (ret != 0) { + NS_WARNING("vorbis_block_init() failed initializing vorbis bitstream"); + if (mActive) { + vorbis_dsp_clear(&mDsp); + } + return mActive = false; + } + + nsTArray headers; + nsTArray headerLens; + for (size_t i = 0; i < mHeaders.Length(); i++) { + headers.AppendElement(mHeaders[i]->packet); + headerLens.AppendElement(mHeaders[i]->bytes); + } + // Save header packets for the decoder + VorbisCodecSpecificData vorbisCodecSpecificData{}; + if (!XiphHeadersToExtradata(vorbisCodecSpecificData.mHeadersBinaryBlob, + headers, headerLens)) { + return mActive = false; + } + mHeaders.Erase(); + mInfo.mMimeType = "audio/vorbis"_ns; + mInfo.mRate = mVorbisInfo.rate; + mInfo.mChannels = mVorbisInfo.channels; + mInfo.mBitDepth = 16; + mInfo.mCodecSpecificConfig = + AudioCodecSpecificVariant{std::move(vorbisCodecSpecificData)}; + + return true; +} + +int64_t VorbisState::Time(int64_t granulepos) { + if (!mActive) { + return -1; + } + + return VorbisState::Time(&mVorbisInfo, granulepos); +} + +int64_t VorbisState::Time(vorbis_info* aInfo, int64_t aGranulepos) { + if (aGranulepos == -1 || aInfo->rate == 0) { + return -1; + } + CheckedInt64 t = SaferMultDiv(aGranulepos, USECS_PER_S, aInfo->rate); + return t.isValid() ? t.value() : 0; +} + +int64_t VorbisState::PacketDuration(ogg_packet* aPacket) { + if (!mActive) { + return -1; + } + if (aPacket->granulepos == -1) { + return -1; + } + // @FIXME store these in a more stable place + if (mVorbisPacketSamples.count(aPacket) == 0) { + // We haven't seen this packet, don't know its size? + return -1; + } + + long samples = mVorbisPacketSamples[aPacket]; + return Time(samples); +} + +bool VorbisState::IsHeader(ogg_packet* aPacket) { + // The first byte in each Vorbis header packet is either 0x01, 0x03, or 0x05, + // i.e. the first bit is odd. Audio data packets have their first bit as 0x0. + // Any packet with its first bit set cannot be a data packet, it's a + // (possibly invalid) header packet. + // See: http://xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-610004.2.1 + return aPacket->bytes > 0 ? (aPacket->packet[0] & 0x1) : false; +} + +UniquePtr VorbisState::GetTags() { + NS_ASSERTION(mComment.user_comments, "no vorbis comment strings!"); + NS_ASSERTION(mComment.comment_lengths, "no vorbis comment lengths!"); + auto tags = MakeUnique(); + for (int i = 0; i < mComment.comments; i++) { + AddVorbisComment(tags, mComment.user_comments[i], + mComment.comment_lengths[i]); + } + return tags; +} + +nsresult VorbisState::PageIn(tainted_opaque_ogg aPage) { + if (!mActive) { + return NS_OK; + } + NS_ASSERTION((rlbox::sandbox_static_cast(sandbox_invoke( + *mSandbox, ogg_page_serialno, aPage)) == mSerial) + .unverified_safe_because(RLBOX_OGG_PAGE_SERIAL_REASON), + "Page must be for this stream!"); + if (sandbox_invoke(*mSandbox, ogg_stream_pagein, mState, aPage) + .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) == -1) { + return NS_ERROR_FAILURE; + } + bool foundGp; + nsresult res = PacketOutUntilGranulepos(foundGp); + if (NS_FAILED(res)) { + return res; + } + if (foundGp && mDoneReadingHeaders) { + // We've found a packet with a granulepos, and we've loaded our metadata + // and initialized our decoder. Determine granulepos of buffered packets. + ReconstructVorbisGranulepos(); + for (uint32_t i = 0; i < mUnstamped.Length(); ++i) { + OggPacketPtr packet = std::move(mUnstamped[i]); + AssertHasRecordedPacketSamples(packet.get()); + NS_ASSERTION(!IsHeader(packet.get()), + "Don't try to recover header packet gp"); + NS_ASSERTION(packet->granulepos != -1, "Packet must have gp by now"); + mPackets.Append(std::move(packet)); + } + mUnstamped.Clear(); + } + return NS_OK; +} + +void VorbisState::ReconstructVorbisGranulepos() { + // The number of samples in a Vorbis packet is: + // window_blocksize(previous_packet)/4+window_blocksize(current_packet)/4 + // See: http://xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-230001.3.2 + // So we maintain mPrevVorbisBlockSize, the block size of the last packet + // encountered. We also maintain mGranulepos, which is the granulepos of + // the last encountered packet. This enables us to give granulepos to + // packets when the last packet in mUnstamped doesn't have a granulepos + // (for example if the stream was truncated). + // + // We validate our prediction of the number of samples decoded when + // VALIDATE_VORBIS_SAMPLE_CALCULATION is defined by recording the predicted + // number of samples, and verifing we extract that many when decoding + // each packet. + + NS_ASSERTION(mUnstamped.Length() > 0, "Length must be > 0"); + auto& last = mUnstamped.LastElement(); + NS_ASSERTION(last->e_o_s || last->granulepos >= 0, + "Must know last granulepos!"); + if (mUnstamped.Length() == 1) { + auto& packet = mUnstamped[0]; + long blockSize = vorbis_packet_blocksize(&mVorbisInfo, packet.get()); + if (blockSize < 0) { + // On failure vorbis_packet_blocksize returns < 0. If we've got + // a bad packet, we just assume that decode will have to skip this + // packet, i.e. assume 0 samples are decodable from this packet. + blockSize = 0; + mPrevVorbisBlockSize = 0; + } + long samples = mPrevVorbisBlockSize / 4 + blockSize / 4; + mPrevVorbisBlockSize = blockSize; + if (packet->granulepos == -1) { + packet->granulepos = mGranulepos + samples; + } + + // Account for a partial last frame + if (packet->e_o_s && packet->granulepos >= mGranulepos) { + samples = packet->granulepos - mGranulepos; + } + + mGranulepos = packet->granulepos; + RecordVorbisPacketSamples(packet.get(), samples); + return; + } + + bool unknownGranulepos = last->granulepos == -1; + int totalSamples = 0; + for (int32_t i = mUnstamped.Length() - 1; i > 0; i--) { + auto& packet = mUnstamped[i]; + auto& prev = mUnstamped[i - 1]; + ogg_int64_t granulepos = packet->granulepos; + NS_ASSERTION(granulepos != -1, "Must know granulepos!"); + long prevBlockSize = vorbis_packet_blocksize(&mVorbisInfo, prev.get()); + long blockSize = vorbis_packet_blocksize(&mVorbisInfo, packet.get()); + + if (blockSize < 0 || prevBlockSize < 0) { + // On failure vorbis_packet_blocksize returns < 0. If we've got + // a bad packet, we just assume that decode will have to skip this + // packet, i.e. assume 0 samples are decodable from this packet. + blockSize = 0; + prevBlockSize = 0; + } + + long samples = prevBlockSize / 4 + blockSize / 4; + totalSamples += samples; + prev->granulepos = granulepos - samples; + RecordVorbisPacketSamples(packet.get(), samples); + } + + if (unknownGranulepos) { + for (uint32_t i = 0; i < mUnstamped.Length(); i++) { + mUnstamped[i]->granulepos += mGranulepos + totalSamples + 1; + } + } + + auto& first = mUnstamped[0]; + long blockSize = vorbis_packet_blocksize(&mVorbisInfo, first.get()); + if (blockSize < 0) { + mPrevVorbisBlockSize = 0; + blockSize = 0; + } + + long samples = (mPrevVorbisBlockSize == 0) + ? 0 + : mPrevVorbisBlockSize / 4 + blockSize / 4; + int64_t start = first->granulepos - samples; + RecordVorbisPacketSamples(first.get(), samples); + + if (last->e_o_s && start < mGranulepos) { + // We've calculated that there are more samples in this page than its + // granulepos claims, and it's the last page in the stream. This is legal, + // and we will need to prune the trailing samples when we come to decode it. + // We must correct the timestamps so that they follow the last Vorbis page's + // samples. + int64_t pruned = mGranulepos - start; + for (uint32_t i = 0; i < mUnstamped.Length() - 1; i++) { + mUnstamped[i]->granulepos += pruned; + } +#ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION + mVorbisPacketSamples[last.get()] -= pruned; +#endif + } + + mPrevVorbisBlockSize = vorbis_packet_blocksize(&mVorbisInfo, last.get()); + mPrevVorbisBlockSize = std::max(static_cast(0), mPrevVorbisBlockSize); + mGranulepos = last->granulepos; +} + +OpusState::OpusState(rlbox_sandbox_ogg* aSandbox, + tainted_opaque_ogg aBosPage, uint32_t aSerial) + : OggCodecState(aSandbox, aBosPage, aSerial, true), + mParser(nullptr), + mDecoder(nullptr), + mPrevPacketGranulepos(0), + mPrevPageGranulepos(0) { + MOZ_COUNT_CTOR(OpusState); +} + +OpusState::~OpusState() { + MOZ_COUNT_DTOR(OpusState); + Reset(); + + if (mDecoder) { + opus_multistream_decoder_destroy(mDecoder); + mDecoder = nullptr; + } +} + +nsresult OpusState::Reset() { return Reset(false); } + +nsresult OpusState::Reset(bool aStart) { + nsresult res = NS_OK; + + if (mActive && mDecoder) { + // Reset the decoder. + opus_multistream_decoder_ctl(mDecoder, OPUS_RESET_STATE); + // This lets us distinguish the first page being the last page vs. just + // not having processed the previous page when we encounter the last page. + mPrevPageGranulepos = aStart ? 0 : -1; + mPrevPacketGranulepos = aStart ? 0 : -1; + } + + // Clear queued data. + if (NS_FAILED(OggCodecState::Reset())) { + return NS_ERROR_FAILURE; + } + + LOG(LogLevel::Debug, ("Opus decoder reset")); + + return res; +} + +bool OpusState::Init(void) { + if (!mActive) { + return false; + } + + int error; + + NS_ASSERTION(mDecoder == nullptr, "leaking OpusDecoder"); + + mDecoder = opus_multistream_decoder_create( + mParser->mRate, mParser->mChannels, mParser->mStreams, + mParser->mCoupledStreams, mParser->mMappingTable, &error); + + mInfo.mMimeType = "audio/opus"_ns; + mInfo.mRate = mParser->mRate; + mInfo.mChannels = mParser->mChannels; + mInfo.mBitDepth = 16; + // Save preskip & the first header packet for the Opus decoder + OpusCodecSpecificData opusData; + opusData.mContainerCodecDelayMicroSeconds = Time(0, mParser->mPreSkip); + + if (!mHeaders.PeekFront()) { + return false; + } + opusData.mHeadersBinaryBlob->AppendElements(mHeaders.PeekFront()->packet, + mHeaders.PeekFront()->bytes); + mInfo.mCodecSpecificConfig = AudioCodecSpecificVariant{std::move(opusData)}; + + mHeaders.Erase(); + LOG(LogLevel::Debug, ("Opus decoder init")); + + return error == OPUS_OK; +} + +bool OpusState::DecodeHeader(OggPacketPtr aPacket) { + switch (mPacketCount++) { + // Parse the id header. + case 0: + mParser = MakeUnique(); + if (!mParser->DecodeHeader(aPacket->packet, aPacket->bytes)) { + return false; + } + mHeaders.Append(std::move(aPacket)); + break; + + // Parse the metadata header. + case 1: + if (!mParser->DecodeTags(aPacket->packet, aPacket->bytes)) { + return false; + } + break; + + // We made it to the first data packet (which includes reconstructing + // timestamps for it in PageIn). Success! + default: + mDoneReadingHeaders = true; + // Put it back on the queue so we can decode it. + mPackets.PushFront(std::move(aPacket)); + break; + } + return true; +} + +/* Construct and return a tags hashmap from our internal array */ +UniquePtr OpusState::GetTags() { + auto tags = MakeUnique(); + for (uint32_t i = 0; i < mParser->mTags.Length(); i++) { + AddVorbisComment(tags, mParser->mTags[i].Data(), + mParser->mTags[i].Length()); + } + + return tags; +} + +/* Return the timestamp (in microseconds) equivalent to a granulepos. */ +int64_t OpusState::Time(int64_t aGranulepos) { + if (!mActive) { + return -1; + } + + return Time(mParser->mPreSkip, aGranulepos); +} + +int64_t OpusState::Time(int aPreSkip, int64_t aGranulepos) { + if (aGranulepos < 0) { + return -1; + } + + // Ogg Opus always runs at a granule rate of 48 kHz. + CheckedInt64 t = SaferMultDiv(aGranulepos - aPreSkip, USECS_PER_S, 48000); + return t.isValid() ? t.value() : -1; +} + +bool OpusState::IsHeader(ogg_packet* aPacket) { + return aPacket->bytes >= 16 && (!memcmp(aPacket->packet, "OpusHead", 8) || + !memcmp(aPacket->packet, "OpusTags", 8)); +} + +nsresult OpusState::PageIn(tainted_opaque_ogg aPage) { + if (!mActive) { + return NS_OK; + } + NS_ASSERTION((rlbox::sandbox_static_cast(sandbox_invoke( + *mSandbox, ogg_page_serialno, aPage)) == mSerial) + .unverified_safe_because(RLBOX_OGG_PAGE_SERIAL_REASON), + "Page must be for this stream!"); + if (sandbox_invoke(*mSandbox, ogg_stream_pagein, mState, aPage) + .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) == -1) { + return NS_ERROR_FAILURE; + } + + bool haveGranulepos; + nsresult rv = PacketOutUntilGranulepos(haveGranulepos); + if (NS_FAILED(rv) || !haveGranulepos || mPacketCount < 2) { + return rv; + } + if (!ReconstructOpusGranulepos()) { + return NS_ERROR_FAILURE; + } + for (uint32_t i = 0; i < mUnstamped.Length(); i++) { + OggPacketPtr packet = std::move(mUnstamped[i]); + NS_ASSERTION(!IsHeader(packet.get()), "Don't try to play a header packet"); + NS_ASSERTION(packet->granulepos != -1, "Packet should have a granulepos"); + mPackets.Append(std::move(packet)); + } + mUnstamped.Clear(); + return NS_OK; +} + +// Helper method to return the change in granule position due to an Opus packet +// (as distinct from the number of samples in the packet, which depends on the +// decoder rate). It should work with a multistream Opus file, and continue to +// work should we ever allow the decoder to decode at a rate other than 48 kHz. +// It even works before we've created the actual Opus decoder. +static int GetOpusDeltaGP(ogg_packet* packet) { + int nframes; + nframes = opus_packet_get_nb_frames(packet->packet, packet->bytes); + if (nframes > 0) { + return nframes * opus_packet_get_samples_per_frame(packet->packet, 48000); + } + NS_WARNING("Invalid Opus packet."); + return nframes; +} + +int64_t OpusState::PacketDuration(ogg_packet* aPacket) { + CheckedInt64 t = SaferMultDiv(GetOpusDeltaGP(aPacket), USECS_PER_S, 48000); + return t.isValid() ? t.value() : -1; +} + +bool OpusState::ReconstructOpusGranulepos(void) { + NS_ASSERTION(mUnstamped.Length() > 0, "Must have unstamped packets"); + NS_ASSERTION(mUnstamped.LastElement()->e_o_s || + mUnstamped.LastElement()->granulepos > 0, + "Must know last granulepos!"); + int64_t gp; + // If this is the last page, and we've seen at least one previous page (or + // this is the first page)... + if (mUnstamped.LastElement()->e_o_s) { + auto& last = mUnstamped.LastElement(); + if (mPrevPageGranulepos != -1) { + // If this file only has one page and the final granule position is + // smaller than the pre-skip amount, we MUST reject the stream. + if (!mDoneReadingHeaders && last->granulepos < mParser->mPreSkip) + return false; + int64_t last_gp = last->granulepos; + gp = mPrevPageGranulepos; + // Loop through the packets forwards, adding the current packet's + // duration to the previous granulepos to get the value for the + // current packet. + for (uint32_t i = 0; i < mUnstamped.Length() - 1; ++i) { + auto& packet = mUnstamped[i]; + int offset = GetOpusDeltaGP(packet.get()); + // Check for error (negative offset) and overflow. + if (offset >= 0 && gp <= INT64_MAX - offset) { + gp += offset; + if (gp >= last_gp) { + NS_WARNING("Opus end trimming removed more than a full packet."); + // We were asked to remove a full packet's worth of data or more. + // Encoders SHOULD NOT produce streams like this, but we'll handle + // it for them anyway. + gp = last_gp; + mUnstamped.RemoveLastElements(mUnstamped.Length() - (i + 1)); + packet->e_o_s = 1; + } + } + packet->granulepos = gp; + } + mPrevPageGranulepos = last_gp; + return true; + } else { + NS_WARNING("No previous granule position to use for Opus end trimming."); + // If we don't have a previous granule position, fall through. + // We simply won't trim any samples from the end. + // TODO: Are we guaranteed to have seen a previous page if there is one? + } + } + + auto& last = mUnstamped.LastElement(); + gp = last->granulepos; + // Loop through the packets backwards, subtracting the next + // packet's duration from its granulepos to get the value + // for the current packet. + for (uint32_t i = mUnstamped.Length() - 1; i > 0; i--) { + int offset = GetOpusDeltaGP(mUnstamped[i].get()); + // Check for error (negative offset) and overflow. + if (offset >= 0) { + if (offset <= gp) { + gp -= offset; + } else { + // If the granule position of the first data page is smaller than the + // number of decodable audio samples on that page, then we MUST reject + // the stream. + if (!mDoneReadingHeaders) return false; + // It's too late to reject the stream. + // If we get here, this almost certainly means the file has screwed-up + // timestamps somewhere after the first page. + NS_WARNING("Clamping negative Opus granulepos to zero."); + gp = 0; + } + } + mUnstamped[i - 1]->granulepos = gp; + } + + // Check to make sure the first granule position is at least as large as the + // total number of samples decodable from the first page with completed + // packets. This requires looking at the duration of the first packet, too. + // We MUST reject such streams. + if (!mDoneReadingHeaders && GetOpusDeltaGP(mUnstamped[0].get()) > gp) { + return false; + } + mPrevPageGranulepos = last->granulepos; + return true; +} + +already_AddRefed OpusState::PacketOutAsMediaRawData() { + ogg_packet* packet = PacketPeek(); + if (!packet) { + return nullptr; + } + + uint32_t frames = 0; + const int64_t endFrame = packet->granulepos; + + if (packet->e_o_s) { + frames = GetOpusDeltaGP(packet); + } + + RefPtr data = OggCodecState::PacketOutAsMediaRawData(); + if (!data) { + return nullptr; + } + + if (data->mEOS && mPrevPacketGranulepos != -1) { + // If this is the last packet, perform end trimming. + int64_t startFrame = mPrevPacketGranulepos; + frames -= std::max( + 0, std::min(endFrame - startFrame, static_cast(frames))); + data->mDiscardPadding = frames; + } + + // Save this packet's granule position in case we need to perform end + // trimming on the next packet. + mPrevPacketGranulepos = endFrame; + + return data.forget(); +} + +FlacState::FlacState(rlbox_sandbox_ogg* aSandbox, + tainted_opaque_ogg aBosPage, uint32_t aSerial) + : OggCodecState(aSandbox, aBosPage, aSerial, true) {} + +bool FlacState::DecodeHeader(OggPacketPtr aPacket) { + if (mParser.DecodeHeaderBlock(aPacket->packet, aPacket->bytes).isErr()) { + return false; + } + if (mParser.HasFullMetadata()) { + mDoneReadingHeaders = true; + } + return true; +} + +int64_t FlacState::Time(int64_t granulepos) { + if (!mParser.mInfo.IsValid()) { + return -1; + } + CheckedInt64 t = SaferMultDiv(granulepos, USECS_PER_S, mParser.mInfo.mRate); + if (!t.isValid()) { + return -1; + } + return t.value(); +} + +int64_t FlacState::PacketDuration(ogg_packet* aPacket) { + return mParser.BlockDuration(aPacket->packet, aPacket->bytes); +} + +bool FlacState::IsHeader(ogg_packet* aPacket) { + auto res = mParser.IsHeaderBlock(aPacket->packet, aPacket->bytes); + return res.isOk() ? res.unwrap() : false; +} + +nsresult FlacState::PageIn(tainted_opaque_ogg aPage) { + if (!mActive) { + return NS_OK; + } + NS_ASSERTION((rlbox::sandbox_static_cast(sandbox_invoke( + *mSandbox, ogg_page_serialno, aPage)) == mSerial) + .unverified_safe_because(RLBOX_OGG_PAGE_SERIAL_REASON), + "Page must be for this stream!"); + if (sandbox_invoke(*mSandbox, ogg_stream_pagein, mState, aPage) + .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) == -1) { + return NS_ERROR_FAILURE; + } + bool foundGp; + nsresult res = PacketOutUntilGranulepos(foundGp); + if (NS_FAILED(res)) { + return res; + } + if (foundGp && mDoneReadingHeaders) { + // We've found a packet with a granulepos, and we've loaded our metadata + // and initialized our decoder. Determine granulepos of buffered packets. + ReconstructFlacGranulepos(); + for (uint32_t i = 0; i < mUnstamped.Length(); ++i) { + OggPacketPtr packet = std::move(mUnstamped[i]); + NS_ASSERTION(!IsHeader(packet.get()), + "Don't try to recover header packet gp"); + NS_ASSERTION(packet->granulepos != -1, "Packet must have gp by now"); + mPackets.Append(std::move(packet)); + } + mUnstamped.Clear(); + } + return NS_OK; +} + +// Return a hash table with tag metadata. +UniquePtr FlacState::GetTags() { return mParser.GetTags(); } + +const TrackInfo* FlacState::GetInfo() const { return &mParser.mInfo; } + +bool FlacState::ReconstructFlacGranulepos(void) { + NS_ASSERTION(mUnstamped.Length() > 0, "Must have unstamped packets"); + auto& last = mUnstamped.LastElement(); + NS_ASSERTION(last->e_o_s || last->granulepos > 0, + "Must know last granulepos!"); + int64_t gp; + + gp = last->granulepos; + // Loop through the packets backwards, subtracting the next + // packet's duration from its granulepos to get the value + // for the current packet. + for (uint32_t i = mUnstamped.Length() - 1; i > 0; i--) { + int offset = + mParser.BlockDuration(mUnstamped[i]->packet, mUnstamped[i]->bytes); + // Check for error (negative offset) and overflow. + if (offset >= 0) { + if (offset <= gp) { + gp -= offset; + } else { + // If the granule position of the first data page is smaller than the + // number of decodable audio samples on that page, then we MUST reject + // the stream. + if (!mDoneReadingHeaders) { + return false; + } + // It's too late to reject the stream. + // If we get here, this almost certainly means the file has screwed-up + // timestamps somewhere after the first page. + NS_WARNING("Clamping negative granulepos to zero."); + gp = 0; + } + } + mUnstamped[i - 1]->granulepos = gp; + } + + return true; +} + +SkeletonState::SkeletonState(rlbox_sandbox_ogg* aSandbox, + tainted_opaque_ogg aBosPage, + uint32_t aSerial) + : OggCodecState(aSandbox, aBosPage, aSerial, true), + mVersion(0), + mPresentationTime(0), + mLength(0) { + MOZ_COUNT_CTOR(SkeletonState); +} + +SkeletonState::~SkeletonState() { MOZ_COUNT_DTOR(SkeletonState); } + +// Support for Ogg Skeleton 4.0, as per specification at: +// http://wiki.xiph.org/Ogg_Skeleton_4 + +// Minimum length in bytes of a Skeleton header packet. +static const long SKELETON_MIN_HEADER_LEN = 28; +static const long SKELETON_4_0_MIN_HEADER_LEN = 80; + +// Minimum length in bytes of a Skeleton 4.0 index packet. +static const long SKELETON_4_0_MIN_INDEX_LEN = 42; + +// Minimum length in bytes of a Skeleton 3.0/4.0 Fisbone packet. +static const long SKELETON_MIN_FISBONE_LEN = 52; + +// Minimum possible size of a compressed index keypoint. +static const size_t MIN_KEY_POINT_SIZE = 2; + +// Byte offset of the major and minor version numbers in the +// Ogg Skeleton 4.0 header packet. +static const size_t SKELETON_VERSION_MAJOR_OFFSET = 8; +static const size_t SKELETON_VERSION_MINOR_OFFSET = 10; + +// Byte-offsets of the presentation time numerator and denominator +static const size_t SKELETON_PRESENTATION_TIME_NUMERATOR_OFFSET = 12; +static const size_t SKELETON_PRESENTATION_TIME_DENOMINATOR_OFFSET = 20; + +// Byte-offsets of the length of file field in the Skeleton 4.0 header packet. +static const size_t SKELETON_FILE_LENGTH_OFFSET = 64; + +// Byte-offsets of the fields in the Skeleton index packet. +static const size_t INDEX_SERIALNO_OFFSET = 6; +static const size_t INDEX_NUM_KEYPOINTS_OFFSET = 10; +static const size_t INDEX_TIME_DENOM_OFFSET = 18; +static const size_t INDEX_FIRST_NUMER_OFFSET = 26; +static const size_t INDEX_LAST_NUMER_OFFSET = 34; +static const size_t INDEX_KEYPOINT_OFFSET = 42; + +// Byte-offsets of the fields in the Skeleton Fisbone packet. +static const size_t FISBONE_MSG_FIELDS_OFFSET = 8; +static const size_t FISBONE_SERIALNO_OFFSET = 12; + +static bool IsSkeletonBOS(ogg_packet* aPacket) { + static_assert(SKELETON_MIN_HEADER_LEN >= 8, + "Minimum length of skeleton BOS header incorrect"); + return aPacket->bytes >= SKELETON_MIN_HEADER_LEN && + memcmp(reinterpret_cast(aPacket->packet), "fishead", 8) == 0; +} + +static bool IsSkeletonIndex(ogg_packet* aPacket) { + static_assert(SKELETON_4_0_MIN_INDEX_LEN >= 5, + "Minimum length of skeleton index header incorrect"); + return aPacket->bytes >= SKELETON_4_0_MIN_INDEX_LEN && + memcmp(reinterpret_cast(aPacket->packet), "index", 5) == 0; +} + +static bool IsSkeletonFisbone(ogg_packet* aPacket) { + static_assert(SKELETON_MIN_FISBONE_LEN >= 8, + "Minimum length of skeleton fisbone header incorrect"); + return aPacket->bytes >= SKELETON_MIN_FISBONE_LEN && + memcmp(reinterpret_cast(aPacket->packet), "fisbone", 8) == 0; +} + +// Reads a variable length encoded integer at p. Will not read +// past aLimit. Returns pointer to character after end of integer. +static const unsigned char* ReadVariableLengthInt(const unsigned char* p, + const unsigned char* aLimit, + int64_t& n) { + int shift = 0; + int64_t byte = 0; + n = 0; + while (p < aLimit && (byte & 0x80) != 0x80 && shift < 57) { + byte = static_cast(*p); + n |= ((byte & 0x7f) << shift); + shift += 7; + p++; + } + return p; +} + +bool SkeletonState::DecodeIndex(ogg_packet* aPacket) { + NS_ASSERTION(aPacket->bytes >= SKELETON_4_0_MIN_INDEX_LEN, + "Index must be at least minimum size"); + if (!mActive) { + return false; + } + + uint32_t serialno = + LittleEndian::readUint32(aPacket->packet + INDEX_SERIALNO_OFFSET); + int64_t numKeyPoints = + LittleEndian::readInt64(aPacket->packet + INDEX_NUM_KEYPOINTS_OFFSET); + + int64_t endTime = 0, startTime = 0; + const unsigned char* p = aPacket->packet; + + int64_t timeDenom = + LittleEndian::readInt64(aPacket->packet + INDEX_TIME_DENOM_OFFSET); + if (timeDenom == 0) { + LOG(LogLevel::Debug, ("Ogg Skeleton Index packet for stream %u has 0 " + "timestamp denominator.", + serialno)); + return (mActive = false); + } + + // Extract the start time. + int64_t timeRawInt = LittleEndian::readInt64(p + INDEX_FIRST_NUMER_OFFSET); + CheckedInt64 t = SaferMultDiv(timeRawInt, USECS_PER_S, timeDenom); + if (!t.isValid()) { + return (mActive = false); + } else { + startTime = t.value(); + } + + // Extract the end time. + timeRawInt = LittleEndian::readInt64(p + INDEX_LAST_NUMER_OFFSET); + t = SaferMultDiv(timeRawInt, USECS_PER_S, timeDenom); + if (!t.isValid()) { + return (mActive = false); + } else { + endTime = t.value(); + } + + // Check the numKeyPoints value read, ensure we're not going to run out of + // memory while trying to decode the index packet. + CheckedInt64 minPacketSize = + (CheckedInt64(numKeyPoints) * MIN_KEY_POINT_SIZE) + INDEX_KEYPOINT_OFFSET; + if (!minPacketSize.isValid()) { + return (mActive = false); + } + + int64_t sizeofIndex = aPacket->bytes - INDEX_KEYPOINT_OFFSET; + int64_t maxNumKeyPoints = sizeofIndex / MIN_KEY_POINT_SIZE; + if (aPacket->bytes < minPacketSize.value() || + numKeyPoints > maxNumKeyPoints || numKeyPoints < 0) { + // Packet size is less than the theoretical minimum size, or the packet is + // claiming to store more keypoints than it's capable of storing. This means + // that the numKeyPoints field is too large or small for the packet to + // possibly contain as many packets as it claims to, so the numKeyPoints + // field is possibly malicious. Don't try decoding this index, we may run + // out of memory. + LOG(LogLevel::Debug, ("Possibly malicious number of key points reported " + "(%" PRId64 ") in index packet for stream %u.", + numKeyPoints, serialno)); + return (mActive = false); + } + + UniquePtr keyPoints(new nsKeyFrameIndex(startTime, endTime)); + + p = aPacket->packet + INDEX_KEYPOINT_OFFSET; + const unsigned char* limit = aPacket->packet + aPacket->bytes; + int64_t numKeyPointsRead = 0; + CheckedInt64 offset = 0; + CheckedInt64 time = 0; + while (p < limit && numKeyPointsRead < numKeyPoints) { + int64_t delta = 0; + p = ReadVariableLengthInt(p, limit, delta); + offset += delta; + if (p == limit || !offset.isValid() || offset.value() > mLength || + offset.value() < 0) { + return (mActive = false); + } + p = ReadVariableLengthInt(p, limit, delta); + time += delta; + if (!time.isValid() || time.value() > endTime || time.value() < startTime) { + return (mActive = false); + } + CheckedInt64 timeUsecs = SaferMultDiv(time.value(), USECS_PER_S, timeDenom); + if (!timeUsecs.isValid()) { + return (mActive = false); + } + keyPoints->Add(offset.value(), timeUsecs.value()); + numKeyPointsRead++; + } + + int32_t keyPointsRead = keyPoints->Length(); + if (keyPointsRead > 0) { + mIndex.InsertOrUpdate(serialno, std::move(keyPoints)); + } + + LOG(LogLevel::Debug, ("Loaded %d keypoints for Skeleton on stream %u", + keyPointsRead, serialno)); + return true; +} + +nsresult SkeletonState::IndexedSeekTargetForTrack(uint32_t aSerialno, + int64_t aTarget, + nsKeyPoint& aResult) { + nsKeyFrameIndex* index = nullptr; + mIndex.Get(aSerialno, &index); + + if (!index || index->Length() == 0 || aTarget < index->mStartTime || + aTarget > index->mEndTime) { + return NS_ERROR_FAILURE; + } + + // Binary search to find the last key point with time less than target. + int start = 0; + int end = index->Length() - 1; + while (end > start) { + int mid = start + ((end - start + 1) >> 1); + if (index->Get(mid).mTime == aTarget) { + start = mid; + break; + } else if (index->Get(mid).mTime < aTarget) { + start = mid; + } else { + end = mid - 1; + } + } + + aResult = index->Get(start); + NS_ASSERTION(aResult.mTime <= aTarget, "Result should have time <= target"); + return NS_OK; +} + +nsresult SkeletonState::IndexedSeekTarget(int64_t aTarget, + nsTArray& aTracks, + nsSeekTarget& aResult) { + if (!mActive || mVersion < SKELETON_VERSION(4, 0)) { + return NS_ERROR_FAILURE; + } + // Loop over all requested tracks' indexes, and get the keypoint for that + // seek target. Record the keypoint with the lowest offset, this will be + // our seek result. User must seek to the one with lowest offset to ensure we + // pass "keyframes" on all tracks when we decode forwards to the seek target. + nsSeekTarget r; + for (uint32_t i = 0; i < aTracks.Length(); i++) { + nsKeyPoint k; + if (NS_SUCCEEDED(IndexedSeekTargetForTrack(aTracks[i], aTarget, k)) && + k.mOffset < r.mKeyPoint.mOffset) { + r.mKeyPoint = k; + r.mSerial = aTracks[i]; + } + } + if (r.IsNull()) { + return NS_ERROR_FAILURE; + } + LOG(LogLevel::Debug, + ("Indexed seek target for time %" PRId64 " is offset %" PRId64, aTarget, + r.mKeyPoint.mOffset)); + aResult = r; + return NS_OK; +} + +nsresult SkeletonState::GetDuration(const nsTArray& aTracks, + int64_t& aDuration) { + if (!mActive || mVersion < SKELETON_VERSION(4, 0) || !HasIndex() || + aTracks.Length() == 0) { + return NS_ERROR_FAILURE; + } + int64_t endTime = INT64_MIN; + int64_t startTime = INT64_MAX; + for (uint32_t i = 0; i < aTracks.Length(); i++) { + nsKeyFrameIndex* index = nullptr; + mIndex.Get(aTracks[i], &index); + if (!index) { + // Can't get the timestamps for one of the required tracks, fail. + return NS_ERROR_FAILURE; + } + if (index->mEndTime > endTime) { + endTime = index->mEndTime; + } + if (index->mStartTime < startTime) { + startTime = index->mStartTime; + } + } + NS_ASSERTION(endTime > startTime, "Duration must be positive"); + CheckedInt64 duration = CheckedInt64(endTime) - startTime; + aDuration = duration.isValid() ? duration.value() : 0; + return duration.isValid() ? NS_OK : NS_ERROR_FAILURE; +} + +bool SkeletonState::DecodeFisbone(ogg_packet* aPacket) { + if (aPacket->bytes < static_cast(FISBONE_MSG_FIELDS_OFFSET + 4)) { + return false; + } + uint32_t offsetMsgField = + LittleEndian::readUint32(aPacket->packet + FISBONE_MSG_FIELDS_OFFSET); + + if (aPacket->bytes < static_cast(FISBONE_SERIALNO_OFFSET + 4)) { + return false; + } + uint32_t serialno = + LittleEndian::readUint32(aPacket->packet + FISBONE_SERIALNO_OFFSET); + + CheckedUint32 checked_fields_pos = + CheckedUint32(FISBONE_MSG_FIELDS_OFFSET) + offsetMsgField; + if (!checked_fields_pos.isValid() || + aPacket->bytes < static_cast(checked_fields_pos.value())) { + return false; + } + int64_t msgLength = aPacket->bytes - checked_fields_pos.value(); + char* msgProbe = (char*)aPacket->packet + checked_fields_pos.value(); + char* msgHead = msgProbe; + UniquePtr field(new MessageField()); + + const static FieldPatternType kFieldTypeMaps[] = { + {"Content-Type:", eContentType}, + {"Role:", eRole}, + {"Name:", eName}, + {"Language:", eLanguage}, + {"Title:", eTitle}, + {"Display-hint:", eDisplayHint}, + {"Altitude:", eAltitude}, + {"TrackOrder:", eTrackOrder}, + {"Track dependencies:", eTrackDependencies}}; + + bool isContentTypeParsed = false; + while (msgLength > 1) { + if (*msgProbe == '\r' && *(msgProbe + 1) == '\n') { + nsAutoCString strMsg(msgHead, msgProbe - msgHead); + for (size_t i = 0; i < ArrayLength(kFieldTypeMaps); i++) { + if (strMsg.Find(kFieldTypeMaps[i].mPatternToRecognize) != -1) { + // The content of message header fields follows [RFC2822], and the + // mandatory message field must be encoded in US-ASCII, others + // must be be encoded in UTF-8. "Content-Type" must come first + // for all of message header fields. + // See + // http://svn.annodex.net/standards/draft-pfeiffer-oggskeleton-current.txt. + if (i != 0 && !isContentTypeParsed) { + return false; + } + + if ((i == 0 && IsAscii(strMsg)) || (i != 0 && IsUtf8(strMsg))) { + EMsgHeaderType eHeaderType = kFieldTypeMaps[i].mMsgHeaderType; + Unused << field->mValuesStore.LookupOrInsertWith( + eHeaderType, [i, msgHead, msgProbe]() { + uint32_t nameLen = + strlen(kFieldTypeMaps[i].mPatternToRecognize); + return MakeUnique(msgHead + nameLen, + msgProbe - msgHead - nameLen); + }); + isContentTypeParsed = i == 0 ? true : isContentTypeParsed; + } + break; + } + } + msgProbe += 2; + msgLength -= 2; + msgHead = msgProbe; + continue; + } + msgLength--; + msgProbe++; + } + + return mMsgFieldStore.WithEntryHandle(serialno, [&](auto&& entry) { + if (entry) { + // mMsgFieldStore has an entry for serialno already. + return false; + } + entry.Insert(std::move(field)); + return true; + }); +} + +bool SkeletonState::DecodeHeader(OggPacketPtr aPacket) { + if (IsSkeletonBOS(aPacket.get())) { + uint16_t verMajor = LittleEndian::readUint16(aPacket->packet + + SKELETON_VERSION_MAJOR_OFFSET); + uint16_t verMinor = LittleEndian::readUint16(aPacket->packet + + SKELETON_VERSION_MINOR_OFFSET); + + // Read the presentation time. We read this before the version check as the + // presentation time exists in all versions. + int64_t n = LittleEndian::readInt64( + aPacket->packet + SKELETON_PRESENTATION_TIME_NUMERATOR_OFFSET); + int64_t d = LittleEndian::readInt64( + aPacket->packet + SKELETON_PRESENTATION_TIME_DENOMINATOR_OFFSET); + mPresentationTime = + d == 0 ? 0 + : (static_cast(n) / static_cast(d)) * USECS_PER_S; + + mVersion = SKELETON_VERSION(verMajor, verMinor); + // We can only care to parse Skeleton version 4.0+. + if (mVersion < SKELETON_VERSION(4, 0) || + mVersion >= SKELETON_VERSION(5, 0) || + aPacket->bytes < SKELETON_4_0_MIN_HEADER_LEN) { + return false; + } + + // Extract the segment length. + mLength = + LittleEndian::readInt64(aPacket->packet + SKELETON_FILE_LENGTH_OFFSET); + + LOG(LogLevel::Debug, ("Skeleton segment length: %" PRId64, mLength)); + + // Initialize the serialno-to-index map. + return true; + } + if (IsSkeletonIndex(aPacket.get()) && mVersion >= SKELETON_VERSION(4, 0)) { + return DecodeIndex(aPacket.get()); + } + if (IsSkeletonFisbone(aPacket.get())) { + return DecodeFisbone(aPacket.get()); + } + if (aPacket->e_o_s) { + mDoneReadingHeaders = true; + } + return true; +} + +#undef LOG + +} // namespace mozilla diff --git a/dom/media/ogg/OggCodecState.h b/dom/media/ogg/OggCodecState.h new file mode 100644 index 0000000000..b8a3857875 --- /dev/null +++ b/dom/media/ogg/OggCodecState.h @@ -0,0 +1,628 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#if !defined(OggCodecState_h_) +# define OggCodecState_h_ + +# include +// For MOZ_SAMPLE_TYPE_* +# include "FlacFrameParser.h" +# include "OggRLBoxTypes.h" +# include "VideoUtils.h" +# include +# include +# include + +# include +# ifdef MOZ_TREMOR +# include +# else +# include +# endif + +// Uncomment the following to validate that we're predicting the number +// of Vorbis samples in each packet correctly. +# define VALIDATE_VORBIS_SAMPLE_CALCULATION +# ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION +# include +# endif + +struct OpusMSDecoder; + +namespace mozilla { + +inline constexpr char RLBOX_SAFE_DEBUG_ASSERTION[] = + "Tainted data is being inspected only for debugging purposes. This is not " + "a condition that is critical for safety of the renderer."; + +inline constexpr char RLBOX_OGG_STATE_ASSERT_REASON[] = + "Tainted data is being inspected only to check the internal state of " + "libogg structures. This is not a condition that is critical for safety of " + "the renderer."; + +inline constexpr char RLBOX_OGG_PAGE_SERIAL_REASON[] = + "We are checking the serial of the page. If libogg is operating correctly, " + "we check serial numbers to make sure the Firefox renderer is correctly " + "passing streams to the correct source. If libogg has been corrupted, it " + "could return an incorrect serial, however this would mean that an OGG " + "file has intentionally corrupted data across multiple logical streams. " + "This however cannot compromise memory safety of the renderer."; + +class OpusParser; + +struct OggPacketDeletePolicy { + void operator()(ogg_packet* aPacket) const { + delete[] aPacket->packet; + delete aPacket; + } +}; + +using OggPacketPtr = UniquePtr; + +// Deallocates a packet, used in OggPacketQueue below. +class OggPacketDeallocator : public nsDequeFunctor { + virtual void operator()(ogg_packet* aPacket) override { + OggPacketDeletePolicy()(aPacket); + } +}; + +// A queue of ogg_packets. When we read a page, we extract the page's packets +// and buffer them in the owning stream's OggCodecState. This is because +// if we're skipping up to the next keyframe in very large frame sized videos, +// there may be several megabytes of data between keyframes, and the +// ogg_stream_state would end up resizing its buffer every time we added a +// new 4KB page to the bitstream, which kills performance on Windows. This +// also gives us the option to timestamp packets rather than decoded +// frames/samples, reducing the amount of frames/samples we must decode to +// determine start-time at a particular offset, and gives us finer control +// over memory usage. +class OggPacketQueue : private nsDeque { + public: + OggPacketQueue() : nsDeque(new OggPacketDeallocator()) {} + ~OggPacketQueue() { Erase(); } + bool IsEmpty() { return nsDeque::GetSize() == 0; } + void Append(OggPacketPtr aPacket); + OggPacketPtr PopFront() { + return OggPacketPtr(nsDeque::PopFront()); + } + ogg_packet* PeekFront() { return nsDeque::PeekFront(); } + OggPacketPtr Pop() { return OggPacketPtr(nsDeque::Pop()); } + ogg_packet* operator[](size_t aIndex) const { + return nsDeque::ObjectAt(aIndex); + } + size_t Length() const { return nsDeque::GetSize(); } + void PushFront(OggPacketPtr aPacket) { + nsDeque::PushFront(aPacket.release()); + } + void Erase() { nsDeque::Erase(); } +}; + +// Encapsulates the data required for decoding an ogg bitstream and for +// converting granulepos to timestamps. +class OggCodecState { + public: + typedef mozilla::MetadataTags MetadataTags; + // Ogg types we know about + enum CodecType { + TYPE_VORBIS = 0, + TYPE_THEORA, + TYPE_OPUS, + TYPE_SKELETON, + TYPE_FLAC, + TYPE_UNKNOWN + }; + + virtual ~OggCodecState(); + + // Factory for creating nsCodecStates. Use instead of constructor. + // aPage should be a beginning-of-stream page. + static UniquePtr Create(rlbox_sandbox_ogg* aSandbox, + tainted_opaque_ogg aPage, + uint32_t aSerial); + + virtual CodecType GetType() { return TYPE_UNKNOWN; } + + // Reads a header packet. Returns false if an error was encountered + // while reading header packets. Callers should check DoneReadingHeaders() + // to determine if the last header has been read. + // This function takes ownership of the packet and is responsible for + // releasing it or queuing it for later processing. + virtual bool DecodeHeader(OggPacketPtr aPacket) { + return (mDoneReadingHeaders = true); + } + + // Build a hash table with tag metadata parsed from the stream. + virtual UniquePtr GetTags() { return nullptr; } + + // Returns the end time that a granulepos represents. + virtual int64_t Time(int64_t granulepos) { return -1; } + + // Returns the start time that a granulepos represents. + virtual int64_t StartTime(int64_t granulepos) { return -1; } + + // Returns the duration of the given packet, if it can be determined. + virtual int64_t PacketDuration(ogg_packet* aPacket) { return -1; } + + // Returns the start time of the given packet, if it can be determined. + virtual int64_t PacketStartTime(ogg_packet* aPacket) { + if (aPacket->granulepos < 0) { + return -1; + } + int64_t endTime = Time(aPacket->granulepos); + int64_t duration = PacketDuration(aPacket); + if (duration > endTime) { + // Audio preskip may eat a whole packet or more. + return 0; + } else { + return endTime - duration; + } + } + + // Initializes the codec state. + virtual bool Init() { return true; } + + // Returns true when this bitstream has finished reading all its + // header packets. + bool DoneReadingHeaders() { return mDoneReadingHeaders; } + + // Deactivates the bitstream. Only the primary video and audio bitstreams + // should be active. + void Deactivate() { + mActive = false; + mDoneReadingHeaders = true; + Reset(); + } + + // Resets decoding state. + virtual nsresult Reset(); + + // Returns true if the OggCodecState thinks this packet is a header + // packet. Note this does not verify the validity of the header packet, + // it just guarantees that the packet is marked as a header packet (i.e. + // it is definintely not a data packet). Do not use this to identify + // streams, use it to filter header packets from data packets while + // decoding. + virtual bool IsHeader(ogg_packet* aPacket) { return false; } + + // Returns true if the OggCodecState thinks this packet represents a + // keyframe, from which decoding can restart safely. + virtual bool IsKeyframe(ogg_packet* aPacket) { return true; } + + // Returns true if there is a packet available for dequeueing in the stream. + bool IsPacketReady(); + + // Returns the next raw packet in the stream, or nullptr if there are no more + // packets buffered in the packet queue. More packets can be buffered by + // inserting one or more pages into the stream by calling PageIn(). + // The packet will have a valid granulepos. + OggPacketPtr PacketOut(); + + // Returns the next raw packet in the stream, or nullptr if there are no more + // packets buffered in the packet queue, without consuming it. + // The packet will have a valid granulepos. + ogg_packet* PacketPeek(); + + // Moves all raw packets from aOther to the front of the current packet queue. + void PushFront(OggPacketQueue&& aOther); + + // Returns the next packet in the stream as a MediaRawData, or nullptr + // if there are no more packets buffered in the packet queue. More packets + // can be buffered by inserting one or more pages into the stream by calling + // PageIn(). The packet will have a valid granulepos. + virtual already_AddRefed PacketOutAsMediaRawData(); + + // Extracts all packets from the page, and inserts them into the packet + // queue. They can be extracted by calling PacketOut(). Packets from an + // inactive stream are not buffered, i.e. this call has no effect for + // inactive streams. Multiple pages may need to be inserted before + // PacketOut() starts to return packets, as granulepos may need to be + // captured. + virtual nsresult PageIn(tainted_opaque_ogg aPage); + + // Returns the maximum number of microseconds which a keyframe can be offset + // from any given interframe.b + virtual int64_t MaxKeyframeOffset() { return 0; } + // Public access for mTheoraInfo.keyframe_granule_shift + virtual int32_t KeyFrameGranuleJobs() { return 0; } + + // Number of packets read. + uint64_t mPacketCount; + + // Serial number of the bitstream. + uint32_t mSerial; + + // Ogg specific state. + tainted_opaque_ogg mState; + + // Queue of as yet undecoded packets. Packets are guaranteed to have + // a valid granulepos. + OggPacketQueue mPackets; + + // Is the bitstream active; whether we're decoding and playing this bitstream. + bool mActive; + + // True when all headers packets have been read. + bool mDoneReadingHeaders; + + // All invocations of libogg functionality from the demuxer is sandboxed using + // wasm library sandboxes on supported platforms. This is the sandbox + // instance. + rlbox_sandbox_ogg* mSandbox; + + virtual const TrackInfo* GetInfo() const { + MOZ_RELEASE_ASSERT(false, "Can't be called directly"); + return nullptr; + } + + // Validation utility for vorbis-style tag names. + static bool IsValidVorbisTagName(nsCString& aName); + + // Utility method to parse and add a vorbis-style comment + // to a metadata hash table. Most Ogg-encapsulated codecs + // use the vorbis comment format for metadata. + static bool AddVorbisComment(UniquePtr& aTags, + const char* aComment, uint32_t aLength); + + protected: + // Constructs a new OggCodecState. aActive denotes whether the stream is + // active. For streams of unsupported or unknown types, aActive should be + // false. + OggCodecState(rlbox_sandbox_ogg* aSandbox, + tainted_opaque_ogg aBosPage, uint32_t aSerial, + bool aActive); + + // Deallocates all packets stored in mUnstamped, and clears the array. + void ClearUnstamped(); + + // Extracts packets out of mState until a data packet with a non -1 + // granulepos is encountered, or no more packets are readable. Header + // packets are pushed into the packet queue immediately, and data packets + // are buffered in mUnstamped. Once a non -1 granulepos packet is read + // the granulepos of the packets in mUnstamped can be inferred, and they + // can be pushed over to mPackets. Used by PageIn() implementations in + // subclasses. + nsresult PacketOutUntilGranulepos(bool& aFoundGranulepos); + + // Temporary buffer in which to store packets while we're reading packets + // in order to capture granulepos. + nsTArray mUnstamped; + + bool SetCodecSpecificConfig(MediaByteBuffer* aBuffer, + OggPacketQueue& aHeaders); + + private: + bool InternalInit(); +}; + +class VorbisState : public OggCodecState { + public: + explicit VorbisState(rlbox_sandbox_ogg* aSandbox, + tainted_opaque_ogg aBosPage, + uint32_t aSerial); + virtual ~VorbisState(); + + CodecType GetType() override { return TYPE_VORBIS; } + bool DecodeHeader(OggPacketPtr aPacket) override; + int64_t Time(int64_t granulepos) override; + int64_t PacketDuration(ogg_packet* aPacket) override; + bool Init() override; + nsresult Reset() override; + bool IsHeader(ogg_packet* aPacket) override; + nsresult PageIn(tainted_opaque_ogg aPage) override; + const TrackInfo* GetInfo() const override { return &mInfo; } + + // Return a hash table with tag metadata. + UniquePtr GetTags() override; + + private: + AudioInfo mInfo; + vorbis_info mVorbisInfo; + vorbis_comment mComment; + vorbis_dsp_state mDsp; + vorbis_block mBlock; + OggPacketQueue mHeaders; + + // Returns the end time that a granulepos represents. + static int64_t Time(vorbis_info* aInfo, int64_t aGranulePos); + + // Reconstructs the granulepos of Vorbis packets stored in the mUnstamped + // array. + void ReconstructVorbisGranulepos(); + + // The "block size" of the previously decoded Vorbis packet, or 0 if we've + // not yet decoded anything. This is used to calculate the number of samples + // in a Vorbis packet, since each Vorbis packet depends on the previous + // packet while being decoded. + long mPrevVorbisBlockSize; + + // Granulepos (end sample) of the last decoded Vorbis packet. This is used + // to calculate the Vorbis granulepos when we don't find a granulepos to + // back-propagate from. + int64_t mGranulepos; + +# ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION + // When validating that we've correctly predicted Vorbis packets' number + // of samples, we store each packet's predicted number of samples in this + // map, and verify we decode the predicted number of samples. + std::map mVorbisPacketSamples; +# endif + + // Records that aPacket is predicted to have aSamples samples. + // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION + // is not defined. + void RecordVorbisPacketSamples(ogg_packet* aPacket, long aSamples); + + // Verifies that aPacket has had its number of samples predicted. + // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION + // is not defined. + void AssertHasRecordedPacketSamples(ogg_packet* aPacket); + + public: + // Asserts that the number of samples predicted for aPacket is aSamples. + // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION + // is not defined. + void ValidateVorbisPacketSamples(ogg_packet* aPacket, long aSamples); +}; + +// Returns 1 if the Theora info struct is decoding a media of Theora +// version (maj,min,sub) or later, otherwise returns 0. +int TheoraVersion(th_info* info, unsigned char maj, unsigned char min, + unsigned char sub); + +class TheoraState : public OggCodecState { + public: + explicit TheoraState(rlbox_sandbox_ogg* aSandbox, + tainted_opaque_ogg aBosPage, + uint32_t aSerial); + virtual ~TheoraState(); + + CodecType GetType() override { return TYPE_THEORA; } + bool DecodeHeader(OggPacketPtr aPacket) override; + int64_t Time(int64_t granulepos) override; + int64_t StartTime(int64_t granulepos) override; + int64_t PacketDuration(ogg_packet* aPacket) override; + bool Init() override; + nsresult Reset() override; + bool IsHeader(ogg_packet* aPacket) override; + bool IsKeyframe(ogg_packet* aPacket) override; + nsresult PageIn(tainted_opaque_ogg aPage) override; + const TrackInfo* GetInfo() const override { return &mInfo; } + int64_t MaxKeyframeOffset() override; + int32_t KeyFrameGranuleJobs() override { + return mTheoraInfo.keyframe_granule_shift; + } + + private: + // Returns the end time that a granulepos represents. + static int64_t Time(th_info* aInfo, int64_t aGranulePos); + + th_info mTheoraInfo; + th_comment mComment; + th_setup_info* mSetup; + th_dec_ctx* mCtx; + + VideoInfo mInfo; + OggPacketQueue mHeaders; + + // Reconstructs the granulepos of Theora packets stored in the + // mUnstamped array. mUnstamped must be filled with consecutive packets from + // the stream, with the last packet having a known granulepos. Using this + // known granulepos, and the known frame numbers, we recover the granulepos + // of all frames in the array. This enables us to determine their timestamps. + void ReconstructTheoraGranulepos(); +}; + +class OpusState : public OggCodecState { + public: + explicit OpusState(rlbox_sandbox_ogg* aSandbox, + tainted_opaque_ogg aBosPage, uint32_t aSerial); + virtual ~OpusState(); + + CodecType GetType() override { return TYPE_OPUS; } + bool DecodeHeader(OggPacketPtr aPacket) override; + int64_t Time(int64_t aGranulepos) override; + int64_t PacketDuration(ogg_packet* aPacket) override; + bool Init() override; + nsresult Reset() override; + nsresult Reset(bool aStart); + bool IsHeader(ogg_packet* aPacket) override; + nsresult PageIn(tainted_opaque_ogg aPage) override; + already_AddRefed PacketOutAsMediaRawData() override; + const TrackInfo* GetInfo() const override { return &mInfo; } + + // Returns the end time that a granulepos represents. + static int64_t Time(int aPreSkip, int64_t aGranulepos); + + // Construct and return a table of tags from the metadata header. + UniquePtr GetTags() override; + + private: + UniquePtr mParser; + OpusMSDecoder* mDecoder; + + // Granule position (end sample) of the last decoded Opus packet. This is + // used to calculate the amount we should trim from the last packet. + int64_t mPrevPacketGranulepos; + + // Reconstructs the granulepos of Opus packets stored in the + // mUnstamped array. mUnstamped must be filled with consecutive packets from + // the stream, with the last packet having a known granulepos. Using this + // known granulepos, and the known frame numbers, we recover the granulepos + // of all frames in the array. This enables us to determine their timestamps. + bool ReconstructOpusGranulepos(); + + // Granule position (end sample) of the last decoded Opus page. This is + // used to calculate the Opus per-packet granule positions on the last page, + // where we may need to trim some samples from the end. + int64_t mPrevPageGranulepos; + AudioInfo mInfo; + OggPacketQueue mHeaders; +}; + +// Constructs a 32bit version number out of two 16 bit major,minor +// version numbers. +# define SKELETON_VERSION(major, minor) (((major) << 16) | (minor)) + +enum EMsgHeaderType { + eContentType, + eRole, + eName, + eLanguage, + eTitle, + eDisplayHint, + eAltitude, + eTrackOrder, + eTrackDependencies +}; + +struct FieldPatternType { + const char* mPatternToRecognize; + EMsgHeaderType mMsgHeaderType; +}; + +// Stores the message information for different logical bitstream. +struct MessageField { + nsClassHashtable mValuesStore; +}; + +class SkeletonState : public OggCodecState { + public: + explicit SkeletonState(rlbox_sandbox_ogg* aSandbox, + tainted_opaque_ogg aBosPage, + uint32_t aSerial); + ~SkeletonState(); + + nsClassHashtable mMsgFieldStore; + + CodecType GetType() override { return TYPE_SKELETON; } + bool DecodeHeader(OggPacketPtr aPacket) override; + int64_t Time(int64_t granulepos) override { return -1; } + bool IsHeader(ogg_packet* aPacket) override { return true; } + + // Return true if the given time (in milliseconds) is within + // the presentation time defined in the skeleton track. + bool IsPresentable(int64_t aTime) { return aTime >= mPresentationTime; } + + // Stores the offset of the page on which a keyframe starts, + // and its presentation time. + class nsKeyPoint { + public: + nsKeyPoint() : mOffset(INT64_MAX), mTime(INT64_MAX) {} + + nsKeyPoint(int64_t aOffset, int64_t aTime) + : mOffset(aOffset), mTime(aTime) {} + + // Offset from start of segment/link-in-the-chain in bytes. + int64_t mOffset; + + // Presentation time in usecs. + int64_t mTime; + + bool IsNull() { return mOffset == INT64_MAX && mTime == INT64_MAX; } + }; + + // Stores a keyframe's byte-offset, presentation time and the serialno + // of the stream it belongs to. + class nsSeekTarget { + public: + nsSeekTarget() : mSerial(0) {} + nsKeyPoint mKeyPoint; + uint32_t mSerial; + bool IsNull() { return mKeyPoint.IsNull() && mSerial == 0; } + }; + + // Determines from the seek index the keyframe which you must seek back to + // in order to get all keyframes required to render all streams with + // serialnos in aTracks, at time aTarget. + nsresult IndexedSeekTarget(int64_t aTarget, nsTArray& aTracks, + nsSeekTarget& aResult); + + bool HasIndex() const { return mIndex.Count() > 0; } + + // Returns the duration of the active tracks in the media, if we have + // an index. aTracks must be filled with the serialnos of the active tracks. + // The duration is calculated as the greatest end time of all active tracks, + // minus the smalled start time of all the active tracks. + nsresult GetDuration(const nsTArray& aTracks, int64_t& aDuration); + + private: + // Decodes an index packet. Returns false on failure. + bool DecodeIndex(ogg_packet* aPacket); + // Decodes an fisbone packet. Returns false on failure. + bool DecodeFisbone(ogg_packet* aPacket); + + // Gets the keypoint you must seek to in order to get the keyframe required + // to render the stream at time aTarget on stream with serial aSerialno. + nsresult IndexedSeekTargetForTrack(uint32_t aSerialno, int64_t aTarget, + nsKeyPoint& aResult); + + // Version of the decoded skeleton track, as per the SKELETON_VERSION macro. + uint32_t mVersion; + + // Presentation time of the resource in milliseconds + int64_t mPresentationTime; + + // Length of the resource in bytes. + int64_t mLength; + + // Stores the keyframe index and duration information for a particular + // stream. + class nsKeyFrameIndex { + public: + nsKeyFrameIndex(int64_t aStartTime, int64_t aEndTime) + : mStartTime(aStartTime), mEndTime(aEndTime) { + MOZ_COUNT_CTOR(nsKeyFrameIndex); + } + + MOZ_COUNTED_DTOR(nsKeyFrameIndex) + + void Add(int64_t aOffset, int64_t aTimeMs) { + mKeyPoints.AppendElement(nsKeyPoint(aOffset, aTimeMs)); + } + + const nsKeyPoint& Get(uint32_t aIndex) const { return mKeyPoints[aIndex]; } + + uint32_t Length() const { return mKeyPoints.Length(); } + + // Presentation time of the first sample in this stream in usecs. + const int64_t mStartTime; + + // End time of the last sample in this stream in usecs. + const int64_t mEndTime; + + private: + nsTArray mKeyPoints; + }; + + // Maps Ogg serialnos to the index-keypoint list. + nsClassHashtable mIndex; +}; + +class FlacState : public OggCodecState { + public: + explicit FlacState(rlbox_sandbox_ogg* aSandbox, + tainted_opaque_ogg aBosPage, uint32_t aSerial); + + CodecType GetType() override { return TYPE_FLAC; } + bool DecodeHeader(OggPacketPtr aPacket) override; + int64_t Time(int64_t granulepos) override; + int64_t PacketDuration(ogg_packet* aPacket) override; + bool IsHeader(ogg_packet* aPacket) override; + nsresult PageIn(tainted_opaque_ogg aPage) override; + + // Return a hash table with tag metadata. + UniquePtr GetTags() override; + + const TrackInfo* GetInfo() const override; + + private: + bool ReconstructFlacGranulepos(void); + + FlacFrameParser mParser; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/ogg/OggCodecStore.cpp b/dom/media/ogg/OggCodecStore.cpp new file mode 100644 index 0000000000..ef3498adec --- /dev/null +++ b/dom/media/ogg/OggCodecStore.cpp @@ -0,0 +1,31 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/DebugOnly.h" + +#include "OggCodecStore.h" + +namespace mozilla { + +OggCodecStore::OggCodecStore() : mMonitor("CodecStore") {} + +OggCodecState* OggCodecStore::Add(uint32_t serial, + UniquePtr codecState) { + MonitorAutoLock mon(mMonitor); + return mCodecStates.InsertOrUpdate(serial, std::move(codecState)).get(); +} + +bool OggCodecStore::Contains(uint32_t serial) { + MonitorAutoLock mon(mMonitor); + return mCodecStates.Get(serial, nullptr); +} + +OggCodecState* OggCodecStore::Get(uint32_t serial) { + MonitorAutoLock mon(mMonitor); + return mCodecStates.Get(serial); +} + +} // namespace mozilla diff --git a/dom/media/ogg/OggCodecStore.h b/dom/media/ogg/OggCodecStore.h new file mode 100644 index 0000000000..bcde8bed00 --- /dev/null +++ b/dom/media/ogg/OggCodecStore.h @@ -0,0 +1,37 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#if !defined(OggCodecStore_h_) +# define OggCodecStore_h_ + +# include + +# include "OggCodecState.h" +# include "VideoUtils.h" +# include "mozilla/Monitor.h" + +namespace mozilla { + +// Thread safe container to store the codec information and the serial for each +// streams. +class OggCodecStore { + public: + OggCodecStore(); + OggCodecState* Add(uint32_t serial, UniquePtr codecState); + bool Contains(uint32_t serial); + OggCodecState* Get(uint32_t serial); + bool IsKnownStream(uint32_t aSerial); + + private: + // Maps Ogg serialnos to OggStreams. + nsClassHashtable mCodecStates; + + // Protects the |mCodecStates| and the |mKnownStreams| members. + Monitor mMonitor MOZ_UNANNOTATED; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/ogg/OggDecoder.cpp b/dom/media/ogg/OggDecoder.cpp new file mode 100644 index 0000000000..5f6d61f694 --- /dev/null +++ b/dom/media/ogg/OggDecoder.cpp @@ -0,0 +1,82 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "OggDecoder.h" +#include "MediaContainerType.h" +#include "MediaDecoder.h" +#include "mozilla/StaticPrefs_media.h" +#include "nsMimeTypes.h" + +namespace mozilla { + +/* static */ +bool OggDecoder::IsSupportedType(const MediaContainerType& aContainerType) { + if (!StaticPrefs::media_ogg_enabled()) { + return false; + } + + if (aContainerType.Type() != MEDIAMIMETYPE(AUDIO_OGG) && + aContainerType.Type() != MEDIAMIMETYPE(VIDEO_OGG) && + aContainerType.Type() != MEDIAMIMETYPE("application/ogg")) { + return false; + } + + const bool isOggVideo = (aContainerType.Type() != MEDIAMIMETYPE(AUDIO_OGG)); + + const MediaCodecs& codecs = aContainerType.ExtendedType().Codecs(); + if (codecs.IsEmpty()) { + // Ogg guarantees that the only codecs it contained are supported. + return true; + } + // Verify that all the codecs specified are ones that we expect that + // we can play. + for (const auto& codec : codecs.Range()) { + if ((MediaDecoder::IsOpusEnabled() && codec.EqualsLiteral("opus")) || + codec.EqualsLiteral("vorbis") || codec.EqualsLiteral("flac")) { + continue; + } + // Note: Only accept Theora in a video container type, not in an audio + // container type. + if (isOggVideo && codec.EqualsLiteral("theora")) { + continue; + } + // Some unsupported codec. + return false; + } + return true; +} + +/* static */ +nsTArray> OggDecoder::GetTracksInfo( + const MediaContainerType& aType) { + nsTArray> tracks; + if (!IsSupportedType(aType)) { + return tracks; + } + + const MediaCodecs& codecs = aType.ExtendedType().Codecs(); + if (codecs.IsEmpty()) { + // Codecs must be specified for ogg as it can't be implied. + return tracks; + } + + for (const auto& codec : codecs.Range()) { + if (codec.EqualsLiteral("opus") || codec.EqualsLiteral("vorbis") || + codec.EqualsLiteral("flac")) { + tracks.AppendElement( + CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters( + "audio/"_ns + NS_ConvertUTF16toUTF8(codec), aType)); + } else { + MOZ_ASSERT(codec.EqualsLiteral("theora")); + tracks.AppendElement( + CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters( + "video/"_ns + NS_ConvertUTF16toUTF8(codec), aType)); + } + } + return tracks; +} + +} // namespace mozilla diff --git a/dom/media/ogg/OggDecoder.h b/dom/media/ogg/OggDecoder.h new file mode 100644 index 0000000000..95e8663746 --- /dev/null +++ b/dom/media/ogg/OggDecoder.h @@ -0,0 +1,29 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#if !defined(OggDecoder_h_) +# define OggDecoder_h_ + +# include "mozilla/UniquePtr.h" +# include "nsTArray.h" + +namespace mozilla { + +class MediaContainerType; +class TrackInfo; + +class OggDecoder { + public: + // Returns true if aContainerType is an Ogg type that we think we can render + // with an enabled platform decoder backend. + // If provided, codecs are checked for support. + static bool IsSupportedType(const MediaContainerType& aContainerType); + static nsTArray> GetTracksInfo( + const MediaContainerType& aType); +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/ogg/OggDemuxer.cpp b/dom/media/ogg/OggDemuxer.cpp new file mode 100644 index 0000000000..2d1fdd3097 --- /dev/null +++ b/dom/media/ogg/OggDemuxer.cpp @@ -0,0 +1,2172 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "OggDemuxer.h" +#include "OggRLBox.h" +#include "MediaDataDemuxer.h" +#include "OggCodecState.h" +#include "XiphExtradata.h" +#include "mozilla/AbstractThread.h" +#include "mozilla/Atomics.h" +#include "mozilla/PodOperations.h" +#include "mozilla/ScopeExit.h" +#include "mozilla/SchedulerGroup.h" +#include "mozilla/SharedThreadPool.h" +#include "mozilla/Telemetry.h" +#include "mozilla/TimeStamp.h" +#ifdef MOZ_WASM_SANDBOXING_OGG +# include "mozilla/ipc/LibrarySandboxPreload.h" +#endif +#include "nsAutoRef.h" +#include "nsError.h" + +#include + +extern mozilla::LazyLogModule gMediaDemuxerLog; +#define OGG_DEBUG(arg, ...) \ + DDMOZ_LOG(gMediaDemuxerLog, mozilla::LogLevel::Debug, "::%s: " arg, \ + __func__, ##__VA_ARGS__) + +// Un-comment to enable logging of seek bisections. +// #define SEEK_LOGGING +#ifdef SEEK_LOGGING +# define SEEK_LOG(type, msg) MOZ_LOG(gMediaDemuxerLog, type, msg) +#else +# define SEEK_LOG(type, msg) +#endif + +#define CopyAndVerifyOrFail(t, cond, failed) \ + (t).copy_and_verify([&](auto val) { \ + if (!(cond)) { \ + *(failed) = true; \ + } \ + return val; \ + }) + +namespace mozilla { + +using media::TimeInterval; +using media::TimeIntervals; +using media::TimeUnit; + +// The number of microseconds of "fuzz" we use in a bisection search over +// HTTP. When we're seeking with fuzz, we'll stop the search if a bisection +// lands between the seek target and OGG_SEEK_FUZZ_USECS microseconds before the +// seek target. This is becaue it's usually quicker to just keep downloading +// from an exisiting connection than to do another bisection inside that +// small range, which would open a new HTTP connetion. +static const uint32_t OGG_SEEK_FUZZ_USECS = 500000; + +// The number of microseconds of "pre-roll" we use for Opus streams. +// The specification recommends 80 ms. +static const TimeUnit OGG_SEEK_OPUS_PREROLL = TimeUnit::FromMicroseconds(80000); + +static Atomic sStreamSourceID(0u); + +OggDemuxer::nsAutoOggSyncState::nsAutoOggSyncState(rlbox_sandbox_ogg* aSandbox) + : mSandbox(aSandbox) { + if (mSandbox) { + tainted_ogg state = + mSandbox->malloc_in_sandbox(); + MOZ_RELEASE_ASSERT(state != nullptr); + mState = state.to_opaque(); + sandbox_invoke(*mSandbox, ogg_sync_init, mState); + } +} +OggDemuxer::nsAutoOggSyncState::~nsAutoOggSyncState() { + if (mSandbox) { + sandbox_invoke(*mSandbox, ogg_sync_clear, mState); + mSandbox->free_in_sandbox(rlbox::from_opaque(mState)); + tainted_ogg null = nullptr; + mState = null.to_opaque(); + } +} + +/* static */ +rlbox_sandbox_ogg* OggDemuxer::CreateSandbox() { + rlbox_sandbox_ogg* sandbox = new rlbox_sandbox_ogg(); +#ifdef MOZ_WASM_SANDBOXING_OGG + bool success = sandbox->create_sandbox(false /* infallible */); +#else + bool success = sandbox->create_sandbox(); +#endif + if (!success) { + delete sandbox; + sandbox = nullptr; + } + return sandbox; +} + +void OggDemuxer::SandboxDestroy::operator()(rlbox_sandbox_ogg* sandbox) { + if (sandbox) { + sandbox->destroy_sandbox(); + delete sandbox; + } +} + +// Return the corresponding category in aKind based on the following specs. +// (https://www.whatwg.org/specs/web-apps/current- +// work/multipage/embedded-content.html#dom-audiotrack-kind) & +// (http://wiki.xiph.org/SkeletonHeaders) +const nsString OggDemuxer::GetKind(const nsCString& aRole) { + if (aRole.Find("audio/main") != -1 || aRole.Find("video/main") != -1) { + return u"main"_ns; + } else if (aRole.Find("audio/alternate") != -1 || + aRole.Find("video/alternate") != -1) { + return u"alternative"_ns; + } else if (aRole.Find("audio/audiodesc") != -1) { + return u"descriptions"_ns; + } else if (aRole.Find("audio/described") != -1) { + return u"main-desc"_ns; + } else if (aRole.Find("audio/dub") != -1) { + return u"translation"_ns; + } else if (aRole.Find("audio/commentary") != -1) { + return u"commentary"_ns; + } else if (aRole.Find("video/sign") != -1) { + return u"sign"_ns; + } else if (aRole.Find("video/captioned") != -1) { + return u"captions"_ns; + } else if (aRole.Find("video/subtitled") != -1) { + return u"subtitles"_ns; + } + return u""_ns; +} + +void OggDemuxer::InitTrack(MessageField* aMsgInfo, TrackInfo* aInfo, + bool aEnable) { + MOZ_ASSERT(aMsgInfo); + MOZ_ASSERT(aInfo); + + nsCString* sName = aMsgInfo->mValuesStore.Get(eName); + nsCString* sRole = aMsgInfo->mValuesStore.Get(eRole); + nsCString* sTitle = aMsgInfo->mValuesStore.Get(eTitle); + nsCString* sLanguage = aMsgInfo->mValuesStore.Get(eLanguage); + aInfo->Init(sName ? NS_ConvertUTF8toUTF16(*sName) : EmptyString(), + sRole ? GetKind(*sRole) : u""_ns, + sTitle ? NS_ConvertUTF8toUTF16(*sTitle) : EmptyString(), + sLanguage ? NS_ConvertUTF8toUTF16(*sLanguage) : EmptyString(), + aEnable); +} + +OggDemuxer::OggDemuxer(MediaResource* aResource) + : mSandbox(CreateSandbox()), + mTheoraState(nullptr), + mVorbisState(nullptr), + mOpusState(nullptr), + mFlacState(nullptr), + mOpusEnabled(MediaDecoder::IsOpusEnabled()), + mSkeletonState(nullptr), + mAudioOggState(aResource, mSandbox.get()), + mVideoOggState(aResource, mSandbox.get()), + mIsChained(false), + mTimedMetadataEvent(nullptr), + mOnSeekableEvent(nullptr) { + MOZ_COUNT_CTOR(OggDemuxer); + // aResource is referenced through inner m{Audio,Video}OffState members. + DDLINKCHILD("resource", aResource); +} + +OggDemuxer::~OggDemuxer() { + MOZ_COUNT_DTOR(OggDemuxer); + Reset(TrackInfo::kAudioTrack); + Reset(TrackInfo::kVideoTrack); +} + +void OggDemuxer::SetChainingEvents(TimedMetadataEventProducer* aMetadataEvent, + MediaEventProducer* aOnSeekableEvent) { + mTimedMetadataEvent = aMetadataEvent; + mOnSeekableEvent = aOnSeekableEvent; +} + +bool OggDemuxer::HasAudio() const { + return mVorbisState || mOpusState || mFlacState; +} + +bool OggDemuxer::HasVideo() const { return mTheoraState; } + +bool OggDemuxer::HaveStartTime() const { return mStartTime.isSome(); } + +int64_t OggDemuxer::StartTime() const { return mStartTime.refOr(0); } + +bool OggDemuxer::HaveStartTime(TrackInfo::TrackType aType) { + return OggState(aType).mStartTime.isSome(); +} + +int64_t OggDemuxer::StartTime(TrackInfo::TrackType aType) { + return OggState(aType).mStartTime.refOr(TimeUnit::Zero()).ToMicroseconds(); +} + +RefPtr OggDemuxer::Init() { + if (!mSandbox) { + return InitPromise::CreateAndReject(NS_ERROR_OUT_OF_MEMORY, __func__); + } + const char RLBOX_OGG_RETURN_CODE_SAFE[] = + "Return codes only control whether to early exit. Incorrect return codes " + "will not lead to memory safety issues in the renderer."; + + int ret = sandbox_invoke(*mSandbox, ogg_sync_init, + OggSyncState(TrackInfo::kAudioTrack)) + .unverified_safe_because(RLBOX_OGG_RETURN_CODE_SAFE); + if (ret != 0) { + return InitPromise::CreateAndReject(NS_ERROR_OUT_OF_MEMORY, __func__); + } + ret = sandbox_invoke(*mSandbox, ogg_sync_init, + OggSyncState(TrackInfo::kVideoTrack)) + .unverified_safe_because(RLBOX_OGG_RETURN_CODE_SAFE); + if (ret != 0) { + return InitPromise::CreateAndReject(NS_ERROR_OUT_OF_MEMORY, __func__); + } + if (ReadMetadata() != NS_OK) { + return InitPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_METADATA_ERR, + __func__); + } + + if (!GetNumberTracks(TrackInfo::kAudioTrack) && + !GetNumberTracks(TrackInfo::kVideoTrack)) { + return InitPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_METADATA_ERR, + __func__); + } + + return InitPromise::CreateAndResolve(NS_OK, __func__); +} + +OggCodecState* OggDemuxer::GetTrackCodecState( + TrackInfo::TrackType aType) const { + switch (aType) { + case TrackInfo::kAudioTrack: + if (mVorbisState) { + return mVorbisState; + } else if (mOpusState) { + return mOpusState; + } else { + return mFlacState; + } + case TrackInfo::kVideoTrack: + return mTheoraState; + default: + return 0; + } +} + +TrackInfo::TrackType OggDemuxer::GetCodecStateType( + OggCodecState* aState) const { + switch (aState->GetType()) { + case OggCodecState::TYPE_THEORA: + return TrackInfo::kVideoTrack; + case OggCodecState::TYPE_OPUS: + case OggCodecState::TYPE_VORBIS: + case OggCodecState::TYPE_FLAC: + return TrackInfo::kAudioTrack; + default: + return TrackInfo::kUndefinedTrack; + } +} + +uint32_t OggDemuxer::GetNumberTracks(TrackInfo::TrackType aType) const { + switch (aType) { + case TrackInfo::kAudioTrack: + return HasAudio() ? 1 : 0; + case TrackInfo::kVideoTrack: + return HasVideo() ? 1 : 0; + default: + return 0; + } +} + +UniquePtr OggDemuxer::GetTrackInfo(TrackInfo::TrackType aType, + size_t aTrackNumber) const { + switch (aType) { + case TrackInfo::kAudioTrack: + return mInfo.mAudio.Clone(); + case TrackInfo::kVideoTrack: + return mInfo.mVideo.Clone(); + default: + return nullptr; + } +} + +already_AddRefed OggDemuxer::GetTrackDemuxer( + TrackInfo::TrackType aType, uint32_t aTrackNumber) { + if (GetNumberTracks(aType) <= aTrackNumber) { + return nullptr; + } + RefPtr e = new OggTrackDemuxer(this, aType, aTrackNumber); + DDLINKCHILD("track demuxer", e.get()); + mDemuxers.AppendElement(e); + + return e.forget(); +} + +nsresult OggDemuxer::Reset(TrackInfo::TrackType aType) { + // Discard any previously buffered packets/pages. + if (mSandbox) { + sandbox_invoke(*mSandbox, ogg_sync_reset, OggSyncState(aType)); + } + OggCodecState* trackState = GetTrackCodecState(aType); + if (trackState) { + return trackState->Reset(); + } + OggState(aType).mNeedKeyframe = true; + return NS_OK; +} + +bool OggDemuxer::ReadHeaders(TrackInfo::TrackType aType, + OggCodecState* aState) { + while (!aState->DoneReadingHeaders()) { + DemuxUntilPacketAvailable(aType, aState); + OggPacketPtr packet = aState->PacketOut(); + if (!packet) { + OGG_DEBUG("Ran out of header packets early; deactivating stream %" PRIu32, + aState->mSerial); + aState->Deactivate(); + return false; + } + + // Local OggCodecState needs to decode headers in order to process + // packet granulepos -> time mappings, etc. + if (!aState->DecodeHeader(std::move(packet))) { + OGG_DEBUG( + "Failed to decode ogg header packet; deactivating stream %" PRIu32, + aState->mSerial); + aState->Deactivate(); + return false; + } + } + + return aState->Init(); +} + +void OggDemuxer::BuildSerialList(nsTArray& aTracks) { + // Obtaining seek index information for currently active bitstreams. + if (HasVideo()) { + aTracks.AppendElement(mTheoraState->mSerial); + } + if (HasAudio()) { + if (mVorbisState) { + aTracks.AppendElement(mVorbisState->mSerial); + } else if (mOpusState) { + aTracks.AppendElement(mOpusState->mSerial); + } + } +} + +void OggDemuxer::SetupTarget(OggCodecState** aSavedState, + OggCodecState* aNewState) { + if (*aSavedState) { + (*aSavedState)->Reset(); + } + + if (aNewState->GetInfo()->GetAsAudioInfo()) { + mInfo.mAudio = *aNewState->GetInfo()->GetAsAudioInfo(); + } else { + mInfo.mVideo = *aNewState->GetInfo()->GetAsVideoInfo(); + } + *aSavedState = aNewState; +} + +void OggDemuxer::SetupTargetSkeleton() { + // Setup skeleton related information after mVorbisState & mTheroState + // being set (if they exist). + if (mSkeletonState) { + if (!HasAudio() && !HasVideo()) { + // We have a skeleton track, but no audio or video, may as well disable + // the skeleton, we can't do anything useful with this media. + OGG_DEBUG("Deactivating skeleton stream %" PRIu32, + mSkeletonState->mSerial); + mSkeletonState->Deactivate(); + } else if (ReadHeaders(TrackInfo::kAudioTrack, mSkeletonState) && + mSkeletonState->HasIndex()) { + // We don't particularly care about which track we are currently using + // as both MediaResource points to the same content. + // Extract the duration info out of the index, so we don't need to seek to + // the end of resource to get it. + nsTArray tracks; + BuildSerialList(tracks); + int64_t duration = 0; + if (NS_SUCCEEDED(mSkeletonState->GetDuration(tracks, duration))) { + OGG_DEBUG("Got duration from Skeleton index %" PRId64, duration); + mInfo.mMetadataDuration.emplace(TimeUnit::FromMicroseconds(duration)); + } + } + } +} + +void OggDemuxer::SetupMediaTracksInfo(const nsTArray& aSerials) { + // For each serial number + // 1. Retrieve a codecState from mCodecStore by this serial number. + // 2. Retrieve a message field from mMsgFieldStore by this serial number. + // 3. For now, skip if the serial number refers to a non-primary bitstream. + // 4. Setup track and other audio/video related information per different + // types. + for (size_t i = 0; i < aSerials.Length(); i++) { + uint32_t serial = aSerials[i]; + OggCodecState* codecState = mCodecStore.Get(serial); + + MessageField* msgInfo = nullptr; + if (mSkeletonState) { + mSkeletonState->mMsgFieldStore.Get(serial, &msgInfo); + } + + OggCodecState* primeState = nullptr; + switch (codecState->GetType()) { + case OggCodecState::TYPE_THEORA: + primeState = mTheoraState; + break; + case OggCodecState::TYPE_VORBIS: + primeState = mVorbisState; + break; + case OggCodecState::TYPE_OPUS: + primeState = mOpusState; + break; + case OggCodecState::TYPE_FLAC: + primeState = mFlacState; + break; + default: + break; + } + if (primeState && primeState == codecState) { + bool isAudio = primeState->GetInfo()->GetAsAudioInfo(); + if (msgInfo) { + InitTrack( + msgInfo, + isAudio ? static_cast(&mInfo.mAudio) : &mInfo.mVideo, + true); + } + FillTags(isAudio ? static_cast(&mInfo.mAudio) : &mInfo.mVideo, + primeState->GetTags()); + } + } +} + +void OggDemuxer::FillTags(TrackInfo* aInfo, UniquePtr&& aTags) { + if (!aTags) { + return; + } + UniquePtr tags(std::move(aTags)); + for (const auto& entry : *tags) { + aInfo->mTags.AppendElement(MetadataTag(entry.GetKey(), entry.GetData())); + } +} + +nsresult OggDemuxer::ReadMetadata() { + OGG_DEBUG("OggDemuxer::ReadMetadata called!"); + + // We read packets until all bitstreams have read all their header packets. + // We record the offset of the first non-header page so that we know + // what page to seek to when seeking to the media start. + + // @FIXME we have to read all the header packets on all the streams + // and THEN we can run SetupTarget* + // @fixme fixme + + TrackInfo::TrackType tracks[2] = {TrackInfo::kAudioTrack, + TrackInfo::kVideoTrack}; + + nsTArray bitstreams; + nsTArray serials; + + for (uint32_t i = 0; i < ArrayLength(tracks); i++) { + tainted_ogg page = mSandbox->malloc_in_sandbox(); + if (!page) { + return NS_ERROR_OUT_OF_MEMORY; + } + auto clean_page = MakeScopeExit([&] { mSandbox->free_in_sandbox(page); }); + + bool readAllBOS = false; + while (!readAllBOS) { + if (!ReadOggPage(tracks[i], page.to_opaque())) { + // Some kind of error... + OGG_DEBUG("OggDemuxer::ReadOggPage failed? leaving ReadMetadata..."); + return NS_ERROR_FAILURE; + } + + uint32_t serial = static_cast( + sandbox_invoke(*mSandbox, ogg_page_serialno, page) + .unverified_safe_because(RLBOX_OGG_PAGE_SERIAL_REASON)); + + if (!sandbox_invoke(*mSandbox, ogg_page_bos, page) + .unverified_safe_because( + "If this value is incorrect, it would mean not all " + "bitstreams are read. This does not affect the memory " + "safety of the renderer.")) { + // We've encountered a non Beginning Of Stream page. No more BOS pages + // can follow in this Ogg segment, so there will be no other bitstreams + // in the Ogg (unless it's invalid). + readAllBOS = true; + } else if (!mCodecStore.Contains(serial)) { + // We've not encountered a stream with this serial number before. Create + // an OggCodecState to demux it, and map that to the OggCodecState + // in mCodecStates. + OggCodecState* const codecState = mCodecStore.Add( + serial, + OggCodecState::Create(mSandbox.get(), page.to_opaque(), serial)); + bitstreams.AppendElement(codecState); + serials.AppendElement(serial); + } + if (NS_FAILED(DemuxOggPage(tracks[i], page.to_opaque()))) { + return NS_ERROR_FAILURE; + } + } + } + + // We've read all BOS pages, so we know the streams contained in the media. + // 1. Find the first encountered Theora/Vorbis/Opus bitstream, and configure + // it as the target A/V bitstream. + // 2. Deactivate the rest of bitstreams for now, until we have MediaInfo + // support multiple track infos. + for (uint32_t i = 0; i < bitstreams.Length(); ++i) { + OggCodecState* s = bitstreams[i]; + if (s) { + if (s->GetType() == OggCodecState::TYPE_THEORA && + ReadHeaders(TrackInfo::kVideoTrack, s)) { + if (!mTheoraState) { + SetupTarget(&mTheoraState, s); + } else { + s->Deactivate(); + } + } else if (s->GetType() == OggCodecState::TYPE_VORBIS && + ReadHeaders(TrackInfo::kAudioTrack, s)) { + if (!mVorbisState) { + SetupTarget(&mVorbisState, s); + } else { + s->Deactivate(); + } + } else if (s->GetType() == OggCodecState::TYPE_OPUS && + ReadHeaders(TrackInfo::kAudioTrack, s)) { + if (mOpusEnabled) { + if (!mOpusState) { + SetupTarget(&mOpusState, s); + } else { + s->Deactivate(); + } + } else { + NS_WARNING( + "Opus decoding disabled." + " See media.opus.enabled in about:config"); + } + } else if (s->GetType() == OggCodecState::TYPE_FLAC && + ReadHeaders(TrackInfo::kAudioTrack, s)) { + if (!mFlacState) { + SetupTarget(&mFlacState, s); + } else { + s->Deactivate(); + } + } else if (s->GetType() == OggCodecState::TYPE_SKELETON && + !mSkeletonState) { + mSkeletonState = static_cast(s); + } else { + // Deactivate any non-primary bitstreams. + s->Deactivate(); + } + } + } + + SetupTargetSkeleton(); + SetupMediaTracksInfo(serials); + + if (HasAudio() || HasVideo()) { + int64_t startTime = -1; + FindStartTime(startTime); + if (startTime >= 0) { + OGG_DEBUG("Detected stream start time %" PRId64, startTime); + mStartTime.emplace(startTime); + } + + if (mInfo.mMetadataDuration.isNothing() && + Resource(TrackInfo::kAudioTrack)->GetLength() >= 0) { + // We didn't get a duration from the index or a Content-Duration header. + // Seek to the end of file to find the end time. + int64_t length = Resource(TrackInfo::kAudioTrack)->GetLength(); + + MOZ_ASSERT(length > 0, "Must have a content length to get end time"); + + int64_t endTime = RangeEndTime(TrackInfo::kAudioTrack, length); + + if (endTime != -1) { + mInfo.mUnadjustedMetadataEndTime.emplace( + TimeUnit::FromMicroseconds(endTime)); + mInfo.mMetadataDuration.emplace( + TimeUnit::FromMicroseconds(endTime - mStartTime.refOr(0))); + OGG_DEBUG("Got Ogg duration from seeking to end %" PRId64, endTime); + } + } + if (mInfo.mMetadataDuration.isNothing()) { + mInfo.mMetadataDuration.emplace(TimeUnit::FromInfinity()); + } + if (HasAudio()) { + mInfo.mAudio.mDuration = mInfo.mMetadataDuration.ref(); + } + if (HasVideo()) { + mInfo.mVideo.mDuration = mInfo.mMetadataDuration.ref(); + } + } else { + OGG_DEBUG("no audio or video tracks"); + return NS_ERROR_FAILURE; + } + + OGG_DEBUG("success?!"); + return NS_OK; +} + +void OggDemuxer::SetChained() { + { + if (mIsChained) { + return; + } + mIsChained = true; + } + if (mOnSeekableEvent) { + mOnSeekableEvent->Notify(); + } +} + +bool OggDemuxer::ReadOggChain(const media::TimeUnit& aLastEndTime) { + bool chained = false; + OpusState* newOpusState = nullptr; + VorbisState* newVorbisState = nullptr; + FlacState* newFlacState = nullptr; + UniquePtr tags; + + if (HasVideo() || HasSkeleton() || !HasAudio()) { + return false; + } + + tainted_ogg page = mSandbox->malloc_in_sandbox(); + if (!page) { + return false; + } + auto clean_page = MakeScopeExit([&] { mSandbox->free_in_sandbox(page); }); + if (!ReadOggPage(TrackInfo::kAudioTrack, page.to_opaque()) || + !sandbox_invoke(*mSandbox, ogg_page_bos, page) + .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON)) { + // Chaining is only supported for audio only ogg files. + return false; + } + + uint32_t serial = static_cast( + sandbox_invoke(*mSandbox, ogg_page_serialno, page) + .unverified_safe_because( + "We are reading a new page with a serial number for the first " + "time and will check if we have seen it before prior to use.")); + if (mCodecStore.Contains(serial)) { + return false; + } + + UniquePtr codecState( + OggCodecState::Create(mSandbox.get(), page.to_opaque(), serial)); + if (!codecState) { + return false; + } + + if (mVorbisState && (codecState->GetType() == OggCodecState::TYPE_VORBIS)) { + newVorbisState = static_cast(codecState.get()); + } else if (mOpusState && + (codecState->GetType() == OggCodecState::TYPE_OPUS)) { + newOpusState = static_cast(codecState.get()); + } else if (mFlacState && + (codecState->GetType() == OggCodecState::TYPE_FLAC)) { + newFlacState = static_cast(codecState.get()); + } else { + return false; + } + + OggCodecState* state; + + mCodecStore.Add(serial, std::move(codecState)); + state = mCodecStore.Get(serial); + + NS_ENSURE_TRUE(state != nullptr, false); + + if (NS_FAILED(state->PageIn(page.to_opaque()))) { + return false; + } + + MessageField* msgInfo = nullptr; + if (mSkeletonState) { + mSkeletonState->mMsgFieldStore.Get(serial, &msgInfo); + } + + if ((newVorbisState && ReadHeaders(TrackInfo::kAudioTrack, newVorbisState)) && + (mVorbisState->GetInfo()->GetAsAudioInfo()->mRate == + newVorbisState->GetInfo()->GetAsAudioInfo()->mRate) && + (mVorbisState->GetInfo()->GetAsAudioInfo()->mChannels == + newVorbisState->GetInfo()->GetAsAudioInfo()->mChannels)) { + SetupTarget(&mVorbisState, newVorbisState); + OGG_DEBUG("New vorbis ogg link, serial=%d\n", mVorbisState->mSerial); + + if (msgInfo) { + InitTrack(msgInfo, &mInfo.mAudio, true); + } + + chained = true; + tags = newVorbisState->GetTags(); + } + + if ((newOpusState && ReadHeaders(TrackInfo::kAudioTrack, newOpusState)) && + (mOpusState->GetInfo()->GetAsAudioInfo()->mRate == + newOpusState->GetInfo()->GetAsAudioInfo()->mRate) && + (mOpusState->GetInfo()->GetAsAudioInfo()->mChannels == + newOpusState->GetInfo()->GetAsAudioInfo()->mChannels)) { + SetupTarget(&mOpusState, newOpusState); + + if (msgInfo) { + InitTrack(msgInfo, &mInfo.mAudio, true); + } + + chained = true; + tags = newOpusState->GetTags(); + } + + if ((newFlacState && ReadHeaders(TrackInfo::kAudioTrack, newFlacState)) && + (mFlacState->GetInfo()->GetAsAudioInfo()->mRate == + newFlacState->GetInfo()->GetAsAudioInfo()->mRate) && + (mFlacState->GetInfo()->GetAsAudioInfo()->mChannels == + newFlacState->GetInfo()->GetAsAudioInfo()->mChannels)) { + SetupTarget(&mFlacState, newFlacState); + OGG_DEBUG("New flac ogg link, serial=%d\n", mFlacState->mSerial); + + if (msgInfo) { + InitTrack(msgInfo, &mInfo.mAudio, true); + } + + chained = true; + tags = newFlacState->GetTags(); + } + + if (chained) { + SetChained(); + mInfo.mMediaSeekable = false; + mDecodedAudioDuration += aLastEndTime; + if (mTimedMetadataEvent) { + mTimedMetadataEvent->Notify( + TimedMetadata(mDecodedAudioDuration, std::move(tags), + UniquePtr(new MediaInfo(mInfo)))); + } + // Setup a new TrackInfo so that the MediaFormatReader will flush the + // current decoder. + mSharedAudioTrackInfo = + new TrackInfoSharedPtr(mInfo.mAudio, ++sStreamSourceID); + return true; + } + + return false; +} + +OggDemuxer::OggStateContext& OggDemuxer::OggState(TrackInfo::TrackType aType) { + if (aType == TrackInfo::kVideoTrack) { + return mVideoOggState; + } + return mAudioOggState; +} + +tainted_opaque_ogg OggDemuxer::OggSyncState( + TrackInfo::TrackType aType) { + return OggState(aType).mOggState.mState; +} + +MediaResourceIndex* OggDemuxer::Resource(TrackInfo::TrackType aType) { + return &OggState(aType).mResource; +} + +MediaResourceIndex* OggDemuxer::CommonResource() { + return &mAudioOggState.mResource; +} + +bool OggDemuxer::ReadOggPage(TrackInfo::TrackType aType, + tainted_opaque_ogg aPage) { + int ret = 0; + while ((ret = sandbox_invoke(*mSandbox, ogg_sync_pageseek, + OggSyncState(aType), aPage) + .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON)) <= + 0) { + if (ret < 0) { + // Lost page sync, have to skip up to next page. + continue; + } + // Returns a buffer that can be written too + // with the given size. This buffer is stored + // in the ogg synchronisation structure. + const uint32_t MIN_BUFFER_SIZE = 4096; + tainted_ogg buffer_tainted = sandbox_invoke( + *mSandbox, ogg_sync_buffer, OggSyncState(aType), MIN_BUFFER_SIZE); + MOZ_ASSERT(buffer_tainted != nullptr, "ogg_sync_buffer failed"); + + // Read from the resource into the buffer + uint32_t bytesRead = 0; + + char* buffer = buffer_tainted.copy_and_verify_buffer_address( + [](uintptr_t val) { return reinterpret_cast(val); }, + MIN_BUFFER_SIZE); + + nsresult rv = Resource(aType)->Read(buffer, MIN_BUFFER_SIZE, &bytesRead); + if (NS_FAILED(rv) || !bytesRead) { + // End of file or error. + return false; + } + + // Update the synchronisation layer with the number + // of bytes written to the buffer + ret = sandbox_invoke(*mSandbox, ogg_sync_wrote, OggSyncState(aType), + bytesRead) + .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON); + NS_ENSURE_TRUE(ret == 0, false); + } + + return true; +} + +nsresult OggDemuxer::DemuxOggPage(TrackInfo::TrackType aType, + tainted_opaque_ogg aPage) { + tainted_ogg serial = sandbox_invoke(*mSandbox, ogg_page_serialno, aPage); + OggCodecState* codecState = mCodecStore.Get(static_cast( + serial.unverified_safe_because(RLBOX_OGG_PAGE_SERIAL_REASON))); + if (codecState == nullptr) { + OGG_DEBUG("encountered packet for unrecognized codecState"); + return NS_ERROR_FAILURE; + } + if (GetCodecStateType(codecState) != aType && + codecState->GetType() != OggCodecState::TYPE_SKELETON) { + // Not a page we're interested in. + return NS_OK; + } + if (NS_FAILED(codecState->PageIn(aPage))) { + OGG_DEBUG("codecState->PageIn failed"); + return NS_ERROR_FAILURE; + } + return NS_OK; +} + +bool OggDemuxer::IsSeekable() const { + if (mIsChained) { + return false; + } + return true; +} + +UniquePtr OggDemuxer::GetCrypto() { return nullptr; } + +ogg_packet* OggDemuxer::GetNextPacket(TrackInfo::TrackType aType) { + OggCodecState* state = GetTrackCodecState(aType); + ogg_packet* packet = nullptr; + OggStateContext& context = OggState(aType); + + while (true) { + if (packet) { + Unused << state->PacketOut(); + } + DemuxUntilPacketAvailable(aType, state); + + packet = state->PacketPeek(); + if (!packet) { + break; + } + if (state->IsHeader(packet)) { + continue; + } + if (context.mNeedKeyframe && !state->IsKeyframe(packet)) { + continue; + } + context.mNeedKeyframe = false; + break; + } + + return packet; +} + +void OggDemuxer::DemuxUntilPacketAvailable(TrackInfo::TrackType aType, + OggCodecState* aState) { + while (!aState->IsPacketReady()) { + OGG_DEBUG("no packet yet, reading some more"); + tainted_ogg page = mSandbox->malloc_in_sandbox(); + MOZ_RELEASE_ASSERT(page != nullptr); + auto clean_page = MakeScopeExit([&] { mSandbox->free_in_sandbox(page); }); + if (!ReadOggPage(aType, page.to_opaque())) { + OGG_DEBUG("no more pages to read in resource?"); + return; + } + DemuxOggPage(aType, page.to_opaque()); + } +} + +TimeIntervals OggDemuxer::GetBuffered(TrackInfo::TrackType aType) { + if (!HaveStartTime(aType)) { + return TimeIntervals(); + } + if (mIsChained) { + return TimeIntervals::Invalid(); + } + TimeIntervals buffered; + // HasAudio and HasVideo are not used here as they take a lock and cause + // a deadlock. Accessing mInfo doesn't require a lock - it doesn't change + // after metadata is read. + if (!mInfo.HasValidMedia()) { + // No need to search through the file if there are no audio or video tracks + return buffered; + } + + AutoPinned resource(Resource(aType)->GetResource()); + MediaByteRangeSet ranges; + nsresult res = resource->GetCachedRanges(ranges); + NS_ENSURE_SUCCESS(res, TimeIntervals::Invalid()); + + const char time_interval_reason[] = + "Even if this computation is incorrect due to the reliance on tainted " + "values, only the search for the time interval or the time interval " + "returned will be affected. However this will not result in a memory " + "safety vulnerabilty in the Firefox renderer."; + + // Traverse across the buffered byte ranges, determining the time ranges + // they contain. MediaResource::GetNextCachedData(offset) returns -1 when + // offset is after the end of the media resource, or there's no more cached + // data after the offset. This loop will run until we've checked every + // buffered range in the media, in increasing order of offset. + nsAutoOggSyncState sync(mSandbox.get()); + for (uint32_t index = 0; index < ranges.Length(); index++) { + // Ensure the offsets are after the header pages. + int64_t startOffset = ranges[index].mStart; + int64_t endOffset = ranges[index].mEnd; + + // Because the granulepos time is actually the end time of the page, + // we special-case (startOffset == 0) so that the first + // buffered range always appears to be buffered from the media start + // time, rather than from the end-time of the first page. + int64_t startTime = (startOffset == 0) ? StartTime() : -1; + + // Find the start time of the range. Read pages until we find one with a + // granulepos which we can convert into a timestamp to use as the time of + // the start of the buffered range. + sandbox_invoke(*mSandbox, ogg_sync_reset, sync.mState); + tainted_ogg page = mSandbox->malloc_in_sandbox(); + if (!page) { + return TimeIntervals::Invalid(); + } + auto clean_page = MakeScopeExit([&] { mSandbox->free_in_sandbox(page); }); + + while (startTime == -1) { + int32_t discard; + PageSyncResult pageSyncResult = + PageSync(mSandbox.get(), Resource(aType), sync.mState, true, + startOffset, endOffset, page, discard); + if (pageSyncResult == PAGE_SYNC_ERROR) { + return TimeIntervals::Invalid(); + } else if (pageSyncResult == PAGE_SYNC_END_OF_RANGE) { + // Hit the end of range without reading a page, give up trying to + // find a start time for this buffered range, skip onto the next one. + break; + } + + int64_t granulepos = sandbox_invoke(*mSandbox, ogg_page_granulepos, page) + .unverified_safe_because(time_interval_reason); + if (granulepos == -1) { + // Page doesn't have an end time, advance to the next page + // until we find one. + + bool failedPageLenVerify = false; + // Page length should be under 64Kb according to + // https://xiph.org/ogg/doc/libogg/ogg_page.html + long pageLength = + CopyAndVerifyOrFail(page->header_len + page->body_len, + val <= 64 * 1024, &failedPageLenVerify); + if (failedPageLenVerify) { + return TimeIntervals::Invalid(); + } + + startOffset += pageLength; + continue; + } + + tainted_ogg serial = rlbox::sandbox_static_cast( + sandbox_invoke(*mSandbox, ogg_page_serialno, page)); + if (aType == TrackInfo::kAudioTrack && mVorbisState && + (serial == mVorbisState->mSerial) + .unverified_safe_because(time_interval_reason)) { + startTime = mVorbisState->Time(granulepos); + MOZ_ASSERT(startTime > 0, "Must have positive start time"); + } else if (aType == TrackInfo::kAudioTrack && mOpusState && + (serial == mOpusState->mSerial) + .unverified_safe_because(time_interval_reason)) { + startTime = mOpusState->Time(granulepos); + MOZ_ASSERT(startTime > 0, "Must have positive start time"); + } else if (aType == TrackInfo::kAudioTrack && mFlacState && + (serial == mFlacState->mSerial) + .unverified_safe_because(time_interval_reason)) { + startTime = mFlacState->Time(granulepos); + MOZ_ASSERT(startTime > 0, "Must have positive start time"); + } else if (aType == TrackInfo::kVideoTrack && mTheoraState && + (serial == mTheoraState->mSerial) + .unverified_safe_because(time_interval_reason)) { + startTime = mTheoraState->Time(granulepos); + MOZ_ASSERT(startTime > 0, "Must have positive start time"); + } else if (mCodecStore.Contains( + serial.unverified_safe_because(time_interval_reason))) { + // Stream is not the theora or vorbis stream we're playing, + // but is one that we have header data for. + + bool failedPageLenVerify = false; + // Page length should be under 64Kb according to + // https://xiph.org/ogg/doc/libogg/ogg_page.html + long pageLength = + CopyAndVerifyOrFail(page->header_len + page->body_len, + val <= 64 * 1024, &failedPageLenVerify); + if (failedPageLenVerify) { + return TimeIntervals::Invalid(); + } + + startOffset += pageLength; + continue; + } else { + // Page is for a stream we don't know about (possibly a chained + // ogg), return OK to abort the finding any further ranges. This + // prevents us searching through the rest of the media when we + // may not be able to extract timestamps from it. + SetChained(); + return buffered; + } + } + + if (startTime != -1) { + // We were able to find a start time for that range, see if we can + // find an end time. + int64_t endTime = RangeEndTime(aType, startOffset, endOffset, true); + if (endTime > startTime) { + buffered += + TimeInterval(TimeUnit::FromMicroseconds(startTime - StartTime()), + TimeUnit::FromMicroseconds(endTime - StartTime())); + } + } + } + + return buffered; +} + +void OggDemuxer::FindStartTime(int64_t& aOutStartTime) { + // Extract the start times of the bitstreams in order to calculate + // the duration. + int64_t videoStartTime = INT64_MAX; + int64_t audioStartTime = INT64_MAX; + + if (HasVideo()) { + FindStartTime(TrackInfo::kVideoTrack, videoStartTime); + if (videoStartTime != INT64_MAX) { + OGG_DEBUG("OggDemuxer::FindStartTime() video=%" PRId64, videoStartTime); + mVideoOggState.mStartTime = + Some(TimeUnit::FromMicroseconds(videoStartTime)); + } + } + if (HasAudio()) { + FindStartTime(TrackInfo::kAudioTrack, audioStartTime); + if (audioStartTime != INT64_MAX) { + OGG_DEBUG("OggDemuxer::FindStartTime() audio=%" PRId64, audioStartTime); + mAudioOggState.mStartTime = + Some(TimeUnit::FromMicroseconds(audioStartTime)); + } + } + + int64_t startTime = std::min(videoStartTime, audioStartTime); + if (startTime != INT64_MAX) { + aOutStartTime = startTime; + } +} + +void OggDemuxer::FindStartTime(TrackInfo::TrackType aType, + int64_t& aOutStartTime) { + int64_t startTime = INT64_MAX; + + OggCodecState* state = GetTrackCodecState(aType); + ogg_packet* pkt = GetNextPacket(aType); + if (pkt) { + startTime = state->PacketStartTime(pkt); + } + + if (startTime != INT64_MAX) { + aOutStartTime = startTime; + } +} + +nsresult OggDemuxer::SeekInternal(TrackInfo::TrackType aType, + const TimeUnit& aTarget) { + int64_t target = aTarget.ToMicroseconds(); + OGG_DEBUG("About to seek to %" PRId64, target); + nsresult res; + int64_t adjustedTarget = target; + int64_t startTime = StartTime(aType); + int64_t endTime = mInfo.mMetadataDuration->ToMicroseconds() + startTime; + if (aType == TrackInfo::kAudioTrack && mOpusState) { + adjustedTarget = + std::max(startTime, target - OGG_SEEK_OPUS_PREROLL.ToMicroseconds()); + } + + if (!HaveStartTime(aType) || adjustedTarget == startTime) { + // We've seeked to the media start or we can't seek. + // Just seek to the offset of the first content page. + res = Resource(aType)->Seek(nsISeekableStream::NS_SEEK_SET, 0); + NS_ENSURE_SUCCESS(res, res); + + res = Reset(aType); + NS_ENSURE_SUCCESS(res, res); + } else { + // TODO: This may seek back unnecessarily far in the video, but we don't + // have a way of asking Skeleton to seek to a different target for each + // stream yet. Using adjustedTarget here is at least correct, if slow. + IndexedSeekResult sres = SeekToKeyframeUsingIndex(aType, adjustedTarget); + NS_ENSURE_TRUE(sres != SEEK_FATAL_ERROR, NS_ERROR_FAILURE); + if (sres == SEEK_INDEX_FAIL) { + // No index or other non-fatal index-related failure. Try to seek + // using a bisection search. Determine the already downloaded data + // in the media cache, so we can try to seek in the cached data first. + AutoTArray ranges; + res = GetSeekRanges(aType, ranges); + NS_ENSURE_SUCCESS(res, res); + + // Figure out if the seek target lies in a buffered range. + SeekRange r = + SelectSeekRange(aType, ranges, target, startTime, endTime, true); + + if (!r.IsNull()) { + // We know the buffered range in which the seek target lies, do a + // bisection search in that buffered range. + res = SeekInBufferedRange(aType, target, adjustedTarget, startTime, + endTime, ranges, r); + NS_ENSURE_SUCCESS(res, res); + } else { + // The target doesn't lie in a buffered range. Perform a bisection + // search over the whole media, using the known buffered ranges to + // reduce the search space. + res = SeekInUnbuffered(aType, target, startTime, endTime, ranges); + NS_ENSURE_SUCCESS(res, res); + } + } + } + + // Demux forwards until we find the first keyframe prior the target. + // there may be non-keyframes in the page before the keyframe. + // Additionally, we may have seeked to the first page referenced by the + // page index which may be quite far off the target. + // When doing fastSeek we display the first frame after the seek, so + // we need to advance the decode to the keyframe otherwise we'll get + // visual artifacts in the first frame output after the seek. + OggCodecState* state = GetTrackCodecState(aType); + OggPacketQueue tempPackets; + bool foundKeyframe = false; + while (true) { + DemuxUntilPacketAvailable(aType, state); + ogg_packet* packet = state->PacketPeek(); + if (packet == nullptr) { + OGG_DEBUG("End of stream reached before keyframe found in indexed seek"); + break; + } + int64_t startTstamp = state->PacketStartTime(packet); + if (foundKeyframe && startTstamp > adjustedTarget) { + break; + } + if (state->IsKeyframe(packet)) { + OGG_DEBUG("keyframe found after seeking at %" PRId64, startTstamp); + tempPackets.Erase(); + foundKeyframe = true; + } + if (foundKeyframe && startTstamp == adjustedTarget) { + break; + } + if (foundKeyframe) { + tempPackets.Append(state->PacketOut()); + } else { + // Discard video packets before the first keyframe. + Unused << state->PacketOut(); + } + } + // Re-add all packet into the codec state in order. + state->PushFront(std::move(tempPackets)); + + return NS_OK; +} + +OggDemuxer::IndexedSeekResult OggDemuxer::RollbackIndexedSeek( + TrackInfo::TrackType aType, int64_t aOffset) { + if (mSkeletonState) { + mSkeletonState->Deactivate(); + } + nsresult res = Resource(aType)->Seek(nsISeekableStream::NS_SEEK_SET, aOffset); + NS_ENSURE_SUCCESS(res, SEEK_FATAL_ERROR); + return SEEK_INDEX_FAIL; +} + +OggDemuxer::IndexedSeekResult OggDemuxer::SeekToKeyframeUsingIndex( + TrackInfo::TrackType aType, int64_t aTarget) { + if (!HasSkeleton() || !mSkeletonState->HasIndex()) { + return SEEK_INDEX_FAIL; + } + // We have an index from the Skeleton track, try to use it to seek. + AutoTArray tracks; + BuildSerialList(tracks); + SkeletonState::nsSeekTarget keyframe; + if (NS_FAILED(mSkeletonState->IndexedSeekTarget(aTarget, tracks, keyframe))) { + // Could not locate a keypoint for the target in the index. + return SEEK_INDEX_FAIL; + } + + // Remember original resource read cursor position so we can rollback on + // failure. + int64_t tell = Resource(aType)->Tell(); + + // Seek to the keypoint returned by the index. + if (keyframe.mKeyPoint.mOffset > Resource(aType)->GetLength() || + keyframe.mKeyPoint.mOffset < 0) { + // Index must be invalid. + return RollbackIndexedSeek(aType, tell); + } + OGG_DEBUG("Seeking using index to keyframe at offset %" PRId64 "\n", + keyframe.mKeyPoint.mOffset); + nsresult res = Resource(aType)->Seek(nsISeekableStream::NS_SEEK_SET, + keyframe.mKeyPoint.mOffset); + NS_ENSURE_SUCCESS(res, SEEK_FATAL_ERROR); + + // We've moved the read set, so reset decode. + res = Reset(aType); + NS_ENSURE_SUCCESS(res, SEEK_FATAL_ERROR); + + // Check that the page the index thinks is exactly here is actually exactly + // here. If not, the index is invalid. + tainted_ogg page = mSandbox->malloc_in_sandbox(); + if (!page) { + return SEEK_INDEX_FAIL; + } + auto clean_page = MakeScopeExit([&] { mSandbox->free_in_sandbox(page); }); + int skippedBytes = 0; + PageSyncResult syncres = + PageSync(mSandbox.get(), Resource(aType), OggSyncState(aType), false, + keyframe.mKeyPoint.mOffset, Resource(aType)->GetLength(), page, + skippedBytes); + NS_ENSURE_TRUE(syncres != PAGE_SYNC_ERROR, SEEK_FATAL_ERROR); + if (syncres != PAGE_SYNC_OK || skippedBytes != 0) { + OGG_DEBUG( + "Indexed-seek failure: Ogg Skeleton Index is invalid " + "or sync error after seek"); + return RollbackIndexedSeek(aType, tell); + } + uint32_t serial = static_cast( + sandbox_invoke(*mSandbox, ogg_page_serialno, page) + .unverified_safe_because( + "Serial is only used to locate the correct page. If the serial " + "is incorrect the the renderer would just fail to seek with an " + "error code. This would not lead to any memory safety bugs.")); + if (serial != keyframe.mSerial) { + // Serialno of page at offset isn't what the index told us to expect. + // Assume the index is invalid. + return RollbackIndexedSeek(aType, tell); + } + OggCodecState* codecState = mCodecStore.Get(serial); + if (codecState && codecState->mActive && + sandbox_invoke(*mSandbox, ogg_stream_pagein, codecState->mState, page) + .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) != 0) { + // Couldn't insert page into the ogg resource, or somehow the resource + // is no longer active. + return RollbackIndexedSeek(aType, tell); + } + return SEEK_OK; +} + +// Reads a page from the media resource. +OggDemuxer::PageSyncResult OggDemuxer::PageSync( + rlbox_sandbox_ogg* aSandbox, MediaResourceIndex* aResource, + tainted_opaque_ogg aState, bool aCachedDataOnly, + int64_t aOffset, int64_t aEndOffset, tainted_ogg aPage, + int& aSkippedBytes) { + aSkippedBytes = 0; + // Sync to the next page. + tainted_ogg ret = 0; + uint32_t bytesRead = 0; + int64_t readHead = aOffset; + while (ret.unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) <= 0) { + tainted_ogg seek_ret = + sandbox_invoke(*aSandbox, ogg_sync_pageseek, aState, aPage); + + // We aren't really verifying the value of seek_ret below. + // We are merely ensuring that it won't overflow an integer. + // However we are assigning the value to ret which is marked tainted, so + // this is fine. + bool failedVerify = false; + CheckedInt checker; + ret = CopyAndVerifyOrFail( + seek_ret, (static_cast(checker = val), checker.isValid()), + &failedVerify); + if (failedVerify) { + return PAGE_SYNC_ERROR; + } + + if (ret.unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) == 0) { + const int page_step_val = PAGE_STEP; + tainted_ogg buffer_tainted = + sandbox_invoke(*aSandbox, ogg_sync_buffer, aState, page_step_val); + MOZ_ASSERT(buffer_tainted != nullptr, "Must have a buffer"); + + // Read from the file into the buffer + int64_t bytesToRead = + std::min(static_cast(PAGE_STEP), aEndOffset - readHead); + MOZ_ASSERT(bytesToRead <= UINT32_MAX, "bytesToRead range check"); + if (bytesToRead <= 0) { + return PAGE_SYNC_END_OF_RANGE; + } + char* buffer = buffer_tainted.copy_and_verify_buffer_address( + [](uintptr_t val) { return reinterpret_cast(val); }, + static_cast(bytesToRead)); + + nsresult rv = NS_OK; + if (aCachedDataOnly) { + rv = aResource->GetResource()->ReadFromCache( + buffer, readHead, static_cast(bytesToRead)); + NS_ENSURE_SUCCESS(rv, PAGE_SYNC_ERROR); + bytesRead = static_cast(bytesToRead); + } else { + rv = aResource->Seek(nsISeekableStream::NS_SEEK_SET, readHead); + NS_ENSURE_SUCCESS(rv, PAGE_SYNC_ERROR); + rv = aResource->Read(buffer, static_cast(bytesToRead), + &bytesRead); + NS_ENSURE_SUCCESS(rv, PAGE_SYNC_ERROR); + } + if (bytesRead == 0 && NS_SUCCEEDED(rv)) { + // End of file. + return PAGE_SYNC_END_OF_RANGE; + } + readHead += bytesRead; + + // Update the synchronisation layer with the number + // of bytes written to the buffer + ret = sandbox_invoke(*aSandbox, ogg_sync_wrote, aState, bytesRead); + NS_ENSURE_TRUE( + ret.unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) == 0, + PAGE_SYNC_ERROR); + continue; + } + + if (ret.unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) < 0) { + MOZ_ASSERT(aSkippedBytes >= 0, "Offset >= 0"); + bool failedSkippedBytesVerify = false; + ret.copy_and_verify([&](int val) { + int64_t result = static_cast(aSkippedBytes) - val; + if (result > std::numeric_limits::max() || + result > (aEndOffset - aOffset) || result < 0) { + failedSkippedBytesVerify = true; + } else { + aSkippedBytes = result; + } + }); + if (failedSkippedBytesVerify) { + return PAGE_SYNC_ERROR; + } + continue; + } + } + + return PAGE_SYNC_OK; +} + +// OggTrackDemuxer +OggTrackDemuxer::OggTrackDemuxer(OggDemuxer* aParent, + TrackInfo::TrackType aType, + uint32_t aTrackNumber) + : mParent(aParent), mType(aType) { + mInfo = mParent->GetTrackInfo(aType, aTrackNumber); + MOZ_ASSERT(mInfo); +} + +OggTrackDemuxer::~OggTrackDemuxer() = default; + +UniquePtr OggTrackDemuxer::GetInfo() const { return mInfo->Clone(); } + +RefPtr OggTrackDemuxer::Seek( + const TimeUnit& aTime) { + // Seeks to aTime. Upon success, SeekPromise will be resolved with the + // actual time seeked to. Typically the random access point time + mQueuedSample = nullptr; + TimeUnit seekTime = aTime; + if (mParent->SeekInternal(mType, aTime) == NS_OK) { + RefPtr sample(NextSample()); + + // Check what time we actually seeked to. + if (sample != nullptr) { + seekTime = sample->mTime; + OGG_DEBUG("%p seeked to time %" PRId64, this, seekTime.ToMicroseconds()); + } + mQueuedSample = sample; + + return SeekPromise::CreateAndResolve(seekTime, __func__); + } else { + return SeekPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_DEMUXER_ERR, + __func__); + } +} + +RefPtr OggTrackDemuxer::NextSample() { + if (mQueuedSample) { + RefPtr nextSample = mQueuedSample; + mQueuedSample = nullptr; + if (mType == TrackInfo::kAudioTrack) { + nextSample->mTrackInfo = mParent->mSharedAudioTrackInfo; + } + return nextSample; + } + ogg_packet* packet = mParent->GetNextPacket(mType); + if (!packet) { + return nullptr; + } + // Check the eos state in case we need to look for chained streams. + bool eos = packet->e_o_s; + OggCodecState* state = mParent->GetTrackCodecState(mType); + RefPtr data = state->PacketOutAsMediaRawData(); + // ogg allows 'nil' packets, that are EOS and of size 0. + if (!data || (data->mEOS && data->Size() == 0)) { + return nullptr; + } + if (mType == TrackInfo::kAudioTrack) { + data->mTrackInfo = mParent->mSharedAudioTrackInfo; + } + // mDecodedAudioDuration gets adjusted during ReadOggChain(). + TimeUnit totalDuration = mParent->mDecodedAudioDuration; + if (eos) { + // We've encountered an end of bitstream packet; check for a chained + // bitstream following this one. + // This will also update mSharedAudioTrackInfo. + mParent->ReadOggChain(data->GetEndTime()); + } + data->mOffset = mParent->Resource(mType)->Tell(); + // We adjust the start time of the sample to account for the potential ogg + // chaining. + data->mTime += totalDuration; + if (!data->mTime.IsValid()) { + return nullptr; + } + + return data; +} + +RefPtr OggTrackDemuxer::GetSamples( + int32_t aNumSamples) { + RefPtr samples = new SamplesHolder; + if (!aNumSamples) { + return SamplesPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_DEMUXER_ERR, + __func__); + } + + while (aNumSamples) { + RefPtr sample(NextSample()); + if (!sample) { + break; + } + if (!sample->HasValidTime()) { + return SamplesPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_DEMUXER_ERR, + __func__); + } + samples->AppendSample(sample); + aNumSamples--; + } + + if (samples->GetSamples().IsEmpty()) { + return SamplesPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_END_OF_STREAM, + __func__); + } else { + return SamplesPromise::CreateAndResolve(samples, __func__); + } +} + +void OggTrackDemuxer::Reset() { + mParent->Reset(mType); + mQueuedSample = nullptr; +} + +RefPtr +OggTrackDemuxer::SkipToNextRandomAccessPoint(const TimeUnit& aTimeThreshold) { + uint32_t parsed = 0; + bool found = false; + RefPtr sample; + + OGG_DEBUG("TimeThreshold: %f", aTimeThreshold.ToSeconds()); + while (!found && (sample = NextSample())) { + parsed++; + if (sample->mKeyframe && sample->mTime >= aTimeThreshold) { + found = true; + mQueuedSample = sample; + } + } + if (found) { + OGG_DEBUG("next sample: %f (parsed: %d)", sample->mTime.ToSeconds(), + parsed); + return SkipAccessPointPromise::CreateAndResolve(parsed, __func__); + } else { + SkipFailureHolder failure(NS_ERROR_DOM_MEDIA_END_OF_STREAM, parsed); + return SkipAccessPointPromise::CreateAndReject(std::move(failure), + __func__); + } +} + +TimeIntervals OggTrackDemuxer::GetBuffered() { + return mParent->GetBuffered(mType); +} + +void OggTrackDemuxer::BreakCycles() { mParent = nullptr; } + +// Returns an ogg page's checksum. +tainted_opaque_ogg OggDemuxer::GetPageChecksum( + tainted_opaque_ogg aPage) { + tainted_ogg page = rlbox::from_opaque(aPage); + + const char hint_reason[] = + "Early bail out of checksum. Even if this is wrong, the renderer's " + "security is not compromised."; + if (page == nullptr || + (page->header == nullptr).unverified_safe_because(hint_reason) || + (page->header_len < 25).unverified_safe_because(hint_reason)) { + tainted_ogg ret = 0; + return ret.to_opaque(); + } + + const int CHECKSUM_BYTES_LENGTH = 4; + const unsigned char* p = + (page->header + 22u) + .copy_and_verify_buffer_address( + [](uintptr_t val) { + return reinterpret_cast(val); + }, + CHECKSUM_BYTES_LENGTH); + uint32_t c = + static_cast(p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24)); + tainted_ogg ret = c; + return ret.to_opaque(); +} + +int64_t OggDemuxer::RangeStartTime(TrackInfo::TrackType aType, + int64_t aOffset) { + int64_t position = Resource(aType)->Tell(); + nsresult res = Resource(aType)->Seek(nsISeekableStream::NS_SEEK_SET, aOffset); + NS_ENSURE_SUCCESS(res, 0); + int64_t startTime = 0; + FindStartTime(aType, startTime); + res = Resource(aType)->Seek(nsISeekableStream::NS_SEEK_SET, position); + NS_ENSURE_SUCCESS(res, -1); + return startTime; +} + +struct nsDemuxerAutoOggSyncState { + explicit nsDemuxerAutoOggSyncState(rlbox_sandbox_ogg& aSandbox) + : mSandbox(aSandbox) { + mState = mSandbox.malloc_in_sandbox(); + MOZ_RELEASE_ASSERT(mState != nullptr); + sandbox_invoke(mSandbox, ogg_sync_init, mState); + } + ~nsDemuxerAutoOggSyncState() { + sandbox_invoke(mSandbox, ogg_sync_clear, mState); + mSandbox.free_in_sandbox(mState); + } + rlbox_sandbox_ogg& mSandbox; + tainted_ogg mState; +}; + +int64_t OggDemuxer::RangeEndTime(TrackInfo::TrackType aType, + int64_t aEndOffset) { + int64_t position = Resource(aType)->Tell(); + int64_t endTime = RangeEndTime(aType, 0, aEndOffset, false); + nsresult res = + Resource(aType)->Seek(nsISeekableStream::NS_SEEK_SET, position); + NS_ENSURE_SUCCESS(res, -1); + return endTime; +} + +int64_t OggDemuxer::RangeEndTime(TrackInfo::TrackType aType, + int64_t aStartOffset, int64_t aEndOffset, + bool aCachedDataOnly) { + nsDemuxerAutoOggSyncState sync(*mSandbox); + + // We need to find the last page which ends before aEndOffset that + // has a granulepos that we can convert to a timestamp. We do this by + // backing off from aEndOffset until we encounter a page on which we can + // interpret the granulepos. If while backing off we encounter a page which + // we've previously encountered before, we'll either backoff again if we + // haven't found an end time yet, or return the last end time found. + const int step = 5000; + const int maxOggPageSize = 65306; + int64_t readStartOffset = aEndOffset; + int64_t readLimitOffset = aEndOffset; + int64_t readHead = aEndOffset; + int64_t endTime = -1; + uint32_t checksumAfterSeek = 0; + uint32_t prevChecksumAfterSeek = 0; + bool mustBackOff = false; + tainted_ogg page = mSandbox->malloc_in_sandbox(); + if (!page) { + return -1; + } + auto clean_page = MakeScopeExit([&] { mSandbox->free_in_sandbox(page); }); + while (true) { + tainted_ogg seek_ret = + sandbox_invoke(*mSandbox, ogg_sync_pageseek, sync.mState, page); + + // We aren't really verifying the value of seek_ret below. + // We are merely ensuring that it won't overflow an integer. + // However we are assigning the value to ret which is marked tainted, so + // this is fine. + bool failedVerify = false; + CheckedInt checker; + tainted_ogg ret = CopyAndVerifyOrFail( + seek_ret, (static_cast(checker = val), checker.isValid()), + &failedVerify); + if (failedVerify) { + return -1; + } + + if (ret.unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) == 0) { + // We need more data if we've not encountered a page we've seen before, + // or we've read to the end of file. + if (mustBackOff || readHead == aEndOffset || readHead == aStartOffset) { + if (endTime != -1 || readStartOffset == 0) { + // We have encountered a page before, or we're at the end of file. + break; + } + mustBackOff = false; + prevChecksumAfterSeek = checksumAfterSeek; + checksumAfterSeek = 0; + sandbox_invoke(*mSandbox, ogg_sync_reset, sync.mState); + readStartOffset = + std::max(static_cast(0), readStartOffset - step); + // There's no point reading more than the maximum size of + // an Ogg page into data we've previously scanned. Any data + // between readLimitOffset and aEndOffset must be garbage + // and we can ignore it thereafter. + readLimitOffset = + std::min(readLimitOffset, readStartOffset + maxOggPageSize); + readHead = std::max(aStartOffset, readStartOffset); + } + + int64_t limit = + std::min(static_cast(UINT32_MAX), aEndOffset - readHead); + limit = std::max(static_cast(0), limit); + limit = std::min(limit, static_cast(step)); + uint32_t bytesToRead = static_cast(limit); + uint32_t bytesRead = 0; + tainted_ogg buffer_tainted = + sandbox_invoke(*mSandbox, ogg_sync_buffer, sync.mState, bytesToRead); + char* buffer = buffer_tainted.copy_and_verify_buffer_address( + [](uintptr_t val) { return reinterpret_cast(val); }, + bytesToRead); + MOZ_ASSERT(buffer, "Must have buffer"); + nsresult res; + if (aCachedDataOnly) { + res = Resource(aType)->GetResource()->ReadFromCache(buffer, readHead, + bytesToRead); + NS_ENSURE_SUCCESS(res, -1); + bytesRead = bytesToRead; + } else { + MOZ_ASSERT(readHead < aEndOffset, + "resource pos must be before range end"); + res = Resource(aType)->Seek(nsISeekableStream::NS_SEEK_SET, readHead); + NS_ENSURE_SUCCESS(res, -1); + res = Resource(aType)->Read(buffer, bytesToRead, &bytesRead); + NS_ENSURE_SUCCESS(res, -1); + } + readHead += bytesRead; + if (readHead > readLimitOffset) { + mustBackOff = true; + } + + // Update the synchronisation layer with the number + // of bytes written to the buffer + ret = sandbox_invoke(*mSandbox, ogg_sync_wrote, sync.mState, bytesRead); + bool failedWroteVerify = false; + int wrote_success = + CopyAndVerifyOrFail(ret, val == 0 || val == -1, &failedWroteVerify); + if (failedWroteVerify) { + return -1; + } + + if (wrote_success != 0) { + endTime = -1; + break; + } + continue; + } + + if (ret.unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) < 0 || + sandbox_invoke(*mSandbox, ogg_page_granulepos, page) + .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) < 0) { + continue; + } + + tainted_ogg checksum_tainted = + rlbox::from_opaque(GetPageChecksum(page.to_opaque())); + uint32_t checksum = checksum_tainted.unverified_safe_because( + "checksum is only being used as a hint as part of search for end time. " + "Incorrect values will not affect the memory safety of the renderer."); + if (checksumAfterSeek == 0) { + // This is the first page we've decoded after a backoff/seek. Remember + // the page checksum. If we backoff further and encounter this page + // again, we'll know that we won't find a page with an end time after + // this one, so we'll know to back off again. + checksumAfterSeek = checksum; + } + if (checksum == prevChecksumAfterSeek) { + // This page has the same checksum as the first page we encountered + // after the last backoff/seek. Since we've already scanned after this + // page and failed to find an end time, we may as well backoff again and + // try to find an end time from an earlier page. + mustBackOff = true; + continue; + } + + int64_t granulepos = + sandbox_invoke(*mSandbox, ogg_page_granulepos, page) + .unverified_safe_because( + "If this is incorrect it may lead to incorrect seeking " + "behavior in the stream, however will not affect the memory " + "safety of the Firefox renderer."); + uint32_t serial = static_cast( + sandbox_invoke(*mSandbox, ogg_page_serialno, page) + .unverified_safe_because(RLBOX_OGG_PAGE_SERIAL_REASON)); + + OggCodecState* codecState = nullptr; + codecState = mCodecStore.Get(serial); + if (!codecState) { + // This page is from a bitstream which we haven't encountered yet. + // It's probably from a new "link" in a "chained" ogg. Don't + // bother even trying to find a duration... + SetChained(); + endTime = -1; + break; + } + + int64_t t = codecState->Time(granulepos); + if (t != -1) { + endTime = t; + } + } + + return endTime; +} + +nsresult OggDemuxer::GetSeekRanges(TrackInfo::TrackType aType, + nsTArray& aRanges) { + AutoPinned resource(Resource(aType)->GetResource()); + MediaByteRangeSet cached; + nsresult res = resource->GetCachedRanges(cached); + NS_ENSURE_SUCCESS(res, res); + + for (uint32_t index = 0; index < cached.Length(); index++) { + auto& range = cached[index]; + int64_t startTime = -1; + int64_t endTime = -1; + if (NS_FAILED(Reset(aType))) { + return NS_ERROR_FAILURE; + } + int64_t startOffset = range.mStart; + int64_t endOffset = range.mEnd; + startTime = RangeStartTime(aType, startOffset); + if (startTime != -1 && ((endTime = RangeEndTime(aType, endOffset)) != -1)) { + NS_WARNING_ASSERTION(startTime < endTime, + "Start time must be before end time"); + aRanges.AppendElement( + SeekRange(startOffset, endOffset, startTime, endTime)); + } + } + if (NS_FAILED(Reset(aType))) { + return NS_ERROR_FAILURE; + } + return NS_OK; +} + +OggDemuxer::SeekRange OggDemuxer::SelectSeekRange( + TrackInfo::TrackType aType, const nsTArray& ranges, + int64_t aTarget, int64_t aStartTime, int64_t aEndTime, bool aExact) { + int64_t so = 0; + int64_t eo = Resource(aType)->GetLength(); + int64_t st = aStartTime; + int64_t et = aEndTime; + for (uint32_t i = 0; i < ranges.Length(); i++) { + const SeekRange& r = ranges[i]; + if (r.mTimeStart < aTarget) { + so = r.mOffsetStart; + st = r.mTimeStart; + } + if (r.mTimeEnd >= aTarget && r.mTimeEnd < et) { + eo = r.mOffsetEnd; + et = r.mTimeEnd; + } + + if (r.mTimeStart < aTarget && aTarget <= r.mTimeEnd) { + // Target lies exactly in this range. + return ranges[i]; + } + } + if (aExact || eo == -1) { + return SeekRange(); + } + return SeekRange(so, eo, st, et); +} + +nsresult OggDemuxer::SeekInBufferedRange(TrackInfo::TrackType aType, + int64_t aTarget, + int64_t aAdjustedTarget, + int64_t aStartTime, int64_t aEndTime, + const nsTArray& aRanges, + const SeekRange& aRange) { + OGG_DEBUG("Seeking in buffered data to %" PRId64 " using bisection search", + aTarget); + if (aType == TrackInfo::kVideoTrack || aAdjustedTarget >= aTarget) { + // We know the exact byte range in which the target must lie. It must + // be buffered in the media cache. Seek there. + nsresult res = SeekBisection(aType, aTarget, aRange, 0); + if (NS_FAILED(res) || aType != TrackInfo::kVideoTrack) { + return res; + } + + // We have an active Theora bitstream. Peek the next Theora frame, and + // extract its keyframe's time. + DemuxUntilPacketAvailable(aType, mTheoraState); + ogg_packet* packet = mTheoraState->PacketPeek(); + if (packet && !mTheoraState->IsKeyframe(packet)) { + // First post-seek frame isn't a keyframe, seek back to previous keyframe, + // otherwise we'll get visual artifacts. + MOZ_ASSERT(packet->granulepos != -1, "Must have a granulepos"); + int shift = mTheoraState->KeyFrameGranuleJobs(); + int64_t keyframeGranulepos = (packet->granulepos >> shift) << shift; + int64_t keyframeTime = mTheoraState->StartTime(keyframeGranulepos); + SEEK_LOG(LogLevel::Debug, + ("Keyframe for %lld is at %lld, seeking back to it", frameTime, + keyframeTime)); + aAdjustedTarget = std::min(aAdjustedTarget, keyframeTime); + } + } + + nsresult res = NS_OK; + if (aAdjustedTarget < aTarget) { + SeekRange k = SelectSeekRange(aType, aRanges, aAdjustedTarget, aStartTime, + aEndTime, false); + res = SeekBisection(aType, aAdjustedTarget, k, OGG_SEEK_FUZZ_USECS); + } + return res; +} + +nsresult OggDemuxer::SeekInUnbuffered(TrackInfo::TrackType aType, + int64_t aTarget, int64_t aStartTime, + int64_t aEndTime, + const nsTArray& aRanges) { + OGG_DEBUG("Seeking in unbuffered data to %" PRId64 " using bisection search", + aTarget); + + // If we've got an active Theora bitstream, determine the maximum possible + // time in usecs which a keyframe could be before a given interframe. We + // subtract this from our seek target, seek to the new target, and then + // will decode forward to the original seek target. We should encounter a + // keyframe in that interval. This prevents us from needing to run two + // bisections; one for the seek target frame, and another to find its + // keyframe. It's usually faster to just download this extra data, rather + // tham perform two bisections to find the seek target's keyframe. We + // don't do this offsetting when seeking in a buffered range, + // as the extra decoding causes a noticeable speed hit when all the data + // is buffered (compared to just doing a bisection to exactly find the + // keyframe). + int64_t keyframeOffsetMs = 0; + if (aType == TrackInfo::kVideoTrack && mTheoraState) { + keyframeOffsetMs = mTheoraState->MaxKeyframeOffset(); + } + // Add in the Opus pre-roll if necessary, as well. + if (aType == TrackInfo::kAudioTrack && mOpusState) { + keyframeOffsetMs = + std::max(keyframeOffsetMs, OGG_SEEK_OPUS_PREROLL.ToMilliseconds()); + } + int64_t seekTarget = std::max(aStartTime, aTarget - keyframeOffsetMs); + // Minimize the bisection search space using the known timestamps from the + // buffered ranges. + SeekRange k = + SelectSeekRange(aType, aRanges, seekTarget, aStartTime, aEndTime, false); + return SeekBisection(aType, seekTarget, k, OGG_SEEK_FUZZ_USECS); +} + +nsresult OggDemuxer::SeekBisection(TrackInfo::TrackType aType, int64_t aTarget, + const SeekRange& aRange, uint32_t aFuzz) { + nsresult res; + + if (aTarget <= aRange.mTimeStart) { + if (NS_FAILED(Reset(aType))) { + return NS_ERROR_FAILURE; + } + res = Resource(aType)->Seek(nsISeekableStream::NS_SEEK_SET, 0); + NS_ENSURE_SUCCESS(res, res); + return NS_OK; + } + + // Bisection search, find start offset of last page with end time less than + // the seek target. + ogg_int64_t startOffset = aRange.mOffsetStart; + ogg_int64_t startTime = aRange.mTimeStart; + ogg_int64_t startLength = 0; // Length of the page at startOffset. + ogg_int64_t endOffset = aRange.mOffsetEnd; + ogg_int64_t endTime = aRange.mTimeEnd; + + ogg_int64_t seekTarget = aTarget; + int64_t seekLowerBound = std::max(static_cast(0), aTarget - aFuzz); + int hops = 0; + DebugOnly previousGuess = -1; + int backsteps = 0; + const int maxBackStep = 10; + MOZ_ASSERT( + static_cast(PAGE_STEP) * pow(2.0, maxBackStep) < INT32_MAX, + "Backstep calculation must not overflow"); + + // Seek via bisection search. Loop until we find the offset where the page + // before the offset is before the seek target, and the page after the offset + // is after the seek target. + tainted_ogg page = mSandbox->malloc_in_sandbox(); + if (!page) { + return NS_ERROR_OUT_OF_MEMORY; + } + auto clean_page = MakeScopeExit([&] { mSandbox->free_in_sandbox(page); }); + while (true) { + ogg_int64_t duration = 0; + double target = 0; + ogg_int64_t interval = 0; + ogg_int64_t guess = 0; + int skippedBytes = 0; + ogg_int64_t pageOffset = 0; + ogg_int64_t pageLength = 0; + ogg_int64_t granuleTime = -1; + bool mustBackoff = false; + + // Guess where we should bisect to, based on the bit rate and the time + // remaining in the interval. Loop until we can determine the time at + // the guess offset. + while (true) { + // Discard any previously buffered packets/pages. + if (NS_FAILED(Reset(aType))) { + return NS_ERROR_FAILURE; + } + + interval = endOffset - startOffset - startLength; + if (interval == 0) { + // Our interval is empty, we've found the optimal seek point, as the + // page at the start offset is before the seek target, and the page + // at the end offset is after the seek target. + SEEK_LOG(LogLevel::Debug, + ("Interval narrowed, terminating bisection.")); + break; + } + + // Guess bisection point. + duration = endTime - startTime; + target = (double)(seekTarget - startTime) / (double)duration; + guess = startOffset + startLength + + static_cast((double)interval * target); + guess = std::min(guess, endOffset - PAGE_STEP); + if (mustBackoff) { + // We previously failed to determine the time at the guess offset, + // probably because we ran out of data to decode. This usually happens + // when we guess very close to the end offset. So reduce the guess + // offset using an exponential backoff until we determine the time. + SEEK_LOG( + LogLevel::Debug, + ("Backing off %d bytes, backsteps=%d", + static_cast(PAGE_STEP * pow(2.0, backsteps)), backsteps)); + guess -= PAGE_STEP * static_cast(pow(2.0, backsteps)); + + if (guess <= startOffset) { + // We've tried to backoff to before the start offset of our seek + // range. This means we couldn't find a seek termination position + // near the end of the seek range, so just set the seek termination + // condition, and break out of the bisection loop. We'll begin + // decoding from the start of the seek range. + interval = 0; + break; + } + + backsteps = std::min(backsteps + 1, maxBackStep); + // We reset mustBackoff. If we still need to backoff further, it will + // be set to true again. + mustBackoff = false; + } else { + backsteps = 0; + } + guess = std::max(guess, startOffset + startLength); + + SEEK_LOG(LogLevel::Debug, + ("Seek loop start[o=%lld..%lld t=%lld] " + "end[o=%lld t=%lld] " + "interval=%lld target=%lf guess=%lld", + startOffset, (startOffset + startLength), startTime, endOffset, + endTime, interval, target, guess)); + + MOZ_ASSERT(guess >= startOffset + startLength, + "Guess must be after range start"); + MOZ_ASSERT(guess < endOffset, "Guess must be before range end"); + MOZ_ASSERT(guess != previousGuess, + "Guess should be different to previous"); + previousGuess = guess; + + hops++; + + // Locate the next page after our seek guess, and then figure out the + // granule time of the audio and video bitstreams there. We can then + // make a bisection decision based on our location in the media. + PageSyncResult pageSyncResult = + PageSync(mSandbox.get(), Resource(aType), OggSyncState(aType), false, + guess, endOffset, page, skippedBytes); + NS_ENSURE_TRUE(pageSyncResult != PAGE_SYNC_ERROR, NS_ERROR_FAILURE); + + if (pageSyncResult == PAGE_SYNC_END_OF_RANGE) { + // Our guess was too close to the end, we've ended up reading the end + // page. Backoff exponentially from the end point, in case the last + // page/frame/sample is huge. + mustBackoff = true; + SEEK_LOG(LogLevel::Debug, ("Hit the end of range, backing off")); + continue; + } + + // We've located a page of length |ret| at |guess + skippedBytes|. + // Remember where the page is located. + pageOffset = guess + skippedBytes; + + bool failedPageLenVerify = false; + // Page length should be under 64Kb according to + // https://xiph.org/ogg/doc/libogg/ogg_page.html + pageLength = CopyAndVerifyOrFail(page->header_len + page->body_len, + val <= 64 * 1024, &failedPageLenVerify); + if (failedPageLenVerify) { + return NS_ERROR_FAILURE; + } + + // Read pages until we can determine the granule time of the audio and + // video bitstream. + ogg_int64_t audioTime = -1; + ogg_int64_t videoTime = -1; + do { + // Add the page to its codec state, determine its granule time. + uint32_t serial = static_cast( + sandbox_invoke(*mSandbox, ogg_page_serialno, page) + .unverified_safe_because(RLBOX_OGG_PAGE_SERIAL_REASON)); + OggCodecState* codecState = mCodecStore.Get(serial); + if (codecState && GetCodecStateType(codecState) == aType) { + if (codecState->mActive) { + int ret = + sandbox_invoke(*mSandbox, ogg_stream_pagein, codecState->mState, + page) + .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON); + NS_ENSURE_TRUE(ret == 0, NS_ERROR_FAILURE); + } + + ogg_int64_t granulepos = + sandbox_invoke(*mSandbox, ogg_page_granulepos, page) + .unverified_safe_because( + "If this is incorrect it may lead to incorrect seeking " + "behavior in the stream, however will not affect the " + "memory safety of the Firefox renderer."); + + if (aType == TrackInfo::kAudioTrack && granulepos > 0 && + audioTime == -1) { + if (mVorbisState && serial == mVorbisState->mSerial) { + audioTime = mVorbisState->Time(granulepos); + } else if (mOpusState && serial == mOpusState->mSerial) { + audioTime = mOpusState->Time(granulepos); + } else if (mFlacState && serial == mFlacState->mSerial) { + audioTime = mFlacState->Time(granulepos); + } + } + + if (aType == TrackInfo::kVideoTrack && granulepos > 0 && + serial == mTheoraState->mSerial && videoTime == -1) { + videoTime = mTheoraState->Time(granulepos); + } + + if (pageOffset + pageLength >= endOffset) { + // Hit end of readable data. + break; + } + } + if (!ReadOggPage(aType, page.to_opaque())) { + break; + } + + } while ((aType == TrackInfo::kAudioTrack && audioTime == -1) || + (aType == TrackInfo::kVideoTrack && videoTime == -1)); + + if ((aType == TrackInfo::kAudioTrack && audioTime == -1) || + (aType == TrackInfo::kVideoTrack && videoTime == -1)) { + // We don't have timestamps for all active tracks... + if (pageOffset == startOffset + startLength && + pageOffset + pageLength >= endOffset) { + // We read the entire interval without finding timestamps for all + // active tracks. We know the interval start offset is before the seek + // target, and the interval end is after the seek target, and we can't + // terminate inside the interval, so we terminate the seek at the + // start of the interval. + interval = 0; + break; + } + + // We should backoff; cause the guess to back off from the end, so + // that we've got more room to capture. + mustBackoff = true; + continue; + } + + // We've found appropriate time stamps here. Proceed to bisect + // the search space. + granuleTime = aType == TrackInfo::kAudioTrack ? audioTime : videoTime; + MOZ_ASSERT(granuleTime > 0, "Must get a granuletime"); + break; + } // End of "until we determine time at guess offset" loop. + + if (interval == 0) { + // Seek termination condition; we've found the page boundary of the + // last page before the target, and the first page after the target. + SEEK_LOG(LogLevel::Debug, + ("Terminating seek at offset=%lld", startOffset)); + MOZ_ASSERT(startTime < aTarget, + "Start time must always be less than target"); + res = Resource(aType)->Seek(nsISeekableStream::NS_SEEK_SET, startOffset); + NS_ENSURE_SUCCESS(res, res); + if (NS_FAILED(Reset(aType))) { + return NS_ERROR_FAILURE; + } + break; + } + + SEEK_LOG(LogLevel::Debug, + ("Time at offset %lld is %lld", guess, granuleTime)); + if (granuleTime < seekTarget && granuleTime > seekLowerBound) { + // We're within the fuzzy region in which we want to terminate the search. + res = Resource(aType)->Seek(nsISeekableStream::NS_SEEK_SET, pageOffset); + NS_ENSURE_SUCCESS(res, res); + if (NS_FAILED(Reset(aType))) { + return NS_ERROR_FAILURE; + } + SEEK_LOG(LogLevel::Debug, + ("Terminating seek at offset=%lld", pageOffset)); + break; + } + + if (granuleTime >= seekTarget) { + // We've landed after the seek target. + MOZ_ASSERT(pageOffset < endOffset, "offset_end must decrease"); + endOffset = pageOffset; + endTime = granuleTime; + } else if (granuleTime < seekTarget) { + // Landed before seek target. + MOZ_ASSERT(pageOffset >= startOffset + startLength, + "Bisection point should be at or after end of first page in " + "interval"); + startOffset = pageOffset; + startLength = pageLength; + startTime = granuleTime; + } + MOZ_ASSERT(startTime <= seekTarget, "Must be before seek target"); + MOZ_ASSERT(endTime >= seekTarget, "End must be after seek target"); + } + + (void)hops; + SEEK_LOG(LogLevel::Debug, ("Seek complete in %d bisections.", hops)); + + return NS_OK; +} + +#undef OGG_DEBUG +#undef SEEK_LOG +#undef CopyAndVerifyOrFail +} // namespace mozilla diff --git a/dom/media/ogg/OggDemuxer.h b/dom/media/ogg/OggDemuxer.h new file mode 100644 index 0000000000..8a65398cf9 --- /dev/null +++ b/dom/media/ogg/OggDemuxer.h @@ -0,0 +1,363 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#if !defined(OggDemuxer_h_) +# define OggDemuxer_h_ + +# include "nsTArray.h" +# include "MediaDataDemuxer.h" +# include "OggCodecState.h" +# include "OggCodecStore.h" +# include "OggRLBoxTypes.h" +# include "MediaMetadataManager.h" + +# include + +namespace mozilla { + +class OggTrackDemuxer; + +DDLoggedTypeDeclNameAndBase(OggDemuxer, MediaDataDemuxer); +DDLoggedTypeNameAndBase(OggTrackDemuxer, MediaTrackDemuxer); + +class OggDemuxer : public MediaDataDemuxer, + public DecoderDoctorLifeLogger { + public: + explicit OggDemuxer(MediaResource* aResource); + + RefPtr Init() override; + + uint32_t GetNumberTracks(TrackInfo::TrackType aType) const override; + + already_AddRefed GetTrackDemuxer( + TrackInfo::TrackType aType, uint32_t aTrackNumber) override; + + bool IsSeekable() const override; + + UniquePtr GetCrypto() override; + + // Set the events to notify when chaining is encountered. + void SetChainingEvents(TimedMetadataEventProducer* aMetadataEvent, + MediaEventProducer* aOnSeekableEvent); + + private: + // helpers for friend OggTrackDemuxer + UniquePtr GetTrackInfo(TrackInfo::TrackType aType, + size_t aTrackNumber) const; + + struct nsAutoOggSyncState { + explicit nsAutoOggSyncState(rlbox_sandbox_ogg* aSandbox); + ~nsAutoOggSyncState(); + rlbox_sandbox_ogg* mSandbox; + tainted_opaque_ogg mState; + }; + media::TimeIntervals GetBuffered(TrackInfo::TrackType aType); + void FindStartTime(int64_t& aOutStartTime); + void FindStartTime(TrackInfo::TrackType, int64_t& aOutStartTime); + + nsresult SeekInternal(TrackInfo::TrackType aType, + const media::TimeUnit& aTarget); + + // Seeks to the keyframe preceding the target time using available + // keyframe indexes. + enum IndexedSeekResult { + SEEK_OK, // Success. + SEEK_INDEX_FAIL, // Failure due to no index, or invalid index. + SEEK_FATAL_ERROR // Error returned by a stream operation. + }; + IndexedSeekResult SeekToKeyframeUsingIndex(TrackInfo::TrackType aType, + int64_t aTarget); + + // Rolls back a seek-using-index attempt, returning a failure error code. + IndexedSeekResult RollbackIndexedSeek(TrackInfo::TrackType aType, + int64_t aOffset); + + // Represents a section of contiguous media, with a start and end offset, + // and the timestamps of the start and end of that range, that is cached. + // Used to denote the extremities of a range in which we can seek quickly + // (because it's cached). + class SeekRange { + public: + SeekRange() : mOffsetStart(0), mOffsetEnd(0), mTimeStart(0), mTimeEnd(0) {} + + SeekRange(int64_t aOffsetStart, int64_t aOffsetEnd, int64_t aTimeStart, + int64_t aTimeEnd) + : mOffsetStart(aOffsetStart), + mOffsetEnd(aOffsetEnd), + mTimeStart(aTimeStart), + mTimeEnd(aTimeEnd) {} + + bool IsNull() const { + return mOffsetStart == 0 && mOffsetEnd == 0 && mTimeStart == 0 && + mTimeEnd == 0; + } + + int64_t mOffsetStart, mOffsetEnd; // in bytes. + int64_t mTimeStart, mTimeEnd; // in usecs. + }; + + nsresult GetSeekRanges(TrackInfo::TrackType aType, + nsTArray& aRanges); + SeekRange SelectSeekRange(TrackInfo::TrackType aType, + const nsTArray& ranges, int64_t aTarget, + int64_t aStartTime, int64_t aEndTime, bool aExact); + + // Seeks to aTarget usecs in the buffered range aRange using bisection search, + // or to the keyframe prior to aTarget if we have video. aAdjustedTarget is + // an adjusted version of the target used to account for Opus pre-roll, if + // necessary. aStartTime must be the presentation time at the start of media, + // and aEndTime the time at end of media. aRanges must be the time/byte ranges + // buffered in the media cache as per GetSeekRanges(). + nsresult SeekInBufferedRange(TrackInfo::TrackType aType, int64_t aTarget, + int64_t aAdjustedTarget, int64_t aStartTime, + int64_t aEndTime, + const nsTArray& aRanges, + const SeekRange& aRange); + + // Seeks to before aTarget usecs in media using bisection search. If the media + // has video, this will seek to before the keyframe required to render the + // media at aTarget. Will use aRanges in order to narrow the bisection + // search space. aStartTime must be the presentation time at the start of + // media, and aEndTime the time at end of media. aRanges must be the time/byte + // ranges buffered in the media cache as per GetSeekRanges(). + nsresult SeekInUnbuffered(TrackInfo::TrackType aType, int64_t aTarget, + int64_t aStartTime, int64_t aEndTime, + const nsTArray& aRanges); + + // Performs a seek bisection to move the media stream's read cursor to the + // last ogg page boundary which has end time before aTarget usecs on both the + // Theora and Vorbis bitstreams. Limits its search to data inside aRange; + // i.e. it will only read inside of the aRange's start and end offsets. + // aFuzz is the number of usecs of leniency we'll allow; we'll terminate the + // seek when we land in the range (aTime - aFuzz, aTime) usecs. + nsresult SeekBisection(TrackInfo::TrackType aType, int64_t aTarget, + const SeekRange& aRange, uint32_t aFuzz); + + // Chunk size to read when reading Ogg files. Average Ogg page length + // is about 4300 bytes, so we read the file in chunks larger than that. + static const int PAGE_STEP = 8192; + + enum PageSyncResult { + PAGE_SYNC_ERROR = 1, + PAGE_SYNC_END_OF_RANGE = 2, + PAGE_SYNC_OK = 3 + }; + static PageSyncResult PageSync(rlbox_sandbox_ogg* aSandbox, + MediaResourceIndex* aResource, + tainted_opaque_ogg aState, + bool aCachedDataOnly, int64_t aOffset, + int64_t aEndOffset, + tainted_ogg aPage, + int& aSkippedBytes); + + // Demux next Ogg packet + ogg_packet* GetNextPacket(TrackInfo::TrackType aType); + + nsresult Reset(TrackInfo::TrackType aType); + + static const nsString GetKind(const nsCString& aRole); + static void InitTrack(MessageField* aMsgInfo, TrackInfo* aInfo, bool aEnable); + + // Really private! + ~OggDemuxer(); + + // Read enough of the file to identify track information and header + // packets necessary for decoding to begin. + nsresult ReadMetadata(); + + // Read a page of data from the Ogg file. Returns true if a page has been + // read, false if the page read failed or end of file reached. + bool ReadOggPage(TrackInfo::TrackType aType, + tainted_opaque_ogg aPage); + + // Send a page off to the individual streams it belongs to. + // Reconstructed packets, if any are ready, will be available + // on the individual OggCodecStates. + nsresult DemuxOggPage(TrackInfo::TrackType aType, + tainted_opaque_ogg aPage); + + // Read data and demux until a packet is available on the given stream state + void DemuxUntilPacketAvailable(TrackInfo::TrackType aType, + OggCodecState* aState); + + // Reads and decodes header packets for aState, until either header decode + // fails, or is complete. Initializes the codec state before returning. + // Returns true if reading headers and initializtion of the stream + // succeeds. + bool ReadHeaders(TrackInfo::TrackType aType, OggCodecState* aState); + + // Reads the next link in the chain. + bool ReadOggChain(const media::TimeUnit& aLastEndTime); + + // Set this media as being a chain and notifies the state machine that the + // media is no longer seekable. + void SetChained(); + + // Fills aTracks with the serial numbers of each active stream, for use by + // various SkeletonState functions. + void BuildSerialList(nsTArray& aTracks); + + // Setup target bitstreams for decoding. + void SetupTarget(OggCodecState** aSavedState, OggCodecState* aNewState); + void SetupTargetSkeleton(); + void SetupMediaTracksInfo(const nsTArray& aSerials); + void FillTags(TrackInfo* aInfo, UniquePtr&& aTags); + + // Compute an ogg page's checksum + tainted_opaque_ogg GetPageChecksum( + tainted_opaque_ogg aPage); + + // Get the end time of aEndOffset. This is the playback position we'd reach + // after playback finished at aEndOffset. + int64_t RangeEndTime(TrackInfo::TrackType aType, int64_t aEndOffset); + + // Get the end time of aEndOffset, without reading before aStartOffset. + // This is the playback position we'd reach after playback finished at + // aEndOffset. If bool aCachedDataOnly is true, then we'll only read + // from data which is cached in the media cached, otherwise we'll do + // regular blocking reads from the media stream. If bool aCachedDataOnly + // is true, this can safely be called on the main thread, otherwise it + // must be called on the state machine thread. + int64_t RangeEndTime(TrackInfo::TrackType aType, int64_t aStartOffset, + int64_t aEndOffset, bool aCachedDataOnly); + + // Get the start time of the range beginning at aOffset. This is the start + // time of the first aType sample we'd be able to play if we + // started playback at aOffset. + int64_t RangeStartTime(TrackInfo::TrackType aType, int64_t aOffset); + + // All invocations of libogg functionality from the demuxer is sandboxed using + // wasm library sandboxes on supported platforms. These functions that create + // and destroy the sandbox instance. + static rlbox_sandbox_ogg* CreateSandbox(); + struct SandboxDestroy { + void operator()(rlbox_sandbox_ogg* sandbox); + }; + + // The sandbox instance used to sandbox libogg functionality in the demuxer. + // This must be declared before other members so that constructors/destructors + // run in the right order. + std::unique_ptr mSandbox; + + MediaInfo mInfo; + nsTArray> mDemuxers; + + // Map of codec-specific bitstream states. + OggCodecStore mCodecStore; + + // Decode state of the Theora bitstream we're decoding, if we have video. + OggCodecState* mTheoraState; + + // Decode state of the Vorbis bitstream we're decoding, if we have audio. + OggCodecState* mVorbisState; + + // Decode state of the Opus bitstream we're decoding, if we have one. + OggCodecState* mOpusState; + + // Get the bitstream decode state for the given track type + // Decode state of the Flac bitstream we're decoding, if we have one. + OggCodecState* mFlacState; + + OggCodecState* GetTrackCodecState(TrackInfo::TrackType aType) const; + TrackInfo::TrackType GetCodecStateType(OggCodecState* aState) const; + + // Represents the user pref media.opus.enabled at the time our + // contructor was called. We can't check it dynamically because + // we're not on the main thread; + bool mOpusEnabled; + + // Decode state of the Skeleton bitstream. + SkeletonState* mSkeletonState; + + // Ogg decoding state. + struct OggStateContext { + explicit OggStateContext(MediaResource* aResource, + rlbox_sandbox_ogg* aSandbox) + : mOggState(aSandbox), mResource(aResource), mNeedKeyframe(true) {} + nsAutoOggSyncState mOggState; + MediaResourceIndex mResource; + Maybe mStartTime; + bool mNeedKeyframe; + }; + + OggStateContext& OggState(TrackInfo::TrackType aType); + tainted_opaque_ogg OggSyncState(TrackInfo::TrackType aType); + MediaResourceIndex* Resource(TrackInfo::TrackType aType); + MediaResourceIndex* CommonResource(); + OggStateContext mAudioOggState; + OggStateContext mVideoOggState; + + Maybe mStartTime; + + // Booleans to indicate if we have audio and/or video data + bool HasVideo() const; + bool HasAudio() const; + bool HasSkeleton() const { + return mSkeletonState != 0 && mSkeletonState->mActive; + } + bool HaveStartTime() const; + bool HaveStartTime(TrackInfo::TrackType aType); + int64_t StartTime() const; + int64_t StartTime(TrackInfo::TrackType aType); + + // The picture region inside Theora frame to be displayed, if we have + // a Theora video track. + gfx::IntRect mPicture; + + // True if we are decoding a chained ogg. + bool mIsChained; + + // Total audio duration played so far. + media::TimeUnit mDecodedAudioDuration; + + // Events manager + TimedMetadataEventProducer* mTimedMetadataEvent; + MediaEventProducer* mOnSeekableEvent; + + // This will be populated only if a content change occurs, otherwise it + // will be left as null so the original metadata is used. + // It is updated once a chained ogg is encountered. + // As Ogg chaining is only supported for audio, we only need an audio track + // info. + RefPtr mSharedAudioTrackInfo; + + friend class OggTrackDemuxer; +}; + +class OggTrackDemuxer : public MediaTrackDemuxer, + public DecoderDoctorLifeLogger { + public: + OggTrackDemuxer(OggDemuxer* aParent, TrackInfo::TrackType aType, + uint32_t aTrackNumber); + + UniquePtr GetInfo() const override; + + RefPtr Seek(const media::TimeUnit& aTime) override; + + RefPtr GetSamples(int32_t aNumSamples = 1) override; + + void Reset() override; + + RefPtr SkipToNextRandomAccessPoint( + const media::TimeUnit& aTimeThreshold) override; + + media::TimeIntervals GetBuffered() override; + + void BreakCycles() override; + + private: + ~OggTrackDemuxer(); + void SetNextKeyFrameTime(); + RefPtr NextSample(); + RefPtr mParent; + TrackInfo::TrackType mType; + UniquePtr mInfo; + + // Queued sample extracted by the demuxer, but not yet returned. + RefPtr mQueuedSample; +}; +} // namespace mozilla + +#endif diff --git a/dom/media/ogg/OggRLBox.h b/dom/media/ogg/OggRLBox.h new file mode 100644 index 0000000000..0a451fa077 --- /dev/null +++ b/dom/media/ogg/OggRLBox.h @@ -0,0 +1,30 @@ +/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef OGG_RLBOX +#define OGG_RLBOX + +#include "OggRLBoxTypes.h" + +// Load general firefox configuration of RLBox +#include "mozilla/rlbox/rlbox_config.h" + +#ifdef MOZ_WASM_SANDBOXING_OGG +// Include the generated header file so that we are able to resolve the symbols +// in the wasm binary +# include "rlbox.wasm.h" +# define RLBOX_USE_STATIC_CALLS() rlbox_wasm2c_sandbox_lookup_symbol +# include "mozilla/rlbox/rlbox_wasm2c_sandbox.hpp" +#else +# define RLBOX_USE_STATIC_CALLS() rlbox_noop_sandbox_lookup_symbol +# include "mozilla/rlbox/rlbox_noop_sandbox.hpp" +#endif + +#include "mozilla/rlbox/rlbox.hpp" + +#include "ogg/OggStructsForRLBox.h" +rlbox_load_structs_from_library(ogg); + +#endif diff --git a/dom/media/ogg/OggRLBoxTypes.h b/dom/media/ogg/OggRLBoxTypes.h new file mode 100644 index 0000000000..d2dfdd1dff --- /dev/null +++ b/dom/media/ogg/OggRLBoxTypes.h @@ -0,0 +1,17 @@ +/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef OGG_RLBOX_TYPES +#define OGG_RLBOX_TYPES + +#include "mozilla/rlbox/rlbox_types.hpp" + +#ifdef MOZ_WASM_SANDBOXING_OGG +RLBOX_DEFINE_BASE_TYPES_FOR(ogg, wasm2c) +#else +RLBOX_DEFINE_BASE_TYPES_FOR(ogg, noop) +#endif + +#endif diff --git a/dom/media/ogg/OggWriter.cpp b/dom/media/ogg/OggWriter.cpp new file mode 100644 index 0000000000..6f29d44124 --- /dev/null +++ b/dom/media/ogg/OggWriter.cpp @@ -0,0 +1,197 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "OggWriter.h" +#include "prtime.h" +#include "mozilla/ProfilerLabels.h" + +#define LOG(args, ...) + +namespace mozilla { + +OggWriter::OggWriter() + : ContainerWriter(), mOggStreamState(), mOggPage(), mPacket() { + if (NS_FAILED(Init())) { + LOG("ERROR! Fail to initialize the OggWriter."); + } +} + +OggWriter::~OggWriter() { + if (mInitialized) { + ogg_stream_clear(&mOggStreamState); + } + // mPacket's data was always owned by us, no need to ogg_packet_clear. +} + +nsresult OggWriter::Init() { + MOZ_ASSERT(!mInitialized); + + // The serial number (serialno) should be a random number, for the current + // implementation where the output file contains only a single stream, this + // serialno is used to differentiate between files. + srand(static_cast(PR_Now())); + int rc = ogg_stream_init(&mOggStreamState, rand()); + + mPacket.b_o_s = 1; + mPacket.e_o_s = 0; + mPacket.granulepos = 0; + mPacket.packet = nullptr; + mPacket.packetno = 0; + mPacket.bytes = 0; + + mInitialized = (rc == 0); + + return (rc == 0) ? NS_OK : NS_ERROR_NOT_INITIALIZED; +} + +nsresult OggWriter::WriteEncodedTrack( + const nsTArray>& aData, uint32_t aFlags) { + AUTO_PROFILER_LABEL("OggWriter::WriteEncodedTrack", OTHER); + + uint32_t len = aData.Length(); + for (uint32_t i = 0; i < len; i++) { + if (aData[i]->mFrameType != EncodedFrame::OPUS_AUDIO_FRAME) { + LOG("[OggWriter] wrong encoded data type!"); + return NS_ERROR_FAILURE; + } + + // only pass END_OF_STREAM on the last frame! + nsresult rv = WriteEncodedData( + *aData[i]->mFrameData, aData[i]->mDuration, + i < len - 1 ? (aFlags & ~ContainerWriter::END_OF_STREAM) : aFlags); + if (NS_FAILED(rv)) { + LOG("%p Failed to WriteEncodedTrack!", this); + return rv; + } + } + return NS_OK; +} + +nsresult OggWriter::WriteEncodedData(const nsTArray& aBuffer, + int aDuration, uint32_t aFlags) { + if (!mInitialized) { + LOG("[OggWriter] OggWriter has not initialized!"); + return NS_ERROR_FAILURE; + } + + MOZ_ASSERT(!ogg_stream_eos(&mOggStreamState), + "No data can be written after eos has marked."); + + // Set eos flag to true, and once the eos is written to a packet, there must + // not be anymore pages after a page has marked as eos. + if (aFlags & ContainerWriter::END_OF_STREAM) { + LOG("[OggWriter] Set e_o_s flag to true."); + mPacket.e_o_s = 1; + } + + mPacket.packet = const_cast(aBuffer.Elements()); + mPacket.bytes = aBuffer.Length(); + mPacket.granulepos += aDuration; + + // 0 returned on success. -1 returned in the event of internal error. + // The data in the packet is copied into the internal storage managed by the + // mOggStreamState, so we are free to alter the contents of mPacket after + // this call has returned. + int rc = ogg_stream_packetin(&mOggStreamState, &mPacket); + if (rc < 0) { + LOG("[OggWriter] Failed in ogg_stream_packetin! (%d).", rc); + return NS_ERROR_FAILURE; + } + + if (mPacket.b_o_s) { + mPacket.b_o_s = 0; + } + mPacket.packetno++; + mPacket.packet = nullptr; + + return NS_OK; +} + +void OggWriter::ProduceOggPage(nsTArray>* aOutputBufs) { + aOutputBufs->AppendElement(); + aOutputBufs->LastElement().SetLength(mOggPage.header_len + mOggPage.body_len); + memcpy(aOutputBufs->LastElement().Elements(), mOggPage.header, + mOggPage.header_len); + memcpy(aOutputBufs->LastElement().Elements() + mOggPage.header_len, + mOggPage.body, mOggPage.body_len); +} + +nsresult OggWriter::GetContainerData(nsTArray>* aOutputBufs, + uint32_t aFlags) { + int rc = -1; + AUTO_PROFILER_LABEL("OggWriter::GetContainerData", OTHER); + // Generate the oggOpus Header + if (aFlags & ContainerWriter::GET_HEADER) { + OpusMetadata* meta = static_cast(mMetadata.get()); + NS_ASSERTION(meta, "should have meta data"); + NS_ASSERTION(meta->GetKind() == TrackMetadataBase::METADATA_OPUS, + "should have Opus meta data"); + + nsresult rv = WriteEncodedData(meta->mIdHeader, 0); + NS_ENSURE_SUCCESS(rv, rv); + + rc = ogg_stream_flush(&mOggStreamState, &mOggPage); + NS_ENSURE_TRUE(rc > 0, NS_ERROR_FAILURE); + ProduceOggPage(aOutputBufs); + + rv = WriteEncodedData(meta->mCommentHeader, 0); + NS_ENSURE_SUCCESS(rv, rv); + + rc = ogg_stream_flush(&mOggStreamState, &mOggPage); + NS_ENSURE_TRUE(rc > 0, NS_ERROR_FAILURE); + + // Force generate a page even if the amount of packet data is not enough. + // Usually do so after a header packet. + + ProduceOggPage(aOutputBufs); + } + + // return value 0 means insufficient data has accumulated to fill a page, or + // an internal error has occurred. + while (ogg_stream_pageout(&mOggStreamState, &mOggPage) != 0) { + ProduceOggPage(aOutputBufs); + } + + if (aFlags & ContainerWriter::FLUSH_NEEDED) { + // return value 0 means no packet to put into a page, or an internal error. + if (ogg_stream_flush(&mOggStreamState, &mOggPage) != 0) { + ProduceOggPage(aOutputBufs); + } + mIsWritingComplete = true; + } + + // We always return NS_OK here since it's OK to call this without having + // enough data to fill a page. It's the more common case compared to internal + // errors, and we cannot distinguish the two. + return NS_OK; +} + +nsresult OggWriter::SetMetadata( + const nsTArray>& aMetadata) { + MOZ_ASSERT(aMetadata.Length() == 1); + MOZ_ASSERT(aMetadata[0]); + + AUTO_PROFILER_LABEL("OggWriter::SetMetadata", OTHER); + + if (aMetadata[0]->GetKind() != TrackMetadataBase::METADATA_OPUS) { + LOG("wrong meta data type!"); + return NS_ERROR_FAILURE; + } + // Validate each field of METADATA + mMetadata = static_cast(aMetadata[0].get()); + if (mMetadata->mIdHeader.Length() == 0) { + LOG("miss mIdHeader!"); + return NS_ERROR_FAILURE; + } + if (mMetadata->mCommentHeader.Length() == 0) { + LOG("miss mCommentHeader!"); + return NS_ERROR_FAILURE; + } + + return NS_OK; +} + +} // namespace mozilla + +#undef LOG diff --git a/dom/media/ogg/OggWriter.h b/dom/media/ogg/OggWriter.h new file mode 100644 index 0000000000..73d5bd87e9 --- /dev/null +++ b/dom/media/ogg/OggWriter.h @@ -0,0 +1,55 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef OggWriter_h_ +#define OggWriter_h_ + +#include "ContainerWriter.h" +#include "OpusTrackEncoder.h" +#include + +namespace mozilla { +/** + * WriteEncodedTrack inserts raw packets into Ogg stream (ogg_stream_state), and + * GetContainerData outputs an ogg_page when enough packets have been written + * to the Ogg stream. + * For more details, please reference: + * http://www.xiph.org/ogg/doc/libogg/encoding.html + */ +class OggWriter : public ContainerWriter { + public: + OggWriter(); + ~OggWriter(); + + // Write frames into the ogg container. aFlags should be set to END_OF_STREAM + // for the final set of frames. + nsresult WriteEncodedTrack(const nsTArray>& aData, + uint32_t aFlags = 0) override; + + nsresult GetContainerData(nsTArray>* aOutputBufs, + uint32_t aFlags = 0) override; + + // Check metadata type integrity and reject unacceptable track encoder. + nsresult SetMetadata( + const nsTArray>& aMetadata) override; + + private: + nsresult Init(); + + nsresult WriteEncodedData(const nsTArray& aBuffer, int aDuration, + uint32_t aFlags = 0); + + void ProduceOggPage(nsTArray>* aOutputBufs); + // Store the Medatata from track encoder + RefPtr mMetadata; + + ogg_stream_state mOggStreamState; + ogg_page mOggPage; + ogg_packet mPacket; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/ogg/OpusParser.cpp b/dom/media/ogg/OpusParser.cpp new file mode 100644 index 0000000000..918888ea8c --- /dev/null +++ b/dom/media/ogg/OpusParser.cpp @@ -0,0 +1,217 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include "mozilla/EndianUtils.h" + +#include "OpusParser.h" +#include "VideoUtils.h" + +#include "opus/opus.h" +extern "C" { +#include "opus/opus_multistream.h" +} + +#include + +namespace mozilla { + +extern LazyLogModule gMediaDecoderLog; +#define OPUS_LOG(type, msg) MOZ_LOG(gMediaDecoderLog, type, msg) + +OpusParser::OpusParser() + : mRate(0), + mNominalRate(0), + mChannels(0), + mPreSkip(0), +#ifdef MOZ_SAMPLE_TYPE_FLOAT32 + mGain(1.0f), +#else + mGain_Q16(65536), +#endif + mChannelMapping(0), + mStreams(0), + mCoupledStreams(0), + mPrevPacketGranulepos(0) { +} + +bool OpusParser::DecodeHeader(unsigned char* aData, size_t aLength) { + if (aLength < 19 || memcmp(aData, "OpusHead", 8)) { + OPUS_LOG(LogLevel::Debug, ("Invalid Opus file: unrecognized header")); + return false; + } + + mRate = 48000; // The Opus decoder runs at 48 kHz regardless. + + int version = aData[8]; + // Accept file format versions 0.x. + if ((version & 0xf0) != 0) { + OPUS_LOG(LogLevel::Debug, + ("Rejecting unknown Opus file version %d", version)); + return false; + } + + mChannels = aData[9]; + if (mChannels < 1) { + OPUS_LOG(LogLevel::Debug, + ("Invalid Opus file: Number of channels %d", mChannels)); + return false; + } + + mPreSkip = LittleEndian::readUint16(aData + 10); + mNominalRate = LittleEndian::readUint32(aData + 12); + double gain_dB = LittleEndian::readInt16(aData + 16) / 256.0; +#ifdef MOZ_SAMPLE_TYPE_FLOAT32 + mGain = static_cast(pow(10, 0.05 * gain_dB)); +#else + mGain_Q16 = static_cast(std::min( + 65536 * pow(10, 0.05 * gain_dB) + 0.5, static_cast(INT32_MAX))); +#endif + mChannelMapping = aData[18]; + + if (mChannelMapping == 0) { + // Mapping family 0 only allows two channels + if (mChannels > 2) { + OPUS_LOG(LogLevel::Debug, ("Invalid Opus file: too many channels (%d) for" + " mapping family 0.", + mChannels)); + return false; + } + mStreams = 1; + mCoupledStreams = mChannels - 1; + mMappingTable[0] = 0; + mMappingTable[1] = 1; + } else if (mChannelMapping == 1 || mChannelMapping == 2 || + mChannelMapping == 255) { + // Currently only up to 8 channels are defined for mapping family 1 + if (mChannelMapping == 1 && mChannels > 8) { + OPUS_LOG(LogLevel::Debug, ("Invalid Opus file: too many channels (%d) for" + " mapping family 1.", + mChannels)); + return false; + } + if (mChannelMapping == 2) { + if (!IsValidMapping2ChannelsCount(mChannels)) { + return false; + } + } + if (aLength > static_cast(20 + mChannels)) { + mStreams = aData[19]; + mCoupledStreams = aData[20]; + int i; + for (i = 0; i < mChannels; i++) { + mMappingTable[i] = aData[21 + i]; + } + } else { + OPUS_LOG(LogLevel::Debug, ("Invalid Opus file: channel mapping %d," + " but no channel mapping table", + mChannelMapping)); + return false; + } + } else { + OPUS_LOG(LogLevel::Debug, ("Invalid Opus file: unsupported channel mapping " + "family %d", + mChannelMapping)); + return false; + } + if (mStreams < 1) { + OPUS_LOG(LogLevel::Debug, ("Invalid Opus file: no streams")); + return false; + } + if (mCoupledStreams > mStreams) { + OPUS_LOG(LogLevel::Debug, + ("Invalid Opus file: more coupled streams (%d) than " + "total streams (%d)", + mCoupledStreams, mStreams)); + return false; + } + +#ifdef DEBUG + OPUS_LOG(LogLevel::Debug, ("Opus stream header:")); + OPUS_LOG(LogLevel::Debug, (" channels: %d", mChannels)); + OPUS_LOG(LogLevel::Debug, (" preskip: %d", mPreSkip)); + OPUS_LOG(LogLevel::Debug, (" original: %d Hz", mNominalRate)); + OPUS_LOG(LogLevel::Debug, (" gain: %.2f dB", gain_dB)); + OPUS_LOG(LogLevel::Debug, ("Channel Mapping:")); + OPUS_LOG(LogLevel::Debug, (" family: %d", mChannelMapping)); + OPUS_LOG(LogLevel::Debug, (" streams: %d", mStreams)); +#endif + return true; +} + +bool OpusParser::DecodeTags(unsigned char* aData, size_t aLength) { + if (aLength < 16 || memcmp(aData, "OpusTags", 8)) return false; + + // Copy out the raw comment lines, but only do basic validation + // checks against the string packing: too little data, too many + // comments, or comments that are too long. Rejecting these cases + // helps reduce the propagation of broken files. + // We do not ensure they are valid UTF-8 here, nor do we validate + // the required ASCII_TAG=value format of the user comments. + const unsigned char* buf = aData + 8; + uint32_t bytes = aLength - 8; + uint32_t len; + // Read the vendor string. + len = LittleEndian::readUint32(buf); + buf += 4; + bytes -= 4; + if (len > bytes) return false; + mVendorString = nsCString(reinterpret_cast(buf), len); + buf += len; + bytes -= len; + // Read the user comments. + if (bytes < 4) return false; + uint32_t ncomments = LittleEndian::readUint32(buf); + buf += 4; + bytes -= 4; + // If there are so many comments even their length fields + // won't fit in the packet, stop reading now. + if (ncomments > (bytes >> 2)) return false; + for (uint32_t i = 0; i < ncomments; i++) { + if (bytes < 4) return false; + len = LittleEndian::readUint32(buf); + buf += 4; + bytes -= 4; + if (len > bytes) return false; + mTags.AppendElement(nsCString(reinterpret_cast(buf), len)); + buf += len; + bytes -= len; + } + +#ifdef DEBUG + OPUS_LOG(LogLevel::Debug, ("Opus metadata header:")); + OPUS_LOG(LogLevel::Debug, (" vendor: %s", mVendorString.get())); + for (uint32_t i = 0; i < mTags.Length(); i++) { + OPUS_LOG(LogLevel::Debug, (" %s", mTags[i].get())); + } +#endif + return true; +} + +/* static */ +bool OpusParser::IsValidMapping2ChannelsCount(uint8_t aChannels) { + // https://tools.ietf.org/html/draft-ietf-codec-ambisonics-08#page-4 + // For both channel mapping family 2 and family 3, the allowed numbers + // of channels: (1 + n)^2 + 2j for n = 0, 1, ..., 14 and j = 0 or 1, + // where n denotes the (highest) ambisonic order and j denotes whether + // or not there is a separate non-diegetic stereo stream Explicitly the + // allowed number of channels are 1, 3, 4, 6, 9, 11, 16, 18, 25, 27, 36, + // 38, 49, 51, 64, 66, 81, 83, 100, 102, 121, 123, 144, 146, 169, 171, + // 196, 198, 225, and 227. + + // We use the property that int(sqrt(n)) == int(sqrt(n+2)) for n != 3 + // which is handled by the test n^2 + 2 != channel + if (aChannels < 1 || aChannels > 227) { + return false; + } + double val = sqrt(aChannels); + int32_t valInt = int32_t(val); + return val == valInt || valInt * valInt + 2 == aChannels; +} + +#undef OPUS_LOG + +} // namespace mozilla diff --git a/dom/media/ogg/OpusParser.h b/dom/media/ogg/OpusParser.h new file mode 100644 index 0000000000..fc2fc5094d --- /dev/null +++ b/dom/media/ogg/OpusParser.h @@ -0,0 +1,48 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#if !defined(OpusParser_h_) +# define OpusParser_h_ + +# include "nsTArray.h" +# include "nsString.h" + +namespace mozilla { + +class OpusParser { + public: + OpusParser(); + + bool DecodeHeader(unsigned char* aData, size_t aLength); + bool DecodeTags(unsigned char* aData, size_t aLength); + static bool IsValidMapping2ChannelsCount(uint8_t aChannels); + + // Various fields from the Ogg Opus header. + int mRate; // Sample rate the decoder uses (always 48 kHz). + uint32_t mNominalRate; // Original sample rate of the data (informational). + int mChannels; // Number of channels the stream encodes. + uint16_t mPreSkip; // Number of samples to strip after decoder reset. +# ifdef MOZ_SAMPLE_TYPE_FLOAT32 + float mGain; // Gain to apply to decoder output. +# else + int32_t mGain_Q16; // Gain to apply to the decoder output. +# endif + int mChannelMapping; // Channel mapping family. + int mStreams; // Number of packed streams in each packet. + int mCoupledStreams; // Number of packed coupled streams in each packet. + unsigned char mMappingTable[255]; // Channel mapping table. + + // Granule position (end sample) of the last decoded Opus packet. This is + // used to calculate the amount we should trim from the last packet. + int64_t mPrevPacketGranulepos; + + nsTArray mTags; // Unparsed comment strings from the header. + + nsCString mVendorString; // Encoder vendor string from the header. +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/ogg/moz.build b/dom/media/ogg/moz.build new file mode 100644 index 0000000000..863686686a --- /dev/null +++ b/dom/media/ogg/moz.build @@ -0,0 +1,32 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +EXPORTS += [ + "OggCodecState.h", + "OggCodecStore.h", + "OggDecoder.h", + "OggDemuxer.h", + "OggRLBox.h", + "OggRLBoxTypes.h", + "OggWriter.h", + "OpusParser.h", +] + +UNIFIED_SOURCES += [ + "OggCodecState.cpp", + "OggCodecStore.cpp", + "OggDecoder.cpp", + "OggDemuxer.cpp", + "OggWriter.cpp", + "OpusParser.cpp", +] + +LOCAL_INCLUDES += ["!/security/rlbox"] + +FINAL_LIBRARY = "xul" + +# Add libFuzzer configuration directives +include("/tools/fuzzing/libfuzzer-config.mozbuild") -- cgit v1.2.3