/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "ContainerParser.h" #include "MP4Demuxer.h" #include "MediaInfo.h" #include "MediaSourceDemuxer.h" #include "MediaSourceUtils.h" #include "SourceBuffer.h" #include "SourceBufferResource.h" #include "SourceBufferTask.h" #include "TrackBuffersManager.h" #include "WebMDemuxer.h" #include "mozilla/ErrorResult.h" #include "mozilla/Preferences.h" #include "mozilla/ProfilerLabels.h" #include "mozilla/ProfilerMarkers.h" #include "mozilla/StaticPrefs_media.h" #include "nsMimeTypes.h" #include <limits> extern mozilla::LogModule* GetMediaSourceLog(); #define MSE_DEBUG(arg, ...) \ DDMOZ_LOG(GetMediaSourceLog(), mozilla::LogLevel::Debug, "::%s: " arg, \ __func__, ##__VA_ARGS__) #define MSE_DEBUGV(arg, ...) \ DDMOZ_LOG(GetMediaSourceLog(), mozilla::LogLevel::Verbose, "::%s: " arg, \ __func__, ##__VA_ARGS__) mozilla::LogModule* GetMediaSourceSamplesLog() { static mozilla::LazyLogModule sLogModule("MediaSourceSamples"); return sLogModule; } #define SAMPLE_DEBUG(arg, ...) \ DDMOZ_LOG(GetMediaSourceSamplesLog(), mozilla::LogLevel::Debug, \ "::%s: " arg, __func__, ##__VA_ARGS__) #define SAMPLE_DEBUGV(arg, ...) \ DDMOZ_LOG(GetMediaSourceSamplesLog(), mozilla::LogLevel::Verbose, \ "::%s: " arg, __func__, ##__VA_ARGS__) namespace mozilla { using dom::SourceBufferAppendMode; using media::TimeInterval; using media::TimeIntervals; using media::TimeUnit; using AppendBufferResult = SourceBufferTask::AppendBufferResult; using AppendState = SourceBufferAttributes::AppendState; static const char* AppendStateToStr(AppendState aState) { switch (aState) { case AppendState::WAITING_FOR_SEGMENT: return "WAITING_FOR_SEGMENT"; case AppendState::PARSING_INIT_SEGMENT: return "PARSING_INIT_SEGMENT"; case AppendState::PARSING_MEDIA_SEGMENT: return "PARSING_MEDIA_SEGMENT"; default: return "IMPOSSIBLE"; } } static Atomic<uint32_t> sStreamSourceID(0u); class DispatchKeyNeededEvent : public Runnable { public: DispatchKeyNeededEvent(MediaSourceDecoder* aDecoder, const nsTArray<uint8_t>& aInitData, const nsString& aInitDataType) : Runnable("DispatchKeyNeededEvent"), mDecoder(aDecoder), mInitData(aInitData.Clone()), mInitDataType(aInitDataType) {} NS_IMETHOD Run() override { // Note: Null check the owner, as the decoder could have been shutdown // since this event was dispatched. MediaDecoderOwner* owner = mDecoder->GetOwner(); if (owner) { owner->DispatchEncrypted(mInitData, mInitDataType); } mDecoder = nullptr; return NS_OK; } private: RefPtr<MediaSourceDecoder> mDecoder; nsTArray<uint8_t> mInitData; nsString mInitDataType; }; TrackBuffersManager::TrackBuffersManager(MediaSourceDecoder* aParentDecoder, const MediaContainerType& aType) : mBufferFull(false), mFirstInitializationSegmentReceived(false), mChangeTypeReceived(false), mNewMediaSegmentStarted(false), mActiveTrack(false), mType(aType), mParser(ContainerParser::CreateForMIMEType(aType)), mProcessedInput(0), mParentDecoder(new nsMainThreadPtrHolder<MediaSourceDecoder>( "TrackBuffersManager::mParentDecoder", aParentDecoder, false /* strict */)), mAbstractMainThread(aParentDecoder->AbstractMainThread()), mEnded(false), mVideoEvictionThreshold(Preferences::GetUint( "media.mediasource.eviction_threshold.video", 100 * 1024 * 1024)), mAudioEvictionThreshold(Preferences::GetUint( "media.mediasource.eviction_threshold.audio", 20 * 1024 * 1024)), mEvictionState(EvictionState::NO_EVICTION_NEEDED), mMutex("TrackBuffersManager"), mTaskQueue(aParentDecoder->GetDemuxer()->GetTaskQueue()), mTaskQueueCapability(Some(EventTargetCapability{mTaskQueue.get()})) { MOZ_ASSERT(NS_IsMainThread(), "Must be instanciated on the main thread"); DDLINKCHILD("parser", mParser.get()); } TrackBuffersManager::~TrackBuffersManager() { ShutdownDemuxers(); } RefPtr<TrackBuffersManager::AppendPromise> TrackBuffersManager::AppendData( already_AddRefed<MediaByteBuffer> aData, const SourceBufferAttributes& aAttributes) { MOZ_ASSERT(NS_IsMainThread()); RefPtr<MediaByteBuffer> data(aData); MSE_DEBUG("Appending %zu bytes", data->Length()); mEnded = false; return InvokeAsync(static_cast<AbstractThread*>(GetTaskQueueSafe().get()), this, __func__, &TrackBuffersManager::DoAppendData, data.forget(), aAttributes); } RefPtr<TrackBuffersManager::AppendPromise> TrackBuffersManager::DoAppendData( already_AddRefed<MediaByteBuffer> aData, const SourceBufferAttributes& aAttributes) { RefPtr<AppendBufferTask> task = new AppendBufferTask(std::move(aData), aAttributes); RefPtr<AppendPromise> p = task->mPromise.Ensure(__func__); QueueTask(task); return p; } void TrackBuffersManager::QueueTask(SourceBufferTask* aTask) { // The source buffer is a wrapped native, it would be unlinked twice and so // the TrackBuffersManager::Detach() would also be called twice. Since the // detach task has been done before, we could ignore this task. RefPtr<TaskQueue> taskQueue = GetTaskQueueSafe(); if (!taskQueue) { MOZ_ASSERT(aTask->GetType() == SourceBufferTask::Type::Detach, "only detach task could happen here!"); MSE_DEBUG("Could not queue the task '%s' without task queue", aTask->GetTypeName()); return; } if (!taskQueue->IsCurrentThreadIn()) { nsresult rv = taskQueue->Dispatch(NewRunnableMethod<RefPtr<SourceBufferTask>>( "TrackBuffersManager::QueueTask", this, &TrackBuffersManager::QueueTask, aTask)); MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); Unused << rv; return; } mQueue.Push(aTask); ProcessTasks(); } void TrackBuffersManager::ProcessTasks() { // ProcessTask is always called OnTaskQueue, however it is possible that it is // called once again after a first Detach task has run, in which case // mTaskQueue would be null. // This can happen under two conditions: // 1- Two Detach tasks were queued in a row due to a double cycle collection. // 2- An call to ProcessTasks() had queued another run of ProcessTasks while // a Detach task is pending. // We handle these two cases by aborting early. // A second Detach task was queued, prior the first one running, ignore it. if (!mTaskQueue) { RefPtr<SourceBufferTask> task = mQueue.Pop(); if (!task) { return; } MOZ_RELEASE_ASSERT(task->GetType() == SourceBufferTask::Type::Detach, "only detach task could happen here!"); MSE_DEBUG("Could not process the task '%s' after detached", task->GetTypeName()); return; } mTaskQueueCapability->AssertOnCurrentThread(); typedef SourceBufferTask::Type Type; if (mCurrentTask) { // Already have a task pending. ProcessTask will be scheduled once the // current task complete. return; } RefPtr<SourceBufferTask> task = mQueue.Pop(); if (!task) { // nothing to do. return; } MSE_DEBUG("Process task '%s'", task->GetTypeName()); switch (task->GetType()) { case Type::AppendBuffer: mCurrentTask = task; if (!mInputBuffer || mInputBuffer->IsEmpty()) { // Note: we reset mInputBuffer here to ensure it doesn't grow unbounded. mInputBuffer.reset(); mInputBuffer = Some(MediaSpan(task->As<AppendBufferTask>()->mBuffer)); } else { // mInputBuffer wasn't empty, so we can't just reset it, but we move // the data into a new buffer to clear out data no longer in the span. MSE_DEBUG( "mInputBuffer not empty during append -- data will be copied to " "new buffer. mInputBuffer->Length()=%zu " "mInputBuffer->Buffer()->Length()=%zu", mInputBuffer->Length(), mInputBuffer->Buffer()->Length()); const RefPtr<MediaByteBuffer> newBuffer{new MediaByteBuffer()}; // Set capacity outside of ctor to let us explicitly handle OOM. const size_t newCapacity = mInputBuffer->Length() + task->As<AppendBufferTask>()->mBuffer->Length(); if (!newBuffer->SetCapacity(newCapacity, fallible)) { RejectAppend(NS_ERROR_OUT_OF_MEMORY, __func__); return; } // Use infallible appends as we've already set capacity above. newBuffer->AppendElements(mInputBuffer->Elements(), mInputBuffer->Length()); newBuffer->AppendElements(*task->As<AppendBufferTask>()->mBuffer); mInputBuffer = Some(MediaSpan(newBuffer)); } mSourceBufferAttributes = MakeUnique<SourceBufferAttributes>( task->As<AppendBufferTask>()->mAttributes); mAppendWindow = TimeInterval( TimeUnit::FromSeconds( mSourceBufferAttributes->GetAppendWindowStart()), TimeUnit::FromSeconds(mSourceBufferAttributes->GetAppendWindowEnd())); ScheduleSegmentParserLoop(); break; case Type::RangeRemoval: { bool rv = CodedFrameRemoval(task->As<RangeRemovalTask>()->mRange); task->As<RangeRemovalTask>()->mPromise.Resolve(rv, __func__); break; } case Type::EvictData: DoEvictData(task->As<EvictDataTask>()->mPlaybackTime, task->As<EvictDataTask>()->mSizeToEvict); break; case Type::Abort: // not handled yet, and probably never. break; case Type::Reset: CompleteResetParserState(); break; case Type::Detach: mCurrentInputBuffer = nullptr; MOZ_DIAGNOSTIC_ASSERT(mQueue.Length() == 0, "Detach task must be the last"); mVideoTracks.Reset(); mAudioTracks.Reset(); ShutdownDemuxers(); ResetTaskQueue(); return; case Type::ChangeType: MOZ_RELEASE_ASSERT(!mCurrentTask); MSE_DEBUG("Processing type change from %s -> %s", mType.OriginalString().get(), task->As<ChangeTypeTask>()->mType.OriginalString().get()); mType = task->As<ChangeTypeTask>()->mType; mChangeTypeReceived = true; mInitData = nullptr; // A new input buffer will be created once we receive a new init segment. // The first segment received after a changeType call must be an init // segment. mCurrentInputBuffer = nullptr; CompleteResetParserState(); break; default: NS_WARNING("Invalid Task"); } TaskQueueFromTaskQueue()->Dispatch( NewRunnableMethod("TrackBuffersManager::ProcessTasks", this, &TrackBuffersManager::ProcessTasks)); } // The MSE spec requires that we abort the current SegmentParserLoop // which is then followed by a call to ResetParserState. // However due to our asynchronous design this causes inherent difficulties. // As the spec behaviour is non deterministic anyway, we instead process all // pending frames found in the input buffer. void TrackBuffersManager::AbortAppendData() { MOZ_ASSERT(NS_IsMainThread()); MSE_DEBUG(""); QueueTask(new AbortTask()); } void TrackBuffersManager::ResetParserState( SourceBufferAttributes& aAttributes) { MOZ_ASSERT(NS_IsMainThread()); MSE_DEBUG(""); // Spec states: // 1. If the append state equals PARSING_MEDIA_SEGMENT and the input buffer // contains some complete coded frames, then run the coded frame processing // algorithm until all of these complete coded frames have been processed. // However, we will wait until all coded frames have been processed regardless // of the value of append state. QueueTask(new ResetTask()); // ResetParserState has some synchronous steps that much be performed now. // The remaining steps will be performed once the ResetTask gets executed. // 6. If the mode attribute equals "sequence", then set the group start // timestamp to the group end timestamp if (aAttributes.GetAppendMode() == SourceBufferAppendMode::Sequence) { aAttributes.SetGroupStartTimestamp(aAttributes.GetGroupEndTimestamp()); } // 8. Set append state to WAITING_FOR_SEGMENT. aAttributes.SetAppendState(AppendState::WAITING_FOR_SEGMENT); } RefPtr<TrackBuffersManager::RangeRemovalPromise> TrackBuffersManager::RangeRemoval(TimeUnit aStart, TimeUnit aEnd) { MOZ_ASSERT(NS_IsMainThread()); MSE_DEBUG("From %.2f to %.2f", aStart.ToSeconds(), aEnd.ToSeconds()); mEnded = false; return InvokeAsync(static_cast<AbstractThread*>(GetTaskQueueSafe().get()), this, __func__, &TrackBuffersManager::CodedFrameRemovalWithPromise, TimeInterval(aStart, aEnd)); } TrackBuffersManager::EvictDataResult TrackBuffersManager::EvictData( const TimeUnit& aPlaybackTime, int64_t aSize, TrackType aType) { MOZ_ASSERT(NS_IsMainThread()); if (aSize > EvictionThreshold(aType)) { // We're adding more data than we can hold. return EvictDataResult::BUFFER_FULL; } const int64_t toEvict = GetSize() + aSize - EvictionThreshold(aType); const uint32_t canEvict = Evictable(HasVideo() ? TrackInfo::kVideoTrack : TrackInfo::kAudioTrack); MSE_DEBUG("currentTime=%" PRId64 " buffered=%" PRId64 "kB, eviction threshold=%" PRId64 "kB, " "evict=%" PRId64 "kB canevict=%" PRIu32 "kB", aPlaybackTime.ToMicroseconds(), GetSize() / 1024, EvictionThreshold(aType) / 1024, toEvict / 1024, canEvict / 1024); if (toEvict <= 0) { mEvictionState = EvictionState::NO_EVICTION_NEEDED; return EvictDataResult::NO_DATA_EVICTED; } EvictDataResult result; if (mBufferFull && mEvictionState == EvictionState::EVICTION_COMPLETED && canEvict < uint32_t(toEvict)) { // Our buffer is currently full. We will make another eviction attempt. // However, the current appendBuffer will fail as we can't know ahead of // time if the eviction will later succeed. result = EvictDataResult::BUFFER_FULL; } else { mEvictionState = EvictionState::EVICTION_NEEDED; result = EvictDataResult::NO_DATA_EVICTED; } MSE_DEBUG("Reached our size limit, schedule eviction of %" PRId64 " bytes (%s)", toEvict, result == EvictDataResult::BUFFER_FULL ? "buffer full" : "no data evicted"); QueueTask(new EvictDataTask(aPlaybackTime, toEvict)); return result; } void TrackBuffersManager::ChangeType(const MediaContainerType& aType) { MOZ_ASSERT(NS_IsMainThread()); QueueTask(new ChangeTypeTask(aType)); } TimeIntervals TrackBuffersManager::Buffered() const { MSE_DEBUG(""); // http://w3c.github.io/media-source/index.html#widl-SourceBuffer-buffered MutexAutoLock mut(mMutex); nsTArray<const TimeIntervals*> tracks; if (HasVideo()) { tracks.AppendElement(&mVideoBufferedRanges); } if (HasAudio()) { tracks.AppendElement(&mAudioBufferedRanges); } // 2. Let highest end time be the largest track buffer ranges end time across // all the track buffers managed by this SourceBuffer object. TimeUnit highestEndTime = HighestEndTime(tracks); // 3. Let intersection ranges equal a TimeRange object containing a single // range from 0 to highest end time. TimeIntervals intersection{ TimeInterval(TimeUnit::FromSeconds(0), highestEndTime)}; // 4. For each track buffer managed by this SourceBuffer, run the following // steps: // 1. Let track ranges equal the track buffer ranges for the current track // buffer. for (const TimeIntervals* trackRanges : tracks) { // 2. If readyState is "ended", then set the end time on the last range in // track ranges to highest end time. // 3. Let new intersection ranges equal the intersection between the // intersection ranges and the track ranges. if (mEnded) { TimeIntervals tR = *trackRanges; tR.Add(TimeInterval(tR.GetEnd(), highestEndTime)); intersection.Intersection(tR); } else { intersection.Intersection(*trackRanges); } } return intersection; } int64_t TrackBuffersManager::GetSize() const { return mSizeSourceBuffer; } void TrackBuffersManager::Ended() { mEnded = true; } void TrackBuffersManager::Detach() { MOZ_ASSERT(NS_IsMainThread()); MSE_DEBUG(""); QueueTask(new DetachTask()); } void TrackBuffersManager::CompleteResetParserState() { mTaskQueueCapability->AssertOnCurrentThread(); AUTO_PROFILER_LABEL("TrackBuffersManager::CompleteResetParserState", MEDIA_PLAYBACK); MSE_DEBUG(""); // We shouldn't change mInputDemuxer while a demuxer init/reset request is // being processed. See bug 1239983. MOZ_DIAGNOSTIC_ASSERT(!mDemuxerInitRequest.Exists(), "Previous AppendBuffer didn't complete"); for (auto& track : GetTracksList()) { // 2. Unset the last decode timestamp on all track buffers. // 3. Unset the last frame duration on all track buffers. // 4. Unset the highest end timestamp on all track buffers. // 5. Set the need random access point flag on all track buffers to true. track->ResetAppendState(); // if we have been aborted, we may have pending frames that we are going // to discard now. track->mQueuedSamples.Clear(); } // 7. Remove all bytes from the input buffer. mPendingInputBuffer.reset(); mInputBuffer.reset(); if (mCurrentInputBuffer) { mCurrentInputBuffer->EvictAll(); // The demuxer will be recreated during the next run of SegmentParserLoop. // As such we don't need to notify it that data has been removed. mCurrentInputBuffer = new SourceBufferResource(); } // We could be left with a demuxer in an unusable state. It needs to be // recreated. Unless we have a pending changeType operation, we store in the // InputBuffer an init segment which will be parsed during the next Segment // Parser Loop and a new demuxer will be created and initialized. // If we are in the middle of a changeType operation, then we do not have an // init segment yet. The next appendBuffer operation will need to provide such // init segment. if (mFirstInitializationSegmentReceived && !mChangeTypeReceived) { MOZ_ASSERT(mInitData && mInitData->Length(), "we must have an init segment"); // The aim here is really to destroy our current demuxer. CreateDemuxerforMIMEType(); // Recreate our input buffer. We can't directly assign the initData buffer // to mInputBuffer as it will get modified in the Segment Parser Loop. mInputBuffer = Some(MediaSpan::WithCopyOf(mInitData)); RecreateParser(true); } else { RecreateParser(false); } } int64_t TrackBuffersManager::EvictionThreshold( TrackInfo::TrackType aType) const { MOZ_ASSERT(aType != TrackInfo::kTextTrack); if (aType == TrackInfo::kVideoTrack || (aType == TrackInfo::kUndefinedTrack && HasVideo())) { return mVideoEvictionThreshold; } return mAudioEvictionThreshold; } void TrackBuffersManager::DoEvictData(const TimeUnit& aPlaybackTime, int64_t aSizeToEvict) { mTaskQueueCapability->AssertOnCurrentThread(); AUTO_PROFILER_LABEL("TrackBuffersManager::DoEvictData", MEDIA_PLAYBACK); mEvictionState = EvictionState::EVICTION_COMPLETED; // Video is what takes the most space, only evict there if we have video. auto& track = HasVideo() ? mVideoTracks : mAudioTracks; const auto& buffer = track.GetTrackBuffer(); if (buffer.IsEmpty()) { // Buffer has been emptied while the eviction was queued, nothing to do. return; } if (track.mBufferedRanges.IsEmpty()) { MSE_DEBUG( "DoEvictData running with no buffered ranges. 0 duration data likely " "present in our buffer(s). Evicting all data!"); // We have no buffered ranges, but may still have data. This happens if the // buffer is full of 0 duration data. Normal removal procedures don't clear // 0 duration data, so blow away all our data. RemoveAllCodedFrames(); return; } // Remove any data we've already played, or before the next sample to be // demuxed whichever is lowest. TimeUnit lowerLimit = std::min(track.mNextSampleTime, aPlaybackTime); uint32_t lastKeyFrameIndex = 0; int64_t toEvict = aSizeToEvict; int64_t partialEvict = 0; for (uint32_t i = 0; i < buffer.Length(); i++) { const auto& frame = buffer[i]; if (frame->mKeyframe) { lastKeyFrameIndex = i; toEvict -= partialEvict; if (toEvict < 0) { break; } partialEvict = 0; } if (frame->GetEndTime() >= lowerLimit) { break; } partialEvict += AssertedCast<int64_t>(frame->ComputedSizeOfIncludingThis()); } const int64_t finalSize = mSizeSourceBuffer - aSizeToEvict; if (lastKeyFrameIndex > 0) { MSE_DEBUG("Step1. Evicting %" PRId64 " bytes prior currentTime", aSizeToEvict - toEvict); TimeUnit start = track.mBufferedRanges[0].mStart; TimeUnit end = buffer[lastKeyFrameIndex]->mTime - TimeUnit::FromMicroseconds(1); if (end > start) { CodedFrameRemoval(TimeInterval(start, end)); } } if (mSizeSourceBuffer <= finalSize) { return; } toEvict = mSizeSourceBuffer - finalSize; // See if we can evict data into the future. // We do not evict data from the currently used buffered interval. TimeUnit currentPosition = std::max(aPlaybackTime, track.mNextSampleTime); TimeIntervals futureBuffered( TimeInterval(currentPosition, TimeUnit::FromInfinity())); futureBuffered.Intersection(track.mBufferedRanges); futureBuffered.SetFuzz(MediaSourceDemuxer::EOS_FUZZ / 2); if (futureBuffered.Length() <= 1) { // We have one continuous segment ahead of us: // nothing further can be evicted. return; } // Don't evict before the end of the current segment TimeUnit upperLimit = futureBuffered[0].mEnd; uint32_t evictedFramesStartIndex = buffer.Length(); for (uint32_t i = buffer.Length() - 1; i-- > 0;) { const auto& frame = buffer[i]; if (frame->mTime <= upperLimit || toEvict < 0) { // We've reached a frame that shouldn't be evicted -> Evict after it -> // i+1. Or the previous loop reached the eviction threshold -> Evict from // it -> i+1. evictedFramesStartIndex = i + 1; break; } toEvict -= AssertedCast<int64_t>(frame->ComputedSizeOfIncludingThis()); } if (evictedFramesStartIndex < buffer.Length()) { MSE_DEBUG("Step2. Evicting %" PRId64 " bytes from trailing data", mSizeSourceBuffer - finalSize - toEvict); CodedFrameRemoval(TimeInterval(buffer[evictedFramesStartIndex]->mTime, TimeUnit::FromInfinity())); } } RefPtr<TrackBuffersManager::RangeRemovalPromise> TrackBuffersManager::CodedFrameRemovalWithPromise( const TimeInterval& aInterval) { mTaskQueueCapability->AssertOnCurrentThread(); RefPtr<RangeRemovalTask> task = new RangeRemovalTask(aInterval); RefPtr<RangeRemovalPromise> p = task->mPromise.Ensure(__func__); QueueTask(task); return p; } bool TrackBuffersManager::CodedFrameRemoval(const TimeInterval& aInterval) { MOZ_ASSERT(OnTaskQueue()); AUTO_PROFILER_LABEL("TrackBuffersManager::CodedFrameRemoval", MEDIA_PLAYBACK); MSE_DEBUG("From %.2fs to %.2f", aInterval.mStart.ToSeconds(), aInterval.mEnd.ToSeconds()); #if DEBUG if (HasVideo()) { MSE_DEBUG("before video ranges=%s", DumpTimeRangesRaw(mVideoTracks.mBufferedRanges).get()); } if (HasAudio()) { MSE_DEBUG("before audio ranges=%s", DumpTimeRangesRaw(mAudioTracks.mBufferedRanges).get()); } #endif // 1. Let start be the starting presentation timestamp for the removal range. TimeUnit start = aInterval.mStart; // 2. Let end be the end presentation timestamp for the removal range. TimeUnit end = aInterval.mEnd; bool dataRemoved = false; // 3. For each track buffer in this source buffer, run the following steps: for (auto* track : GetTracksList()) { MSE_DEBUGV("Processing %s track", track->mInfo->mMimeType.get()); // 1. Let remove end timestamp be the current value of duration // See bug: https://www.w3.org/Bugs/Public/show_bug.cgi?id=28727 // At worse we will remove all frames until the end, unless a key frame is // found between the current interval's end and the trackbuffer's end. TimeUnit removeEndTimestamp = track->mBufferedRanges.GetEnd(); if (start > removeEndTimestamp) { // Nothing to remove. continue; } // 2. If this track buffer has a random access point timestamp that is // greater than or equal to end, then update remove end timestamp to that // random access point timestamp. if (end < track->mBufferedRanges.GetEnd()) { for (auto& frame : track->GetTrackBuffer()) { if (frame->mKeyframe && frame->mTime >= end) { removeEndTimestamp = frame->mTime; break; } } } // 3. Remove all media data, from this track buffer, that contain starting // timestamps greater than or equal to start and less than the remove end // timestamp. // 4. Remove decoding dependencies of the coded frames removed in the // previous step: Remove all coded frames between the coded frames removed // in the previous step and the next random access point after those removed // frames. TimeIntervals removedInterval{TimeInterval(start, removeEndTimestamp)}; RemoveFrames(removedInterval, *track, 0, RemovalMode::kRemoveFrame); // 5. If this object is in activeSourceBuffers, the current playback // position is greater than or equal to start and less than the remove end // timestamp, and HTMLMediaElement.readyState is greater than HAVE_METADATA, // then set the HTMLMediaElement.readyState attribute to HAVE_METADATA and // stall playback. This will be done by the MDSM during playback. // TODO properly, so it works even if paused. } UpdateBufferedRanges(); // Update our reported total size. mSizeSourceBuffer = mVideoTracks.mSizeBuffer + mAudioTracks.mSizeBuffer; // 4. If buffer full flag equals true and this object is ready to accept more // bytes, then set the buffer full flag to false. if (mBufferFull && mSizeSourceBuffer < EvictionThreshold()) { mBufferFull = false; } return dataRemoved; } void TrackBuffersManager::RemoveAllCodedFrames() { // This is similar to RemoveCodedFrames, but will attempt to remove ALL // the frames. This is not to spec, as explained below at step 3.1. Steps // below coincide with Remove Coded Frames algorithm from the spec. MSE_DEBUG("RemoveAllCodedFrames called."); MOZ_ASSERT(OnTaskQueue()); AUTO_PROFILER_LABEL("TrackBuffersManager::RemoveAllCodedFrames", MEDIA_PLAYBACK); // 1. Let start be the starting presentation timestamp for the removal range. TimeUnit start{}; // 2. Let end be the end presentation timestamp for the removal range. TimeUnit end = TimeUnit::FromMicroseconds(1); // Find an end time such that our range will include every frame in every // track. We do this by setting the end of our interval to the largest end // time seen + 1 microsecond. for (TrackData* track : GetTracksList()) { for (auto& frame : track->GetTrackBuffer()) { MOZ_ASSERT(frame->mTime >= start, "Shouldn't have frame at negative time!"); TimeUnit frameEnd = frame->mTime + frame->mDuration; if (frameEnd > end) { end = frameEnd + TimeUnit::FromMicroseconds(1); } } } // 3. For each track buffer in this source buffer, run the following steps: TimeIntervals removedInterval{TimeInterval(start, end)}; for (TrackData* track : GetTracksList()) { // 1. Let remove end timestamp be the current value of duration // ^ It's off spec, but we ignore this in order to clear 0 duration frames. // If we don't ignore this rule and our buffer is full of 0 duration frames // at timestamp n, we get an eviction range of [0, n). When we get to step // 3.3 below, the 0 duration frames will not be evicted because their // timestamp is not less than remove end timestamp -- it will in fact be // equal to remove end timestamp. // // 2. If this track buffer has a random access point timestamp that is // greater than or equal to end, then update remove end timestamp to that // random access point timestamp. // ^ We've made sure end > any sample's timestamp, so can skip this. // // 3. Remove all media data, from this track buffer, that contain starting // timestamps greater than or equal to start and less than the remove end // timestamp. // 4. Remove decoding dependencies of the coded frames removed in the // previous step: Remove all coded frames between the coded frames removed // in the previous step and the next random access point after those removed // frames. // This should remove every frame in the track because removedInterval was // constructed such that every frame in any track falls into that interval. RemoveFrames(removedInterval, *track, 0, RemovalMode::kRemoveFrame); // 5. If this object is in activeSourceBuffers, the current playback // position is greater than or equal to start and less than the remove end // timestamp, and HTMLMediaElement.readyState is greater than HAVE_METADATA, // then set the HTMLMediaElement.readyState attribute to HAVE_METADATA and // stall playback. This will be done by the MDSM during playback. // TODO properly, so it works even if paused. } UpdateBufferedRanges(); #ifdef DEBUG { MutexAutoLock lock(mMutex); MOZ_ASSERT( mAudioBufferedRanges.IsEmpty(), "Should have no buffered video ranges after evicting everything."); MOZ_ASSERT( mVideoBufferedRanges.IsEmpty(), "Should have no buffered video ranges after evicting everything."); } #endif mSizeSourceBuffer = mVideoTracks.mSizeBuffer + mAudioTracks.mSizeBuffer; MOZ_ASSERT(mSizeSourceBuffer == 0, "Buffer should be empty after evicting everything!"); if (mBufferFull && mSizeSourceBuffer < EvictionThreshold()) { mBufferFull = false; } } void TrackBuffersManager::UpdateBufferedRanges() { MutexAutoLock mut(mMutex); mVideoBufferedRanges = mVideoTracks.mSanitizedBufferedRanges; mAudioBufferedRanges = mAudioTracks.mSanitizedBufferedRanges; #if DEBUG if (HasVideo()) { MSE_DEBUG("after video ranges=%s", DumpTimeRangesRaw(mVideoTracks.mBufferedRanges).get()); } if (HasAudio()) { MSE_DEBUG("after audio ranges=%s", DumpTimeRangesRaw(mAudioTracks.mBufferedRanges).get()); } #endif } void TrackBuffersManager::SegmentParserLoop() { MOZ_ASSERT(OnTaskQueue()); AUTO_PROFILER_LABEL("TrackBuffersManager::SegmentParserLoop", MEDIA_PLAYBACK); while (true) { // 1. If the input buffer is empty, then jump to the need more data step // below. if (!mInputBuffer || mInputBuffer->IsEmpty()) { NeedMoreData(); return; } // 2. If the input buffer contains bytes that violate the SourceBuffer // byte stream format specification, then run the append error algorithm // with the decode error parameter set to true and abort this algorithm. // TODO // 3. Remove any bytes that the byte stream format specifications say must // be ignored from the start of the input buffer. We do not remove bytes // from our input buffer. Instead we enforce that our ContainerParser is // able to skip over all data that is supposed to be ignored. // 4. If the append state equals WAITING_FOR_SEGMENT, then run the following // steps: if (mSourceBufferAttributes->GetAppendState() == AppendState::WAITING_FOR_SEGMENT) { MediaResult haveInitSegment = mParser->IsInitSegmentPresent(*mInputBuffer); if (NS_SUCCEEDED(haveInitSegment)) { SetAppendState(AppendState::PARSING_INIT_SEGMENT); if (mFirstInitializationSegmentReceived && !mChangeTypeReceived) { // This is a new initialization segment. Obsolete the old one. RecreateParser(false); } continue; } MediaResult haveMediaSegment = mParser->IsMediaSegmentPresent(*mInputBuffer); if (NS_SUCCEEDED(haveMediaSegment)) { SetAppendState(AppendState::PARSING_MEDIA_SEGMENT); mNewMediaSegmentStarted = true; continue; } // We have neither an init segment nor a media segment. // Check if it was invalid data. if (haveInitSegment != NS_ERROR_NOT_AVAILABLE) { MSE_DEBUG("Found invalid data."); RejectAppend(haveInitSegment, __func__); return; } if (haveMediaSegment != NS_ERROR_NOT_AVAILABLE) { MSE_DEBUG("Found invalid data."); RejectAppend(haveMediaSegment, __func__); return; } MSE_DEBUG("Found incomplete data."); NeedMoreData(); return; } MOZ_ASSERT(mSourceBufferAttributes->GetAppendState() == AppendState::PARSING_INIT_SEGMENT || mSourceBufferAttributes->GetAppendState() == AppendState::PARSING_MEDIA_SEGMENT); TimeUnit start, end; MediaResult newData = NS_ERROR_NOT_AVAILABLE; if (mSourceBufferAttributes->GetAppendState() == AppendState::PARSING_INIT_SEGMENT || (mSourceBufferAttributes->GetAppendState() == AppendState::PARSING_MEDIA_SEGMENT && mFirstInitializationSegmentReceived && !mChangeTypeReceived)) { newData = mParser->ParseStartAndEndTimestamps(*mInputBuffer, start, end); if (NS_FAILED(newData) && newData.Code() != NS_ERROR_NOT_AVAILABLE) { RejectAppend(newData, __func__); return; } mProcessedInput += mInputBuffer->Length(); } // 5. If the append state equals PARSING_INIT_SEGMENT, then run the // following steps: if (mSourceBufferAttributes->GetAppendState() == AppendState::PARSING_INIT_SEGMENT) { if (mParser->InitSegmentRange().IsEmpty()) { mInputBuffer.reset(); NeedMoreData(); return; } InitializationSegmentReceived(); return; } if (mSourceBufferAttributes->GetAppendState() == AppendState::PARSING_MEDIA_SEGMENT) { // 1. If the first initialization segment received flag is false, then run // the append error algorithm with the decode error parameter set to // true and abort this algorithm. // Or we are in the process of changeType, in which case we must first // get an init segment before getting a media segment. if (!mFirstInitializationSegmentReceived || mChangeTypeReceived) { RejectAppend(NS_ERROR_FAILURE, __func__); return; } // We can't feed some demuxers (WebMDemuxer) with data that do not have // monotonizally increasing timestamps. So we check if we have a // discontinuity from the previous segment parsed. // If so, recreate a new demuxer to ensure that the demuxer is only fed // monotonically increasing data. if (mNewMediaSegmentStarted) { if (NS_SUCCEEDED(newData) && mLastParsedEndTime.isSome() && start < mLastParsedEndTime.ref()) { MSE_DEBUG("Re-creating demuxer"); ResetDemuxingState(); return; } if (NS_SUCCEEDED(newData) || !mParser->MediaSegmentRange().IsEmpty()) { if (mPendingInputBuffer) { // We now have a complete media segment header. We can resume // parsing the data. AppendDataToCurrentInputBuffer(*mPendingInputBuffer); mPendingInputBuffer.reset(); } mNewMediaSegmentStarted = false; } else { // We don't have any data to demux yet, stash aside the data. // This also handles the case: // 2. If the input buffer does not contain a complete media segment // header yet, then jump to the need more data step below. if (!mPendingInputBuffer) { mPendingInputBuffer = Some(MediaSpan(*mInputBuffer)); } else { // Note we reset mInputBuffer below, so this won't end up appending // the contents of mInputBuffer to itself. mPendingInputBuffer->Append(*mInputBuffer); } mInputBuffer.reset(); NeedMoreData(); return; } } // 3. If the input buffer contains one or more complete coded frames, then // run the coded frame processing algorithm. RefPtr<TrackBuffersManager> self = this; CodedFrameProcessing() ->Then( TaskQueueFromTaskQueue(), __func__, [self](bool aNeedMoreData) { self->mTaskQueueCapability->AssertOnCurrentThread(); self->mProcessingRequest.Complete(); if (aNeedMoreData) { self->NeedMoreData(); } else { self->ScheduleSegmentParserLoop(); } }, [self](const MediaResult& aRejectValue) { self->mTaskQueueCapability->AssertOnCurrentThread(); self->mProcessingRequest.Complete(); self->RejectAppend(aRejectValue, __func__); }) ->Track(mProcessingRequest); return; } } } void TrackBuffersManager::NeedMoreData() { MSE_DEBUG(""); MOZ_DIAGNOSTIC_ASSERT(mCurrentTask && mCurrentTask->GetType() == SourceBufferTask::Type::AppendBuffer); MOZ_DIAGNOSTIC_ASSERT(mSourceBufferAttributes); mCurrentTask->As<AppendBufferTask>()->mPromise.Resolve( SourceBufferTask::AppendBufferResult(mActiveTrack, *mSourceBufferAttributes), __func__); mSourceBufferAttributes = nullptr; mCurrentTask = nullptr; ProcessTasks(); } void TrackBuffersManager::RejectAppend(const MediaResult& aRejectValue, const char* aName) { MSE_DEBUG("rv=%" PRIu32, static_cast<uint32_t>(aRejectValue.Code())); MOZ_DIAGNOSTIC_ASSERT(mCurrentTask && mCurrentTask->GetType() == SourceBufferTask::Type::AppendBuffer); mCurrentTask->As<AppendBufferTask>()->mPromise.Reject(aRejectValue, __func__); mSourceBufferAttributes = nullptr; mCurrentTask = nullptr; ProcessTasks(); } void TrackBuffersManager::ScheduleSegmentParserLoop() { MOZ_ASSERT(OnTaskQueue()); TaskQueueFromTaskQueue()->Dispatch( NewRunnableMethod("TrackBuffersManager::SegmentParserLoop", this, &TrackBuffersManager::SegmentParserLoop)); } void TrackBuffersManager::ShutdownDemuxers() { if (mVideoTracks.mDemuxer) { mVideoTracks.mDemuxer->BreakCycles(); mVideoTracks.mDemuxer = nullptr; } if (mAudioTracks.mDemuxer) { mAudioTracks.mDemuxer->BreakCycles(); mAudioTracks.mDemuxer = nullptr; } // We shouldn't change mInputDemuxer while a demuxer init/reset request is // being processed. See bug 1239983. MOZ_DIAGNOSTIC_ASSERT(!mDemuxerInitRequest.Exists()); mInputDemuxer = nullptr; mLastParsedEndTime.reset(); } void TrackBuffersManager::CreateDemuxerforMIMEType() { mTaskQueueCapability->AssertOnCurrentThread(); MSE_DEBUG("mType.OriginalString=%s", mType.OriginalString().get()); ShutdownDemuxers(); if (mType.Type() == MEDIAMIMETYPE(VIDEO_WEBM) || mType.Type() == MEDIAMIMETYPE(AUDIO_WEBM)) { mInputDemuxer = new WebMDemuxer(mCurrentInputBuffer, true /* IsMediaSource*/); DDLINKCHILD("demuxer", mInputDemuxer.get()); return; } if (mType.Type() == MEDIAMIMETYPE(VIDEO_MP4) || mType.Type() == MEDIAMIMETYPE(AUDIO_MP4)) { mInputDemuxer = new MP4Demuxer(mCurrentInputBuffer); DDLINKCHILD("demuxer", mInputDemuxer.get()); return; } NS_WARNING("Not supported (yet)"); } // We reset the demuxer by creating a new one and initializing it. void TrackBuffersManager::ResetDemuxingState() { MOZ_ASSERT(OnTaskQueue()); MOZ_ASSERT(mParser && mParser->HasInitData()); AUTO_PROFILER_LABEL("TrackBuffersManager::ResetDemuxingState", MEDIA_PLAYBACK); RecreateParser(true); mCurrentInputBuffer = new SourceBufferResource(); // The demuxer isn't initialized yet ; we don't want to notify it // that data has been appended yet ; so we simply append the init segment // to the resource. mCurrentInputBuffer->AppendData(mParser->InitData()); CreateDemuxerforMIMEType(); if (!mInputDemuxer) { RejectAppend(NS_ERROR_FAILURE, __func__); return; } mInputDemuxer->Init() ->Then(TaskQueueFromTaskQueue(), __func__, this, &TrackBuffersManager::OnDemuxerResetDone, &TrackBuffersManager::OnDemuxerInitFailed) ->Track(mDemuxerInitRequest); } void TrackBuffersManager::OnDemuxerResetDone(const MediaResult& aResult) { MOZ_ASSERT(OnTaskQueue()); mDemuxerInitRequest.Complete(); if (NS_FAILED(aResult) && StaticPrefs::media_playback_warnings_as_errors()) { RejectAppend(aResult, __func__); return; } // mInputDemuxer shouldn't have been destroyed while a demuxer init/reset // request was being processed. See bug 1239983. MOZ_DIAGNOSTIC_ASSERT(mInputDemuxer); if (aResult != NS_OK && mParentDecoder) { RefPtr<TrackBuffersManager> self = this; mAbstractMainThread->Dispatch(NS_NewRunnableFunction( "TrackBuffersManager::OnDemuxerResetDone", [self, aResult]() { if (self->mParentDecoder && self->mParentDecoder->GetOwner()) { self->mParentDecoder->GetOwner()->DecodeWarning(aResult); } })); } // Recreate track demuxers. uint32_t numVideos = mInputDemuxer->GetNumberTracks(TrackInfo::kVideoTrack); if (numVideos) { // We currently only handle the first video track. mVideoTracks.mDemuxer = mInputDemuxer->GetTrackDemuxer(TrackInfo::kVideoTrack, 0); MOZ_ASSERT(mVideoTracks.mDemuxer); DDLINKCHILD("video demuxer", mVideoTracks.mDemuxer.get()); } uint32_t numAudios = mInputDemuxer->GetNumberTracks(TrackInfo::kAudioTrack); if (numAudios) { // We currently only handle the first audio track. mAudioTracks.mDemuxer = mInputDemuxer->GetTrackDemuxer(TrackInfo::kAudioTrack, 0); MOZ_ASSERT(mAudioTracks.mDemuxer); DDLINKCHILD("audio demuxer", mAudioTracks.mDemuxer.get()); } if (mPendingInputBuffer) { // We had a partial media segment header stashed aside. // Reparse its content so we can continue parsing the current input buffer. TimeUnit start, end; mParser->ParseStartAndEndTimestamps(*mPendingInputBuffer, start, end); mProcessedInput += mPendingInputBuffer->Length(); } SegmentParserLoop(); } void TrackBuffersManager::AppendDataToCurrentInputBuffer( const MediaSpan& aData) { MOZ_ASSERT(mCurrentInputBuffer); mCurrentInputBuffer->AppendData(aData); mInputDemuxer->NotifyDataArrived(); } void TrackBuffersManager::InitializationSegmentReceived() { MOZ_ASSERT(OnTaskQueue()); MOZ_ASSERT(mParser->HasCompleteInitData()); AUTO_PROFILER_LABEL("TrackBuffersManager::InitializationSegmentReceived", MEDIA_PLAYBACK); int64_t endInit = mParser->InitSegmentRange().mEnd; if (mInputBuffer->Length() > mProcessedInput || int64_t(mProcessedInput - mInputBuffer->Length()) > endInit) { // Something is not quite right with the data appended. Refuse it. RejectAppend(MediaResult(NS_ERROR_FAILURE, "Invalid state following initialization segment"), __func__); return; } mCurrentInputBuffer = new SourceBufferResource(); // The demuxer isn't initialized yet ; we don't want to notify it // that data has been appended yet ; so we simply append the init segment // to the resource. mCurrentInputBuffer->AppendData(mParser->InitData()); uint32_t length = endInit - (mProcessedInput - mInputBuffer->Length()); MOZ_RELEASE_ASSERT(length <= mInputBuffer->Length()); mInputBuffer->RemoveFront(length); CreateDemuxerforMIMEType(); if (!mInputDemuxer) { NS_WARNING("TODO type not supported"); RejectAppend(NS_ERROR_DOM_NOT_SUPPORTED_ERR, __func__); return; } mInputDemuxer->Init() ->Then(TaskQueueFromTaskQueue(), __func__, this, &TrackBuffersManager::OnDemuxerInitDone, &TrackBuffersManager::OnDemuxerInitFailed) ->Track(mDemuxerInitRequest); } bool TrackBuffersManager::IsRepeatInitData( const MediaInfo& aNewMediaInfo) const { MOZ_ASSERT(OnTaskQueue()); if (!mInitData) { // There is no previous init data, so this cannot be a repeat. return false; } if (mChangeTypeReceived) { // If we're received change type we want to reprocess init data. return false; } MOZ_DIAGNOSTIC_ASSERT(mInitData, "Init data should be non-null"); if (*mInitData == *mParser->InitData()) { // We have previous init data, and it's the same binary data as we've just // parsed. return true; } // At this point the binary data doesn't match, but it's possible to have the // different binary representations for the same logical init data. These // checks can be revised as we encounter such cases in the wild. bool audioInfoIsRepeat = false; if (aNewMediaInfo.HasAudio()) { if (!mAudioTracks.mLastInfo) { // There is no old audio info, so this can't be a repeat. return false; } audioInfoIsRepeat = *mAudioTracks.mLastInfo->GetAsAudioInfo() == aNewMediaInfo.mAudio; if (!aNewMediaInfo.HasVideo()) { // Only have audio. return audioInfoIsRepeat; } } bool videoInfoIsRepeat = false; if (aNewMediaInfo.HasVideo()) { if (!mVideoTracks.mLastInfo) { // There is no old video info, so this can't be a repeat. return false; } videoInfoIsRepeat = *mVideoTracks.mLastInfo->GetAsVideoInfo() == aNewMediaInfo.mVideo; if (!aNewMediaInfo.HasAudio()) { // Only have video. return videoInfoIsRepeat; } } if (audioInfoIsRepeat && videoInfoIsRepeat) { MOZ_DIAGNOSTIC_ASSERT( aNewMediaInfo.HasVideo() && aNewMediaInfo.HasAudio(), "This should only be reachable if audio and video are present"); // Video + audio are present and both have the same init data. return true; } return false; } void TrackBuffersManager::OnDemuxerInitDone(const MediaResult& aResult) { mTaskQueueCapability->AssertOnCurrentThread(); MOZ_DIAGNOSTIC_ASSERT(mInputDemuxer, "mInputDemuxer has been destroyed"); AUTO_PROFILER_LABEL("TrackBuffersManager::OnDemuxerInitDone", MEDIA_PLAYBACK); mDemuxerInitRequest.Complete(); if (NS_FAILED(aResult) && StaticPrefs::media_playback_warnings_as_errors()) { RejectAppend(aResult, __func__); return; } MediaInfo info; uint32_t numVideos = mInputDemuxer->GetNumberTracks(TrackInfo::kVideoTrack); if (numVideos) { // We currently only handle the first video track. mVideoTracks.mDemuxer = mInputDemuxer->GetTrackDemuxer(TrackInfo::kVideoTrack, 0); MOZ_ASSERT(mVideoTracks.mDemuxer); DDLINKCHILD("video demuxer", mVideoTracks.mDemuxer.get()); info.mVideo = *mVideoTracks.mDemuxer->GetInfo()->GetAsVideoInfo(); info.mVideo.mTrackId = 2; } uint32_t numAudios = mInputDemuxer->GetNumberTracks(TrackInfo::kAudioTrack); if (numAudios) { // We currently only handle the first audio track. mAudioTracks.mDemuxer = mInputDemuxer->GetTrackDemuxer(TrackInfo::kAudioTrack, 0); MOZ_ASSERT(mAudioTracks.mDemuxer); DDLINKCHILD("audio demuxer", mAudioTracks.mDemuxer.get()); info.mAudio = *mAudioTracks.mDemuxer->GetInfo()->GetAsAudioInfo(); info.mAudio.mTrackId = 1; } TimeUnit videoDuration = numVideos ? info.mVideo.mDuration : TimeUnit::Zero(); TimeUnit audioDuration = numAudios ? info.mAudio.mDuration : TimeUnit::Zero(); TimeUnit duration = std::max(videoDuration, audioDuration); // 1. Update the duration attribute if it currently equals NaN. // Those steps are performed by the MediaSourceDecoder::SetInitialDuration mAbstractMainThread->Dispatch(NewRunnableMethod<TimeUnit>( "MediaSourceDecoder::SetInitialDuration", mParentDecoder.get(), &MediaSourceDecoder::SetInitialDuration, !duration.IsZero() ? duration : TimeUnit::FromInfinity())); // 2. If the initialization segment has no audio, video, or text tracks, then // run the append error algorithm with the decode error parameter set to true // and abort these steps. if (!numVideos && !numAudios) { RejectAppend(NS_ERROR_FAILURE, __func__); return; } // 3. If the first initialization segment received flag is true, then run the // following steps: if (mFirstInitializationSegmentReceived) { if (numVideos != mVideoTracks.mNumTracks || numAudios != mAudioTracks.mNumTracks) { RejectAppend(NS_ERROR_FAILURE, __func__); return; } // 1. If more than one track for a single type are present (ie 2 audio // tracks), then the Track IDs match the ones in the first initialization // segment. // TODO // 2. Add the appropriate track descriptions from this initialization // segment to each of the track buffers. // TODO // 3. Set the need random access point flag on all track buffers to true. mVideoTracks.mNeedRandomAccessPoint = true; mAudioTracks.mNeedRandomAccessPoint = true; } // Check if we've received the same init data again. Some streams will // resend the same data. In these cases we don't need to change the stream // id as it's the same stream. Doing so would recreate decoders, possibly // leading to gaps in audio and/or video (see bug 1450952). bool isRepeatInitData = IsRepeatInitData(info); MOZ_ASSERT(mFirstInitializationSegmentReceived || !isRepeatInitData, "Should never detect repeat init data for first segment!"); // If we have new init data we configure and set track info as needed. If we // have repeat init data we carry forward our existing track info. if (!isRepeatInitData) { // Increase our stream id. uint32_t streamID = sStreamSourceID++; // 4. Let active track flag equal false. bool activeTrack = false; // 5. If the first initialization segment received flag is false, then run // the following steps: if (!mFirstInitializationSegmentReceived) { MSE_DEBUG("Get first init data"); mAudioTracks.mNumTracks = numAudios; // TODO: // 1. If the initialization segment contains tracks with codecs the user // agent does not support, then run the append error algorithm with the // decode error parameter set to true and abort these steps. // 2. For each audio track in the initialization segment, run following // steps: for (uint32_t i = 0; i < numAudios; i++) { if (numAudios) { // 1. Let audio byte stream track ID be the Track ID for the current // track being processed. // 2. Let audio language be a BCP 47 language tag for the language // specified in the initialization segment for this track or an empty // string if no language info is present. // 3. If audio language equals an empty string or the 'und' BCP 47 // value, then run the default track language algorithm with // byteStreamTrackID set to audio byte stream track ID and type set to // "audio" and assign the value returned by the algorithm to audio // language. // 4. Let audio label be a label specified in the initialization segment // for this track or an empty string if no label info is present. // 5. If audio label equals an empty string, then run the default track // label algorithm with byteStreamTrackID set to audio byte stream track // ID and type set to "audio" and assign the value returned by the // algorithm to audio label. // 6. Let audio kinds be an array of kind strings specified in the // initialization segment for this track or an empty array if no kind // information is provided. // 7. If audio kinds equals an empty array, then run the default track // kinds algorithm with byteStreamTrackID set to audio byte stream track // ID and type set to "audio" and assign the value returned by the // algorithm to audio kinds. // 8. For each value in audio kinds, run the following steps: // 1. Let current audio kind equal the value from audio kinds for this // iteration of the loop. // 2. Let new audio track be a new AudioTrack object. // 3. Generate a unique ID and assign it to the id property on new // audio track. // 4. Assign audio language to the language property on new audio // track. // 5. Assign audio label to the label property on new audio track. // 6. Assign current audio kind to the kind property on new audio // track. // 7. If audioTracks.length equals 0, then run the following steps: // 1. Set the enabled property on new audio track to true. // 2. Set active track flag to true. activeTrack = true; // 8. Add new audio track to the audioTracks attribute on this // SourceBuffer object. // 9. Queue a task to fire a trusted event named addtrack, that does // not bubble and is not cancelable, and that uses the TrackEvent // interface, at the AudioTrackList object referenced by the // audioTracks attribute on this SourceBuffer object. // 10. Add new audio track to the audioTracks attribute on the // HTMLMediaElement. // 11. Queue a task to fire a trusted event named addtrack, that does // not bubble and is not cancelable, and that uses the TrackEvent // interface, at the AudioTrackList object referenced by the // audioTracks attribute on the HTMLMediaElement. mAudioTracks.mBuffers.AppendElement(TrackBuffer()); // 10. Add the track description for this track to the track buffer. mAudioTracks.mInfo = new TrackInfoSharedPtr(info.mAudio, streamID); mAudioTracks.mLastInfo = mAudioTracks.mInfo; } mVideoTracks.mNumTracks = numVideos; // 3. For each video track in the initialization segment, run following // steps: for (uint32_t i = 0; i < numVideos; i++) { if (numVideos) { // 1. Let video byte stream track ID be the Track ID for the current // track being processed. // 2. Let video language be a BCP 47 language tag for the language // specified in the initialization segment for this track or an empty // string if no language info is present. // 3. If video language equals an empty string or the 'und' BCP 47 // value, then run the default track language algorithm with // byteStreamTrackID set to video byte stream track ID and type set to // "video" and assign the value returned by the algorithm to video // language. // 4. Let video label be a label specified in the initialization segment // for this track or an empty string if no label info is present. // 5. If video label equals an empty string, then run the default track // label algorithm with byteStreamTrackID set to video byte stream track // ID and type set to "video" and assign the value returned by the // algorithm to video label. // 6. Let video kinds be an array of kind strings specified in the // initialization segment for this track or an empty array if no kind // information is provided. // 7. If video kinds equals an empty array, then run the default track // kinds algorithm with byteStreamTrackID set to video byte stream track // ID and type set to "video" and assign the value returned by the // algorithm to video kinds. // 8. For each value in video kinds, run the following steps: // 1. Let current video kind equal the value from video kinds for this // iteration of the loop. // 2. Let new video track be a new VideoTrack object. // 3. Generate a unique ID and assign it to the id property on new // video track. // 4. Assign video language to the language property on new video // track. // 5. Assign video label to the label property on new video track. // 6. Assign current video kind to the kind property on new video // track. // 7. If videoTracks.length equals 0, then run the following steps: // 1. Set the selected property on new video track to true. // 2. Set active track flag to true. activeTrack = true; // 8. Add new video track to the videoTracks attribute on this // SourceBuffer object. // 9. Queue a task to fire a trusted event named addtrack, that does // not bubble and is not cancelable, and that uses the TrackEvent // interface, at the VideoTrackList object referenced by the // videoTracks attribute on this SourceBuffer object. // 10. Add new video track to the videoTracks attribute on the // HTMLMediaElement. // 11. Queue a task to fire a trusted event named addtrack, that does // not bubble and is not cancelable, and that uses the TrackEvent // interface, at the VideoTrackList object referenced by the // videoTracks attribute on the HTMLMediaElement. mVideoTracks.mBuffers.AppendElement(TrackBuffer()); // 10. Add the track description for this track to the track buffer. mVideoTracks.mInfo = new TrackInfoSharedPtr(info.mVideo, streamID); mVideoTracks.mLastInfo = mVideoTracks.mInfo; } // 4. For each text track in the initialization segment, run following // steps: // 5. If active track flag equals true, then run the following steps: // This is handled by SourceBuffer once the promise is resolved. if (activeTrack) { mActiveTrack = true; } // 6. Set first initialization segment received flag to true. mFirstInitializationSegmentReceived = true; } else { MSE_DEBUG("Get new init data"); mAudioTracks.mLastInfo = new TrackInfoSharedPtr(info.mAudio, streamID); mVideoTracks.mLastInfo = new TrackInfoSharedPtr(info.mVideo, streamID); } UniquePtr<EncryptionInfo> crypto = mInputDemuxer->GetCrypto(); if (crypto && crypto->IsEncrypted()) { // Try and dispatch 'encrypted'. Won't go if ready state still // HAVE_NOTHING. for (uint32_t i = 0; i < crypto->mInitDatas.Length(); i++) { nsCOMPtr<nsIRunnable> r = new DispatchKeyNeededEvent( mParentDecoder, crypto->mInitDatas[i].mInitData, crypto->mInitDatas[i].mType); mAbstractMainThread->Dispatch(r.forget()); } info.mCrypto = *crypto; // We clear our crypto init data array, so the MediaFormatReader will // not emit an encrypted event for the same init data again. info.mCrypto.mInitDatas.Clear(); } { MutexAutoLock mut(mMutex); mInfo = info; } } // We now have a valid init data ; we can store it for later use. mInitData = mParser->InitData(); // We have now completed the changeType operation. mChangeTypeReceived = false; // 3. Remove the initialization segment bytes from the beginning of the input // buffer. This step has already been done in InitializationSegmentReceived // when we transferred the content into mCurrentInputBuffer. mCurrentInputBuffer->EvictAll(); mInputDemuxer->NotifyDataRemoved(); RecreateParser(true); // 4. Set append state to WAITING_FOR_SEGMENT. SetAppendState(AppendState::WAITING_FOR_SEGMENT); // 5. Jump to the loop top step above. ScheduleSegmentParserLoop(); if (aResult != NS_OK && mParentDecoder) { RefPtr<TrackBuffersManager> self = this; mAbstractMainThread->Dispatch(NS_NewRunnableFunction( "TrackBuffersManager::OnDemuxerInitDone", [self, aResult]() { if (self->mParentDecoder && self->mParentDecoder->GetOwner()) { self->mParentDecoder->GetOwner()->DecodeWarning(aResult); } })); } } void TrackBuffersManager::OnDemuxerInitFailed(const MediaResult& aError) { mTaskQueueCapability->AssertOnCurrentThread(); MSE_DEBUG(""); MOZ_ASSERT(aError != NS_ERROR_DOM_MEDIA_WAITING_FOR_DATA); mDemuxerInitRequest.Complete(); RejectAppend(aError, __func__); } RefPtr<TrackBuffersManager::CodedFrameProcessingPromise> TrackBuffersManager::CodedFrameProcessing() { MOZ_ASSERT(OnTaskQueue()); MOZ_ASSERT(mProcessingPromise.IsEmpty()); AUTO_PROFILER_LABEL("TrackBuffersManager::CodedFrameProcessing", MEDIA_PLAYBACK); MediaByteRange mediaRange = mParser->MediaSegmentRange(); if (mediaRange.IsEmpty()) { AppendDataToCurrentInputBuffer(*mInputBuffer); mInputBuffer.reset(); } else { MOZ_ASSERT(mProcessedInput >= mInputBuffer->Length()); if (int64_t(mProcessedInput - mInputBuffer->Length()) > mediaRange.mEnd) { // Something is not quite right with the data appended. Refuse it. // This would typically happen if the previous media segment was partial // yet a new complete media segment was added. return CodedFrameProcessingPromise::CreateAndReject(NS_ERROR_FAILURE, __func__); } // The mediaRange is offset by the init segment position previously added. uint32_t length = mediaRange.mEnd - (mProcessedInput - mInputBuffer->Length()); if (!length) { // We've completed our earlier media segment and no new data is to be // processed. This happens with some containers that can't detect that a // media segment is ending until a new one starts. RefPtr<CodedFrameProcessingPromise> p = mProcessingPromise.Ensure(__func__); CompleteCodedFrameProcessing(); return p; } AppendDataToCurrentInputBuffer(mInputBuffer->To(length)); mInputBuffer->RemoveFront(length); } RefPtr<CodedFrameProcessingPromise> p = mProcessingPromise.Ensure(__func__); DoDemuxVideo(); return p; } void TrackBuffersManager::OnDemuxFailed(TrackType aTrack, const MediaResult& aError) { MOZ_ASSERT(OnTaskQueue()); MSE_DEBUG("Failed to demux %s, failure:%s", aTrack == TrackType::kVideoTrack ? "video" : "audio", aError.ErrorName().get()); switch (aError.Code()) { case NS_ERROR_DOM_MEDIA_END_OF_STREAM: case NS_ERROR_DOM_MEDIA_WAITING_FOR_DATA: if (aTrack == TrackType::kVideoTrack) { DoDemuxAudio(); } else { CompleteCodedFrameProcessing(); } break; default: RejectProcessing(aError, __func__); break; } } void TrackBuffersManager::DoDemuxVideo() { MOZ_ASSERT(OnTaskQueue()); if (!HasVideo()) { DoDemuxAudio(); return; } mVideoTracks.mDemuxer->GetSamples(-1) ->Then(TaskQueueFromTaskQueue(), __func__, this, &TrackBuffersManager::OnVideoDemuxCompleted, &TrackBuffersManager::OnVideoDemuxFailed) ->Track(mVideoTracks.mDemuxRequest); } void TrackBuffersManager::MaybeDispatchEncryptedEvent( const nsTArray<RefPtr<MediaRawData>>& aSamples) { // Try and dispatch 'encrypted'. Won't go if ready state still HAVE_NOTHING. for (const RefPtr<MediaRawData>& sample : aSamples) { for (const nsTArray<uint8_t>& initData : sample->mCrypto.mInitDatas) { nsCOMPtr<nsIRunnable> r = new DispatchKeyNeededEvent( mParentDecoder, initData, sample->mCrypto.mInitDataType); mAbstractMainThread->Dispatch(r.forget()); } } } void TrackBuffersManager::OnVideoDemuxCompleted( const RefPtr<MediaTrackDemuxer::SamplesHolder>& aSamples) { mTaskQueueCapability->AssertOnCurrentThread(); MSE_DEBUG("%zu video samples demuxed", aSamples->GetSamples().Length()); mVideoTracks.mDemuxRequest.Complete(); mVideoTracks.mQueuedSamples.AppendElements(aSamples->GetSamples()); MaybeDispatchEncryptedEvent(aSamples->GetSamples()); DoDemuxAudio(); } void TrackBuffersManager::DoDemuxAudio() { MOZ_ASSERT(OnTaskQueue()); if (!HasAudio()) { CompleteCodedFrameProcessing(); return; } mAudioTracks.mDemuxer->GetSamples(-1) ->Then(TaskQueueFromTaskQueue(), __func__, this, &TrackBuffersManager::OnAudioDemuxCompleted, &TrackBuffersManager::OnAudioDemuxFailed) ->Track(mAudioTracks.mDemuxRequest); } void TrackBuffersManager::OnAudioDemuxCompleted( const RefPtr<MediaTrackDemuxer::SamplesHolder>& aSamples) { mTaskQueueCapability->AssertOnCurrentThread(); MSE_DEBUG("%zu audio samples demuxed", aSamples->GetSamples().Length()); // When using MSE, it's possible for each fragments to have their own // duration, with a duration that is incorrectly rounded. Ignore the trimming // information set by the demuxer to ensure a continous playback. for (const auto& sample : aSamples->GetSamples()) { sample->mOriginalPresentationWindow = Nothing(); } mAudioTracks.mDemuxRequest.Complete(); mAudioTracks.mQueuedSamples.AppendElements(aSamples->GetSamples()); CompleteCodedFrameProcessing(); MaybeDispatchEncryptedEvent(aSamples->GetSamples()); } void TrackBuffersManager::CompleteCodedFrameProcessing() { MOZ_ASSERT(OnTaskQueue()); AUTO_PROFILER_LABEL("TrackBuffersManager::CompleteCodedFrameProcessing", MEDIA_PLAYBACK); // 1. For each coded frame in the media segment run the following steps: // Coded Frame Processing steps 1.1 to 1.21. if (mSourceBufferAttributes->GetAppendMode() == SourceBufferAppendMode::Sequence && mVideoTracks.mQueuedSamples.Length() && mAudioTracks.mQueuedSamples.Length()) { // When we are in sequence mode, the order in which we process the frames is // important as it determines the future value of timestampOffset. // So we process the earliest sample first. See bug 1293576. TimeInterval videoInterval = PresentationInterval(mVideoTracks.mQueuedSamples); TimeInterval audioInterval = PresentationInterval(mAudioTracks.mQueuedSamples); if (audioInterval.mStart < videoInterval.mStart) { ProcessFrames(mAudioTracks.mQueuedSamples, mAudioTracks); ProcessFrames(mVideoTracks.mQueuedSamples, mVideoTracks); } else { ProcessFrames(mVideoTracks.mQueuedSamples, mVideoTracks); ProcessFrames(mAudioTracks.mQueuedSamples, mAudioTracks); } } else { ProcessFrames(mVideoTracks.mQueuedSamples, mVideoTracks); ProcessFrames(mAudioTracks.mQueuedSamples, mAudioTracks); } #if defined(DEBUG) if (HasVideo()) { const auto& track = mVideoTracks.GetTrackBuffer(); MOZ_ASSERT(track.IsEmpty() || track[0]->mKeyframe); for (uint32_t i = 1; i < track.Length(); i++) { MOZ_ASSERT( (track[i - 1]->mTrackInfo->GetID() == track[i]->mTrackInfo->GetID() && track[i - 1]->mTimecode <= track[i]->mTimecode) || track[i]->mKeyframe); } } if (HasAudio()) { const auto& track = mAudioTracks.GetTrackBuffer(); MOZ_ASSERT(track.IsEmpty() || track[0]->mKeyframe); for (uint32_t i = 1; i < track.Length(); i++) { MOZ_ASSERT( (track[i - 1]->mTrackInfo->GetID() == track[i]->mTrackInfo->GetID() && track[i - 1]->mTimecode <= track[i]->mTimecode) || track[i]->mKeyframe); } } #endif mVideoTracks.mQueuedSamples.Clear(); mAudioTracks.mQueuedSamples.Clear(); UpdateBufferedRanges(); // Update our reported total size. mSizeSourceBuffer = mVideoTracks.mSizeBuffer + mAudioTracks.mSizeBuffer; // Return to step 6.4 of Segment Parser Loop algorithm // 4. If this SourceBuffer is full and cannot accept more media data, then set // the buffer full flag to true. if (mSizeSourceBuffer >= EvictionThreshold()) { mBufferFull = true; } // 5. If the input buffer does not contain a complete media segment, then jump // to the need more data step below. if (mParser->MediaSegmentRange().IsEmpty()) { ResolveProcessing(true, __func__); return; } mLastParsedEndTime = Some(std::max(mAudioTracks.mLastParsedEndTime, mVideoTracks.mLastParsedEndTime)); // 6. Remove the media segment bytes from the beginning of the input buffer. // Clear our demuxer from any already processed data. int64_t safeToEvict = std::min(HasVideo() ? mVideoTracks.mDemuxer->GetEvictionOffset( mVideoTracks.mLastParsedEndTime) : INT64_MAX, HasAudio() ? mAudioTracks.mDemuxer->GetEvictionOffset( mAudioTracks.mLastParsedEndTime) : INT64_MAX); mCurrentInputBuffer->EvictBefore(safeToEvict); mInputDemuxer->NotifyDataRemoved(); RecreateParser(true); // 7. Set append state to WAITING_FOR_SEGMENT. SetAppendState(AppendState::WAITING_FOR_SEGMENT); // 8. Jump to the loop top step above. ResolveProcessing(false, __func__); } void TrackBuffersManager::RejectProcessing(const MediaResult& aRejectValue, const char* aName) { mProcessingPromise.RejectIfExists(aRejectValue, __func__); } void TrackBuffersManager::ResolveProcessing(bool aResolveValue, const char* aName) { mProcessingPromise.ResolveIfExists(aResolveValue, __func__); } void TrackBuffersManager::CheckSequenceDiscontinuity( const TimeUnit& aPresentationTime) { if (mSourceBufferAttributes->GetAppendMode() == SourceBufferAppendMode::Sequence && mSourceBufferAttributes->HaveGroupStartTimestamp()) { mSourceBufferAttributes->SetTimestampOffset( mSourceBufferAttributes->GetGroupStartTimestamp() - aPresentationTime); mSourceBufferAttributes->SetGroupEndTimestamp( mSourceBufferAttributes->GetGroupStartTimestamp()); mVideoTracks.mNeedRandomAccessPoint = true; mAudioTracks.mNeedRandomAccessPoint = true; mSourceBufferAttributes->ResetGroupStartTimestamp(); } } TimeInterval TrackBuffersManager::PresentationInterval( const TrackBuffer& aSamples) const { TimeInterval presentationInterval = TimeInterval(aSamples[0]->mTime, aSamples[0]->GetEndTime()); for (uint32_t i = 1; i < aSamples.Length(); i++) { const auto& sample = aSamples[i]; presentationInterval = presentationInterval.Span( TimeInterval(sample->mTime, sample->GetEndTime())); } return presentationInterval; } void TrackBuffersManager::ProcessFrames(TrackBuffer& aSamples, TrackData& aTrackData) { AUTO_PROFILER_LABEL("TrackBuffersManager::ProcessFrames", MEDIA_PLAYBACK); if (!aSamples.Length()) { return; } // 1. If generate timestamps flag equals true // Let presentation timestamp equal 0. // Otherwise // Let presentation timestamp be a double precision floating point // representation of the coded frame's presentation timestamp in seconds. TimeUnit presentationTimestamp = mSourceBufferAttributes->mGenerateTimestamps ? TimeUnit::Zero() : aSamples[0]->mTime; // 3. If mode equals "sequence" and group start timestamp is set, then run the // following steps: CheckSequenceDiscontinuity(presentationTimestamp); // 5. Let track buffer equal the track buffer that the coded frame will be // added to. auto& trackBuffer = aTrackData; TimeIntervals samplesRange; uint32_t sizeNewSamples = 0; TrackBuffer samples; // array that will contain the frames to be added // to our track buffer. // We assume that no frames are contiguous within a media segment and as such // don't need to check for discontinuity except for the first frame and should // a frame be ignored due to the target window. bool needDiscontinuityCheck = true; // Highest presentation time seen in samples block. TimeUnit highestSampleTime; if (aSamples.Length()) { aTrackData.mLastParsedEndTime = TimeUnit(); } auto addToSamples = [&](MediaRawData* aSample, const TimeInterval& aInterval) { aSample->mTime = aInterval.mStart; aSample->mDuration = aInterval.Length(); aSample->mTrackInfo = trackBuffer.mLastInfo; SAMPLE_DEBUGV( "Add sample [%" PRId64 "%s,%" PRId64 "%s] by interval %s", aSample->mTime.ToMicroseconds(), aSample->mTime.ToString().get(), aSample->GetEndTime().ToMicroseconds(), aSample->GetEndTime().ToString().get(), aInterval.ToString().get()); MOZ_DIAGNOSTIC_ASSERT(aSample->HasValidTime()); MOZ_DIAGNOSTIC_ASSERT(TimeInterval(aSample->mTime, aSample->GetEndTime()) == aInterval); #ifdef MOZ_DIAGNOSTIC_ASSERT_ENABLED auto oldRangeEnd = samplesRange.GetEnd(); #endif samplesRange += aInterval; // For debug purpose, if the sample range grows, it should match the // sample's end time. MOZ_DIAGNOSTIC_ASSERT_IF(samplesRange.GetEnd() > oldRangeEnd, samplesRange.GetEnd() == aSample->GetEndTime()); sizeNewSamples += aSample->ComputedSizeOfIncludingThis(); samples.AppendElement(aSample); }; // Will be set to the last frame dropped due to being outside mAppendWindow. // It will be added prior the first following frame which can be added to the // track buffer. // This sample will be set with a duration of only 1us which will cause it to // be dropped once returned by the decoder. // This sample is required to "prime" the decoder so that the following frame // can be fully decoded. RefPtr<MediaRawData> previouslyDroppedSample; for (auto& sample : aSamples) { const TimeUnit sampleEndTime = sample->GetEndTime(); if (sampleEndTime > aTrackData.mLastParsedEndTime) { aTrackData.mLastParsedEndTime = sampleEndTime; } // We perform step 10 right away as we can't do anything should a keyframe // be needed until we have one. // 10. If the need random access point flag on track buffer equals true, // then run the following steps: if (trackBuffer.mNeedRandomAccessPoint) { // 1. If the coded frame is not a random access point, then drop the coded // frame and jump to the top of the loop to start processing the next // coded frame. if (!sample->mKeyframe) { previouslyDroppedSample = nullptr; continue; } // 2. Set the need random access point flag on track buffer to false. trackBuffer.mNeedRandomAccessPoint = false; } // We perform step 1,2 and 4 at once: // 1. If generate timestamps flag equals true: // Let presentation timestamp equal 0. // Let decode timestamp equal 0. // Otherwise: // Let presentation timestamp be a double precision floating point // representation of the coded frame's presentation timestamp in seconds. // Let decode timestamp be a double precision floating point // representation of the coded frame's decode timestamp in seconds. // 2. Let frame duration be a double precision floating point representation // of the coded frame's duration in seconds. Step 3 is performed earlier or // when a discontinuity has been detected. // 4. If timestampOffset is not 0, then run the following steps: TimeUnit sampleTime = sample->mTime; TimeUnit sampleTimecode = sample->mTimecode; TimeUnit sampleDuration = sample->mDuration; // Keep the timestamp, set by js, in the time base of the container. TimeUnit timestampOffset = mSourceBufferAttributes->GetTimestampOffset().ToBase(sample->mTime); TimeInterval sampleInterval = mSourceBufferAttributes->mGenerateTimestamps ? TimeInterval(timestampOffset, timestampOffset + sampleDuration) : TimeInterval(timestampOffset + sampleTime, timestampOffset + sampleTime + sampleDuration); TimeUnit decodeTimestamp = mSourceBufferAttributes->mGenerateTimestamps ? timestampOffset : timestampOffset + sampleTimecode; SAMPLE_DEBUG( "Processing %s frame [%" PRId64 "%s,%" PRId64 "%s] (adjusted:[%" PRId64 "%s,%" PRId64 "%s]), dts:%" PRId64 ", duration:%" PRId64 ", kf:%d)", aTrackData.mInfo->mMimeType.get(), sample->mTime.ToMicroseconds(), sample->mTime.ToString().get(), sample->GetEndTime().ToMicroseconds(), sample->GetEndTime().ToString().get(), sampleInterval.mStart.ToMicroseconds(), sampleInterval.mStart.ToString().get(), sampleInterval.mEnd.ToMicroseconds(), sampleInterval.mEnd.ToString().get(), sample->mTimecode.ToMicroseconds(), sample->mDuration.ToMicroseconds(), sample->mKeyframe); // 6. If last decode timestamp for track buffer is set and decode timestamp // is less than last decode timestamp: OR If last decode timestamp for track // buffer is set and the difference between decode timestamp and last decode // timestamp is greater than 2 times last frame duration: if (needDiscontinuityCheck && trackBuffer.mLastDecodeTimestamp.isSome() && (decodeTimestamp < trackBuffer.mLastDecodeTimestamp.ref() || (decodeTimestamp - trackBuffer.mLastDecodeTimestamp.ref() > trackBuffer.mLongestFrameDuration * 2))) { MSE_DEBUG("Discontinuity detected."); SourceBufferAppendMode appendMode = mSourceBufferAttributes->GetAppendMode(); // 1a. If mode equals "segments": if (appendMode == SourceBufferAppendMode::Segments) { // Set group end timestamp to presentation timestamp. mSourceBufferAttributes->SetGroupEndTimestamp(sampleInterval.mStart); } // 1b. If mode equals "sequence": if (appendMode == SourceBufferAppendMode::Sequence) { // Set group start timestamp equal to the group end timestamp. mSourceBufferAttributes->SetGroupStartTimestamp( mSourceBufferAttributes->GetGroupEndTimestamp()); } for (auto& track : GetTracksList()) { // 2. Unset the last decode timestamp on all track buffers. // 3. Unset the last frame duration on all track buffers. // 4. Unset the highest end timestamp on all track buffers. // 5. Set the need random access point flag on all track buffers to // true. track->ResetAppendState(); } // 6. Jump to the Loop Top step above to restart processing of the current // coded frame. Rather that restarting the process for the frame, we run // the first steps again instead. // 3. If mode equals "sequence" and group start timestamp is set, then run // the following steps: TimeUnit presentationTimestamp = mSourceBufferAttributes->mGenerateTimestamps ? TimeUnit() : sampleTime; CheckSequenceDiscontinuity(presentationTimestamp); if (!sample->mKeyframe) { previouslyDroppedSample = nullptr; continue; } if (appendMode == SourceBufferAppendMode::Sequence) { // mSourceBufferAttributes->GetTimestampOffset() was modified during // CheckSequenceDiscontinuity. We need to update our variables. timestampOffset = mSourceBufferAttributes->GetTimestampOffset().ToBase(sample->mTime); sampleInterval = mSourceBufferAttributes->mGenerateTimestamps ? TimeInterval(timestampOffset, timestampOffset + sampleDuration) : TimeInterval(timestampOffset + sampleTime, timestampOffset + sampleTime + sampleDuration); decodeTimestamp = mSourceBufferAttributes->mGenerateTimestamps ? timestampOffset : timestampOffset + sampleTimecode; } trackBuffer.mNeedRandomAccessPoint = false; needDiscontinuityCheck = false; } // 7. Let frame end timestamp equal the sum of presentation timestamp and // frame duration. This is sampleInterval.mEnd // 8. If presentation timestamp is less than appendWindowStart, then set the // need random access point flag to true, drop the coded frame, and jump to // the top of the loop to start processing the next coded frame. // 9. If frame end timestamp is greater than appendWindowEnd, then set the // need random access point flag to true, drop the coded frame, and jump to // the top of the loop to start processing the next coded frame. if (!mAppendWindow.ContainsStrict(sampleInterval)) { if (mAppendWindow.IntersectsStrict(sampleInterval)) { // 8. Note: Some implementations MAY choose to collect some of these // coded frames with presentation timestamp less than // appendWindowStart and use them to generate a splice at the first // coded frame that has a presentation timestamp greater than or // equal to appendWindowStart even if that frame is not a random // access point. Supporting this requires multiple decoders or faster // than real-time decoding so for now this behavior will not be a // normative requirement. // 9. Note: Some implementations MAY choose to collect coded frames with // presentation timestamp less than appendWindowEnd and frame end // timestamp greater than appendWindowEnd and use them to generate a // splice across the portion of the collected coded frames within the // append window at time of collection, and the beginning portion of // later processed frames which only partially overlap the end of the // collected coded frames. Supporting this requires multiple decoders // or faster than real-time decoding so for now this behavior will // not be a normative requirement. In conjunction with collecting // coded frames that span appendWindowStart, implementations MAY thus // support gapless audio splicing. TimeInterval intersection = mAppendWindow.Intersection(sampleInterval); intersection.mStart = intersection.mStart.ToBase(sample->mTime); intersection.mEnd = intersection.mEnd.ToBase(sample->mTime); sample->mOriginalPresentationWindow = Some(sampleInterval); MSE_DEBUGV("will truncate frame from [%" PRId64 "%s,%" PRId64 "%s] to [%" PRId64 "%s,%" PRId64 "%s]", sampleInterval.mStart.ToMicroseconds(), sampleInterval.mStart.ToString().get(), sampleInterval.mEnd.ToMicroseconds(), sampleInterval.mEnd.ToString().get(), intersection.mStart.ToMicroseconds(), intersection.mStart.ToString().get(), intersection.mEnd.ToMicroseconds(), intersection.mEnd.ToString().get()); sampleInterval = intersection; } else { sample->mOriginalPresentationWindow = Some(sampleInterval); sample->mTimecode = decodeTimestamp; previouslyDroppedSample = sample; MSE_DEBUGV("frame [%" PRId64 "%s,%" PRId64 "%s] outside appendWindow [%" PRId64 "%s,%" PRId64 "%s] dropping", sampleInterval.mStart.ToMicroseconds(), sampleInterval.mStart.ToString().get(), sampleInterval.mEnd.ToMicroseconds(), sampleInterval.mEnd.ToString().get(), mAppendWindow.mStart.ToMicroseconds(), mAppendWindow.mStart.ToString().get(), mAppendWindow.mEnd.ToMicroseconds(), mAppendWindow.mEnd.ToString().get()); if (samples.Length()) { // We are creating a discontinuity in the samples. // Insert the samples processed so far. InsertFrames(samples, samplesRange, trackBuffer); samples.Clear(); samplesRange = TimeIntervals(); trackBuffer.mSizeBuffer += sizeNewSamples; sizeNewSamples = 0; UpdateHighestTimestamp(trackBuffer, highestSampleTime); } trackBuffer.mNeedRandomAccessPoint = true; needDiscontinuityCheck = true; continue; } } if (previouslyDroppedSample) { MSE_DEBUGV("Adding silent frame"); // This "silent" sample will be added so that it starts exactly before the // first usable one. The duration of the actual sample will be adjusted so // that the total duration stay the same. This sample will be dropped // after decoding by the AudioTrimmer (if audio). TimeInterval previouslyDroppedSampleInterval = TimeInterval(sampleInterval.mStart, sampleInterval.mStart); addToSamples(previouslyDroppedSample, previouslyDroppedSampleInterval); previouslyDroppedSample = nullptr; sampleInterval.mStart += previouslyDroppedSampleInterval.Length(); } sample->mTimecode = decodeTimestamp; addToSamples(sample, sampleInterval); // Steps 11,12,13,14, 15 and 16 will be done in one block in InsertFrames. trackBuffer.mLongestFrameDuration = trackBuffer.mLastFrameDuration.isSome() ? sample->mKeyframe ? sampleDuration : std::max(sampleDuration, trackBuffer.mLongestFrameDuration) : sampleDuration; // 17. Set last decode timestamp for track buffer to decode timestamp. trackBuffer.mLastDecodeTimestamp = Some(decodeTimestamp); // 18. Set last frame duration for track buffer to frame duration. trackBuffer.mLastFrameDuration = Some(sampleDuration); // 19. If highest end timestamp for track buffer is unset or frame end // timestamp is greater than highest end timestamp, then set highest end // timestamp for track buffer to frame end timestamp. if (trackBuffer.mHighestEndTimestamp.isNothing() || sampleInterval.mEnd > trackBuffer.mHighestEndTimestamp.ref()) { trackBuffer.mHighestEndTimestamp = Some(sampleInterval.mEnd); } if (sampleInterval.mStart > highestSampleTime) { highestSampleTime = sampleInterval.mStart; } // 20. If frame end timestamp is greater than group end timestamp, then set // group end timestamp equal to frame end timestamp. if (sampleInterval.mEnd > mSourceBufferAttributes->GetGroupEndTimestamp()) { mSourceBufferAttributes->SetGroupEndTimestamp(sampleInterval.mEnd); } // 21. If generate timestamps flag equals true, then set timestampOffset // equal to frame end timestamp. if (mSourceBufferAttributes->mGenerateTimestamps) { mSourceBufferAttributes->SetTimestampOffset(sampleInterval.mEnd); } } if (samples.Length()) { InsertFrames(samples, samplesRange, trackBuffer); trackBuffer.mSizeBuffer += sizeNewSamples; UpdateHighestTimestamp(trackBuffer, highestSampleTime); } } bool TrackBuffersManager::CheckNextInsertionIndex(TrackData& aTrackData, const TimeUnit& aSampleTime) { if (aTrackData.mNextInsertionIndex.isSome()) { return true; } const TrackBuffer& data = aTrackData.GetTrackBuffer(); if (data.IsEmpty() || aSampleTime < aTrackData.mBufferedRanges.GetStart()) { aTrackData.mNextInsertionIndex = Some(0u); return true; } // Find which discontinuity we should insert the frame before. TimeInterval target; for (const auto& interval : aTrackData.mBufferedRanges) { if (aSampleTime < interval.mStart) { target = interval; break; } } if (target.IsEmpty()) { // No target found, it will be added at the end of the track buffer. aTrackData.mNextInsertionIndex = Some(uint32_t(data.Length())); return true; } // We now need to find the first frame of the searched interval. // We will insert our new frames right before. for (uint32_t i = 0; i < data.Length(); i++) { const RefPtr<MediaRawData>& sample = data[i]; if (sample->mTime >= target.mStart || sample->GetEndTime() > target.mStart) { aTrackData.mNextInsertionIndex = Some(i); return true; } } NS_ASSERTION(false, "Insertion Index Not Found"); return false; } void TrackBuffersManager::InsertFrames(TrackBuffer& aSamples, const TimeIntervals& aIntervals, TrackData& aTrackData) { AUTO_PROFILER_LABEL("TrackBuffersManager::InsertFrames", MEDIA_PLAYBACK); // 5. Let track buffer equal the track buffer that the coded frame will be // added to. auto& trackBuffer = aTrackData; MSE_DEBUGV("Processing %zu %s frames(start:%" PRId64 " end:%" PRId64 ")", aSamples.Length(), aTrackData.mInfo->mMimeType.get(), aIntervals.GetStart().ToMicroseconds(), aIntervals.GetEnd().ToMicroseconds()); if (profiler_thread_is_being_profiled_for_markers()) { nsPrintfCString markerString( "Processing %zu %s frames(start:%" PRId64 " end:%" PRId64 ")", aSamples.Length(), aTrackData.mInfo->mMimeType.get(), aIntervals.GetStart().ToMicroseconds(), aIntervals.GetEnd().ToMicroseconds()); PROFILER_MARKER_TEXT("InsertFrames", MEDIA_PLAYBACK, {}, markerString); } // 11. Let spliced audio frame be an unset variable for holding audio splice // information // 12. Let spliced timed text frame be an unset variable for holding timed // text splice information // 13. If last decode timestamp for track buffer is unset and presentation // timestamp falls within the presentation interval of a coded frame in track // buffer,then run the following steps: For now we only handle replacing // existing frames with the new ones. So we skip this step. // 14. Remove existing coded frames in track buffer: // a) If highest end timestamp for track buffer is not set: // Remove all coded frames from track buffer that have a presentation // timestamp greater than or equal to presentation timestamp and less // than frame end timestamp. // b) If highest end timestamp for track buffer is set and less than or // equal to presentation timestamp: // Remove all coded frames from track buffer that have a presentation // timestamp greater than or equal to highest end timestamp and less than // frame end timestamp // There is an ambiguity on how to remove frames, which was lodged with: // https://www.w3.org/Bugs/Public/show_bug.cgi?id=28710, implementing as per // bug description. // 15. Remove decoding dependencies of the coded frames removed in the // previous step: Remove all coded frames between the coded frames removed in // the previous step and the next random access point after those removed // frames. if (trackBuffer.mBufferedRanges.IntersectsStrict(aIntervals)) { if (aSamples[0]->mKeyframe && (mType.Type() == MEDIAMIMETYPE("video/webm") || mType.Type() == MEDIAMIMETYPE("audio/webm"))) { // We are starting a new GOP, we do not have to worry about breaking an // existing current coded frame group. Reset the next insertion index // so the search for when to start our frames removal can be exhaustive. // This is a workaround for bug 1276184 and only until either bug 1277733 // or bug 1209386 is fixed. // With the webm container, we can't always properly determine the // duration of the last frame, which may cause the last frame of a cluster // to overlap the following frame. trackBuffer.mNextInsertionIndex.reset(); } uint32_t index = RemoveFrames(aIntervals, trackBuffer, trackBuffer.mNextInsertionIndex.refOr(0), RemovalMode::kTruncateFrame); if (index) { trackBuffer.mNextInsertionIndex = Some(index); } } // 16. Add the coded frame with the presentation timestamp, decode timestamp, // and frame duration to the track buffer. if (!CheckNextInsertionIndex(aTrackData, aSamples[0]->mTime)) { RejectProcessing(NS_ERROR_FAILURE, __func__); return; } // Adjust our demuxing index if necessary. if (trackBuffer.mNextGetSampleIndex.isSome()) { if (trackBuffer.mNextInsertionIndex.ref() == trackBuffer.mNextGetSampleIndex.ref() && aIntervals.GetEnd() >= trackBuffer.mNextSampleTime) { MSE_DEBUG("Next sample to be played got overwritten"); trackBuffer.mNextGetSampleIndex.reset(); ResetEvictionIndex(trackBuffer); } else if (trackBuffer.mNextInsertionIndex.ref() <= trackBuffer.mNextGetSampleIndex.ref()) { trackBuffer.mNextGetSampleIndex.ref() += aSamples.Length(); // We could adjust the eviction index so that the new data gets added to // the evictable amount (as it is prior currentTime). However, considering // new data is being added prior the current playback, it's likely that // this data will be played next, and as such we probably don't want to // have it evicted too early. So instead reset the eviction index instead. ResetEvictionIndex(trackBuffer); } } TrackBuffer& data = trackBuffer.GetTrackBuffer(); data.InsertElementsAt(trackBuffer.mNextInsertionIndex.ref(), aSamples); trackBuffer.mNextInsertionIndex.ref() += aSamples.Length(); // Update our buffered range with new sample interval. trackBuffer.mBufferedRanges += aIntervals; MSE_DEBUG("Inserted %s frame:%s, buffered-range:%s, mHighestEndTimestamp=%s", aTrackData.mInfo->mMimeType.get(), DumpTimeRanges(aIntervals).get(), DumpTimeRanges(trackBuffer.mBufferedRanges).get(), trackBuffer.mHighestEndTimestamp ? trackBuffer.mHighestEndTimestamp->ToString().get() : "none"); // We allow a fuzz factor in our interval of half a frame length, // as fuzz is +/- value, giving an effective leeway of a full frame // length. if (!aIntervals.IsEmpty()) { TimeIntervals range(aIntervals); range.SetFuzz(trackBuffer.mLongestFrameDuration / 2); trackBuffer.mSanitizedBufferedRanges += range; } } void TrackBuffersManager::UpdateHighestTimestamp( TrackData& aTrackData, const media::TimeUnit& aHighestTime) { if (aHighestTime > aTrackData.mHighestStartTimestamp) { MutexAutoLock mut(mMutex); aTrackData.mHighestStartTimestamp = aHighestTime; } } uint32_t TrackBuffersManager::RemoveFrames(const TimeIntervals& aIntervals, TrackData& aTrackData, uint32_t aStartIndex, RemovalMode aMode) { AUTO_PROFILER_LABEL("TrackBuffersManager::RemoveFrames", MEDIA_PLAYBACK); TrackBuffer& data = aTrackData.GetTrackBuffer(); Maybe<uint32_t> firstRemovedIndex; uint32_t lastRemovedIndex = 0; TimeIntervals intervals = aIntervals.ToBase(aTrackData.mHighestStartTimestamp); // We loop from aStartIndex to avoid removing frames that we inserted earlier // and part of the current coded frame group. This is allows to handle step // 14 of the coded frame processing algorithm without having to check the // value of highest end timestamp: "Remove existing coded frames in track // buffer: // If highest end timestamp for track buffer is not set: // Remove all coded frames from track buffer that have a presentation // timestamp greater than or equal to presentation timestamp and less than // frame end timestamp. // If highest end timestamp for track buffer is set and less than or equal to // presentation timestamp: // Remove all coded frames from track buffer that have a presentation // timestamp greater than or equal to highest end timestamp and less than // frame end timestamp. TimeUnit intervalsEnd = intervals.GetEnd(); for (uint32_t i = aStartIndex; i < data.Length(); i++) { RefPtr<MediaRawData>& sample = data[i]; if (intervals.ContainsStrict(sample->mTime)) { // The start of this existing frame will be overwritten, we drop that // entire frame. MSE_DEBUGV("overridding start of frame [%" PRId64 ",%" PRId64 "] with [%" PRId64 ",%" PRId64 "] dropping", sample->mTime.ToMicroseconds(), sample->GetEndTime().ToMicroseconds(), intervals.GetStart().ToMicroseconds(), intervals.GetEnd().ToMicroseconds()); if (firstRemovedIndex.isNothing()) { firstRemovedIndex = Some(i); } lastRemovedIndex = i; continue; } TimeInterval sampleInterval(sample->mTime, sample->GetEndTime()); if (aMode == RemovalMode::kTruncateFrame && intervals.IntersectsStrict(sampleInterval)) { // The sample to be overwritten is only partially covered. TimeIntervals intersection = Intersection(intervals, TimeIntervals(sampleInterval)); bool found = false; TimeUnit startTime = intersection.GetStart(&found); MOZ_DIAGNOSTIC_ASSERT(found, "Must intersect with added coded frames"); Unused << found; // Signal that this frame should be truncated when decoded. if (!sample->mOriginalPresentationWindow) { sample->mOriginalPresentationWindow = Some(sampleInterval); } MOZ_ASSERT(startTime > sample->mTime); sample->mDuration = startTime - sample->mTime; MOZ_DIAGNOSTIC_ASSERT(sample->mDuration.IsValid()); MSE_DEBUGV("partial overwrite of frame [%" PRId64 ",%" PRId64 "] with [%" PRId64 ",%" PRId64 "] trim to " "[%" PRId64 ",%" PRId64 "]", sampleInterval.mStart.ToMicroseconds(), sampleInterval.mEnd.ToMicroseconds(), intervals.GetStart().ToMicroseconds(), intervals.GetEnd().ToMicroseconds(), sample->mTime.ToMicroseconds(), sample->GetEndTime().ToMicroseconds()); continue; } if (sample->mTime >= intervalsEnd) { // We can break the loop now. All frames up to the next keyframe will be // removed during the next step. break; } } if (firstRemovedIndex.isNothing()) { return 0; } // Remove decoding dependencies of the coded frames removed in the previous // step: Remove all coded frames between the coded frames removed in the // previous step and the next random access point after those removed frames. for (uint32_t i = lastRemovedIndex + 1; i < data.Length(); i++) { const RefPtr<MediaRawData>& sample = data[i]; if (sample->mKeyframe) { break; } lastRemovedIndex = i; } TimeUnit maxSampleDuration; uint32_t sizeRemoved = 0; TimeIntervals removedIntervals; for (uint32_t i = firstRemovedIndex.ref(); i <= lastRemovedIndex; i++) { const RefPtr<MediaRawData> sample = data[i]; TimeInterval sampleInterval = TimeInterval(sample->mTime, sample->GetEndTime()); removedIntervals += sampleInterval; if (sample->mDuration > maxSampleDuration) { maxSampleDuration = sample->mDuration; } sizeRemoved += sample->ComputedSizeOfIncludingThis(); } aTrackData.mSizeBuffer -= sizeRemoved; nsPrintfCString msg("Removing frames from:%u for %s (frames:%u) ([%f, %f))", firstRemovedIndex.ref(), aTrackData.mInfo->mMimeType.get(), lastRemovedIndex - firstRemovedIndex.ref() + 1, removedIntervals.GetStart().ToSeconds(), removedIntervals.GetEnd().ToSeconds()); MSE_DEBUG("%s", msg.get()); if (profiler_thread_is_being_profiled_for_markers()) { PROFILER_MARKER_TEXT("RemoveFrames", MEDIA_PLAYBACK, {}, msg); } if (aTrackData.mNextGetSampleIndex.isSome()) { if (aTrackData.mNextGetSampleIndex.ref() >= firstRemovedIndex.ref() && aTrackData.mNextGetSampleIndex.ref() <= lastRemovedIndex) { MSE_DEBUG("Next sample to be played got evicted"); aTrackData.mNextGetSampleIndex.reset(); ResetEvictionIndex(aTrackData); } else if (aTrackData.mNextGetSampleIndex.ref() > lastRemovedIndex) { uint32_t samplesRemoved = lastRemovedIndex - firstRemovedIndex.ref() + 1; aTrackData.mNextGetSampleIndex.ref() -= samplesRemoved; if (aTrackData.mEvictionIndex.mLastIndex > lastRemovedIndex) { MOZ_DIAGNOSTIC_ASSERT( aTrackData.mEvictionIndex.mLastIndex >= samplesRemoved && aTrackData.mEvictionIndex.mEvictable >= sizeRemoved, "Invalid eviction index"); MutexAutoLock mut(mMutex); aTrackData.mEvictionIndex.mLastIndex -= samplesRemoved; aTrackData.mEvictionIndex.mEvictable -= sizeRemoved; } else { ResetEvictionIndex(aTrackData); } } } if (aTrackData.mNextInsertionIndex.isSome()) { if (aTrackData.mNextInsertionIndex.ref() > firstRemovedIndex.ref() && aTrackData.mNextInsertionIndex.ref() <= lastRemovedIndex + 1) { aTrackData.ResetAppendState(); MSE_DEBUG("NextInsertionIndex got reset."); } else if (aTrackData.mNextInsertionIndex.ref() > lastRemovedIndex + 1) { aTrackData.mNextInsertionIndex.ref() -= lastRemovedIndex - firstRemovedIndex.ref() + 1; } } // Update our buffered range to exclude the range just removed. MSE_DEBUG("Removing %s from bufferedRange %s", DumpTimeRanges(removedIntervals).get(), DumpTimeRanges(aTrackData.mBufferedRanges).get()); aTrackData.mBufferedRanges -= removedIntervals; // Recalculate sanitized buffered ranges. aTrackData.mSanitizedBufferedRanges = aTrackData.mBufferedRanges; aTrackData.mSanitizedBufferedRanges.SetFuzz(maxSampleDuration / 2); data.RemoveElementsAt(firstRemovedIndex.ref(), lastRemovedIndex - firstRemovedIndex.ref() + 1); if (removedIntervals.GetEnd() >= aTrackData.mHighestStartTimestamp && removedIntervals.GetStart() <= aTrackData.mHighestStartTimestamp) { // The sample with the highest presentation time got removed. // Rescan the trackbuffer to determine the new one. TimeUnit highestStartTime; for (const auto& sample : data) { if (sample->mTime > highestStartTime) { highestStartTime = sample->mTime; } } MutexAutoLock mut(mMutex); aTrackData.mHighestStartTimestamp = highestStartTime; } MSE_DEBUG( "After removing frames, %s data sz=%zu, highestStartTimestamp=% " PRId64 ", bufferedRange=%s, sanitizedBufferedRanges=%s", aTrackData.mInfo->mMimeType.get(), data.Length(), aTrackData.mHighestStartTimestamp.ToMicroseconds(), DumpTimeRanges(aTrackData.mBufferedRanges).get(), DumpTimeRanges(aTrackData.mSanitizedBufferedRanges).get()); // If all frames are removed, both buffer and buffered range should be empty. if (data.IsEmpty()) { MOZ_ASSERT(aTrackData.mBufferedRanges.IsEmpty()); // We still can't figure out why above assertion would fail, so we keep it // on debug build, and do a workaround for other builds to ensure that // buffered range should match the data. if (!aTrackData.mBufferedRanges.IsEmpty()) { NS_WARNING( nsPrintfCString("Empty data but has non-empty buffered range %s ?!", DumpTimeRanges(aTrackData.mBufferedRanges).get()) .get()); aTrackData.mBufferedRanges.Clear(); } } if (aTrackData.mBufferedRanges.IsEmpty()) { TimeIntervals sampleIntervals; for (const auto& sample : data) { sampleIntervals += TimeInterval(sample->mTime, sample->GetEndTime()); } MOZ_ASSERT(sampleIntervals.IsEmpty()); // We still can't figure out why above assertion would fail, so we keep it // on debug build, and do a workaround for other builds to ensure that // buffered range should match the data. if (!sampleIntervals.IsEmpty()) { NS_WARNING( nsPrintfCString( "Empty buffer range but has non-empty sample intervals %s ?!", DumpTimeRanges(sampleIntervals).get()) .get()); aTrackData.mBufferedRanges += sampleIntervals; TimeIntervals range(sampleIntervals); range.SetFuzz(aTrackData.mLongestFrameDuration / 2); aTrackData.mSanitizedBufferedRanges += range; } } return firstRemovedIndex.ref(); } void TrackBuffersManager::RecreateParser(bool aReuseInitData) { MOZ_ASSERT(OnTaskQueue()); // Recreate our parser for only the data remaining. This is required // as it has parsed the entire InputBuffer provided. // Once the old TrackBuffer/MediaSource implementation is removed // we can optimize this part. TODO if (mParser) { DDUNLINKCHILD(mParser.get()); } mParser = ContainerParser::CreateForMIMEType(mType); DDLINKCHILD("parser", mParser.get()); if (aReuseInitData && mInitData) { TimeUnit start, end; mParser->ParseStartAndEndTimestamps(MediaSpan(mInitData), start, end); mProcessedInput = mInitData->Length(); } else { mProcessedInput = 0; } } nsTArray<TrackBuffersManager::TrackData*> TrackBuffersManager::GetTracksList() { nsTArray<TrackData*> tracks; if (HasVideo()) { tracks.AppendElement(&mVideoTracks); } if (HasAudio()) { tracks.AppendElement(&mAudioTracks); } return tracks; } nsTArray<const TrackBuffersManager::TrackData*> TrackBuffersManager::GetTracksList() const { nsTArray<const TrackData*> tracks; if (HasVideo()) { tracks.AppendElement(&mVideoTracks); } if (HasAudio()) { tracks.AppendElement(&mAudioTracks); } return tracks; } void TrackBuffersManager::SetAppendState(AppendState aAppendState) { MSE_DEBUG("AppendState changed from %s to %s", AppendStateToStr(mSourceBufferAttributes->GetAppendState()), AppendStateToStr(aAppendState)); mSourceBufferAttributes->SetAppendState(aAppendState); } MediaInfo TrackBuffersManager::GetMetadata() const { MutexAutoLock mut(mMutex); return mInfo; } const TimeIntervals& TrackBuffersManager::Buffered( TrackInfo::TrackType aTrack) const { MOZ_ASSERT(OnTaskQueue()); return GetTracksData(aTrack).mBufferedRanges; } const media::TimeUnit& TrackBuffersManager::HighestStartTime( TrackInfo::TrackType aTrack) const { MOZ_ASSERT(OnTaskQueue()); return GetTracksData(aTrack).mHighestStartTimestamp; } TimeIntervals TrackBuffersManager::SafeBuffered( TrackInfo::TrackType aTrack) const { MutexAutoLock mut(mMutex); return aTrack == TrackInfo::kVideoTrack ? mVideoBufferedRanges : mAudioBufferedRanges; } TimeUnit TrackBuffersManager::HighestStartTime() const { MutexAutoLock mut(mMutex); TimeUnit highestStartTime; for (auto& track : GetTracksList()) { highestStartTime = std::max(track->mHighestStartTimestamp, highestStartTime); } return highestStartTime; } TimeUnit TrackBuffersManager::HighestEndTime() const { MutexAutoLock mut(mMutex); nsTArray<const TimeIntervals*> tracks; if (HasVideo()) { tracks.AppendElement(&mVideoBufferedRanges); } if (HasAudio()) { tracks.AppendElement(&mAudioBufferedRanges); } return HighestEndTime(tracks); } TimeUnit TrackBuffersManager::HighestEndTime( nsTArray<const TimeIntervals*>& aTracks) const { mMutex.AssertCurrentThreadOwns(); TimeUnit highestEndTime; for (const auto& trackRanges : aTracks) { highestEndTime = std::max(trackRanges->GetEnd(), highestEndTime); } return highestEndTime; } void TrackBuffersManager::ResetEvictionIndex(TrackData& aTrackData) { MutexAutoLock mut(mMutex); aTrackData.mEvictionIndex.Reset(); } void TrackBuffersManager::UpdateEvictionIndex(TrackData& aTrackData, uint32_t currentIndex) { uint32_t evictable = 0; TrackBuffer& data = aTrackData.GetTrackBuffer(); MOZ_DIAGNOSTIC_ASSERT(currentIndex >= aTrackData.mEvictionIndex.mLastIndex, "Invalid call"); MOZ_DIAGNOSTIC_ASSERT( currentIndex == data.Length() || data[currentIndex]->mKeyframe, "Must stop at keyframe"); for (uint32_t i = aTrackData.mEvictionIndex.mLastIndex; i < currentIndex; i++) { evictable += data[i]->ComputedSizeOfIncludingThis(); } aTrackData.mEvictionIndex.mLastIndex = currentIndex; MutexAutoLock mut(mMutex); aTrackData.mEvictionIndex.mEvictable += evictable; } const TrackBuffersManager::TrackBuffer& TrackBuffersManager::GetTrackBuffer( TrackInfo::TrackType aTrack) const { MOZ_ASSERT(OnTaskQueue()); return GetTracksData(aTrack).GetTrackBuffer(); } uint32_t TrackBuffersManager::FindSampleIndex(const TrackBuffer& aTrackBuffer, const TimeInterval& aInterval) { TimeUnit target = aInterval.mStart - aInterval.mFuzz; for (uint32_t i = 0; i < aTrackBuffer.Length(); i++) { const RefPtr<MediaRawData>& sample = aTrackBuffer[i]; if (sample->mTime >= target || sample->GetEndTime() > target) { return i; } } MOZ_ASSERT(false, "FindSampleIndex called with invalid arguments"); return 0; } TimeUnit TrackBuffersManager::Seek(TrackInfo::TrackType aTrack, const TimeUnit& aTime, const TimeUnit& aFuzz) { MOZ_ASSERT(OnTaskQueue()); AUTO_PROFILER_LABEL("TrackBuffersManager::Seek", MEDIA_PLAYBACK); auto& trackBuffer = GetTracksData(aTrack); const TrackBuffersManager::TrackBuffer& track = GetTrackBuffer(aTrack); MSE_DEBUG("Seek, track=%s, target=%" PRId64, TrackTypeToStr(aTrack), aTime.ToMicroseconds()); if (!track.Length()) { // This a reset. It will be followed by another valid seek. trackBuffer.mNextGetSampleIndex = Some(uint32_t(0)); trackBuffer.mNextSampleTimecode = TimeUnit(); trackBuffer.mNextSampleTime = TimeUnit(); ResetEvictionIndex(trackBuffer); return TimeUnit(); } uint32_t i = 0; if (aTime != TimeUnit()) { // Determine the interval of samples we're attempting to seek to. TimeIntervals buffered = trackBuffer.mBufferedRanges; // Fuzz factor is +/- aFuzz; as we want to only eliminate gaps // that are less than aFuzz wide, we set a fuzz factor aFuzz/2. buffered.SetFuzz(aFuzz / 2); TimeIntervals::IndexType index = buffered.Find(aTime); MOZ_ASSERT(index != TimeIntervals::NoIndex, "We shouldn't be called if aTime isn't buffered"); TimeInterval target = buffered[index]; target.mFuzz = aFuzz; i = FindSampleIndex(track, target); } Maybe<TimeUnit> lastKeyFrameTime; TimeUnit lastKeyFrameTimecode; uint32_t lastKeyFrameIndex = 0; for (; i < track.Length(); i++) { const RefPtr<MediaRawData>& sample = track[i]; TimeUnit sampleTime = sample->mTime; if (sampleTime > aTime && lastKeyFrameTime.isSome()) { break; } if (sample->mKeyframe) { lastKeyFrameTimecode = sample->mTimecode; lastKeyFrameTime = Some(sampleTime); lastKeyFrameIndex = i; } if (sampleTime == aTime || (sampleTime > aTime && lastKeyFrameTime.isSome())) { break; } } MSE_DEBUG("Keyframe %s found at %" PRId64 " @ %u", lastKeyFrameTime.isSome() ? "" : "not", lastKeyFrameTime.refOr(TimeUnit()).ToMicroseconds(), lastKeyFrameIndex); trackBuffer.mNextGetSampleIndex = Some(lastKeyFrameIndex); trackBuffer.mNextSampleTimecode = lastKeyFrameTimecode; trackBuffer.mNextSampleTime = lastKeyFrameTime.refOr(TimeUnit()); ResetEvictionIndex(trackBuffer); UpdateEvictionIndex(trackBuffer, lastKeyFrameIndex); return lastKeyFrameTime.refOr(TimeUnit()); } uint32_t TrackBuffersManager::SkipToNextRandomAccessPoint( TrackInfo::TrackType aTrack, const TimeUnit& aTimeThreadshold, const media::TimeUnit& aFuzz, bool& aFound) { mTaskQueueCapability->AssertOnCurrentThread(); AUTO_PROFILER_LABEL("TrackBuffersManager::SkipToNextRandomAccessPoint", MEDIA_PLAYBACK); uint32_t parsed = 0; auto& trackData = GetTracksData(aTrack); const TrackBuffer& track = GetTrackBuffer(aTrack); aFound = false; // SkipToNextRandomAccessPoint can only be called if aTimeThreadshold is known // to be buffered. if (NS_FAILED(SetNextGetSampleIndexIfNeeded(aTrack, aFuzz))) { return 0; } TimeUnit nextSampleTimecode = trackData.mNextSampleTimecode; TimeUnit nextSampleTime = trackData.mNextSampleTime; uint32_t i = trackData.mNextGetSampleIndex.ref(); uint32_t originalPos = i; for (; i < track.Length(); i++) { const MediaRawData* sample = GetSample(aTrack, i, nextSampleTimecode, nextSampleTime, aFuzz); if (!sample) { break; } if (sample->mKeyframe && sample->mTime >= aTimeThreadshold) { aFound = true; break; } nextSampleTimecode = sample->GetEndTimecode(); nextSampleTime = sample->GetEndTime(); parsed++; } // Adjust the next demux time and index so that the next call to // SkipToNextRandomAccessPoint will not count again the parsed sample as // skipped. if (aFound) { trackData.mNextSampleTimecode = track[i]->mTimecode; trackData.mNextSampleTime = track[i]->mTime; trackData.mNextGetSampleIndex = Some(i); } else if (i > 0) { // Go back to the previous keyframe or the original position so the next // demux can succeed and be decoded. for (uint32_t j = i - 1; j-- > originalPos;) { const RefPtr<MediaRawData>& sample = track[j]; if (sample->mKeyframe) { trackData.mNextSampleTimecode = sample->mTimecode; trackData.mNextSampleTime = sample->mTime; trackData.mNextGetSampleIndex = Some(uint32_t(j)); // We are unable to skip to a keyframe past aTimeThreshold, however // we are speeding up decoding by dropping the unplayable frames. // So we can mark aFound as true. aFound = true; break; } parsed--; } } if (aFound) { UpdateEvictionIndex(trackData, trackData.mNextGetSampleIndex.ref()); } return parsed; } const MediaRawData* TrackBuffersManager::GetSample(TrackInfo::TrackType aTrack, uint32_t aIndex, const TimeUnit& aExpectedDts, const TimeUnit& aExpectedPts, const TimeUnit& aFuzz) { MOZ_ASSERT(OnTaskQueue()); const TrackBuffer& track = GetTrackBuffer(aTrack); if (aIndex >= track.Length()) { // reached the end. return nullptr; } if (!(aExpectedDts + aFuzz).IsValid() || !(aExpectedPts + aFuzz).IsValid()) { // Time overflow, it seems like we also reached the end. return nullptr; } const RefPtr<MediaRawData>& sample = track[aIndex]; if (!aIndex || sample->mTimecode <= aExpectedDts + aFuzz || sample->mTime <= aExpectedPts + aFuzz) { MOZ_DIAGNOSTIC_ASSERT(sample->HasValidTime()); return sample; } // Gap is too big. End of Stream or Waiting for Data. // TODO, check that we have continuous data based on the sanitized buffered // range instead. return nullptr; } already_AddRefed<MediaRawData> TrackBuffersManager::GetSample( TrackInfo::TrackType aTrack, const TimeUnit& aFuzz, MediaResult& aResult) { mTaskQueueCapability->AssertOnCurrentThread(); AUTO_PROFILER_LABEL("TrackBuffersManager::GetSample", MEDIA_PLAYBACK); auto& trackData = GetTracksData(aTrack); const TrackBuffer& track = GetTrackBuffer(aTrack); aResult = NS_ERROR_DOM_MEDIA_WAITING_FOR_DATA; if (trackData.mNextGetSampleIndex.isSome()) { if (trackData.mNextGetSampleIndex.ref() >= track.Length()) { aResult = NS_ERROR_DOM_MEDIA_END_OF_STREAM; return nullptr; } const MediaRawData* sample = GetSample( aTrack, trackData.mNextGetSampleIndex.ref(), trackData.mNextSampleTimecode, trackData.mNextSampleTime, aFuzz); if (!sample) { return nullptr; } RefPtr<MediaRawData> p = sample->Clone(); if (!p) { aResult = MediaResult(NS_ERROR_OUT_OF_MEMORY, __func__); return nullptr; } if (p->mKeyframe) { UpdateEvictionIndex(trackData, trackData.mNextGetSampleIndex.ref()); } trackData.mNextGetSampleIndex.ref()++; // Estimate decode timestamp and timestamp of the next sample. TimeUnit nextSampleTimecode = sample->GetEndTimecode(); TimeUnit nextSampleTime = sample->GetEndTime(); const MediaRawData* nextSample = GetSample(aTrack, trackData.mNextGetSampleIndex.ref(), nextSampleTimecode, nextSampleTime, aFuzz); if (nextSample) { // We have a valid next sample, can use exact values. trackData.mNextSampleTimecode = nextSample->mTimecode; trackData.mNextSampleTime = nextSample->mTime; } else { // Next sample isn't available yet. Use estimates. trackData.mNextSampleTimecode = nextSampleTimecode; trackData.mNextSampleTime = nextSampleTime; } aResult = NS_OK; return p.forget(); } aResult = SetNextGetSampleIndexIfNeeded(aTrack, aFuzz); if (NS_FAILED(aResult)) { return nullptr; } MOZ_RELEASE_ASSERT(trackData.mNextGetSampleIndex.isSome() && trackData.mNextGetSampleIndex.ref() < track.Length()); const RefPtr<MediaRawData>& sample = track[trackData.mNextGetSampleIndex.ref()]; RefPtr<MediaRawData> p = sample->Clone(); if (!p) { // OOM aResult = MediaResult(NS_ERROR_OUT_OF_MEMORY, __func__); return nullptr; } MOZ_DIAGNOSTIC_ASSERT(p->HasValidTime()); // Find the previous keyframe to calculate the evictable amount. uint32_t i = trackData.mNextGetSampleIndex.ref(); for (; !track[i]->mKeyframe; i--) { } UpdateEvictionIndex(trackData, i); trackData.mNextGetSampleIndex.ref()++; trackData.mNextSampleTimecode = sample->GetEndTimecode(); trackData.mNextSampleTime = sample->GetEndTime(); return p.forget(); } int32_t TrackBuffersManager::FindCurrentPosition(TrackInfo::TrackType aTrack, const TimeUnit& aFuzz) const { MOZ_ASSERT(OnTaskQueue()); const auto& trackData = GetTracksData(aTrack); const TrackBuffer& track = GetTrackBuffer(aTrack); int32_t trackLength = AssertedCast<int32_t>(track.Length()); // Perform an exact search first. for (int32_t i = 0; i < trackLength; i++) { const RefPtr<MediaRawData>& sample = track[i]; TimeInterval sampleInterval{sample->mTimecode, sample->GetEndTimecode()}; if (sampleInterval.ContainsStrict(trackData.mNextSampleTimecode)) { return i; } if (sampleInterval.mStart > trackData.mNextSampleTimecode) { // Samples are ordered by timecode. There's no need to search // any further. break; } } for (int32_t i = 0; i < trackLength; i++) { const RefPtr<MediaRawData>& sample = track[i]; TimeInterval sampleInterval{sample->mTimecode, sample->GetEndTimecode(), aFuzz}; if (sampleInterval.ContainsWithStrictEnd(trackData.mNextSampleTimecode)) { return i; } if (sampleInterval.mStart - aFuzz > trackData.mNextSampleTimecode) { // Samples are ordered by timecode. There's no need to search // any further. break; } } // We couldn't find our sample by decode timestamp. Attempt to find it using // presentation timestamp. There will likely be small jerkiness. for (int32_t i = 0; i < trackLength; i++) { const RefPtr<MediaRawData>& sample = track[i]; TimeInterval sampleInterval{sample->mTime, sample->GetEndTime(), aFuzz}; if (sampleInterval.ContainsWithStrictEnd(trackData.mNextSampleTimecode)) { return i; } } // Still not found. return -1; } uint32_t TrackBuffersManager::Evictable(TrackInfo::TrackType aTrack) const { MutexAutoLock mut(mMutex); return GetTracksData(aTrack).mEvictionIndex.mEvictable; } TimeUnit TrackBuffersManager::GetNextRandomAccessPoint( TrackInfo::TrackType aTrack, const TimeUnit& aFuzz) { mTaskQueueCapability->AssertOnCurrentThread(); // So first determine the current position in the track buffer if necessary. if (NS_FAILED(SetNextGetSampleIndexIfNeeded(aTrack, aFuzz))) { return TimeUnit::FromInfinity(); } auto& trackData = GetTracksData(aTrack); const TrackBuffersManager::TrackBuffer& track = GetTrackBuffer(aTrack); uint32_t i = trackData.mNextGetSampleIndex.ref(); TimeUnit nextSampleTimecode = trackData.mNextSampleTimecode; TimeUnit nextSampleTime = trackData.mNextSampleTime; for (; i < track.Length(); i++) { const MediaRawData* sample = GetSample(aTrack, i, nextSampleTimecode, nextSampleTime, aFuzz); if (!sample) { break; } if (sample->mKeyframe) { return sample->mTime; } nextSampleTimecode = sample->GetEndTimecode(); nextSampleTime = sample->GetEndTime(); } return TimeUnit::FromInfinity(); } nsresult TrackBuffersManager::SetNextGetSampleIndexIfNeeded( TrackInfo::TrackType aTrack, const TimeUnit& aFuzz) { MOZ_ASSERT(OnTaskQueue()); auto& trackData = GetTracksData(aTrack); const TrackBuffer& track = GetTrackBuffer(aTrack); if (trackData.mNextGetSampleIndex.isSome()) { // We already know the next GetSample index. return NS_OK; } if (!track.Length()) { // There's nothing to find yet. return NS_ERROR_DOM_MEDIA_END_OF_STREAM; } if (trackData.mNextSampleTimecode == TimeUnit()) { // First demux, get first sample. trackData.mNextGetSampleIndex = Some(0u); return NS_OK; } if (trackData.mNextSampleTimecode > track.LastElement()->GetEndTimecode()) { // The next element is past our last sample. We're done. trackData.mNextGetSampleIndex = Some(uint32_t(track.Length())); return NS_ERROR_DOM_MEDIA_END_OF_STREAM; } int32_t pos = FindCurrentPosition(aTrack, aFuzz); if (pos < 0) { // Not found, must wait for more data. MSE_DEBUG("Couldn't find sample (pts:%" PRId64 " dts:%" PRId64 ")", trackData.mNextSampleTime.ToMicroseconds(), trackData.mNextSampleTimecode.ToMicroseconds()); return NS_ERROR_DOM_MEDIA_WAITING_FOR_DATA; } trackData.mNextGetSampleIndex = Some(uint32_t(pos)); return NS_OK; } void TrackBuffersManager::TrackData::AddSizeOfResources( MediaSourceDecoder::ResourceSizes* aSizes) const { for (const TrackBuffer& buffer : mBuffers) { for (const MediaRawData* data : buffer) { aSizes->mByteSize += data->SizeOfIncludingThis(aSizes->mMallocSizeOf); } } } RefPtr<GenericPromise> TrackBuffersManager::RequestDebugInfo( dom::TrackBuffersManagerDebugInfo& aInfo) const { const RefPtr<TaskQueue> taskQueue = GetTaskQueueSafe(); if (!taskQueue) { return GenericPromise::CreateAndResolve(true, __func__); } if (!taskQueue->IsCurrentThreadIn()) { // Run the request on the task queue if it's not already. return InvokeAsync(taskQueue.get(), __func__, [this, self = RefPtr{this}, &aInfo] { return RequestDebugInfo(aInfo); }); } mTaskQueueCapability->AssertOnCurrentThread(); GetDebugInfo(aInfo); return GenericPromise::CreateAndResolve(true, __func__); } void TrackBuffersManager::GetDebugInfo( dom::TrackBuffersManagerDebugInfo& aInfo) const { MOZ_ASSERT(OnTaskQueue(), "This shouldn't be called off the task queue because we're about " "to touch a lot of data that is used on the task queue"); CopyUTF8toUTF16(mType.Type().AsString(), aInfo.mType); if (HasAudio()) { aInfo.mNextSampleTime = mAudioTracks.mNextSampleTime.ToSeconds(); aInfo.mNumSamples = AssertedCast<int32_t>(mAudioTracks.mBuffers[0].Length()); aInfo.mBufferSize = AssertedCast<int32_t>(mAudioTracks.mSizeBuffer); aInfo.mEvictable = AssertedCast<int32_t>(Evictable(TrackInfo::kAudioTrack)); aInfo.mNextGetSampleIndex = AssertedCast<int32_t>(mAudioTracks.mNextGetSampleIndex.valueOr(-1)); aInfo.mNextInsertionIndex = AssertedCast<int32_t>(mAudioTracks.mNextInsertionIndex.valueOr(-1)); media::TimeIntervals ranges = SafeBuffered(TrackInfo::kAudioTrack); dom::Sequence<dom::BufferRange> items; for (uint32_t i = 0; i < ranges.Length(); ++i) { // dom::Sequence is a FallibleTArray dom::BufferRange* range = items.AppendElement(fallible); if (!range) { break; } range->mStart = ranges.Start(i).ToSeconds(); range->mEnd = ranges.End(i).ToSeconds(); } aInfo.mRanges = std::move(items); } else if (HasVideo()) { aInfo.mNextSampleTime = mVideoTracks.mNextSampleTime.ToSeconds(); aInfo.mNumSamples = AssertedCast<int32_t>(mVideoTracks.mBuffers[0].Length()); aInfo.mBufferSize = AssertedCast<int32_t>(mVideoTracks.mSizeBuffer); aInfo.mEvictable = AssertedCast<int32_t>(Evictable(TrackInfo::kVideoTrack)); aInfo.mNextGetSampleIndex = AssertedCast<int32_t>(mVideoTracks.mNextGetSampleIndex.valueOr(-1)); aInfo.mNextInsertionIndex = AssertedCast<int32_t>(mVideoTracks.mNextInsertionIndex.valueOr(-1)); media::TimeIntervals ranges = SafeBuffered(TrackInfo::kVideoTrack); dom::Sequence<dom::BufferRange> items; for (uint32_t i = 0; i < ranges.Length(); ++i) { // dom::Sequence is a FallibleTArray dom::BufferRange* range = items.AppendElement(fallible); if (!range) { break; } range->mStart = ranges.Start(i).ToSeconds(); range->mEnd = ranges.End(i).ToSeconds(); } aInfo.mRanges = std::move(items); } } void TrackBuffersManager::AddSizeOfResources( MediaSourceDecoder::ResourceSizes* aSizes) const { mTaskQueueCapability->AssertOnCurrentThread(); if (mInputBuffer.isSome() && mInputBuffer->Buffer()) { // mInputBuffer should be the sole owner of the underlying buffer, so this // won't double count. aSizes->mByteSize += mInputBuffer->Buffer()->ShallowSizeOfIncludingThis( aSizes->mMallocSizeOf); } if (mInitData) { aSizes->mByteSize += mInitData->ShallowSizeOfIncludingThis(aSizes->mMallocSizeOf); } if (mPendingInputBuffer.isSome() && mPendingInputBuffer->Buffer()) { // mPendingInputBuffer should be the sole owner of the underlying buffer, so // this won't double count. aSizes->mByteSize += mPendingInputBuffer->Buffer()->ShallowSizeOfIncludingThis( aSizes->mMallocSizeOf); } mVideoTracks.AddSizeOfResources(aSizes); mAudioTracks.AddSizeOfResources(aSizes); } } // namespace mozilla #undef MSE_DEBUG #undef MSE_DEBUGV #undef SAMPLE_DEBUG #undef SAMPLE_DEBUGV