diff options
Diffstat (limited to '')
-rw-r--r-- | dom/media/MediaDecoderStateMachine.h | 586 |
1 files changed, 586 insertions, 0 deletions
diff --git a/dom/media/MediaDecoderStateMachine.h b/dom/media/MediaDecoderStateMachine.h new file mode 100644 index 0000000000..758feb7539 --- /dev/null +++ b/dom/media/MediaDecoderStateMachine.h @@ -0,0 +1,586 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#if !defined(MediaDecoderStateMachine_h__) +# define MediaDecoderStateMachine_h__ + +# include "AudioDeviceInfo.h" +# include "ImageContainer.h" +# include "MediaDecoder.h" +# include "MediaDecoderOwner.h" +# include "MediaDecoderStateMachineBase.h" +# include "MediaFormatReader.h" +# include "MediaQueue.h" +# include "MediaSink.h" +# include "MediaStatistics.h" +# include "MediaTimer.h" +# include "SeekJob.h" +# include "mozilla/Attributes.h" +# include "mozilla/ReentrantMonitor.h" +# include "mozilla/StateMirroring.h" +# include "nsThreadUtils.h" + +namespace mozilla { + +class AbstractThread; +class AudioSegment; +class DecodedStream; +class DOMMediaStream; +class ReaderProxy; +class TaskQueue; + +extern LazyLogModule gMediaDecoderLog; + +DDLoggedTypeDeclName(MediaDecoderStateMachine); + +/* + +Each media element for a media file has one thread called the "audio thread". + +The audio thread writes the decoded audio data to the audio +hardware. This is done in a separate thread to ensure that the +audio hardware gets a constant stream of data without +interruption due to decoding or display. At some point +AudioStream will be refactored to have a callback interface +where it asks for data and this thread will no longer be +needed. + +The element/state machine also has a TaskQueue which runs in a +SharedThreadPool that is shared with all other elements/decoders. The state +machine dispatches tasks to this to call into the MediaDecoderReader to +request decoded audio or video data. The Reader will callback with decoded +sampled when it has them available, and the state machine places the decoded +samples into its queues for the consuming threads to pull from. + +The MediaDecoderReader can choose to decode asynchronously, or synchronously +and return requested samples synchronously inside it's Request*Data() +functions via callback. Asynchronous decoding is preferred, and should be +used for any new readers. + +Synchronisation of state between the thread is done via a monitor owned +by MediaDecoder. + +The lifetime of the audio thread is controlled by the state machine when +it runs on the shared state machine thread. When playback needs to occur +the audio thread is created and an event dispatched to run it. The audio +thread exits when audio playback is completed or no longer required. + +A/V synchronisation is handled by the state machine. It examines the audio +playback time and compares this to the next frame in the queue of video +frames. If it is time to play the video frame it is then displayed, otherwise +it schedules the state machine to run again at the time of the next frame. + +Frame skipping is done in the following ways: + + 1) The state machine will skip all frames in the video queue whose + display time is less than the current audio time. This ensures + the correct frame for the current time is always displayed. + + 2) The decode tasks will stop decoding interframes and read to the + next keyframe if it determines that decoding the remaining + interframes will cause playback issues. It detects this by: + a) If the amount of audio data in the audio queue drops + below a threshold whereby audio may start to skip. + b) If the video queue drops below a threshold where it + will be decoding video data that won't be displayed due + to the decode thread dropping the frame immediately. + TODO: In future we should only do this when the Reader is decoding + synchronously. + +When hardware accelerated graphics is not available, YCbCr conversion +is done on the decode task queue when video frames are decoded. + +The decode task queue pushes decoded audio and videos frames into two +separate queues - one for audio and one for video. These are kept +separate to make it easy to constantly feed audio data to the audio +hardware while allowing frame skipping of video data. These queues are +threadsafe, and neither the decode, audio, or state machine should +be able to monopolize them, and cause starvation of the other threads. + +Both queues are bounded by a maximum size. When this size is reached +the decode tasks will no longer request video or audio depending on the +queue that has reached the threshold. If both queues are full, no more +decode tasks will be dispatched to the decode task queue, so other +decoders will have an opportunity to run. + +During playback the audio thread will be idle (via a Wait() on the +monitor) if the audio queue is empty. Otherwise it constantly pops +audio data off the queue and plays it with a blocking write to the audio +hardware (via AudioStream). + +*/ +class MediaDecoderStateMachine + : public MediaDecoderStateMachineBase, + public DecoderDoctorLifeLogger<MediaDecoderStateMachine> { + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MediaDecoderStateMachine, override) + + using TrackSet = MediaFormatReader::TrackSet; + + public: + using FrameID = mozilla::layers::ImageContainer::FrameID; + MediaDecoderStateMachine(MediaDecoder* aDecoder, MediaFormatReader* aReader); + + nsresult Init(MediaDecoder* aDecoder) override; + + // Enumeration for the valid decoding states + enum State { + DECODER_STATE_DECODING_METADATA, + DECODER_STATE_DORMANT, + DECODER_STATE_DECODING_FIRSTFRAME, + DECODER_STATE_DECODING, + DECODER_STATE_LOOPING_DECODING, + DECODER_STATE_SEEKING_ACCURATE, + DECODER_STATE_SEEKING_FROMDORMANT, + DECODER_STATE_SEEKING_NEXTFRAMESEEKING, + DECODER_STATE_SEEKING_VIDEOONLY, + DECODER_STATE_BUFFERING, + DECODER_STATE_COMPLETED, + DECODER_STATE_SHUTDOWN + }; + + RefPtr<GenericPromise> RequestDebugInfo( + dom::MediaDecoderStateMachineDebugInfo& aInfo) override; + + size_t SizeOfVideoQueue() const override; + + size_t SizeOfAudioQueue() const override; + + // Sets the video decode mode. Used by the suspend-video-decoder feature. + void SetVideoDecodeMode(VideoDecodeMode aMode) override; + + RefPtr<GenericPromise> InvokeSetSink( + const RefPtr<AudioDeviceInfo>& aSink) override; + + void InvokeSuspendMediaSink() override; + void InvokeResumeMediaSink() override; + + bool IsCDMProxySupported(CDMProxy* aProxy) override; + + private: + class StateObject; + class DecodeMetadataState; + class DormantState; + class DecodingFirstFrameState; + class DecodingState; + class LoopingDecodingState; + class SeekingState; + class AccurateSeekingState; + class NextFrameSeekingState; + class NextFrameSeekingFromDormantState; + class VideoOnlySeekingState; + class BufferingState; + class CompletedState; + class ShutdownState; + + static const char* ToStateStr(State aState); + const char* ToStateStr(); + + void GetDebugInfo(dom::MediaDecoderStateMachineDebugInfo& aInfo); + + // Initialization that needs to happen on the task queue. This is the first + // task that gets run on the task queue, and is dispatched from the MDSM + // constructor immediately after the task queue is created. + void InitializationTask(MediaDecoder* aDecoder) override; + + RefPtr<MediaDecoder::SeekPromise> Seek(const SeekTarget& aTarget) override; + + RefPtr<ShutdownPromise> Shutdown() override; + + RefPtr<ShutdownPromise> FinishShutdown(); + + // Update the playback position. This can result in a timeupdate event + // and an invalidate of the frame being dispatched asynchronously if + // there is no such event currently queued. + // Only called on the decoder thread. Must be called with + // the decode monitor held. + void UpdatePlaybackPosition(const media::TimeUnit& aTime); + + // Schedules the shared state machine thread to run the state machine. + void ScheduleStateMachine(); + + // Invokes ScheduleStateMachine to run in |aTime|, + // unless it's already scheduled to run earlier, in which case the + // request is discarded. + void ScheduleStateMachineIn(const media::TimeUnit& aTime); + + bool HaveEnoughDecodedAudio() const; + bool HaveEnoughDecodedVideo() const; + + // The check is used to store more video frames than usual when playing 4K+ + // video. + bool IsVideoDataEnoughComparedWithAudio() const; + + // Returns true if we're currently playing. The decoder monitor must + // be held. + bool IsPlaying() const; + + // Sets mMediaSeekable to false. + void SetMediaNotSeekable(); + + // Resets all states related to decoding and aborts all pending requests + // to the decoders. + void ResetDecode(const TrackSet& aTracks = TrackSet(TrackInfo::kAudioTrack, + TrackInfo::kVideoTrack)); + + void SetVideoDecodeModeInternal(VideoDecodeMode aMode); + + // Set new sink device and restart MediaSink if playback is started. + // Returned promise will be resolved with true if the playback is + // started and false if playback is stopped after setting the new sink. + // Returned promise will be rejected with value NS_ERROR_ABORT + // if the action fails or it is not supported. + // If there are multiple pending requests only the last one will be + // executed, for all previous requests the promise will be resolved + // with true or false similar to above. + RefPtr<GenericPromise> SetSink(const RefPtr<AudioDeviceInfo>& aDevice); + + // Shutdown MediaSink on suspend to clean up resources. + void SuspendMediaSink(); + // Create a new MediaSink, it must have been stopped first. + void ResumeMediaSink(); + + protected: + virtual ~MediaDecoderStateMachine(); + + void BufferedRangeUpdated() override; + void VolumeChanged() override; + void PreservesPitchChanged() override; + void PlayStateChanged() override; + void LoopingChanged() override; + void UpdateSecondaryVideoContainer() override; + + void ReaderSuspendedChanged(); + + // Inserts a sample into the Audio/Video queue. + // aSample must not be null. + void PushAudio(AudioData* aSample); + void PushVideo(VideoData* aSample); + + void OnAudioPopped(const RefPtr<AudioData>& aSample); + void OnVideoPopped(const RefPtr<VideoData>& aSample); + + void AudioAudibleChanged(bool aAudible); + + void SetPlaybackRate(double aPlaybackRate) override; + void SetIsLiveStream(bool aIsLiveStream) override { + mIsLiveStream = aIsLiveStream; + } + void SetCanPlayThrough(bool aCanPlayThrough) override { + mCanPlayThrough = aCanPlayThrough; + } + void SetFragmentEndTime(const media::TimeUnit& aEndTime) override { + // A negative number means we don't have a fragment end time at all. + mFragmentEndTime = aEndTime >= media::TimeUnit::Zero() + ? aEndTime + : media::TimeUnit::Invalid(); + } + + void StreamNameChanged(); + void UpdateOutputCaptured(); + void OutputTracksChanged(); + void OutputPrincipalChanged(); + + MediaQueue<AudioData>& AudioQueue() { return mAudioQueue; } + MediaQueue<VideoData>& VideoQueue() { return mVideoQueue; } + + const MediaQueue<AudioData>& AudioQueue() const { return mAudioQueue; } + const MediaQueue<VideoData>& VideoQueue() const { return mVideoQueue; } + + // True if we are low in decoded audio/video data. + // May not be invoked when mReader->UseBufferingHeuristics() is false. + bool HasLowDecodedData(); + + bool HasLowDecodedAudio(); + + bool HasLowDecodedVideo(); + + bool OutOfDecodedAudio(); + + bool OutOfDecodedVideo() { + MOZ_ASSERT(OnTaskQueue()); + return IsVideoDecoding() && VideoQueue().GetSize() <= 1; + } + + // Returns true if we're running low on buffered data. + bool HasLowBufferedData(); + + // Returns true if we have less than aThreshold of buffered data available. + bool HasLowBufferedData(const media::TimeUnit& aThreshold); + + // Return the current time, either the audio clock if available (if the media + // has audio, and the playback is possible), or a clock for the video. + // Called on the state machine thread. + // If aTimeStamp is non-null, set *aTimeStamp to the TimeStamp corresponding + // to the returned stream time. + media::TimeUnit GetClock(TimeStamp* aTimeStamp = nullptr) const; + + // Update only the state machine's current playback position (and duration, + // if unknown). Does not update the playback position on the decoder or + // media element -- use UpdatePlaybackPosition for that. Called on the state + // machine thread, caller must hold the decoder lock. + void UpdatePlaybackPositionInternal(const media::TimeUnit& aTime); + + // Update playback position and trigger next update by default time period. + // Called on the state machine thread. + void UpdatePlaybackPositionPeriodically(); + + MediaSink* CreateAudioSink(); + + // Always create mediasink which contains an AudioSink or DecodedStream + // inside. + already_AddRefed<MediaSink> CreateMediaSink(); + + // Stops the media sink and shut it down. + // The decoder monitor must be held with exactly one lock count. + // Called on the state machine thread. + void StopMediaSink(); + + // Create and start the media sink. + // The decoder monitor must be held with exactly one lock count. + // Called on the state machine thread. + // If start fails an NS_ERROR_FAILURE is returned. + nsresult StartMediaSink(); + + // Notification method invoked when mIsVisible changes. + void VisibilityChanged(); + + // Sets internal state which causes playback of media to pause. + // The decoder monitor must be held. + void StopPlayback(); + + // If the conditions are right, sets internal state which causes playback + // of media to begin or resume. + // Must be called with the decode monitor held. + void MaybeStartPlayback(); + + void EnqueueFirstFrameLoadedEvent(); + + // Start a task to decode audio. + void RequestAudioData(); + + // Start a task to decode video. + // @param aRequestNextVideoKeyFrame + // If aRequestNextKeyFrame is true, will request data for the next keyframe + // after aCurrentTime. + void RequestVideoData(const media::TimeUnit& aCurrentTime, + bool aRequestNextKeyFrame = false); + + void WaitForData(MediaData::Type aType); + + // Returns the "media time". This is the absolute time which the media + // playback has reached. i.e. this returns values in the range + // [mStartTime, mEndTime], and mStartTime will not be 0 if the media does + // not start at 0. Note this is different than the "current playback + // position", which is in the range [0,duration]. + media::TimeUnit GetMediaTime() const { + MOZ_ASSERT(OnTaskQueue()); + return mCurrentPosition; + } + + // Returns an upper bound on the number of microseconds of audio that is + // decoded and playable. This is the sum of the number of usecs of audio which + // is decoded and in the reader's audio queue, and the usecs of unplayed audio + // which has been pushed to the audio hardware for playback. Note that after + // calling this, the audio hardware may play some of the audio pushed to + // hardware, so this can only be used as a upper bound. The decoder monitor + // must be held when calling this. Called on the decode thread. + media::TimeUnit GetDecodedAudioDuration() const; + + void FinishDecodeFirstFrame(); + + // Performs one "cycle" of the state machine. + void RunStateMachine(); + + bool IsStateMachineScheduled() const; + + // These return true if the respective stream's decode has not yet reached + // the end of stream. + bool IsAudioDecoding(); + bool IsVideoDecoding(); + + private: + // Resolved by the MediaSink to signal that all audio/video outstanding + // work is complete and identify which part(a/v) of the sink is shutting down. + void OnMediaSinkAudioComplete(); + void OnMediaSinkVideoComplete(); + + // Rejected by the MediaSink to signal errors for audio/video. + void OnMediaSinkAudioError(nsresult aResult); + void OnMediaSinkVideoError(); + + // State-watching manager. + WatchManager<MediaDecoderStateMachine> mWatchManager; + + // True if we've dispatched a task to run the state machine but the task has + // yet to run. + bool mDispatchedStateMachine; + + // Used to dispatch another round schedule with specific target time. + DelayedScheduler mDelayedScheduler; + + // Queue of audio frames. This queue is threadsafe, and is accessed from + // the audio, decoder, state machine, and main threads. + MediaQueue<AudioData> mAudioQueue; + // Queue of video frames. This queue is threadsafe, and is accessed from + // the decoder, state machine, and main threads. + MediaQueue<VideoData> mVideoQueue; + + UniquePtr<StateObject> mStateObj; + + media::TimeUnit Duration() const { + MOZ_ASSERT(OnTaskQueue()); + return mDuration.Ref().ref(); + } + + // FrameID which increments every time a frame is pushed to our queue. + FrameID mCurrentFrameID; + + // Media Fragment end time. + media::TimeUnit mFragmentEndTime = media::TimeUnit::Invalid(); + + // The media sink resource. Used on the state machine thread. + RefPtr<MediaSink> mMediaSink; + + // The end time of the last audio frame that's been pushed onto the media sink + // in microseconds. This will approximately be the end time + // of the audio stream, unless another frame is pushed to the hardware. + media::TimeUnit AudioEndTime() const; + + // The end time of the last rendered video frame that's been sent to + // compositor. + media::TimeUnit VideoEndTime() const; + + // The end time of the last decoded audio frame. This signifies the end of + // decoded audio data. Used to check if we are low in decoded data. + media::TimeUnit mDecodedAudioEndTime; + + // The end time of the last decoded video frame. Used to check if we are low + // on decoded video data. + media::TimeUnit mDecodedVideoEndTime; + + // If we've got more than this number of decoded video frames waiting in + // the video queue, we will not decode any more video frames until some have + // been consumed by the play state machine thread. + // Must hold monitor. + uint32_t GetAmpleVideoFrames() const; + + // Our "ample" audio threshold. Once we've this much audio decoded, we + // pause decoding. + media::TimeUnit mAmpleAudioThreshold; + + const char* AudioRequestStatus() const; + const char* VideoRequestStatus() const; + + void OnSuspendTimerResolved(); + void CancelSuspendTimer(); + + bool IsInSeamlessLooping() const; + + bool mCanPlayThrough = false; + + bool mIsLiveStream = false; + + // True if all audio frames are already rendered. + bool mAudioCompleted = false; + + // True if all video frames are already rendered. + bool mVideoCompleted = false; + + // True if video decoding is suspended. + bool mVideoDecodeSuspended; + + // Track enabling video decode suspension via timer + DelayedScheduler mVideoDecodeSuspendTimer; + + // Track the current video decode mode. + VideoDecodeMode mVideoDecodeMode; + + // Track the complete & error for audio/video separately + MozPromiseRequestHolder<MediaSink::EndedPromise> mMediaSinkAudioEndedPromise; + MozPromiseRequestHolder<MediaSink::EndedPromise> mMediaSinkVideoEndedPromise; + + MediaEventListener mAudioQueueListener; + MediaEventListener mVideoQueueListener; + MediaEventListener mAudibleListener; + MediaEventListener mOnMediaNotSeekable; + + const bool mIsMSE; + + const bool mShouldResistFingerprinting; + + bool mSeamlessLoopingAllowed; + + // If media was in looping and had reached to the end before, then we need + // to adjust sample time from clock time to media time. + void AdjustByLooping(media::TimeUnit& aTime) const; + + // These are used for seamless looping. When looping has been enable at least + // once, `mOriginalDecodedDuration` would be set to the larger duration + // between two tracks. + media::TimeUnit mOriginalDecodedDuration; + Maybe<media::TimeUnit> mAudioTrackDecodedDuration; + Maybe<media::TimeUnit> mVideoTrackDecodedDuration; + + bool HasLastDecodedData(MediaData::Type aType); + + // Current playback position in the stream in bytes. + int64_t mPlaybackOffset = 0; + + // For seamless looping video, we don't want to trigger skip-to-next-keyframe + // after reaching video EOS. Because we've reset the demuxer to 0, and are + // going to request data from start. If playback hasn't looped back, the media + // time would still be too large, which makes the reader think the playback is + // way behind and performs unnecessary skipping. Eg. Media is 10s long, + // reaching EOS at 8s, requesting data at 9s. Assume media's keyframe interval + // is 3s, which means keyframes will appear on 0s, 3s, 6s and 9s. If we use + // current time as a threshold, the reader sees the next key frame is 3s but + // the threashold is 9s, which usually happens when the decoding is too slow. + // But that is not the case for us, we should by pass thskip-to-next-keyframe + // logic until the media loops back. + bool mBypassingSkipToNextKeyFrameCheck = false; + + private: + // Audio stream name + Mirror<nsAutoString> mStreamName; + + // The device used with SetSink, or nullptr if no explicit device has been + // set. + Mirror<RefPtr<AudioDeviceInfo>> mSinkDevice; + + // Whether all output should be captured into mOutputTracks, halted, or not + // captured. + Mirror<MediaDecoder::OutputCaptureState> mOutputCaptureState; + + // A dummy track used to access the right MediaTrackGraph instance. Needed + // since there's no guarantee that output tracks are present. + Mirror<nsMainThreadPtrHandle<SharedDummyTrack>> mOutputDummyTrack; + + // Tracks to capture data into. + Mirror<CopyableTArray<RefPtr<ProcessedMediaTrack>>> mOutputTracks; + + // PrincipalHandle to feed with data captured into mOutputTracks. + Mirror<PrincipalHandle> mOutputPrincipal; + + Canonical<CopyableTArray<RefPtr<ProcessedMediaTrack>>> mCanonicalOutputTracks; + Canonical<PrincipalHandle> mCanonicalOutputPrincipal; + + // Track when MediaSink is supsended. When that happens some actions are + // restricted like starting the sink or changing sink id. The flag is valid + // after Initialization. TaskQueue thread only. + bool mIsMediaSinkSuspended = false; + + public: + AbstractCanonical<CopyableTArray<RefPtr<ProcessedMediaTrack>>>* + CanonicalOutputTracks() { + return &mCanonicalOutputTracks; + } + AbstractCanonical<PrincipalHandle>* CanonicalOutputPrincipal() { + return &mCanonicalOutputPrincipal; + } +}; + +} // namespace mozilla + +#endif |