summaryrefslogtreecommitdiffstats
path: root/dom/media/ogg/OggDemuxer.h
blob: 8a65398cf9dd8457e777b5ac7c00fbd291fe0512 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:set ts=2 sw=2 sts=2 et cindent: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#if !defined(OggDemuxer_h_)
#  define OggDemuxer_h_

#  include "nsTArray.h"
#  include "MediaDataDemuxer.h"
#  include "OggCodecState.h"
#  include "OggCodecStore.h"
#  include "OggRLBoxTypes.h"
#  include "MediaMetadataManager.h"

#  include <memory>

namespace mozilla {

class OggTrackDemuxer;

DDLoggedTypeDeclNameAndBase(OggDemuxer, MediaDataDemuxer);
DDLoggedTypeNameAndBase(OggTrackDemuxer, MediaTrackDemuxer);

class OggDemuxer : public MediaDataDemuxer,
                   public DecoderDoctorLifeLogger<OggDemuxer> {
 public:
  explicit OggDemuxer(MediaResource* aResource);

  RefPtr<InitPromise> Init() override;

  uint32_t GetNumberTracks(TrackInfo::TrackType aType) const override;

  already_AddRefed<MediaTrackDemuxer> GetTrackDemuxer(
      TrackInfo::TrackType aType, uint32_t aTrackNumber) override;

  bool IsSeekable() const override;

  UniquePtr<EncryptionInfo> GetCrypto() override;

  // Set the events to notify when chaining is encountered.
  void SetChainingEvents(TimedMetadataEventProducer* aMetadataEvent,
                         MediaEventProducer<void>* aOnSeekableEvent);

 private:
  // helpers for friend OggTrackDemuxer
  UniquePtr<TrackInfo> GetTrackInfo(TrackInfo::TrackType aType,
                                    size_t aTrackNumber) const;

  struct nsAutoOggSyncState {
    explicit nsAutoOggSyncState(rlbox_sandbox_ogg* aSandbox);
    ~nsAutoOggSyncState();
    rlbox_sandbox_ogg* mSandbox;
    tainted_opaque_ogg<ogg_sync_state*> mState;
  };
  media::TimeIntervals GetBuffered(TrackInfo::TrackType aType);
  void FindStartTime(int64_t& aOutStartTime);
  void FindStartTime(TrackInfo::TrackType, int64_t& aOutStartTime);

  nsresult SeekInternal(TrackInfo::TrackType aType,
                        const media::TimeUnit& aTarget);

  // Seeks to the keyframe preceding the target time using available
  // keyframe indexes.
  enum IndexedSeekResult {
    SEEK_OK,          // Success.
    SEEK_INDEX_FAIL,  // Failure due to no index, or invalid index.
    SEEK_FATAL_ERROR  // Error returned by a stream operation.
  };
  IndexedSeekResult SeekToKeyframeUsingIndex(TrackInfo::TrackType aType,
                                             int64_t aTarget);

  // Rolls back a seek-using-index attempt, returning a failure error code.
  IndexedSeekResult RollbackIndexedSeek(TrackInfo::TrackType aType,
                                        int64_t aOffset);

  // Represents a section of contiguous media, with a start and end offset,
  // and the timestamps of the start and end of that range, that is cached.
  // Used to denote the extremities of a range in which we can seek quickly
  // (because it's cached).
  class SeekRange {
   public:
    SeekRange() : mOffsetStart(0), mOffsetEnd(0), mTimeStart(0), mTimeEnd(0) {}

    SeekRange(int64_t aOffsetStart, int64_t aOffsetEnd, int64_t aTimeStart,
              int64_t aTimeEnd)
        : mOffsetStart(aOffsetStart),
          mOffsetEnd(aOffsetEnd),
          mTimeStart(aTimeStart),
          mTimeEnd(aTimeEnd) {}

    bool IsNull() const {
      return mOffsetStart == 0 && mOffsetEnd == 0 && mTimeStart == 0 &&
             mTimeEnd == 0;
    }

    int64_t mOffsetStart, mOffsetEnd;  // in bytes.
    int64_t mTimeStart, mTimeEnd;      // in usecs.
  };

  nsresult GetSeekRanges(TrackInfo::TrackType aType,
                         nsTArray<SeekRange>& aRanges);
  SeekRange SelectSeekRange(TrackInfo::TrackType aType,
                            const nsTArray<SeekRange>& ranges, int64_t aTarget,
                            int64_t aStartTime, int64_t aEndTime, bool aExact);

  // Seeks to aTarget usecs in the buffered range aRange using bisection search,
  // or to the keyframe prior to aTarget if we have video. aAdjustedTarget is
  // an adjusted version of the target used to account for Opus pre-roll, if
  // necessary. aStartTime must be the presentation time at the start of media,
  // and aEndTime the time at end of media. aRanges must be the time/byte ranges
  // buffered in the media cache as per GetSeekRanges().
  nsresult SeekInBufferedRange(TrackInfo::TrackType aType, int64_t aTarget,
                               int64_t aAdjustedTarget, int64_t aStartTime,
                               int64_t aEndTime,
                               const nsTArray<SeekRange>& aRanges,
                               const SeekRange& aRange);

  // Seeks to before aTarget usecs in media using bisection search. If the media
  // has video, this will seek to before the keyframe required to render the
  // media at aTarget. Will use aRanges in order to narrow the bisection
  // search space. aStartTime must be the presentation time at the start of
  // media, and aEndTime the time at end of media. aRanges must be the time/byte
  // ranges buffered in the media cache as per GetSeekRanges().
  nsresult SeekInUnbuffered(TrackInfo::TrackType aType, int64_t aTarget,
                            int64_t aStartTime, int64_t aEndTime,
                            const nsTArray<SeekRange>& aRanges);

  // Performs a seek bisection to move the media stream's read cursor to the
  // last ogg page boundary which has end time before aTarget usecs on both the
  // Theora and Vorbis bitstreams. Limits its search to data inside aRange;
  // i.e. it will only read inside of the aRange's start and end offsets.
  // aFuzz is the number of usecs of leniency we'll allow; we'll terminate the
  // seek when we land in the range (aTime - aFuzz, aTime) usecs.
  nsresult SeekBisection(TrackInfo::TrackType aType, int64_t aTarget,
                         const SeekRange& aRange, uint32_t aFuzz);

  // Chunk size to read when reading Ogg files. Average Ogg page length
  // is about 4300 bytes, so we read the file in chunks larger than that.
  static const int PAGE_STEP = 8192;

  enum PageSyncResult {
    PAGE_SYNC_ERROR = 1,
    PAGE_SYNC_END_OF_RANGE = 2,
    PAGE_SYNC_OK = 3
  };
  static PageSyncResult PageSync(rlbox_sandbox_ogg* aSandbox,
                                 MediaResourceIndex* aResource,
                                 tainted_opaque_ogg<ogg_sync_state*> aState,
                                 bool aCachedDataOnly, int64_t aOffset,
                                 int64_t aEndOffset,
                                 tainted_ogg<ogg_page*> aPage,
                                 int& aSkippedBytes);

  // Demux next Ogg packet
  ogg_packet* GetNextPacket(TrackInfo::TrackType aType);

  nsresult Reset(TrackInfo::TrackType aType);

  static const nsString GetKind(const nsCString& aRole);
  static void InitTrack(MessageField* aMsgInfo, TrackInfo* aInfo, bool aEnable);

  // Really private!
  ~OggDemuxer();

  // Read enough of the file to identify track information and header
  // packets necessary for decoding to begin.
  nsresult ReadMetadata();

  // Read a page of data from the Ogg file. Returns true if a page has been
  // read, false if the page read failed or end of file reached.
  bool ReadOggPage(TrackInfo::TrackType aType,
                   tainted_opaque_ogg<ogg_page*> aPage);

  // Send a page off to the individual streams it belongs to.
  // Reconstructed packets, if any are ready, will be available
  // on the individual OggCodecStates.
  nsresult DemuxOggPage(TrackInfo::TrackType aType,
                        tainted_opaque_ogg<ogg_page*> aPage);

  // Read data and demux until a packet is available on the given stream state
  void DemuxUntilPacketAvailable(TrackInfo::TrackType aType,
                                 OggCodecState* aState);

  // Reads and decodes header packets for aState, until either header decode
  // fails, or is complete. Initializes the codec state before returning.
  // Returns true if reading headers and initializtion of the stream
  // succeeds.
  bool ReadHeaders(TrackInfo::TrackType aType, OggCodecState* aState);

  // Reads the next link in the chain.
  bool ReadOggChain(const media::TimeUnit& aLastEndTime);

  // Set this media as being a chain and notifies the state machine that the
  // media is no longer seekable.
  void SetChained();

  // Fills aTracks with the serial numbers of each active stream, for use by
  // various SkeletonState functions.
  void BuildSerialList(nsTArray<uint32_t>& aTracks);

  // Setup target bitstreams for decoding.
  void SetupTarget(OggCodecState** aSavedState, OggCodecState* aNewState);
  void SetupTargetSkeleton();
  void SetupMediaTracksInfo(const nsTArray<uint32_t>& aSerials);
  void FillTags(TrackInfo* aInfo, UniquePtr<MetadataTags>&& aTags);

  // Compute an ogg page's checksum
  tainted_opaque_ogg<ogg_uint32_t> GetPageChecksum(
      tainted_opaque_ogg<ogg_page*> aPage);

  // Get the end time of aEndOffset. This is the playback position we'd reach
  // after playback finished at aEndOffset.
  int64_t RangeEndTime(TrackInfo::TrackType aType, int64_t aEndOffset);

  // Get the end time of aEndOffset, without reading before aStartOffset.
  // This is the playback position we'd reach after playback finished at
  // aEndOffset. If bool aCachedDataOnly is true, then we'll only read
  // from data which is cached in the media cached, otherwise we'll do
  // regular blocking reads from the media stream. If bool aCachedDataOnly
  // is true, this can safely be called on the main thread, otherwise it
  // must be called on the state machine thread.
  int64_t RangeEndTime(TrackInfo::TrackType aType, int64_t aStartOffset,
                       int64_t aEndOffset, bool aCachedDataOnly);

  // Get the start time of the range beginning at aOffset. This is the start
  // time of the first aType sample we'd be able to play if we
  // started playback at aOffset.
  int64_t RangeStartTime(TrackInfo::TrackType aType, int64_t aOffset);

  // All invocations of libogg functionality from the demuxer is sandboxed using
  // wasm library sandboxes on supported platforms. These functions that create
  // and destroy the sandbox instance.
  static rlbox_sandbox_ogg* CreateSandbox();
  struct SandboxDestroy {
    void operator()(rlbox_sandbox_ogg* sandbox);
  };

  // The sandbox instance used to sandbox libogg functionality in the demuxer.
  // This must be declared before other members so that constructors/destructors
  // run in the right order.
  std::unique_ptr<rlbox_sandbox_ogg, SandboxDestroy> mSandbox;

  MediaInfo mInfo;
  nsTArray<RefPtr<OggTrackDemuxer>> mDemuxers;

  // Map of codec-specific bitstream states.
  OggCodecStore mCodecStore;

  // Decode state of the Theora bitstream we're decoding, if we have video.
  OggCodecState* mTheoraState;

  // Decode state of the Vorbis bitstream we're decoding, if we have audio.
  OggCodecState* mVorbisState;

  // Decode state of the Opus bitstream we're decoding, if we have one.
  OggCodecState* mOpusState;

  // Get the bitstream decode state for the given track type
  // Decode state of the Flac bitstream we're decoding, if we have one.
  OggCodecState* mFlacState;

  OggCodecState* GetTrackCodecState(TrackInfo::TrackType aType) const;
  TrackInfo::TrackType GetCodecStateType(OggCodecState* aState) const;

  // Represents the user pref media.opus.enabled at the time our
  // contructor was called. We can't check it dynamically because
  // we're not on the main thread;
  bool mOpusEnabled;

  // Decode state of the Skeleton bitstream.
  SkeletonState* mSkeletonState;

  // Ogg decoding state.
  struct OggStateContext {
    explicit OggStateContext(MediaResource* aResource,
                             rlbox_sandbox_ogg* aSandbox)
        : mOggState(aSandbox), mResource(aResource), mNeedKeyframe(true) {}
    nsAutoOggSyncState mOggState;
    MediaResourceIndex mResource;
    Maybe<media::TimeUnit> mStartTime;
    bool mNeedKeyframe;
  };

  OggStateContext& OggState(TrackInfo::TrackType aType);
  tainted_opaque_ogg<ogg_sync_state*> OggSyncState(TrackInfo::TrackType aType);
  MediaResourceIndex* Resource(TrackInfo::TrackType aType);
  MediaResourceIndex* CommonResource();
  OggStateContext mAudioOggState;
  OggStateContext mVideoOggState;

  Maybe<int64_t> mStartTime;

  // Booleans to indicate if we have audio and/or video data
  bool HasVideo() const;
  bool HasAudio() const;
  bool HasSkeleton() const {
    return mSkeletonState != 0 && mSkeletonState->mActive;
  }
  bool HaveStartTime() const;
  bool HaveStartTime(TrackInfo::TrackType aType);
  int64_t StartTime() const;
  int64_t StartTime(TrackInfo::TrackType aType);

  // The picture region inside Theora frame to be displayed, if we have
  // a Theora video track.
  gfx::IntRect mPicture;

  // True if we are decoding a chained ogg.
  bool mIsChained;

  // Total audio duration played so far.
  media::TimeUnit mDecodedAudioDuration;

  // Events manager
  TimedMetadataEventProducer* mTimedMetadataEvent;
  MediaEventProducer<void>* mOnSeekableEvent;

  // This will be populated only if a content change occurs, otherwise it
  // will be left as null so the original metadata is used.
  // It is updated once a chained ogg is encountered.
  // As Ogg chaining is only supported for audio, we only need an audio track
  // info.
  RefPtr<TrackInfoSharedPtr> mSharedAudioTrackInfo;

  friend class OggTrackDemuxer;
};

class OggTrackDemuxer : public MediaTrackDemuxer,
                        public DecoderDoctorLifeLogger<OggTrackDemuxer> {
 public:
  OggTrackDemuxer(OggDemuxer* aParent, TrackInfo::TrackType aType,
                  uint32_t aTrackNumber);

  UniquePtr<TrackInfo> GetInfo() const override;

  RefPtr<SeekPromise> Seek(const media::TimeUnit& aTime) override;

  RefPtr<SamplesPromise> GetSamples(int32_t aNumSamples = 1) override;

  void Reset() override;

  RefPtr<SkipAccessPointPromise> SkipToNextRandomAccessPoint(
      const media::TimeUnit& aTimeThreshold) override;

  media::TimeIntervals GetBuffered() override;

  void BreakCycles() override;

 private:
  ~OggTrackDemuxer();
  void SetNextKeyFrameTime();
  RefPtr<MediaRawData> NextSample();
  RefPtr<OggDemuxer> mParent;
  TrackInfo::TrackType mType;
  UniquePtr<TrackInfo> mInfo;

  // Queued sample extracted by the demuxer, but not yet returned.
  RefPtr<MediaRawData> mQueuedSample;
};
}  // namespace mozilla

#endif