summaryrefslogtreecommitdiffstats
path: root/dom/media/platforms/PlatformEncoderModule.h
blob: 72dad430e6108a3c699fece0ce274a851fbe59b0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:set ts=2 sw=2 sts=2 et cindent: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#if !defined(PlatformEncoderModule_h_)
#  define PlatformEncoderModule_h_

#  include "MP4Decoder.h"
#  include "MediaData.h"
#  include "MediaInfo.h"
#  include "MediaResult.h"
#  include "VPXDecoder.h"
#  include "mozilla/Attributes.h"
#  include "mozilla/Maybe.h"
#  include "mozilla/MozPromise.h"
#  include "mozilla/RefPtr.h"
#  include "mozilla/TaskQueue.h"
#  include "mozilla/dom/ImageBitmapBinding.h"
#  include "nsISupportsImpl.h"
#  include "VideoUtils.h"

namespace mozilla {

class MediaDataEncoder;
class EncoderConfig;
struct EncoderConfigurationChangeList;

enum class CodecType {
  _BeginVideo_,
  H264,
  VP8,
  VP9,
  AV1,
  _EndVideo_,
  _BeginAudio_ = _EndVideo_,
  Opus,
  G722,
  _EndAudio_,
  Unknown,
};

// TODO: Automatically generate this (Bug 1865896)
const char* GetCodecTypeString(const CodecType& aCodecType);

enum class H264BitStreamFormat { AVC, ANNEXB };

struct H264Specific final {
  const H264_PROFILE mProfile;
  const H264_LEVEL mLevel;
  const H264BitStreamFormat mFormat;

  H264Specific(H264_PROFILE aProfile, H264_LEVEL aLevel,
               H264BitStreamFormat aFormat)
      : mProfile(aProfile), mLevel(aLevel), mFormat(aFormat) {}
};

struct OpusSpecific final {
  enum class Application { Voip, Audio, RestricedLowDelay };

  const Application mApplication;
  const uint8_t mComplexity;  // from 0-10

  OpusSpecific(const Application aApplication, const uint8_t aComplexity)
      : mApplication(aApplication), mComplexity(aComplexity) {
    MOZ_ASSERT(mComplexity <= 10);
  }
};

enum class VPXComplexity { Normal, High, Higher, Max };
struct VP8Specific {
  VP8Specific() = default;
  // Ignore webrtc::VideoCodecVP8::errorConcealmentOn,
  // for it's always false in the codebase (except libwebrtc test cases).
  VP8Specific(const VPXComplexity aComplexity, const bool aResilience,
              const uint8_t aNumTemporalLayers, const bool aDenoising,
              const bool aAutoResize, const bool aFrameDropping)
      : mComplexity(aComplexity),
        mResilience(aResilience),
        mNumTemporalLayers(aNumTemporalLayers),
        mDenoising(aDenoising),
        mAutoResize(aAutoResize),
        mFrameDropping(aFrameDropping) {}
  const VPXComplexity mComplexity{VPXComplexity::Normal};
  const bool mResilience{true};
  const uint8_t mNumTemporalLayers{1};
  const bool mDenoising{true};
  const bool mAutoResize{false};
  const bool mFrameDropping{false};
};

struct VP9Specific : public VP8Specific {
  VP9Specific() = default;
  VP9Specific(const VPXComplexity aComplexity, const bool aResilience,
              const uint8_t aNumTemporalLayers, const bool aDenoising,
              const bool aAutoResize, const bool aFrameDropping,
              const bool aAdaptiveQp, const uint8_t aNumSpatialLayers,
              const bool aFlexible)
      : VP8Specific(aComplexity, aResilience, aNumTemporalLayers, aDenoising,
                    aAutoResize, aFrameDropping),
        mAdaptiveQp(aAdaptiveQp),
        mNumSpatialLayers(aNumSpatialLayers),
        mFlexible(aFlexible) {}
  const bool mAdaptiveQp{true};
  const uint8_t mNumSpatialLayers{1};
  const bool mFlexible{false};
};

class PlatformEncoderModule {
 public:
  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(PlatformEncoderModule)

  virtual already_AddRefed<MediaDataEncoder> CreateVideoEncoder(
      const EncoderConfig& aConfig, const RefPtr<TaskQueue>& aTaskQueue) const {
    return nullptr;
  };

  virtual already_AddRefed<MediaDataEncoder> CreateAudioEncoder(
      const EncoderConfig& aConfig, const RefPtr<TaskQueue>& aTaskQueue) const {
    return nullptr;
  };

  using CreateEncoderPromise = MozPromise<RefPtr<MediaDataEncoder>, MediaResult,
                                          /* IsExclusive = */ true>;

  // Indicates if the PlatformDecoderModule supports encoding of a codec.
  virtual bool Supports(const EncoderConfig& aConfig) const = 0;
  virtual bool SupportsCodec(CodecType aCodecType) const = 0;

  // Returns a readable name for this Platform Encoder Module
  virtual const char* GetName() const = 0;

  // Asychronously create an encoder
  RefPtr<PlatformEncoderModule::CreateEncoderPromise> AsyncCreateEncoder(
      const EncoderConfig& aEncoderConfig, const RefPtr<TaskQueue>& aTaskQueue);

 protected:
  PlatformEncoderModule() = default;
  virtual ~PlatformEncoderModule() = default;
};

class MediaDataEncoder {
 public:
  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MediaDataEncoder)

  enum class Usage {
    Realtime,  // Low latency prefered
    Record
  };
  using PixelFormat = dom::ImageBitmapFormat;
  enum class BitrateMode { Constant, Variable };
  // Scalable Video Coding (SVC) settings for WebCodecs:
  // https://www.w3.org/TR/webrtc-svc/
  enum class ScalabilityMode { None, L1T2, L1T3 };

  enum class HardwarePreference { RequireHardware, RequireSoftware, None };

  static bool IsVideo(const CodecType aCodec) {
    return aCodec > CodecType::_BeginVideo_ && aCodec < CodecType::_EndVideo_;
  }
  static bool IsAudio(const CodecType aCodec) {
    return aCodec > CodecType::_BeginAudio_ && aCodec < CodecType::_EndAudio_;
  }

  using InitPromise =
      MozPromise<TrackInfo::TrackType, MediaResult, /* IsExclusive = */ true>;
  using EncodedData = nsTArray<RefPtr<MediaRawData>>;
  using EncodePromise =
      MozPromise<EncodedData, MediaResult, /* IsExclusive = */ true>;
  using ReconfigurationPromise =
      MozPromise<bool, MediaResult, /* IsExclusive = */ true>;

  // Initialize the encoder. It should be ready to encode once the returned
  // promise resolves. The encoder should do any initialization here, rather
  // than in its constructor or PlatformEncoderModule::Create*Encoder(),
  // so that if the client needs to shutdown during initialization,
  // it can call Shutdown() to cancel this operation. Any initialization
  // that requires blocking the calling thread in this function *must*
  // be done here so that it can be canceled by calling Shutdown()!
  virtual RefPtr<InitPromise> Init() = 0;

  // Inserts a sample into the encoder's encode pipeline. The EncodePromise it
  // returns will be resolved with already encoded MediaRawData at the moment,
  // or empty when there is none available yet.
  virtual RefPtr<EncodePromise> Encode(const MediaData* aSample) = 0;

  // Attempt to reconfigure the encoder on the fly. This can fail if the
  // underlying PEM doesn't support this type of reconfiguration.
  virtual RefPtr<ReconfigurationPromise> Reconfigure(
      const RefPtr<const EncoderConfigurationChangeList>&
          aConfigurationChanges) = 0;

  // Causes all complete samples in the pipeline that can be encoded to be
  // output. It indicates that there is no more input sample to insert.
  // This function is asynchronous.
  // The MediaDataEncoder shall resolve the pending EncodePromise with drained
  // samples. Drain will be called multiple times until the resolved
  // EncodePromise is empty which indicates that there are no more samples to
  // drain.
  virtual RefPtr<EncodePromise> Drain() = 0;

  // Cancels all init/encode/drain operations, and shuts down the encoder. The
  // platform encoder should clean up any resources it's using and release
  // memory etc. The shutdown promise will be resolved once the encoder has
  // completed shutdown. The client will delete the decoder once the promise is
  // resolved.
  // The ShutdownPromise must only ever be resolved.
  virtual RefPtr<ShutdownPromise> Shutdown() = 0;

  virtual RefPtr<GenericPromise> SetBitrate(uint32_t aBitsPerSec) {
    return GenericPromise::CreateAndResolve(true, __func__);
  }

  // Decoder needs to decide whether or not hardware acceleration is supported
  // after creating. It doesn't need to call Init() before calling this
  // function.
  virtual bool IsHardwareAccelerated(nsACString& aFailureReason) const {
    return false;
  }

  // Return the name of the MediaDataEncoder, only used for encoding.
  // May be accessed in a non thread-safe fashion.
  virtual nsCString GetDescriptionName() const = 0;

  friend class PlatformEncoderModule;

 protected:
  virtual ~MediaDataEncoder() = default;
};

class EncoderConfig final {
 public:
  using CodecSpecific =
      Variant<H264Specific, OpusSpecific, VP8Specific, VP9Specific>;

  EncoderConfig(const EncoderConfig& aConfig)
      : mCodec(aConfig.mCodec),
        mSize(aConfig.mSize),
        mUsage(aConfig.mUsage),
        mHardwarePreference(aConfig.mHardwarePreference),
        mPixelFormat(aConfig.mPixelFormat),
        mSourcePixelFormat(aConfig.mSourcePixelFormat),
        mScalabilityMode(aConfig.mScalabilityMode),
        mFramerate(aConfig.mFramerate),
        mKeyframeInterval(aConfig.mKeyframeInterval),
        mBitrate(aConfig.mBitrate),
        mBitrateMode(aConfig.mBitrateMode),
        mCodecSpecific(aConfig.mCodecSpecific) {}

  template <typename... Ts>
  EncoderConfig(const CodecType aCodecType, gfx::IntSize aSize,
                const MediaDataEncoder::Usage aUsage,
                const MediaDataEncoder::PixelFormat aPixelFormat,
                const MediaDataEncoder::PixelFormat aSourcePixelFormat,
                const uint8_t aFramerate, const size_t aKeyframeInterval,
                const uint32_t aBitrate,
                const MediaDataEncoder::BitrateMode aBitrateMode,
                const MediaDataEncoder::HardwarePreference aHardwarePreference,
                const MediaDataEncoder::ScalabilityMode aScalabilityMode,
                const Maybe<CodecSpecific>& aCodecSpecific)
      : mCodec(aCodecType),
        mSize(aSize),
        mUsage(aUsage),
        mHardwarePreference(aHardwarePreference),
        mPixelFormat(aPixelFormat),
        mSourcePixelFormat(aSourcePixelFormat),
        mScalabilityMode(aScalabilityMode),
        mFramerate(aFramerate),
        mKeyframeInterval(aKeyframeInterval),
        mBitrate(aBitrate),
        mBitrateMode(aBitrateMode),
        mCodecSpecific(aCodecSpecific) {}

  static CodecType CodecTypeForMime(const nsACString& aMimeType) {
    if (MP4Decoder::IsH264(aMimeType)) {
      return CodecType::H264;
    }
    if (VPXDecoder::IsVPX(aMimeType, VPXDecoder::VP8)) {
      return CodecType::VP8;
    }
    if (VPXDecoder::IsVPX(aMimeType, VPXDecoder::VP9)) {
      return CodecType::VP9;
    }
    MOZ_ASSERT_UNREACHABLE("Unsupported Mimetype");
    return CodecType::Unknown;
  }

  bool IsVideo() const {
    return mCodec > CodecType::_BeginVideo_ && mCodec < CodecType::_EndVideo_;
  }

  bool IsAudio() const {
    return mCodec > CodecType::_BeginAudio_ && mCodec < CodecType::_EndAudio_;
  }

  CodecType mCodec;
  gfx::IntSize mSize;
  MediaDataEncoder::Usage mUsage;
  MediaDataEncoder::HardwarePreference mHardwarePreference;
  MediaDataEncoder::PixelFormat mPixelFormat;
  MediaDataEncoder::PixelFormat mSourcePixelFormat;
  MediaDataEncoder::ScalabilityMode mScalabilityMode;
  uint8_t mFramerate{};
  size_t mKeyframeInterval{};
  uint32_t mBitrate{};
  MediaDataEncoder::BitrateMode mBitrateMode{};
  Maybe<CodecSpecific> mCodecSpecific;
};

// Wrap a type to make it unique. This allows using ergonomically in the Variant
// below. Simply aliasing with `using` isn't enough, because typedefs in C++
// don't produce strong types, so two integer variants result in
// the same type, making it ambiguous to the Variant code.
// T is the type to be wrapped. Phantom is a type that is only used to
// disambiguate and should be unique in the program.
template <typename T, typename Phantom>
class StrongTypedef {
 public:
  explicit StrongTypedef(T const& value) : mValue(value) {}
  explicit StrongTypedef(T&& value) : mValue(std::move(value)) {}
  T& get() { return mValue; }
  T const& get() const { return mValue; }

 private:
  T mValue;
};

// Dimensions of the video frames
using DimensionsChange =
    StrongTypedef<gfx::IntSize, struct DimensionsChangeType>;
// Expected display size of the encoded frames, can influence encoding
using DisplayDimensionsChange =
    StrongTypedef<Maybe<gfx::IntSize>, struct DisplayDimensionsChangeType>;
// If present, the bitrate in kbps of the encoded stream. If absent, let the
// platform decide.
using BitrateChange = StrongTypedef<Maybe<uint32_t>, struct BitrateChangeType>;
// If present, the expected framerate of the output video stream. If absent,
// infer from the input frames timestamp.
using FramerateChange =
    StrongTypedef<Maybe<double>, struct FramerateChangeType>;
// The bitrate mode (variable, constant) of the encoding
using BitrateModeChange =
    StrongTypedef<MediaDataEncoder::BitrateMode, struct BitrateModeChangeType>;
// The usage for the encoded stream, this influence latency, ordering, etc.
using UsageChange =
    StrongTypedef<MediaDataEncoder::Usage, struct UsageChangeType>;
// If present, the expected content of the video frames (screen, movie, etc.).
// The value the string can have isn't decided just yet. When absent, the
// encoder uses generic settings.
using ContentHintChange =
    StrongTypedef<Maybe<nsString>, struct ContentHintTypeType>;

// A change to a parameter of an encoder instance.
using EncoderConfigurationItem =
    Variant<DimensionsChange, DisplayDimensionsChange, BitrateModeChange,
            BitrateChange, FramerateChange, UsageChange, ContentHintChange>;

// A list of changes to an encoder configuration, that _might_ be able to change
// on the fly. Not all encoder modules can adjust their configuration on the
// fly.
struct EncoderConfigurationChangeList {
  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(EncoderConfigurationChangeList)
  bool Empty() const { return mChanges.IsEmpty(); }
  template <typename T>
  void Push(const T& aItem) {
    mChanges.AppendElement(aItem);
  }
  nsString ToString() const;

  nsTArray<EncoderConfigurationItem> mChanges;

 private:
  ~EncoderConfigurationChangeList() = default;
};

// Just by inspecting the configuration and before asking the PEM, it's
// sometimes possible to know that a media won't be able to be encoded. For
// example, VP8 encodes the frame size on 14 bits, so a resolution of more than
// 16383x16383 pixels cannot work.
bool CanLikelyEncode(const EncoderConfig& aConfig);

}  // namespace mozilla

#endif /* PlatformEncoderModule_h_ */