dom/media/webrtc/MediaEngineWebRTCAudio.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
 * You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef MediaEngineWebRTCAudio_h
#define MediaEngineWebRTCAudio_h

#include "AudioPacketizer.h"
#include "AudioSegment.h"
#include "AudioDeviceInfo.h"
#include "DeviceInputTrack.h"
#include "MediaEngineWebRTC.h"
#include "MediaEnginePrefs.h"
#include "MediaTrackListener.h"
#include "modules/audio_processing/include/audio_processing.h"

namespace mozilla {

class AudioInputProcessing;
class AudioProcessingTrack;

// This class is created and used exclusively on the Media Manager thread, with
// exactly two exceptions:
// - Pull is always called on the MTG thread. It only ever uses
//   mInputProcessing. mInputProcessing is set, then a message is sent first to
//   the main thread and then the MTG thread so that it can be used as part of
//   the graph processing. On destruction, similarly, a message is sent to the
//   graph so that it stops using it, and then it is deleted.
// - mSettings is created on the MediaManager thread is always ever accessed on
//   the Main Thread. It is const.
class MediaEngineWebRTCMicrophoneSource : public MediaEngineSource {
 public:
  explicit MediaEngineWebRTCMicrophoneSource(const MediaDevice* aMediaDevice);

  nsresult Allocate(const dom::MediaTrackConstraints& aConstraints,
                    const MediaEnginePrefs& aPrefs, uint64_t aWindowID,
                    const char** aOutBadConstraint) override;
  nsresult Deallocate() override;
  void SetTrack(const RefPtr<MediaTrack>& aTrack,
                const PrincipalHandle& aPrincipal) override;
  nsresult Start() override;
  nsresult Stop() override;
  nsresult Reconfigure(const dom::MediaTrackConstraints& aConstraints,
                       const MediaEnginePrefs& aPrefs,
                       const char** aOutBadConstraint) override;

  /**
   * Assigns the current settings of the capture to aOutSettings.
   * Main thread only.
   */
  void GetSettings(dom::MediaTrackSettings& aOutSettings) const override;

  nsresult TakePhoto(MediaEnginePhotoCallback* aCallback) override {
    return NS_ERROR_NOT_IMPLEMENTED;
  }

 protected:
  ~MediaEngineWebRTCMicrophoneSource() = default;

 private:
  /**
   * From a set of constraints and about:config preferences, output the correct
   * set of preferences that can be sent to AudioInputProcessing.
   *
   * This can fail if the number of channels requested is zero, negative, or
   * more than the device supports.
   */
  nsresult EvaluateSettings(const NormalizedConstraints& aConstraintsUpdate,
                            const MediaEnginePrefs& aInPrefs,
                            MediaEnginePrefs* aOutPrefs,
                            const char** aOutBadConstraint);
  /**
   * From settings output by EvaluateSettings, send those settings to the
   * AudioInputProcessing instance and the main thread (for use in GetSettings).
   */
  void ApplySettings(const MediaEnginePrefs& aPrefs);

  PrincipalHandle mPrincipal = PRINCIPAL_HANDLE_NONE;

  const RefPtr<AudioDeviceInfo> mDeviceInfo;

  // The maximum number of channels that this device supports.
  const uint32_t mDeviceMaxChannelCount;
  // The current settings for the underlying device.
  // Constructed on the MediaManager thread, and then only ever accessed on the
  // main thread.
  const nsMainThreadPtrHandle<media::Refcountable<dom::MediaTrackSettings>>
      mSettings;

  // Current state of the resource for this source.
  MediaEngineSourceState mState;

  // The current preferences that will be forwarded to mAudioProcessingConfig
  // below.
  MediaEnginePrefs mCurrentPrefs;

  // The AudioProcessingTrack used to inteface with the MediaTrackGraph. Set in
  // SetTrack as part of the initialization, and nulled in ::Deallocate.
  RefPtr<AudioProcessingTrack> mTrack;

  // See note at the top of this class.
  RefPtr<AudioInputProcessing> mInputProcessing;

  // Copy of the config currently applied to AudioProcessing through
  // mInputProcessing.
  webrtc::AudioProcessing::Config mAudioProcessingConfig;
};

// This class is created on the MediaManager thread, and then exclusively used
// on the MTG thread.
// All communication is done via message passing using MTG ControlMessages
class AudioInputProcessing : public AudioDataListener {
 public:
  explicit AudioInputProcessing(uint32_t aMaxChannelCount);
  void Process(MediaTrackGraphImpl* aGraph, GraphTime aFrom, GraphTime aTo,
               AudioSegment* aInput, AudioSegment* aOutput);

  void ProcessOutputData(MediaTrackGraphImpl* aGraph, AudioDataValue* aBuffer,
                         size_t aFrames, TrackRate aRate, uint32_t aChannels);
  bool IsVoiceInput(MediaTrackGraphImpl* aGraph) const override {
    // If we're passing data directly without AEC or any other process, this
    // means that all voice-processing has been disabled intentionaly. In this
    // case, consider that the device is not used for voice input.
    return !PassThrough(aGraph);
  }

  void Start(MediaTrackGraphImpl* aGraph);
  void Stop(MediaTrackGraphImpl* aGraph);

  void DeviceChanged(MediaTrackGraphImpl* aGraph) override;

  uint32_t RequestedInputChannelCount(MediaTrackGraphImpl*) override {
    return GetRequestedInputChannelCount();
  }

  void Disconnect(MediaTrackGraphImpl* aGraph) override;

  void PacketizeAndProcess(MediaTrackGraphImpl* aGraph,
                           const AudioSegment& aSegment);

  void SetPassThrough(MediaTrackGraphImpl* aGraph, bool aPassThrough);
  uint32_t GetRequestedInputChannelCount();
  void SetRequestedInputChannelCount(MediaTrackGraphImpl* aGraph,
                                     CubebUtils::AudioDeviceID aDeviceId,
                                     uint32_t aRequestedInputChannelCount);
  // This is true when all processing is disabled, we can skip
  // packetization, resampling and other processing passes.
  bool PassThrough(MediaTrackGraphImpl* aGraphImpl) const;

  // This allow changing the APM options, enabling or disabling processing
  // steps. The config gets applied the next time we're about to process input
  // data.
  void ApplyConfig(MediaTrackGraphImpl* aGraph,
                   const webrtc::AudioProcessing::Config& aConfig);

  void End();

  TrackTime NumBufferedFrames(MediaTrackGraphImpl* aGraph) const;

  // The packet size contains samples in 10ms. The unit of aRate is hz.
  constexpr static uint32_t GetPacketSize(TrackRate aRate) {
    return static_cast<uint32_t>(aRate) / 100u;
  }

  bool IsEnded() const { return mEnded; }

 private:
  ~AudioInputProcessing() = default;
  void EnsureAudioProcessing(MediaTrackGraphImpl* aGraph, uint32_t aChannels);
  void ResetAudioProcessing(MediaTrackGraphImpl* aGraph);
  PrincipalHandle GetCheckedPrincipal(const AudioSegment& aSegment);
  // This implements the processing algoritm to apply to the input (e.g. a
  // microphone). If all algorithms are disabled, this class in not used. This
  // class only accepts audio chunks of 10ms. It has two inputs and one output:
  // it is fed the speaker data and the microphone data. It outputs processed
  // input data.
  const UniquePtr<webrtc::AudioProcessing> mAudioProcessing;
  // Packetizer to be able to feed 10ms packets to the input side of
  // mAudioProcessing. Not used if the processing is bypassed.
  Maybe<AudioPacketizer<AudioDataValue, float>> mPacketizerInput;
  // Packetizer to be able to feed 10ms packets to the output side of
  // mAudioProcessing. Not used if the processing is bypassed.
  Maybe<AudioPacketizer<AudioDataValue, float>> mPacketizerOutput;
  // The number of channels asked for by content, after clamping to the range of
  // legal channel count for this particular device.
  uint32_t mRequestedInputChannelCount;
  // mSkipProcessing is true if none of the processing passes are enabled,
  // because of prefs or constraints. This allows simply copying the audio into
  // the MTG, skipping resampling and the whole webrtc.org code.
  bool mSkipProcessing;
  // Stores the mixed audio output for the reverse-stream of the AEC (the
  // speaker data).
  AlignedFloatBuffer mOutputBuffer;
  // Stores the input audio, to be processed by the APM.
  AlignedFloatBuffer mInputBuffer;
  // Stores the deinterleaved microphone audio
  AlignedFloatBuffer mDeinterleavedBuffer;
  // Stores the mixed down input audio
  AlignedFloatBuffer mInputDownmixBuffer;
  // Stores data waiting to be pulled.
  AudioSegment mSegment;
  // Whether or not this MediaEngine is enabled. If it's not enabled, it
  // operates in "pull" mode, and we append silence only, releasing the audio
  // input track.
  bool mEnabled;
  // Whether or not we've ended and removed the AudioProcessingTrack.
  bool mEnded;
  // When processing is enabled, the number of packets received by this
  // instance, to implement periodic logging.
  uint64_t mPacketCount;
  // A storage holding the interleaved audio data converted the AudioSegment.
  // This will be used as an input parameter for PacketizeAndProcess. This
  // should be removed once bug 1729041 is done.
  AutoTArray<AudioDataValue,
             SilentChannel::AUDIO_PROCESSING_FRAMES * GUESS_AUDIO_CHANNELS>
      mInterleavedBuffer;
  // Tracks the pending frames with paired principals piled up in packetizer.
  std::deque<std::pair<TrackTime, PrincipalHandle>> mChunksInPacketizer;
};

// MediaTrack subclass tailored for MediaEngineWebRTCMicrophoneSource.
class AudioProcessingTrack : public DeviceInputConsumerTrack {
  // Only accessed on the graph thread.
  RefPtr<AudioInputProcessing> mInputProcessing;

  explicit AudioProcessingTrack(TrackRate aSampleRate)
      : DeviceInputConsumerTrack(aSampleRate) {}

  ~AudioProcessingTrack() = default;

 public:
  // Main Thread API
  void Destroy() override;
  void SetInputProcessing(RefPtr<AudioInputProcessing> aInputProcessing);
  static AudioProcessingTrack* Create(MediaTrackGraph* aGraph);

  // Graph Thread API
  void DestroyImpl() override;
  void ProcessInput(GraphTime aFrom, GraphTime aTo, uint32_t aFlags) override;
  uint32_t NumberOfChannels() const override {
    MOZ_DIAGNOSTIC_ASSERT(
        mInputProcessing,
        "Must set mInputProcessing before exposing to content");
    return mInputProcessing->GetRequestedInputChannelCount();
  }
  // Pass the graph's mixed audio output to mInputProcessing for processing as
  // the reverse stream.
  void NotifyOutputData(MediaTrackGraphImpl* aGraph, AudioDataValue* aBuffer,
                        size_t aFrames, TrackRate aRate, uint32_t aChannels);

  // Any thread
  AudioProcessingTrack* AsAudioProcessingTrack() override { return this; }

 private:
  // Graph thread API
  void SetInputProcessingImpl(RefPtr<AudioInputProcessing> aInputProcessing);
};

class MediaEngineWebRTCAudioCaptureSource : public MediaEngineSource {
 public:
  explicit MediaEngineWebRTCAudioCaptureSource(const MediaDevice* aMediaDevice);
  static nsString GetUUID();
  static nsString GetGroupId();
  nsresult Allocate(const dom::MediaTrackConstraints& aConstraints,
                    const MediaEnginePrefs& aPrefs, uint64_t aWindowID,
                    const char** aOutBadConstraint) override {
    // Nothing to do here, everything is managed in MediaManager.cpp
    return NS_OK;
  }
  nsresult Deallocate() override {
    // Nothing to do here, everything is managed in MediaManager.cpp
    return NS_OK;
  }
  void SetTrack(const RefPtr<MediaTrack>& aTrack,
                const PrincipalHandle& aPrincipal) override;
  nsresult Start() override;
  nsresult Stop() override;
  nsresult Reconfigure(const dom::MediaTrackConstraints& aConstraints,
                       const MediaEnginePrefs& aPrefs,
                       const char** aOutBadConstraint) override;

  nsresult TakePhoto(MediaEnginePhotoCallback* aCallback) override {
    return NS_ERROR_NOT_IMPLEMENTED;
  }

  void GetSettings(dom::MediaTrackSettings& aOutSettings) const override;

 protected:
  virtual ~MediaEngineWebRTCAudioCaptureSource() = default;
};

}  // end namespace mozilla

#endif  // MediaEngineWebRTCAudio_h