dom/media/mediasink/AudioDecoderInputTrack.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242

/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef AudioDecoderInputTrack_h
#define AudioDecoderInputTrack_h

#include "AudioSegment.h"
#include "MediaEventSource.h"
#include "MediaTimer.h"
#include "MediaTrackGraph.h"
#include "MediaTrackGraphImpl.h"
#include "MediaSegment.h"
#include "mozilla/SPSCQueue.h"
#include "mozilla/StateMirroring.h"
#include "nsISerialEventTarget.h"

namespace soundtouch {
class MOZ_EXPORT SoundTouch;
}

namespace mozilla {

class AudioData;

/**
 * AudioDecoderInputTrack is used as a source for the audio decoder data, which
 * supports adjusting playback rate and preserve pitch.
 * The owner of this track would be responsible to push audio data via
 * `AppendData()` into a SPSC queue, which is a thread-safe queue between the
 * decoder thread (producer) and the graph thread (consumer). MediaTrackGraph
 * requires data via `ProcessInput()`, then AudioDecoderInputTrack would convert
 * (based on sample rate and playback rate) and append the amount of needed
 * audio frames onto the output segment that would be used by MediaTrackGraph.
 */
class AudioDecoderInputTrack final : public ProcessedMediaTrack {
 public:
  static AudioDecoderInputTrack* Create(MediaTrackGraph* aGraph,
                                        nsISerialEventTarget* aDecoderThread,
                                        const AudioInfo& aInfo,
                                        float aPlaybackRate, float aVolume,
                                        bool aPreservesPitch);

  // SPSCData suppports filling different supported type variants, and is used
  // to achieve a thread-safe information exchange between the decoder thread
  // and the graph thread.
  struct SPSCData final {
    struct Empty {};
    struct ClearFutureData {};
    struct DecodedData {
      DecodedData()
          : mStartTime(media::TimeUnit::Invalid()),
            mEndTime(media::TimeUnit::Invalid()) {}
      DecodedData(DecodedData&& aDecodedData)
          : mSegment(std::move(aDecodedData.mSegment)) {
        mStartTime = aDecodedData.mStartTime;
        mEndTime = aDecodedData.mEndTime;
        aDecodedData.Clear();
      }
      DecodedData(media::TimeUnit aStartTime, media::TimeUnit aEndTime)
          : mStartTime(aStartTime), mEndTime(aEndTime) {}
      DecodedData(const DecodedData&) = delete;
      DecodedData& operator=(const DecodedData&) = delete;
      void Clear() {
        mSegment.Clear();
        mStartTime = media::TimeUnit::Invalid();
        mEndTime = media::TimeUnit::Invalid();
      }
      AudioSegment mSegment;
      media::TimeUnit mStartTime;
      media::TimeUnit mEndTime;
    };
    struct EOS {};

    SPSCData() : mData(Empty()){};
    explicit SPSCData(ClearFutureData&& aArg) : mData(std::move(aArg)){};
    explicit SPSCData(DecodedData&& aArg) : mData(std::move(aArg)){};
    explicit SPSCData(EOS&& aArg) : mData(std::move(aArg)){};

    bool HasData() const { return !mData.is<Empty>(); }
    bool IsClearFutureData() const { return mData.is<ClearFutureData>(); }
    bool IsDecodedData() const { return mData.is<DecodedData>(); }
    bool IsEOS() const { return mData.is<EOS>(); }

    DecodedData* AsDecodedData() {
      return IsDecodedData() ? &mData.as<DecodedData>() : nullptr;
    }

    Variant<Empty, ClearFutureData, DecodedData, EOS> mData;
  };

  // Decoder thread API
  void AppendData(AudioData* aAudio, const PrincipalHandle& aPrincipalHandle);
  void AppendData(nsTArray<RefPtr<AudioData>>& aAudioArray,
                  const PrincipalHandle& aPrincipalHandle);
  void NotifyEndOfStream();
  void ClearFutureData();
  void SetVolume(float aVolume);
  void SetPlaybackRate(float aPlaybackRate);
  void SetPreservesPitch(bool aPreservesPitch);
  // After calling this, the track are not expected to receive any new data.
  void Close();
  bool HasBatchedData() const;

  MediaEventSource<int64_t>& OnOutput() { return mOnOutput; }
  MediaEventSource<void>& OnEnd() { return mOnEnd; }

  // Graph Thread API
  void DestroyImpl() override;
  void ProcessInput(GraphTime aFrom, GraphTime aTo, uint32_t aFlags) override;
  uint32_t NumberOfChannels() const override;

  // The functions below are only used for testing.
  TrackTime WrittenFrames() const {
    AssertOnGraphThread();
    return mWrittenFrames;
  }
  float Volume() const {
    AssertOnGraphThread();
    return mVolume;
  }
  float PlaybackRate() const {
    AssertOnGraphThread();
    return mPlaybackRate;
  }

 protected:
  ~AudioDecoderInputTrack();

 private:
  AudioDecoderInputTrack(nsISerialEventTarget* aDecoderThread,
                         TrackRate aGraphRate, const AudioInfo& aInfo,
                         float aPlaybackRate, float aVolume,
                         bool aPreservesPitch);

  // Return false if the converted segment contains zero duration.
  bool ConvertAudioDataToSegment(AudioData* aAudio, AudioSegment& aSegment,
                                 const PrincipalHandle& aPrincipalHandle);

  void HandleSPSCData(SPSCData& aData);

  // These methods would return the total frames that we consumed from
  // `mBufferedData`.
  TrackTime AppendBufferedDataToOutput(TrackTime aExpectedDuration);
  TrackTime FillDataToTimeStretcher(TrackTime aExpectedDuration);
  TrackTime AppendTimeStretchedDataToSegment(TrackTime aExpectedDuration,
                                             AudioSegment& aOutput);
  TrackTime AppendUnstretchedDataToSegment(TrackTime aExpectedDuration,
                                           AudioSegment& aOutput);

  // Return the total frames that we retrieve from the time stretcher.
  TrackTime DrainStretchedDataIfNeeded(TrackTime aExpectedDuration,
                                       AudioSegment& aOutput);
  TrackTime GetDataFromTimeStretcher(TrackTime aExpectedDuration,
                                     AudioSegment& aOutput);
  void NotifyInTheEndOfProcessInput(TrackTime aFillDuration);

  bool HasSentAllData() const;

  bool ShouldBatchData() const;
  void BatchData(AudioData* aAudio, const PrincipalHandle& aPrincipalHandle);
  void DispatchPushBatchedDataIfNeeded();
  void PushBatchedDataIfNeeded();
  void PushDataToSPSCQueue(SPSCData& data);

  void SetVolumeImpl(float aVolume);
  void SetPlaybackRateImpl(float aPlaybackRate);
  void SetPreservesPitchImpl(bool aPreservesPitch);

  void EnsureTimeStretcher();
  void SetTempoAndRateForTimeStretcher();
  uint32_t GetChannelCountForTimeStretcher() const;

  inline void AssertOnDecoderThread() const {
    MOZ_ASSERT(mDecoderThread->IsOnCurrentThread());
  }
  inline void AssertOnGraphThread() const {
    MOZ_ASSERT(GraphImpl()->OnGraphThread());
  }
  inline void AssertOnGraphThreadOrNotRunning() const {
    MOZ_ASSERT(GraphImpl()->OnGraphThreadOrNotRunning());
  }

  const RefPtr<nsISerialEventTarget> mDecoderThread;

  // Notify the amount of audio frames which have been sent to the track.
  MediaEventProducer<int64_t> mOnOutput;
  // Notify when the track is ended.
  MediaEventProducer<void> mOnEnd;

  // These variables are ONLY used in the decoder thread.
  nsAutoRef<SpeexResamplerState> mResampler;
  uint32_t mResamplerChannelCount;
  const uint32_t mInitialInputChannels;
  TrackRate mInputSampleRate;
  DelayedScheduler mDelayedScheduler;
  bool mShutdownSPSCQueue = false;

  // These attributes are ONLY used in the graph thread.
  bool mReceivedEOS = false;
  TrackTime mWrittenFrames = 0;
  float mPlaybackRate;
  float mVolume;
  bool mPreservesPitch;

  // A thread-safe queue shared by the decoder thread and the graph thread.
  // The decoder thread is the producer side, and the graph thread is the
  // consumer side. This queue should NEVER get full. In order to achieve that,
  // we would batch input samples when SPSC queue doesn't have many available
  // capacity.
  // In addition, as the media track isn't guaranteed to be destroyed on the
  // graph thread (it could be destroyed on the main thread as well) so we might
  // not clear all data in SPSC queue when the track's `DestroyImpl()` gets
  // called. We leave to destroy the queue later when the track gets destroyed.
  SPSCQueue<SPSCData> mSPSCQueue{40};

  // When the graph requires the less amount of audio frames than the amount of
  // frames an audio data has, then the remaining part of frames would be stored
  // and used in next iteration.
  // This is ONLY used in the graph thread.
  AudioSegment mBufferedData;

  // In order to prevent SPSC queue from being full, we want to batch multiple
  // data into one to control the density of SPSC queue, the length of batched
  // data would be dynamically adjusted by queue's available capacity.
  // This is ONLY used in the decoder thread.
  SPSCData::DecodedData mBatchedData;

  // True if we've sent all data to the graph, then the track will be marked as
  // ended in the next iteration.
  bool mSentAllData = false;

  // This is used to adjust the playback rate and pitch.
  soundtouch::SoundTouch* mTimeStretcher = nullptr;

  // Buffers that would be used for the time stretching.
  AutoTArray<AudioDataValue, 2> mInterleavedBuffer;
};

}  // namespace mozilla

#endif  // AudioDecoderInputTrack_h