dom/media/driftcontrol/DynamicResampler.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
 * You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef DOM_MEDIA_DRIFTCONTROL_DYNAMICRESAMPLER_H_
#define DOM_MEDIA_DRIFTCONTROL_DYNAMICRESAMPLER_H_

#include "AudioRingBuffer.h"
#include "AudioSegment.h"
#include "TimeUnits.h"
#include "WavDumper.h"

#include <speex/speex_resampler.h>

namespace mozilla {

const uint32_t STEREO = 2;

/**
 * DynamicResampler allows updating on the fly the output sample rate and the
 * number of channels. In addition to that, it maintains an internal buffer for
 * the input data and allows pre-buffering as well. The Resample() method
 * strives to provide the requested number of output frames by using the input
 * data including any pre-buffering. If there are fewer frames in the internal
 * buffer than is requested, the internal buffer is padded with enough silence
 * to allow the requested to be resampled and returned.
 *
 * Input data buffering makes use of the AudioRingBuffer. The capacity of the
 * buffer is initially 100ms of float audio and it is pre-allocated at the
 * constructor. Should the input data grow beyond that, the input buffer is
 * re-allocated on the fly. In addition to that, due to special feature of
 * AudioRingBuffer, no extra copies take place when the input data is fed to the
 * resampler.
 *
 * The sample format must be set before using any method. If the provided sample
 * format is of type short the pre-allocated capacity of the input buffer
 * becomes 200ms of short audio.
 *
 * The DynamicResampler is not thread-safe, so all the methods appart from the
 * constructor must be called on the same thread.
 */
class DynamicResampler final {
 public:
  /**
   * Provide the initial input and output rate and the amount of pre-buffering.
   * The channel count will be set to stereo. Memory allocation will take
   * place. The input buffer is non-interleaved.
   */
  DynamicResampler(
      uint32_t aInRate, uint32_t aOutRate,
      media::TimeUnit aPreBufferDuration = media::TimeUnit::Zero());
  ~DynamicResampler();

  /**
   * Set the sample format type to float or short.
   */
  void SetSampleFormat(AudioSampleFormat aFormat);
  uint32_t GetOutRate() const { return mOutRate; }
  uint32_t GetChannels() const { return mChannels; }

  /**
   * Append `aInFrames` number of frames from `aInBuffer` to the internal input
   * buffer. Memory copy/move takes place.
   */
  void AppendInput(Span<const float* const> aInBuffer, uint32_t aInFrames);
  void AppendInput(Span<const int16_t* const> aInBuffer, uint32_t aInFrames);
  /**
   * Append `aInFrames` number of frames of silence to the internal input
   * buffer. Memory copy/move takes place.
   */
  void AppendInputSilence(const uint32_t aInFrames);
  /**
   * Return the number of frames the internal input buffer can store.
   */
  uint32_t InFramesBufferSize() const;
  /**
   * Return the number of frames stored in the internal input buffer.
   */
  uint32_t InFramesBuffered(uint32_t aChannelIndex) const;

  /**
   * Prepends existing input data with a silent pre-buffer if not already done.
   * Data will be prepended so that after resampling aOutFrames worth of output
   * data, the buffering level will be as close as possible to
   * mPreBufferDuration, which is the desired buffering level.
   */
  void EnsurePreBuffer(media::TimeUnit aDuration);

  /**
   * Set the duration that should be used for pre-buffering.
   */
  void SetPreBufferDuration(media::TimeUnit aDuration);

  /*
   * Resample as much frames as needed from the internal input buffer to the
   * `aOutBuffer` in order to provide all `aOutFrames`.
   *
   * On first call, prepends the input buffer with silence so that after
   * resampling aOutFrames frames of data, the input buffer holds data as close
   * as possible to the configured pre-buffer size.
   *
   * If there are not enough input frames to provide the requested output
   * frames, the input buffer is padded with enough silence to allow the
   * requested frames to be resampled, and the pre-buffer is reset so that the
   * next call will be treated as the first.
   *
   * Returns true if the internal input buffer underran and had to be padded
   * with silence, otherwise false.
   */
  bool Resample(float* aOutBuffer, uint32_t aOutFrames, uint32_t aChannelIndex);
  bool Resample(int16_t* aOutBuffer, uint32_t aOutFrames,
                uint32_t aChannelIndex);

  /**
   * Update the output rate or/and the channel count. If a value is not updated
   * compared to the current one nothing happens. Changing the `aOutRate`
   * results in recalculation in the resampler. Changing `aChannels` results in
   * the reallocation of the internal input buffer with the exception of
   * changes between mono to stereo and vice versa where no reallocation takes
   * place. A stereo internal input buffer is always maintained even if the
   * sound is mono.
   */
  void UpdateResampler(uint32_t aOutRate, uint32_t aChannels);

 private:
  template <typename T>
  void AppendInputInternal(Span<const T* const>& aInBuffer,
                           uint32_t aInFrames) {
    MOZ_ASSERT(aInBuffer.Length() == (uint32_t)mChannels);
    for (uint32_t i = 0; i < mChannels; ++i) {
      PushInFrames(aInBuffer[i], aInFrames, i);
    }
  }

  void ResampleInternal(const float* aInBuffer, uint32_t* aInFrames,
                        float* aOutBuffer, uint32_t* aOutFrames,
                        uint32_t aChannelIndex);
  void ResampleInternal(const int16_t* aInBuffer, uint32_t* aInFrames,
                        int16_t* aOutBuffer, uint32_t* aOutFrames,
                        uint32_t aChannelIndex);

  template <typename T>
  bool ResampleInternal(T* aOutBuffer, uint32_t aOutFrames,
                        uint32_t aChannelIndex) {
    MOZ_ASSERT(mInRate);
    MOZ_ASSERT(mOutRate);
    MOZ_ASSERT(mChannels);
    MOZ_ASSERT(aChannelIndex < mChannels);
    MOZ_ASSERT(aChannelIndex < mInternalInBuffer.Length());
    MOZ_ASSERT(aOutFrames);

    if (mInRate == mOutRate) {
      bool underrun = false;
      if (uint32_t buffered = mInternalInBuffer[aChannelIndex].AvailableRead();
          buffered < aOutFrames) {
        underrun = true;
        mIsPreBufferSet = false;
        mInternalInBuffer[aChannelIndex].WriteSilence(aOutFrames - buffered);
      }
      DebugOnly<uint32_t> numFramesRead =
          mInternalInBuffer[aChannelIndex].Read(Span(aOutBuffer, aOutFrames));
      MOZ_ASSERT(numFramesRead == aOutFrames);
      // Workaround to avoid discontinuity when the speex resampler operates
      // again. Feed it with the last 20 frames to warm up the internal memory
      // of the resampler and then skip memory equals to resampler's input
      // latency.
      mInputTail[aChannelIndex].StoreTail<T>(aOutBuffer, aOutFrames);
      if (aChannelIndex == 0 && !mIsWarmingUp) {
        mInputStreamFile.Write(aOutBuffer, aOutFrames);
        mOutputStreamFile.Write(aOutBuffer, aOutFrames);
      }
      return underrun;
    }

    uint32_t totalOutFramesNeeded = aOutFrames;
    auto resample = [&] {
      mInternalInBuffer[aChannelIndex].ReadNoCopy(
          [&](const Span<const T>& aInBuffer) -> uint32_t {
            if (!totalOutFramesNeeded) {
              return 0;
            }
            uint32_t outFramesResampled = totalOutFramesNeeded;
            uint32_t inFrames = aInBuffer.Length();
            ResampleInternal(aInBuffer.data(), &inFrames, aOutBuffer,
                             &outFramesResampled, aChannelIndex);
            aOutBuffer += outFramesResampled;
            totalOutFramesNeeded -= outFramesResampled;
            mInputTail[aChannelIndex].StoreTail<T>(aInBuffer.To(inFrames));
            return inFrames;
          });
    };

    resample();

    if (totalOutFramesNeeded == 0) {
      return false;
    }

    while (totalOutFramesNeeded > 0) {
      MOZ_ASSERT(mInternalInBuffer[aChannelIndex].AvailableRead() == 0);
      // Round up.
      uint32_t totalInFramesNeeded =
          ((CheckedUint32(totalOutFramesNeeded) * mInRate + mOutRate - 1) /
           mOutRate)
              .value();
      mInternalInBuffer[aChannelIndex].WriteSilence(totalInFramesNeeded);
      resample();
    }
    mIsPreBufferSet = false;
    return true;
  }

  template <typename T>
  void PushInFrames(const T* aInBuffer, const uint32_t aInFrames,
                    uint32_t aChannelIndex) {
    MOZ_ASSERT(aInBuffer);
    MOZ_ASSERT(aInFrames);
    MOZ_ASSERT(mChannels);
    MOZ_ASSERT(aChannelIndex < mChannels);
    MOZ_ASSERT(aChannelIndex < mInternalInBuffer.Length());
    EnsureInputBufferDuration(media::TimeUnit(
        CheckedInt64(mInternalInBuffer[aChannelIndex].AvailableRead()) +
            aInFrames,
        mInRate));
    mInternalInBuffer[aChannelIndex].Write(Span(aInBuffer, aInFrames));
  }

  void WarmUpResampler(bool aSkipLatency);

  media::TimeUnit CalculateInputBufferDuration() const {
    // Pre-allocate something big, twice the pre-buffer, or at least 100ms.
    return std::max(mPreBufferDuration * 2, media::TimeUnit::FromSeconds(0.1));
  }

  bool EnsureInputBufferDuration(media::TimeUnit aDuration) {
    if (aDuration <= mSetBufferDuration) {
      // Buffer size is sufficient.
      return true;
    }

    // 5 second cap.
    const media::TimeUnit cap = media::TimeUnit::FromSeconds(5);
    if (mSetBufferDuration == cap) {
      // Already at the cap.
      return false;
    }

    uint32_t sampleSize = 0;
    if (mSampleFormat == AUDIO_FORMAT_FLOAT32) {
      sampleSize = sizeof(float);
    } else if (mSampleFormat == AUDIO_FORMAT_S16) {
      sampleSize = sizeof(short);
    }

    if (sampleSize == 0) {
      // No sample format set, we wouldn't know how many bytes to allocate.
      return true;
    }

    // As a backoff strategy, at least double the previous size.
    media::TimeUnit duration = mSetBufferDuration * 2;

    if (aDuration > duration) {
      // A larger buffer than the normal backoff strategy provides is needed, or
      // this is the first time setting the buffer size. Round up to the nearest
      // 100ms, some jitter is expected.
      duration = aDuration.ToBase<media::TimeUnit::CeilingPolicy>(10);
    }

    duration = std::min(cap, duration);

    bool success = true;
    for (auto& b : mInternalInBuffer) {
      success = success &&
                b.SetLengthBytes(sampleSize * duration.ToTicksAtRate(mInRate));
    }

    if (success) {
      // All buffers have the new size.
      mSetBufferDuration = duration;
      return true;
    }

    const uint32_t sizeInFrames =
        static_cast<uint32_t>(mSetBufferDuration.ToTicksAtRate(mInRate));
    // Allocating an input buffer failed. We stick with the old buffer size.
    NS_WARNING(nsPrintfCString("Failed to allocate a buffer of %u bytes (%u "
                               "frames). Expect glitches.",
                               sampleSize * sizeInFrames, sizeInFrames)
                   .get());
    for (auto& b : mInternalInBuffer) {
      MOZ_ALWAYS_TRUE(b.SetLengthBytes(sampleSize * sizeInFrames));
    }
    return false;
  }

 public:
  const uint32_t mInRate;

 private:
  bool mIsPreBufferSet = false;
  bool mIsWarmingUp = false;
  media::TimeUnit mPreBufferDuration;
  media::TimeUnit mSetBufferDuration = media::TimeUnit::Zero();
  uint32_t mChannels = 0;
  uint32_t mOutRate;

  AutoTArray<AudioRingBuffer, STEREO> mInternalInBuffer;

  SpeexResamplerState* mResampler = nullptr;
  AudioSampleFormat mSampleFormat = AUDIO_FORMAT_SILENCE;

  class TailBuffer {
   public:
    template <typename T>
    T* Buffer() {
      return reinterpret_cast<T*>(mBuffer);
    }
    /* Store the MAXSIZE last elements of the buffer. */
    template <typename T>
    void StoreTail(const Span<const T>& aInBuffer) {
      StoreTail(aInBuffer.data(), aInBuffer.size());
    }
    template <typename T>
    void StoreTail(const T* aInBuffer, uint32_t aInFrames) {
      if (aInFrames >= MAXSIZE) {
        PodCopy(Buffer<T>(), aInBuffer + aInFrames - MAXSIZE, MAXSIZE);
        mSize = MAXSIZE;
      } else {
        PodCopy(Buffer<T>(), aInBuffer, aInFrames);
        mSize = aInFrames;
      }
    }
    uint32_t Length() { return mSize; }
    static const uint32_t MAXSIZE = 20;

   private:
    float mBuffer[MAXSIZE] = {};
    uint32_t mSize = 0;
  };
  AutoTArray<TailBuffer, STEREO> mInputTail;

  WavDumper mInputStreamFile;
  WavDumper mOutputStreamFile;
};

}  // namespace mozilla

#endif  // DOM_MEDIA_DRIFTCONTROL_DYNAMICRESAMPLER_H_