dom/media/AudioDriftCorrection.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
 * You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef MOZILLA_AUDIO_DRIFT_CORRECTION_H_
#define MOZILLA_AUDIO_DRIFT_CORRECTION_H_

#include "DynamicResampler.h"

namespace mozilla {

extern LazyLogModule gMediaTrackGraphLog;

/**
 * ClockDrift calculates the diverge of the source clock from the nominal
 * (provided) rate compared to the target clock, which is considered the master
 * clock. In the case of different sampling rates, it is assumed that resampling
 * will take place so the returned correction is estimated after the resampling.
 * That means that resampling is taken into account in the calculations but it
 * does appear in the correction. The correction must be applied to the top of
 * the resampling.
 *
 * It works by measuring the incoming, the outgoing frames, and the amount of
 * buffered data and estimates the correction needed. The correction logic has
 * been created with two things in mind. First, not to run out of frames because
 * that means the audio will glitch. Second, not to change the correction very
 * often because this will result in a change in the resampling ratio. The
 * resampler recreates its internal memory when the ratio changes which has a
 * performance impact.
 *
 * The pref `media.clock drift.buffering` can be used to configure the desired
 * internal buffering. Right now it is at 50ms. But it can be increased if there
 * are audio quality problems.
 */
class ClockDrift final {
 public:
  /**
   * Provide the nominal source and the target sample rate.
   */
  ClockDrift(uint32_t aSourceRate, uint32_t aTargetRate,
             uint32_t aDesiredBuffering)
      : mSourceRate(aSourceRate),
        mTargetRate(aTargetRate),
        mDesiredBuffering(aDesiredBuffering) {}

  /**
   * The correction in the form of a ratio. A correction of 0.98 means that the
   * target is 2% slower compared to the source or 1.03 which means that the
   * target is 3% faster than the source.
   */
  float GetCorrection() { return mCorrection; }

  /**
   * Update the available source frames, target frames, and the current
   * buffer, in every iteration. If the conditions are met a new correction is
   * calculated. A new correction is calculated in the following cases:
   *   1. Every mAdjustmentIntervalMs milliseconds (1000ms).
   *   2. Every time we run low on buffered frames (less than 20ms).
   * In addition to that, the correction is clamped to 10% to avoid sound
   * distortion so the result will be in [0.9, 1.1].
   */
  void UpdateClock(uint32_t aSourceFrames, uint32_t aTargetFrames,
                   uint32_t aBufferedFrames, uint32_t aRemainingFrames) {
    if (mSourceClock >= mSourceRate / 10 || mTargetClock >= mTargetRate / 10) {
      // Only update the correction if 100ms has passed since last update.
      if (aBufferedFrames < mDesiredBuffering * 4 / 10 /*40%*/ ||
          aRemainingFrames < mDesiredBuffering * 4 / 10 /*40%*/) {
        // We are getting close to the lower or upper bound of the internal
        // buffer. Steer clear.
        CalculateCorrection(0.9, aBufferedFrames, aRemainingFrames);
      } else if ((mTargetClock * 1000 / mTargetRate) >= mAdjustmentIntervalMs ||
                 (mSourceClock * 1000 / mSourceRate) >= mAdjustmentIntervalMs) {
        // The adjustment interval has passed on one side. Recalculate.
        CalculateCorrection(0.6, aBufferedFrames, aRemainingFrames);
      }
    }
    mTargetClock += aTargetFrames;
    mSourceClock += aSourceFrames;
  }

 private:
  /**
   * aCalculationWeight is a percentage [0, 1] with which the calculated
   * correction will be weighted. The existing correction will be weighted with
   * 1 - aCalculationWeight. This gives some inertia to the speed at which the
   * correction changes, for smoother changes.
   */
  void CalculateCorrection(float aCalculationWeight, uint32_t aBufferedFrames,
                           uint32_t aRemainingFrames) {
    // We want to maintain the desired buffer
    uint32_t bufferedFramesDiff = aBufferedFrames - mDesiredBuffering;
    uint32_t resampledSourceClock =
        std::max(1u, mSourceClock + bufferedFramesDiff);
    if (mTargetRate != mSourceRate) {
      resampledSourceClock *= static_cast<float>(mTargetRate) / mSourceRate;
    }

    MOZ_LOG(gMediaTrackGraphLog, LogLevel::Verbose,
            ("ClockDrift %p Calculated correction %.3f (with weight: %.1f -> "
             "%.3f) (buffer: %u, desired: %u, remaining: %u)",
             this, static_cast<float>(mTargetClock) / resampledSourceClock,
             aCalculationWeight,
             (1 - aCalculationWeight) * mCorrection +
                 aCalculationWeight * mTargetClock / resampledSourceClock,
             aBufferedFrames, mDesiredBuffering, aRemainingFrames));

    mCorrection = (1 - aCalculationWeight) * mCorrection +
                  aCalculationWeight * mTargetClock / resampledSourceClock;

    // Clamp to range [0.9, 1.1] to avoid distortion
    mCorrection = std::min(std::max(mCorrection, 0.9f), 1.1f);

    // Reset the counters to prepare for the next period.
    mTargetClock = 0;
    mSourceClock = 0;
  }

 public:
  const uint32_t mSourceRate;
  const uint32_t mTargetRate;
  const uint32_t mAdjustmentIntervalMs = 1000;
  const uint32_t mDesiredBuffering;

 private:
  float mCorrection = 1.0;

  uint32_t mSourceClock = 0;
  uint32_t mTargetClock = 0;
};

/**
 * Correct the drift between two independent clocks, the source, and the target
 * clock. The target clock is the master clock so the correction syncs the drift
 * of the source clock to the target. The nominal sampling rates of source and
 * target must be provided. If the source and the target operate in different
 * sample rate the drift correction will be performed on the top of resampling
 * from the source rate to the target rate.
 *
 * It works with AudioSegment in order to be able to be used from the
 * MediaTrackGraph/MediaTrack. The audio buffers are pre-allocated so there is
 * no new allocation takes place during operation. The preallocation capacity is
 * 100ms for input and 100ms for output. The class consists of ClockDrift and
 * AudioResampler check there for more details.
 *
 * The class is not thread-safe. The construction can happen in any thread but
 * the member method must be used in a single thread that can be different than
 * the construction thread. Appropriate for being used in the high priority
 * audio thread.
 */
class AudioDriftCorrection final {
  const uint32_t kMinBufferMs = 5;

 public:
  AudioDriftCorrection(uint32_t aSourceRate, uint32_t aTargetRate,
                       uint32_t aBufferMs,
                       const PrincipalHandle& aPrincipalHandle)
      : mDesiredBuffering(std::max(kMinBufferMs, aBufferMs) * aSourceRate /
                          1000),
        mTargetRate(aTargetRate),
        mClockDrift(aSourceRate, aTargetRate, mDesiredBuffering),
        mResampler(aSourceRate, aTargetRate, mDesiredBuffering,
                   aPrincipalHandle) {}

  /**
   * The source audio frames and request the number of target audio frames must
   * be provided. The duration of the source and the output is considered as the
   * source clock and the target clock. The input is buffered internally so some
   * latency exists. The returned AudioSegment must be cleaned up because the
   * internal buffer will be reused after 100ms. If the drift correction (and
   * possible resampling) is not possible due to lack of input data an empty
   * AudioSegment will be returned. Not thread-safe.
   */
  AudioSegment RequestFrames(const AudioSegment& aInput,
                             uint32_t aOutputFrames) {
    // Very important to go first since the Dynamic will get the sample format
    // from the chunk.
    if (aInput.GetDuration()) {
      // Always go through the resampler because the clock might shift later.
      mResampler.AppendInput(aInput);
    }
    mClockDrift.UpdateClock(aInput.GetDuration(), aOutputFrames,
                            mResampler.InputReadableFrames(),
                            mResampler.InputWritableFrames());
    TrackRate receivingRate = mTargetRate * mClockDrift.GetCorrection();
    // Update resampler's rate if there is a new correction.
    mResampler.UpdateOutRate(receivingRate);
    // If it does not have enough frames the result will be an empty segment.
    AudioSegment output = mResampler.Resample(aOutputFrames);
    if (output.IsEmpty()) {
      NS_WARNING("Got nothing from the resampler");
      output.AppendNullData(aOutputFrames);
    }
    return output;
  }

  // Only accessible from the same thread that is driving RequestFrames().
  uint32_t CurrentBuffering() const { return mResampler.InputReadableFrames(); }

  const uint32_t mDesiredBuffering;
  const uint32_t mTargetRate;

 private:
  ClockDrift mClockDrift;
  AudioResampler mResampler;
};

};     // namespace mozilla
#endif /* MOZILLA_AUDIO_DRIFT_CORRECTION_H_ */