1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
|
/*
* Copyright (C) 2010 Google Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
* its contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "ReverbConvolver.h"
#include "ReverbConvolverStage.h"
using namespace mozilla;
namespace WebCore {
const int InputBufferSize = 8 * 16384;
// We only process the leading portion of the impulse response in the real-time
// thread. We don't exceed this length. It turns out then, that the background
// thread has about 278msec of scheduling slop. Empirically, this has been found
// to be a good compromise between giving enough time for scheduling slop, while
// still minimizing the amount of processing done in the primary (high-priority)
// thread. This was found to be a good value on Mac OS X, and may work well on
// other platforms as well, assuming the very rough scheduling latencies are
// similar on these time-scales. Of course, this code may need to be tuned for
// individual platforms if this assumption is found to be incorrect.
const size_t RealtimeFrameLimit = 8192 + 4096 // ~278msec @ 44.1KHz
- WEBAUDIO_BLOCK_SIZE;
// First stage will have size MinFFTSize - successive stages will double in
// size each time until we hit the maximum size.
const size_t MinFFTSize = 256;
// If we are using background threads then don't exceed this FFT size for the
// stages which run in the real-time thread. This avoids having only one or
// two large stages (size 16384 or so) at the end which take a lot of time
// every several processing slices. This way we amortize the cost over more
// processing slices.
const size_t MaxRealtimeFFTSize = 4096;
ReverbConvolver::ReverbConvolver(const float* impulseResponseData,
size_t impulseResponseLength,
size_t maxFFTSize, size_t convolverRenderPhase,
bool useBackgroundThreads,
bool* aAllocationFailure)
: m_impulseResponseLength(impulseResponseLength),
m_accumulationBuffer(),
m_inputBuffer(InputBufferSize),
m_backgroundThread("ConvolverWorker"),
m_backgroundThreadMonitor("ConvolverMonitor"),
m_useBackgroundThreads(useBackgroundThreads),
m_wantsToExit(false),
m_moreInputBuffered(false) {
*aAllocationFailure = !m_accumulationBuffer.allocate(impulseResponseLength +
WEBAUDIO_BLOCK_SIZE);
if (*aAllocationFailure) {
return;
}
// For the moment, a good way to know if we have real-time constraint is to
// check if we're using background threads. Otherwise, assume we're being run
// from a command-line tool.
bool hasRealtimeConstraint = useBackgroundThreads;
const float* response = impulseResponseData;
size_t totalResponseLength = impulseResponseLength;
// The total latency is zero because the first FFT stage is small enough
// to return output in the first block.
size_t reverbTotalLatency = 0;
size_t stageOffset = 0;
size_t stagePhase = 0;
size_t fftSize = MinFFTSize;
while (stageOffset < totalResponseLength) {
size_t stageSize = fftSize / 2;
// For the last stage, it's possible that stageOffset is such that we're
// straddling the end of the impulse response buffer (if we use stageSize),
// so reduce the last stage's length...
if (stageSize + stageOffset > totalResponseLength) {
stageSize = totalResponseLength - stageOffset;
// Use smallest FFT that is large enough to cover the last stage.
fftSize = MinFFTSize;
while (stageSize * 2 > fftSize) {
fftSize *= 2;
}
}
// This "staggers" the time when each FFT happens so they don't all happen
// at the same time
int renderPhase = convolverRenderPhase + stagePhase;
UniquePtr<ReverbConvolverStage> stage(new ReverbConvolverStage(
response, totalResponseLength, reverbTotalLatency, stageOffset,
stageSize, fftSize, renderPhase, &m_accumulationBuffer));
bool isBackgroundStage = false;
if (this->useBackgroundThreads() && stageOffset > RealtimeFrameLimit) {
m_backgroundStages.AppendElement(std::move(stage));
isBackgroundStage = true;
} else
m_stages.AppendElement(std::move(stage));
// Figure out next FFT size
fftSize *= 2;
stageOffset += stageSize;
if (hasRealtimeConstraint && !isBackgroundStage &&
fftSize > MaxRealtimeFFTSize) {
fftSize = MaxRealtimeFFTSize;
// Custom phase positions for all but the first of the realtime
// stages of largest size. These spread out the work of the
// larger realtime stages. None of the FFTs of size 1024, 2048 or
// 4096 are performed when processing the same block. The first
// MaxRealtimeFFTSize = 4096 stage, at the end of the doubling,
// performs its FFT at block 7. The FFTs of size 2048 are
// performed in blocks 3 + 8 * n and size 1024 at 1 + 4 * n.
const uint32_t phaseLookup[] = {14, 0, 10, 4};
stagePhase = WEBAUDIO_BLOCK_SIZE *
phaseLookup[m_stages.Length() % ArrayLength(phaseLookup)];
} else if (fftSize > maxFFTSize) {
fftSize = maxFFTSize;
// A prime offset spreads out FFTs in a way that all
// available phase positions will be used if there are sufficient
// stages.
stagePhase += 5 * WEBAUDIO_BLOCK_SIZE;
} else if (stageSize > WEBAUDIO_BLOCK_SIZE) {
// As the stages are doubling in size, the next FFT will occur
// mid-way between FFTs for this stage.
stagePhase = stageSize - WEBAUDIO_BLOCK_SIZE;
}
}
// Start up background thread
// FIXME: would be better to up the thread priority here. It doesn't need to
// be real-time, but higher than the default...
if (this->useBackgroundThreads() && m_backgroundStages.Length() > 0) {
if (!m_backgroundThread.Start()) {
NS_WARNING("Cannot start convolver thread.");
return;
}
m_backgroundThread.message_loop()->PostTask(NewNonOwningRunnableMethod(
"WebCore::ReverbConvolver::backgroundThreadEntry", this,
&ReverbConvolver::backgroundThreadEntry));
}
}
ReverbConvolver::~ReverbConvolver() {
// Wait for background thread to stop
if (useBackgroundThreads() && m_backgroundThread.IsRunning()) {
m_wantsToExit = true;
// Wake up thread so it can return
{
MonitorAutoLock locker(m_backgroundThreadMonitor);
m_moreInputBuffered = true;
m_backgroundThreadMonitor.Notify();
}
m_backgroundThread.Stop();
}
}
size_t ReverbConvolver::sizeOfIncludingThis(
mozilla::MallocSizeOf aMallocSizeOf) const {
size_t amount = aMallocSizeOf(this);
amount += m_stages.ShallowSizeOfExcludingThis(aMallocSizeOf);
for (size_t i = 0; i < m_stages.Length(); i++) {
if (m_stages[i]) {
amount += m_stages[i]->sizeOfIncludingThis(aMallocSizeOf);
}
}
amount += m_backgroundStages.ShallowSizeOfExcludingThis(aMallocSizeOf);
for (size_t i = 0; i < m_backgroundStages.Length(); i++) {
if (m_backgroundStages[i]) {
amount += m_backgroundStages[i]->sizeOfIncludingThis(aMallocSizeOf);
}
}
// NB: The buffer sizes are static, so even though they might be accessed
// in another thread it's safe to measure them.
amount += m_accumulationBuffer.sizeOfExcludingThis(aMallocSizeOf);
amount += m_inputBuffer.sizeOfExcludingThis(aMallocSizeOf);
// Possible future measurements:
// - m_backgroundThread
// - m_backgroundThreadMonitor
return amount;
}
void ReverbConvolver::backgroundThreadEntry() {
while (!m_wantsToExit) {
// Wait for realtime thread to give us more input
m_moreInputBuffered = false;
{
MonitorAutoLock locker(m_backgroundThreadMonitor);
while (!m_moreInputBuffered && !m_wantsToExit)
m_backgroundThreadMonitor.Wait();
}
// Process all of the stages until their read indices reach the input
// buffer's write index
int writeIndex = m_inputBuffer.writeIndex();
// Even though it doesn't seem like every stage needs to maintain its own
// version of readIndex we do this in case we want to run in more than one
// background thread.
int readIndex;
while ((readIndex = m_backgroundStages[0]->inputReadIndex()) !=
writeIndex) { // FIXME: do better to detect buffer overrun...
// Accumulate contributions from each stage
for (size_t i = 0; i < m_backgroundStages.Length(); ++i)
m_backgroundStages[i]->processInBackground(this);
}
}
}
void ReverbConvolver::process(const float* sourceChannelData,
float* destinationChannelData) {
const float* source = sourceChannelData;
float* destination = destinationChannelData;
bool isDataSafe = source && destination;
MOZ_ASSERT(isDataSafe);
if (!isDataSafe) return;
// Feed input buffer (read by all threads)
m_inputBuffer.write(source, WEBAUDIO_BLOCK_SIZE);
// Accumulate contributions from each stage
for (size_t i = 0; i < m_stages.Length(); ++i) m_stages[i]->process(source);
// Finally read from accumulation buffer
m_accumulationBuffer.readAndClear(destination, WEBAUDIO_BLOCK_SIZE);
// Now that we've buffered more input, wake up our background thread.
// Not using a MonitorAutoLock looks strange, but we use a TryLock() instead
// because this is run on the real-time thread where it is a disaster for the
// lock to be contended (causes audio glitching). It's OK if we fail to
// signal from time to time, since we'll get to it the next time we're called.
// We're called repeatedly and frequently (around every 3ms). The background
// thread is processing well into the future and has a considerable amount of
// leeway here...
if (m_backgroundThreadMonitor.TryLock()) {
m_moreInputBuffered = true;
m_backgroundThreadMonitor.Notify();
m_backgroundThreadMonitor.Unlock();
}
}
} // namespace WebCore
|