summaryrefslogtreecommitdiffstats
path: root/third_party/libwebrtc/sdk/objc/native/src/audio/voice_processing_audio_unit.mm
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/libwebrtc/sdk/objc/native/src/audio/voice_processing_audio_unit.mm')
-rw-r--r--third_party/libwebrtc/sdk/objc/native/src/audio/voice_processing_audio_unit.mm488
1 files changed, 488 insertions, 0 deletions
diff --git a/third_party/libwebrtc/sdk/objc/native/src/audio/voice_processing_audio_unit.mm b/third_party/libwebrtc/sdk/objc/native/src/audio/voice_processing_audio_unit.mm
new file mode 100644
index 0000000000..3905b6857a
--- /dev/null
+++ b/third_party/libwebrtc/sdk/objc/native/src/audio/voice_processing_audio_unit.mm
@@ -0,0 +1,488 @@
+/*
+ * Copyright 2016 The WebRTC Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#import "voice_processing_audio_unit.h"
+
+#include "rtc_base/checks.h"
+#include "system_wrappers/include/metrics.h"
+
+#import "base/RTCLogging.h"
+#import "sdk/objc/components/audio/RTCAudioSessionConfiguration.h"
+
+#if !defined(NDEBUG)
+static void LogStreamDescription(AudioStreamBasicDescription description) {
+ char formatIdString[5];
+ UInt32 formatId = CFSwapInt32HostToBig(description.mFormatID);
+ bcopy(&formatId, formatIdString, 4);
+ formatIdString[4] = '\0';
+ RTCLog(@"AudioStreamBasicDescription: {\n"
+ " mSampleRate: %.2f\n"
+ " formatIDString: %s\n"
+ " mFormatFlags: 0x%X\n"
+ " mBytesPerPacket: %u\n"
+ " mFramesPerPacket: %u\n"
+ " mBytesPerFrame: %u\n"
+ " mChannelsPerFrame: %u\n"
+ " mBitsPerChannel: %u\n"
+ " mReserved: %u\n}",
+ description.mSampleRate, formatIdString,
+ static_cast<unsigned int>(description.mFormatFlags),
+ static_cast<unsigned int>(description.mBytesPerPacket),
+ static_cast<unsigned int>(description.mFramesPerPacket),
+ static_cast<unsigned int>(description.mBytesPerFrame),
+ static_cast<unsigned int>(description.mChannelsPerFrame),
+ static_cast<unsigned int>(description.mBitsPerChannel),
+ static_cast<unsigned int>(description.mReserved));
+}
+#endif
+
+namespace webrtc {
+namespace ios_adm {
+
+// Calls to AudioUnitInitialize() can fail if called back-to-back on different
+// ADM instances. A fall-back solution is to allow multiple sequential calls
+// with as small delay between each. This factor sets the max number of allowed
+// initialization attempts.
+static const int kMaxNumberOfAudioUnitInitializeAttempts = 5;
+// A VP I/O unit's bus 1 connects to input hardware (microphone).
+static const AudioUnitElement kInputBus = 1;
+// A VP I/O unit's bus 0 connects to output hardware (speaker).
+static const AudioUnitElement kOutputBus = 0;
+
+// Returns the automatic gain control (AGC) state on the processed microphone
+// signal. Should be on by default for Voice Processing audio units.
+static OSStatus GetAGCState(AudioUnit audio_unit, UInt32* enabled) {
+ RTC_DCHECK(audio_unit);
+ UInt32 size = sizeof(*enabled);
+ OSStatus result = AudioUnitGetProperty(audio_unit,
+ kAUVoiceIOProperty_VoiceProcessingEnableAGC,
+ kAudioUnitScope_Global,
+ kInputBus,
+ enabled,
+ &size);
+ RTCLog(@"VPIO unit AGC: %u", static_cast<unsigned int>(*enabled));
+ return result;
+}
+
+VoiceProcessingAudioUnit::VoiceProcessingAudioUnit(bool bypass_voice_processing,
+ VoiceProcessingAudioUnitObserver* observer)
+ : bypass_voice_processing_(bypass_voice_processing),
+ observer_(observer),
+ vpio_unit_(nullptr),
+ state_(kInitRequired) {
+ RTC_DCHECK(observer);
+}
+
+VoiceProcessingAudioUnit::~VoiceProcessingAudioUnit() {
+ DisposeAudioUnit();
+}
+
+const UInt32 VoiceProcessingAudioUnit::kBytesPerSample = 2;
+
+bool VoiceProcessingAudioUnit::Init() {
+ RTC_DCHECK_EQ(state_, kInitRequired);
+
+ // Create an audio component description to identify the Voice Processing
+ // I/O audio unit.
+ AudioComponentDescription vpio_unit_description;
+ vpio_unit_description.componentType = kAudioUnitType_Output;
+ vpio_unit_description.componentSubType = kAudioUnitSubType_VoiceProcessingIO;
+ vpio_unit_description.componentManufacturer = kAudioUnitManufacturer_Apple;
+ vpio_unit_description.componentFlags = 0;
+ vpio_unit_description.componentFlagsMask = 0;
+
+ // Obtain an audio unit instance given the description.
+ AudioComponent found_vpio_unit_ref =
+ AudioComponentFindNext(nullptr, &vpio_unit_description);
+
+ // Create a Voice Processing IO audio unit.
+ OSStatus result = noErr;
+ result = AudioComponentInstanceNew(found_vpio_unit_ref, &vpio_unit_);
+ if (result != noErr) {
+ vpio_unit_ = nullptr;
+ RTCLogError(@"AudioComponentInstanceNew failed. Error=%ld.", (long)result);
+ return false;
+ }
+
+ // Enable input on the input scope of the input element.
+ UInt32 enable_input = 1;
+ result = AudioUnitSetProperty(vpio_unit_, kAudioOutputUnitProperty_EnableIO,
+ kAudioUnitScope_Input, kInputBus, &enable_input,
+ sizeof(enable_input));
+ if (result != noErr) {
+ DisposeAudioUnit();
+ RTCLogError(@"Failed to enable input on input scope of input element. "
+ "Error=%ld.",
+ (long)result);
+ return false;
+ }
+
+ // Enable output on the output scope of the output element.
+ UInt32 enable_output = 1;
+ result = AudioUnitSetProperty(vpio_unit_, kAudioOutputUnitProperty_EnableIO,
+ kAudioUnitScope_Output, kOutputBus,
+ &enable_output, sizeof(enable_output));
+ if (result != noErr) {
+ DisposeAudioUnit();
+ RTCLogError(@"Failed to enable output on output scope of output element. "
+ "Error=%ld.",
+ (long)result);
+ return false;
+ }
+
+ // Specify the callback function that provides audio samples to the audio
+ // unit.
+ AURenderCallbackStruct render_callback;
+ render_callback.inputProc = OnGetPlayoutData;
+ render_callback.inputProcRefCon = this;
+ result = AudioUnitSetProperty(
+ vpio_unit_, kAudioUnitProperty_SetRenderCallback, kAudioUnitScope_Input,
+ kOutputBus, &render_callback, sizeof(render_callback));
+ if (result != noErr) {
+ DisposeAudioUnit();
+ RTCLogError(@"Failed to specify the render callback on the output bus. "
+ "Error=%ld.",
+ (long)result);
+ return false;
+ }
+
+ // Disable AU buffer allocation for the recorder, we allocate our own.
+ // TODO(henrika): not sure that it actually saves resource to make this call.
+ UInt32 flag = 0;
+ result = AudioUnitSetProperty(
+ vpio_unit_, kAudioUnitProperty_ShouldAllocateBuffer,
+ kAudioUnitScope_Output, kInputBus, &flag, sizeof(flag));
+ if (result != noErr) {
+ DisposeAudioUnit();
+ RTCLogError(@"Failed to disable buffer allocation on the input bus. "
+ "Error=%ld.",
+ (long)result);
+ return false;
+ }
+
+ // Specify the callback to be called by the I/O thread to us when input audio
+ // is available. The recorded samples can then be obtained by calling the
+ // AudioUnitRender() method.
+ AURenderCallbackStruct input_callback;
+ input_callback.inputProc = OnDeliverRecordedData;
+ input_callback.inputProcRefCon = this;
+ result = AudioUnitSetProperty(vpio_unit_,
+ kAudioOutputUnitProperty_SetInputCallback,
+ kAudioUnitScope_Global, kInputBus,
+ &input_callback, sizeof(input_callback));
+ if (result != noErr) {
+ DisposeAudioUnit();
+ RTCLogError(@"Failed to specify the input callback on the input bus. "
+ "Error=%ld.",
+ (long)result);
+ return false;
+ }
+
+ state_ = kUninitialized;
+ return true;
+}
+
+VoiceProcessingAudioUnit::State VoiceProcessingAudioUnit::GetState() const {
+ return state_;
+}
+
+bool VoiceProcessingAudioUnit::Initialize(Float64 sample_rate) {
+ RTC_DCHECK_GE(state_, kUninitialized);
+ RTCLog(@"Initializing audio unit with sample rate: %f", sample_rate);
+
+ OSStatus result = noErr;
+ AudioStreamBasicDescription format = GetFormat(sample_rate);
+ UInt32 size = sizeof(format);
+#if !defined(NDEBUG)
+ LogStreamDescription(format);
+#endif
+
+ // Set the format on the output scope of the input element/bus.
+ result =
+ AudioUnitSetProperty(vpio_unit_, kAudioUnitProperty_StreamFormat,
+ kAudioUnitScope_Output, kInputBus, &format, size);
+ if (result != noErr) {
+ RTCLogError(@"Failed to set format on output scope of input bus. "
+ "Error=%ld.",
+ (long)result);
+ return false;
+ }
+
+ // Set the format on the input scope of the output element/bus.
+ result =
+ AudioUnitSetProperty(vpio_unit_, kAudioUnitProperty_StreamFormat,
+ kAudioUnitScope_Input, kOutputBus, &format, size);
+ if (result != noErr) {
+ RTCLogError(@"Failed to set format on input scope of output bus. "
+ "Error=%ld.",
+ (long)result);
+ return false;
+ }
+
+ // Initialize the Voice Processing I/O unit instance.
+ // Calls to AudioUnitInitialize() can fail if called back-to-back on
+ // different ADM instances. The error message in this case is -66635 which is
+ // undocumented. Tests have shown that calling AudioUnitInitialize a second
+ // time, after a short sleep, avoids this issue.
+ // See webrtc:5166 for details.
+ int failed_initalize_attempts = 0;
+ result = AudioUnitInitialize(vpio_unit_);
+ while (result != noErr) {
+ RTCLogError(@"Failed to initialize the Voice Processing I/O unit. "
+ "Error=%ld.",
+ (long)result);
+ ++failed_initalize_attempts;
+ if (failed_initalize_attempts == kMaxNumberOfAudioUnitInitializeAttempts) {
+ // Max number of initialization attempts exceeded, hence abort.
+ RTCLogError(@"Too many initialization attempts.");
+ return false;
+ }
+ RTCLog(@"Pause 100ms and try audio unit initialization again...");
+ [NSThread sleepForTimeInterval:0.1f];
+ result = AudioUnitInitialize(vpio_unit_);
+ }
+ if (result == noErr) {
+ RTCLog(@"Voice Processing I/O unit is now initialized.");
+ }
+
+ if (bypass_voice_processing_) {
+ // Attempt to disable builtin voice processing.
+ UInt32 toggle = 1;
+ result = AudioUnitSetProperty(vpio_unit_,
+ kAUVoiceIOProperty_BypassVoiceProcessing,
+ kAudioUnitScope_Global,
+ kInputBus,
+ &toggle,
+ sizeof(toggle));
+ if (result == noErr) {
+ RTCLog(@"Successfully bypassed voice processing.");
+ } else {
+ RTCLogError(@"Failed to bypass voice processing. Error=%ld.", (long)result);
+ }
+ state_ = kInitialized;
+ return true;
+ }
+
+ // AGC should be enabled by default for Voice Processing I/O units but it is
+ // checked below and enabled explicitly if needed. This scheme is used
+ // to be absolutely sure that the AGC is enabled since we have seen cases
+ // where only zeros are recorded and a disabled AGC could be one of the
+ // reasons why it happens.
+ int agc_was_enabled_by_default = 0;
+ UInt32 agc_is_enabled = 0;
+ result = GetAGCState(vpio_unit_, &agc_is_enabled);
+ if (result != noErr) {
+ RTCLogError(@"Failed to get AGC state (1st attempt). "
+ "Error=%ld.",
+ (long)result);
+ // Example of error code: kAudioUnitErr_NoConnection (-10876).
+ // All error codes related to audio units are negative and are therefore
+ // converted into a postive value to match the UMA APIs.
+ RTC_HISTOGRAM_COUNTS_SPARSE_100000(
+ "WebRTC.Audio.GetAGCStateErrorCode1", (-1) * result);
+ } else if (agc_is_enabled) {
+ // Remember that the AGC was enabled by default. Will be used in UMA.
+ agc_was_enabled_by_default = 1;
+ } else {
+ // AGC was initially disabled => try to enable it explicitly.
+ UInt32 enable_agc = 1;
+ result =
+ AudioUnitSetProperty(vpio_unit_,
+ kAUVoiceIOProperty_VoiceProcessingEnableAGC,
+ kAudioUnitScope_Global, kInputBus, &enable_agc,
+ sizeof(enable_agc));
+ if (result != noErr) {
+ RTCLogError(@"Failed to enable the built-in AGC. "
+ "Error=%ld.",
+ (long)result);
+ RTC_HISTOGRAM_COUNTS_SPARSE_100000(
+ "WebRTC.Audio.SetAGCStateErrorCode", (-1) * result);
+ }
+ result = GetAGCState(vpio_unit_, &agc_is_enabled);
+ if (result != noErr) {
+ RTCLogError(@"Failed to get AGC state (2nd attempt). "
+ "Error=%ld.",
+ (long)result);
+ RTC_HISTOGRAM_COUNTS_SPARSE_100000(
+ "WebRTC.Audio.GetAGCStateErrorCode2", (-1) * result);
+ }
+ }
+
+ // Track if the built-in AGC was enabled by default (as it should) or not.
+ RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.BuiltInAGCWasEnabledByDefault",
+ agc_was_enabled_by_default);
+ RTCLog(@"WebRTC.Audio.BuiltInAGCWasEnabledByDefault: %d",
+ agc_was_enabled_by_default);
+ // As a final step, add an UMA histogram for tracking the AGC state.
+ // At this stage, the AGC should be enabled, and if it is not, more work is
+ // needed to find out the root cause.
+ RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.BuiltInAGCIsEnabled", agc_is_enabled);
+ RTCLog(@"WebRTC.Audio.BuiltInAGCIsEnabled: %u",
+ static_cast<unsigned int>(agc_is_enabled));
+
+ state_ = kInitialized;
+ return true;
+}
+
+OSStatus VoiceProcessingAudioUnit::Start() {
+ RTC_DCHECK_GE(state_, kUninitialized);
+ RTCLog(@"Starting audio unit.");
+
+ OSStatus result = AudioOutputUnitStart(vpio_unit_);
+ if (result != noErr) {
+ RTCLogError(@"Failed to start audio unit. Error=%ld", (long)result);
+ return result;
+ } else {
+ RTCLog(@"Started audio unit");
+ }
+ state_ = kStarted;
+ return noErr;
+}
+
+bool VoiceProcessingAudioUnit::Stop() {
+ RTC_DCHECK_GE(state_, kUninitialized);
+ RTCLog(@"Stopping audio unit.");
+
+ OSStatus result = AudioOutputUnitStop(vpio_unit_);
+ if (result != noErr) {
+ RTCLogError(@"Failed to stop audio unit. Error=%ld", (long)result);
+ return false;
+ } else {
+ RTCLog(@"Stopped audio unit");
+ }
+
+ state_ = kInitialized;
+ return true;
+}
+
+bool VoiceProcessingAudioUnit::Uninitialize() {
+ RTC_DCHECK_GE(state_, kUninitialized);
+ RTCLog(@"Unintializing audio unit.");
+
+ OSStatus result = AudioUnitUninitialize(vpio_unit_);
+ if (result != noErr) {
+ RTCLogError(@"Failed to uninitialize audio unit. Error=%ld", (long)result);
+ return false;
+ } else {
+ RTCLog(@"Uninitialized audio unit.");
+ }
+
+ state_ = kUninitialized;
+ return true;
+}
+
+OSStatus VoiceProcessingAudioUnit::Render(AudioUnitRenderActionFlags* flags,
+ const AudioTimeStamp* time_stamp,
+ UInt32 output_bus_number,
+ UInt32 num_frames,
+ AudioBufferList* io_data) {
+ RTC_DCHECK(vpio_unit_) << "Init() not called.";
+
+ OSStatus result = AudioUnitRender(vpio_unit_, flags, time_stamp,
+ output_bus_number, num_frames, io_data);
+ if (result != noErr) {
+ RTCLogError(@"Failed to render audio unit. Error=%ld", (long)result);
+ }
+ return result;
+}
+
+OSStatus VoiceProcessingAudioUnit::OnGetPlayoutData(
+ void* in_ref_con,
+ AudioUnitRenderActionFlags* flags,
+ const AudioTimeStamp* time_stamp,
+ UInt32 bus_number,
+ UInt32 num_frames,
+ AudioBufferList* io_data) {
+ VoiceProcessingAudioUnit* audio_unit =
+ static_cast<VoiceProcessingAudioUnit*>(in_ref_con);
+ return audio_unit->NotifyGetPlayoutData(flags, time_stamp, bus_number,
+ num_frames, io_data);
+}
+
+OSStatus VoiceProcessingAudioUnit::OnDeliverRecordedData(
+ void* in_ref_con,
+ AudioUnitRenderActionFlags* flags,
+ const AudioTimeStamp* time_stamp,
+ UInt32 bus_number,
+ UInt32 num_frames,
+ AudioBufferList* io_data) {
+ VoiceProcessingAudioUnit* audio_unit =
+ static_cast<VoiceProcessingAudioUnit*>(in_ref_con);
+ return audio_unit->NotifyDeliverRecordedData(flags, time_stamp, bus_number,
+ num_frames, io_data);
+}
+
+OSStatus VoiceProcessingAudioUnit::NotifyGetPlayoutData(
+ AudioUnitRenderActionFlags* flags,
+ const AudioTimeStamp* time_stamp,
+ UInt32 bus_number,
+ UInt32 num_frames,
+ AudioBufferList* io_data) {
+ return observer_->OnGetPlayoutData(flags, time_stamp, bus_number, num_frames,
+ io_data);
+}
+
+OSStatus VoiceProcessingAudioUnit::NotifyDeliverRecordedData(
+ AudioUnitRenderActionFlags* flags,
+ const AudioTimeStamp* time_stamp,
+ UInt32 bus_number,
+ UInt32 num_frames,
+ AudioBufferList* io_data) {
+ return observer_->OnDeliverRecordedData(flags, time_stamp, bus_number,
+ num_frames, io_data);
+}
+
+AudioStreamBasicDescription VoiceProcessingAudioUnit::GetFormat(
+ Float64 sample_rate) const {
+ // Set the application formats for input and output:
+ // - use same format in both directions
+ // - avoid resampling in the I/O unit by using the hardware sample rate
+ // - linear PCM => noncompressed audio data format with one frame per packet
+ // - no need to specify interleaving since only mono is supported
+ AudioStreamBasicDescription format;
+ RTC_DCHECK_EQ(1, kRTCAudioSessionPreferredNumberOfChannels);
+ format.mSampleRate = sample_rate;
+ format.mFormatID = kAudioFormatLinearPCM;
+ format.mFormatFlags =
+ kLinearPCMFormatFlagIsSignedInteger | kLinearPCMFormatFlagIsPacked;
+ format.mBytesPerPacket = kBytesPerSample;
+ format.mFramesPerPacket = 1; // uncompressed.
+ format.mBytesPerFrame = kBytesPerSample;
+ format.mChannelsPerFrame = kRTCAudioSessionPreferredNumberOfChannels;
+ format.mBitsPerChannel = 8 * kBytesPerSample;
+ return format;
+}
+
+void VoiceProcessingAudioUnit::DisposeAudioUnit() {
+ if (vpio_unit_) {
+ switch (state_) {
+ case kStarted:
+ Stop();
+ [[fallthrough]];
+ case kInitialized:
+ Uninitialize();
+ break;
+ case kUninitialized:
+ case kInitRequired:
+ break;
+ }
+
+ RTCLog(@"Disposing audio unit.");
+ OSStatus result = AudioComponentInstanceDispose(vpio_unit_);
+ if (result != noErr) {
+ RTCLogError(@"AudioComponentInstanceDispose failed. Error=%ld.",
+ (long)result);
+ }
+ vpio_unit_ = nullptr;
+ }
+}
+
+} // namespace ios_adm
+} // namespace webrtc