diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
commit | 6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch) | |
tree | a68f146d7fa01f0134297619fbe7e33db084e0aa /third_party/libwebrtc/sdk/objc/native/src/audio/voice_processing_audio_unit.mm | |
parent | Initial commit. (diff) | |
download | thunderbird-upstream.tar.xz thunderbird-upstream.zip |
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/libwebrtc/sdk/objc/native/src/audio/voice_processing_audio_unit.mm')
-rw-r--r-- | third_party/libwebrtc/sdk/objc/native/src/audio/voice_processing_audio_unit.mm | 488 |
1 files changed, 488 insertions, 0 deletions
diff --git a/third_party/libwebrtc/sdk/objc/native/src/audio/voice_processing_audio_unit.mm b/third_party/libwebrtc/sdk/objc/native/src/audio/voice_processing_audio_unit.mm new file mode 100644 index 0000000000..3905b6857a --- /dev/null +++ b/third_party/libwebrtc/sdk/objc/native/src/audio/voice_processing_audio_unit.mm @@ -0,0 +1,488 @@ +/* + * Copyright 2016 The WebRTC Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#import "voice_processing_audio_unit.h" + +#include "rtc_base/checks.h" +#include "system_wrappers/include/metrics.h" + +#import "base/RTCLogging.h" +#import "sdk/objc/components/audio/RTCAudioSessionConfiguration.h" + +#if !defined(NDEBUG) +static void LogStreamDescription(AudioStreamBasicDescription description) { + char formatIdString[5]; + UInt32 formatId = CFSwapInt32HostToBig(description.mFormatID); + bcopy(&formatId, formatIdString, 4); + formatIdString[4] = '\0'; + RTCLog(@"AudioStreamBasicDescription: {\n" + " mSampleRate: %.2f\n" + " formatIDString: %s\n" + " mFormatFlags: 0x%X\n" + " mBytesPerPacket: %u\n" + " mFramesPerPacket: %u\n" + " mBytesPerFrame: %u\n" + " mChannelsPerFrame: %u\n" + " mBitsPerChannel: %u\n" + " mReserved: %u\n}", + description.mSampleRate, formatIdString, + static_cast<unsigned int>(description.mFormatFlags), + static_cast<unsigned int>(description.mBytesPerPacket), + static_cast<unsigned int>(description.mFramesPerPacket), + static_cast<unsigned int>(description.mBytesPerFrame), + static_cast<unsigned int>(description.mChannelsPerFrame), + static_cast<unsigned int>(description.mBitsPerChannel), + static_cast<unsigned int>(description.mReserved)); +} +#endif + +namespace webrtc { +namespace ios_adm { + +// Calls to AudioUnitInitialize() can fail if called back-to-back on different +// ADM instances. A fall-back solution is to allow multiple sequential calls +// with as small delay between each. This factor sets the max number of allowed +// initialization attempts. +static const int kMaxNumberOfAudioUnitInitializeAttempts = 5; +// A VP I/O unit's bus 1 connects to input hardware (microphone). +static const AudioUnitElement kInputBus = 1; +// A VP I/O unit's bus 0 connects to output hardware (speaker). +static const AudioUnitElement kOutputBus = 0; + +// Returns the automatic gain control (AGC) state on the processed microphone +// signal. Should be on by default for Voice Processing audio units. +static OSStatus GetAGCState(AudioUnit audio_unit, UInt32* enabled) { + RTC_DCHECK(audio_unit); + UInt32 size = sizeof(*enabled); + OSStatus result = AudioUnitGetProperty(audio_unit, + kAUVoiceIOProperty_VoiceProcessingEnableAGC, + kAudioUnitScope_Global, + kInputBus, + enabled, + &size); + RTCLog(@"VPIO unit AGC: %u", static_cast<unsigned int>(*enabled)); + return result; +} + +VoiceProcessingAudioUnit::VoiceProcessingAudioUnit(bool bypass_voice_processing, + VoiceProcessingAudioUnitObserver* observer) + : bypass_voice_processing_(bypass_voice_processing), + observer_(observer), + vpio_unit_(nullptr), + state_(kInitRequired) { + RTC_DCHECK(observer); +} + +VoiceProcessingAudioUnit::~VoiceProcessingAudioUnit() { + DisposeAudioUnit(); +} + +const UInt32 VoiceProcessingAudioUnit::kBytesPerSample = 2; + +bool VoiceProcessingAudioUnit::Init() { + RTC_DCHECK_EQ(state_, kInitRequired); + + // Create an audio component description to identify the Voice Processing + // I/O audio unit. + AudioComponentDescription vpio_unit_description; + vpio_unit_description.componentType = kAudioUnitType_Output; + vpio_unit_description.componentSubType = kAudioUnitSubType_VoiceProcessingIO; + vpio_unit_description.componentManufacturer = kAudioUnitManufacturer_Apple; + vpio_unit_description.componentFlags = 0; + vpio_unit_description.componentFlagsMask = 0; + + // Obtain an audio unit instance given the description. + AudioComponent found_vpio_unit_ref = + AudioComponentFindNext(nullptr, &vpio_unit_description); + + // Create a Voice Processing IO audio unit. + OSStatus result = noErr; + result = AudioComponentInstanceNew(found_vpio_unit_ref, &vpio_unit_); + if (result != noErr) { + vpio_unit_ = nullptr; + RTCLogError(@"AudioComponentInstanceNew failed. Error=%ld.", (long)result); + return false; + } + + // Enable input on the input scope of the input element. + UInt32 enable_input = 1; + result = AudioUnitSetProperty(vpio_unit_, kAudioOutputUnitProperty_EnableIO, + kAudioUnitScope_Input, kInputBus, &enable_input, + sizeof(enable_input)); + if (result != noErr) { + DisposeAudioUnit(); + RTCLogError(@"Failed to enable input on input scope of input element. " + "Error=%ld.", + (long)result); + return false; + } + + // Enable output on the output scope of the output element. + UInt32 enable_output = 1; + result = AudioUnitSetProperty(vpio_unit_, kAudioOutputUnitProperty_EnableIO, + kAudioUnitScope_Output, kOutputBus, + &enable_output, sizeof(enable_output)); + if (result != noErr) { + DisposeAudioUnit(); + RTCLogError(@"Failed to enable output on output scope of output element. " + "Error=%ld.", + (long)result); + return false; + } + + // Specify the callback function that provides audio samples to the audio + // unit. + AURenderCallbackStruct render_callback; + render_callback.inputProc = OnGetPlayoutData; + render_callback.inputProcRefCon = this; + result = AudioUnitSetProperty( + vpio_unit_, kAudioUnitProperty_SetRenderCallback, kAudioUnitScope_Input, + kOutputBus, &render_callback, sizeof(render_callback)); + if (result != noErr) { + DisposeAudioUnit(); + RTCLogError(@"Failed to specify the render callback on the output bus. " + "Error=%ld.", + (long)result); + return false; + } + + // Disable AU buffer allocation for the recorder, we allocate our own. + // TODO(henrika): not sure that it actually saves resource to make this call. + UInt32 flag = 0; + result = AudioUnitSetProperty( + vpio_unit_, kAudioUnitProperty_ShouldAllocateBuffer, + kAudioUnitScope_Output, kInputBus, &flag, sizeof(flag)); + if (result != noErr) { + DisposeAudioUnit(); + RTCLogError(@"Failed to disable buffer allocation on the input bus. " + "Error=%ld.", + (long)result); + return false; + } + + // Specify the callback to be called by the I/O thread to us when input audio + // is available. The recorded samples can then be obtained by calling the + // AudioUnitRender() method. + AURenderCallbackStruct input_callback; + input_callback.inputProc = OnDeliverRecordedData; + input_callback.inputProcRefCon = this; + result = AudioUnitSetProperty(vpio_unit_, + kAudioOutputUnitProperty_SetInputCallback, + kAudioUnitScope_Global, kInputBus, + &input_callback, sizeof(input_callback)); + if (result != noErr) { + DisposeAudioUnit(); + RTCLogError(@"Failed to specify the input callback on the input bus. " + "Error=%ld.", + (long)result); + return false; + } + + state_ = kUninitialized; + return true; +} + +VoiceProcessingAudioUnit::State VoiceProcessingAudioUnit::GetState() const { + return state_; +} + +bool VoiceProcessingAudioUnit::Initialize(Float64 sample_rate) { + RTC_DCHECK_GE(state_, kUninitialized); + RTCLog(@"Initializing audio unit with sample rate: %f", sample_rate); + + OSStatus result = noErr; + AudioStreamBasicDescription format = GetFormat(sample_rate); + UInt32 size = sizeof(format); +#if !defined(NDEBUG) + LogStreamDescription(format); +#endif + + // Set the format on the output scope of the input element/bus. + result = + AudioUnitSetProperty(vpio_unit_, kAudioUnitProperty_StreamFormat, + kAudioUnitScope_Output, kInputBus, &format, size); + if (result != noErr) { + RTCLogError(@"Failed to set format on output scope of input bus. " + "Error=%ld.", + (long)result); + return false; + } + + // Set the format on the input scope of the output element/bus. + result = + AudioUnitSetProperty(vpio_unit_, kAudioUnitProperty_StreamFormat, + kAudioUnitScope_Input, kOutputBus, &format, size); + if (result != noErr) { + RTCLogError(@"Failed to set format on input scope of output bus. " + "Error=%ld.", + (long)result); + return false; + } + + // Initialize the Voice Processing I/O unit instance. + // Calls to AudioUnitInitialize() can fail if called back-to-back on + // different ADM instances. The error message in this case is -66635 which is + // undocumented. Tests have shown that calling AudioUnitInitialize a second + // time, after a short sleep, avoids this issue. + // See webrtc:5166 for details. + int failed_initalize_attempts = 0; + result = AudioUnitInitialize(vpio_unit_); + while (result != noErr) { + RTCLogError(@"Failed to initialize the Voice Processing I/O unit. " + "Error=%ld.", + (long)result); + ++failed_initalize_attempts; + if (failed_initalize_attempts == kMaxNumberOfAudioUnitInitializeAttempts) { + // Max number of initialization attempts exceeded, hence abort. + RTCLogError(@"Too many initialization attempts."); + return false; + } + RTCLog(@"Pause 100ms and try audio unit initialization again..."); + [NSThread sleepForTimeInterval:0.1f]; + result = AudioUnitInitialize(vpio_unit_); + } + if (result == noErr) { + RTCLog(@"Voice Processing I/O unit is now initialized."); + } + + if (bypass_voice_processing_) { + // Attempt to disable builtin voice processing. + UInt32 toggle = 1; + result = AudioUnitSetProperty(vpio_unit_, + kAUVoiceIOProperty_BypassVoiceProcessing, + kAudioUnitScope_Global, + kInputBus, + &toggle, + sizeof(toggle)); + if (result == noErr) { + RTCLog(@"Successfully bypassed voice processing."); + } else { + RTCLogError(@"Failed to bypass voice processing. Error=%ld.", (long)result); + } + state_ = kInitialized; + return true; + } + + // AGC should be enabled by default for Voice Processing I/O units but it is + // checked below and enabled explicitly if needed. This scheme is used + // to be absolutely sure that the AGC is enabled since we have seen cases + // where only zeros are recorded and a disabled AGC could be one of the + // reasons why it happens. + int agc_was_enabled_by_default = 0; + UInt32 agc_is_enabled = 0; + result = GetAGCState(vpio_unit_, &agc_is_enabled); + if (result != noErr) { + RTCLogError(@"Failed to get AGC state (1st attempt). " + "Error=%ld.", + (long)result); + // Example of error code: kAudioUnitErr_NoConnection (-10876). + // All error codes related to audio units are negative and are therefore + // converted into a postive value to match the UMA APIs. + RTC_HISTOGRAM_COUNTS_SPARSE_100000( + "WebRTC.Audio.GetAGCStateErrorCode1", (-1) * result); + } else if (agc_is_enabled) { + // Remember that the AGC was enabled by default. Will be used in UMA. + agc_was_enabled_by_default = 1; + } else { + // AGC was initially disabled => try to enable it explicitly. + UInt32 enable_agc = 1; + result = + AudioUnitSetProperty(vpio_unit_, + kAUVoiceIOProperty_VoiceProcessingEnableAGC, + kAudioUnitScope_Global, kInputBus, &enable_agc, + sizeof(enable_agc)); + if (result != noErr) { + RTCLogError(@"Failed to enable the built-in AGC. " + "Error=%ld.", + (long)result); + RTC_HISTOGRAM_COUNTS_SPARSE_100000( + "WebRTC.Audio.SetAGCStateErrorCode", (-1) * result); + } + result = GetAGCState(vpio_unit_, &agc_is_enabled); + if (result != noErr) { + RTCLogError(@"Failed to get AGC state (2nd attempt). " + "Error=%ld.", + (long)result); + RTC_HISTOGRAM_COUNTS_SPARSE_100000( + "WebRTC.Audio.GetAGCStateErrorCode2", (-1) * result); + } + } + + // Track if the built-in AGC was enabled by default (as it should) or not. + RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.BuiltInAGCWasEnabledByDefault", + agc_was_enabled_by_default); + RTCLog(@"WebRTC.Audio.BuiltInAGCWasEnabledByDefault: %d", + agc_was_enabled_by_default); + // As a final step, add an UMA histogram for tracking the AGC state. + // At this stage, the AGC should be enabled, and if it is not, more work is + // needed to find out the root cause. + RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.BuiltInAGCIsEnabled", agc_is_enabled); + RTCLog(@"WebRTC.Audio.BuiltInAGCIsEnabled: %u", + static_cast<unsigned int>(agc_is_enabled)); + + state_ = kInitialized; + return true; +} + +OSStatus VoiceProcessingAudioUnit::Start() { + RTC_DCHECK_GE(state_, kUninitialized); + RTCLog(@"Starting audio unit."); + + OSStatus result = AudioOutputUnitStart(vpio_unit_); + if (result != noErr) { + RTCLogError(@"Failed to start audio unit. Error=%ld", (long)result); + return result; + } else { + RTCLog(@"Started audio unit"); + } + state_ = kStarted; + return noErr; +} + +bool VoiceProcessingAudioUnit::Stop() { + RTC_DCHECK_GE(state_, kUninitialized); + RTCLog(@"Stopping audio unit."); + + OSStatus result = AudioOutputUnitStop(vpio_unit_); + if (result != noErr) { + RTCLogError(@"Failed to stop audio unit. Error=%ld", (long)result); + return false; + } else { + RTCLog(@"Stopped audio unit"); + } + + state_ = kInitialized; + return true; +} + +bool VoiceProcessingAudioUnit::Uninitialize() { + RTC_DCHECK_GE(state_, kUninitialized); + RTCLog(@"Unintializing audio unit."); + + OSStatus result = AudioUnitUninitialize(vpio_unit_); + if (result != noErr) { + RTCLogError(@"Failed to uninitialize audio unit. Error=%ld", (long)result); + return false; + } else { + RTCLog(@"Uninitialized audio unit."); + } + + state_ = kUninitialized; + return true; +} + +OSStatus VoiceProcessingAudioUnit::Render(AudioUnitRenderActionFlags* flags, + const AudioTimeStamp* time_stamp, + UInt32 output_bus_number, + UInt32 num_frames, + AudioBufferList* io_data) { + RTC_DCHECK(vpio_unit_) << "Init() not called."; + + OSStatus result = AudioUnitRender(vpio_unit_, flags, time_stamp, + output_bus_number, num_frames, io_data); + if (result != noErr) { + RTCLogError(@"Failed to render audio unit. Error=%ld", (long)result); + } + return result; +} + +OSStatus VoiceProcessingAudioUnit::OnGetPlayoutData( + void* in_ref_con, + AudioUnitRenderActionFlags* flags, + const AudioTimeStamp* time_stamp, + UInt32 bus_number, + UInt32 num_frames, + AudioBufferList* io_data) { + VoiceProcessingAudioUnit* audio_unit = + static_cast<VoiceProcessingAudioUnit*>(in_ref_con); + return audio_unit->NotifyGetPlayoutData(flags, time_stamp, bus_number, + num_frames, io_data); +} + +OSStatus VoiceProcessingAudioUnit::OnDeliverRecordedData( + void* in_ref_con, + AudioUnitRenderActionFlags* flags, + const AudioTimeStamp* time_stamp, + UInt32 bus_number, + UInt32 num_frames, + AudioBufferList* io_data) { + VoiceProcessingAudioUnit* audio_unit = + static_cast<VoiceProcessingAudioUnit*>(in_ref_con); + return audio_unit->NotifyDeliverRecordedData(flags, time_stamp, bus_number, + num_frames, io_data); +} + +OSStatus VoiceProcessingAudioUnit::NotifyGetPlayoutData( + AudioUnitRenderActionFlags* flags, + const AudioTimeStamp* time_stamp, + UInt32 bus_number, + UInt32 num_frames, + AudioBufferList* io_data) { + return observer_->OnGetPlayoutData(flags, time_stamp, bus_number, num_frames, + io_data); +} + +OSStatus VoiceProcessingAudioUnit::NotifyDeliverRecordedData( + AudioUnitRenderActionFlags* flags, + const AudioTimeStamp* time_stamp, + UInt32 bus_number, + UInt32 num_frames, + AudioBufferList* io_data) { + return observer_->OnDeliverRecordedData(flags, time_stamp, bus_number, + num_frames, io_data); +} + +AudioStreamBasicDescription VoiceProcessingAudioUnit::GetFormat( + Float64 sample_rate) const { + // Set the application formats for input and output: + // - use same format in both directions + // - avoid resampling in the I/O unit by using the hardware sample rate + // - linear PCM => noncompressed audio data format with one frame per packet + // - no need to specify interleaving since only mono is supported + AudioStreamBasicDescription format; + RTC_DCHECK_EQ(1, kRTCAudioSessionPreferredNumberOfChannels); + format.mSampleRate = sample_rate; + format.mFormatID = kAudioFormatLinearPCM; + format.mFormatFlags = + kLinearPCMFormatFlagIsSignedInteger | kLinearPCMFormatFlagIsPacked; + format.mBytesPerPacket = kBytesPerSample; + format.mFramesPerPacket = 1; // uncompressed. + format.mBytesPerFrame = kBytesPerSample; + format.mChannelsPerFrame = kRTCAudioSessionPreferredNumberOfChannels; + format.mBitsPerChannel = 8 * kBytesPerSample; + return format; +} + +void VoiceProcessingAudioUnit::DisposeAudioUnit() { + if (vpio_unit_) { + switch (state_) { + case kStarted: + Stop(); + [[fallthrough]]; + case kInitialized: + Uninitialize(); + break; + case kUninitialized: + case kInitRequired: + break; + } + + RTCLog(@"Disposing audio unit."); + OSStatus result = AudioComponentInstanceDispose(vpio_unit_); + if (result != noErr) { + RTCLogError(@"AudioComponentInstanceDispose failed. Error=%ld.", + (long)result); + } + vpio_unit_ = nullptr; + } +} + +} // namespace ios_adm +} // namespace webrtc |