diff options
Diffstat (limited to 'third_party/libwebrtc/common_audio/vad/include')
-rw-r--r-- | third_party/libwebrtc/common_audio/vad/include/vad.h | 50 | ||||
-rw-r--r-- | third_party/libwebrtc/common_audio/vad/include/webrtc_vad.h | 87 |
2 files changed, 137 insertions, 0 deletions
diff --git a/third_party/libwebrtc/common_audio/vad/include/vad.h b/third_party/libwebrtc/common_audio/vad/include/vad.h new file mode 100644 index 0000000000..b15275b166 --- /dev/null +++ b/third_party/libwebrtc/common_audio/vad/include/vad.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_AUDIO_VAD_INCLUDE_VAD_H_ +#define COMMON_AUDIO_VAD_INCLUDE_VAD_H_ + +#include <memory> + +#include "common_audio/vad/include/webrtc_vad.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +class Vad { + public: + enum Aggressiveness { + kVadNormal = 0, + kVadLowBitrate = 1, + kVadAggressive = 2, + kVadVeryAggressive = 3 + }; + + enum Activity { kPassive = 0, kActive = 1, kError = -1 }; + + virtual ~Vad() = default; + + // Calculates a VAD decision for the given audio frame. Valid sample rates + // are 8000, 16000, and 32000 Hz; the number of samples must be such that the + // frame is 10, 20, or 30 ms long. + virtual Activity VoiceActivity(const int16_t* audio, + size_t num_samples, + int sample_rate_hz) = 0; + + // Resets VAD state. + virtual void Reset() = 0; +}; + +// Returns a Vad instance that's implemented on top of WebRtcVad. +std::unique_ptr<Vad> CreateVad(Vad::Aggressiveness aggressiveness); + +} // namespace webrtc + +#endif // COMMON_AUDIO_VAD_INCLUDE_VAD_H_ diff --git a/third_party/libwebrtc/common_audio/vad/include/webrtc_vad.h b/third_party/libwebrtc/common_audio/vad/include/webrtc_vad.h new file mode 100644 index 0000000000..31e628f058 --- /dev/null +++ b/third_party/libwebrtc/common_audio/vad/include/webrtc_vad.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This header file includes the VAD API calls. Specific function calls are + * given below. + */ + +#ifndef COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT +#define COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ + +#include <stddef.h> +#include <stdint.h> + +typedef struct WebRtcVadInst VadInst; + +#ifdef __cplusplus +extern "C" { +#endif + +// Creates an instance to the VAD structure. +VadInst* WebRtcVad_Create(void); + +// Frees the dynamic memory of a specified VAD instance. +// +// - handle [i] : Pointer to VAD instance that should be freed. +void WebRtcVad_Free(VadInst* handle); + +// Initializes a VAD instance. +// +// - handle [i/o] : Instance that should be initialized. +// +// returns : 0 - (OK), +// -1 - (null pointer or Default mode could not be set). +int WebRtcVad_Init(VadInst* handle); + +// Sets the VAD operating mode. A more aggressive (higher mode) VAD is more +// restrictive in reporting speech. Put in other words the probability of being +// speech when the VAD returns 1 is increased with increasing mode. As a +// consequence also the missed detection rate goes up. +// +// - handle [i/o] : VAD instance. +// - mode [i] : Aggressiveness mode (0, 1, 2, or 3). +// +// returns : 0 - (OK), +// -1 - (null pointer, mode could not be set or the VAD instance +// has not been initialized). +int WebRtcVad_set_mode(VadInst* handle, int mode); + +// Calculates a VAD decision for the `audio_frame`. For valid sampling rates +// frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths(). +// +// - handle [i/o] : VAD Instance. Needs to be initialized by +// WebRtcVad_Init() before call. +// - fs [i] : Sampling frequency (Hz): 8000, 16000, or 32000 +// - audio_frame [i] : Audio frame buffer. +// - frame_length [i] : Length of audio frame buffer in number of samples. +// +// returns : 1 - (Active Voice), +// 0 - (Non-active Voice), +// -1 - (Error) +int WebRtcVad_Process(VadInst* handle, + int fs, + const int16_t* audio_frame, + size_t frame_length); + +// Checks for valid combinations of `rate` and `frame_length`. We support 10, +// 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz. +// +// - rate [i] : Sampling frequency (Hz). +// - frame_length [i] : Speech frame buffer length in number of samples. +// +// returns : 0 - (valid combination), -1 - (invalid combination) +int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length); + +#ifdef __cplusplus +} +#endif + +#endif // COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT |