third_party/libwebrtc/common_audio/vad/vad_filterbank_unittest.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91

/*
 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include <stdlib.h>

#include "common_audio/vad/vad_unittest.h"
#include "test/gtest.h"

extern "C" {
#include "common_audio/vad/vad_core.h"
#include "common_audio/vad/vad_filterbank.h"
}

namespace webrtc {
namespace test {

const int kNumValidFrameLengths = 3;

TEST_F(VadTest, vad_filterbank) {
  VadInstT* self = reinterpret_cast<VadInstT*>(malloc(sizeof(VadInstT)));
  static const int16_t kReference[kNumValidFrameLengths] = {48, 11, 11};
  static const int16_t kFeatures[kNumValidFrameLengths * kNumChannels] = {
      1213, 759,  587,  462,  434,  272,  1479, 1385, 1291,
      1200, 1103, 1099, 1732, 1692, 1681, 1629, 1436, 1436};
  static const int16_t kOffsetVector[kNumChannels] = {368, 368, 272,
                                                      176, 176, 176};
  int16_t features[kNumChannels];

  // Construct a speech signal that will trigger the VAD in all modes. It is
  // known that (i * i) will wrap around, but that doesn't matter in this case.
  int16_t speech[kMaxFrameLength];
  for (size_t i = 0; i < kMaxFrameLength; ++i) {
    speech[i] = static_cast<int16_t>(i * i);
  }

  int frame_length_index = 0;
  ASSERT_EQ(0, WebRtcVad_InitCore(self));
  for (size_t j = 0; j < kFrameLengthsSize; ++j) {
    if (ValidRatesAndFrameLengths(8000, kFrameLengths[j])) {
      EXPECT_EQ(kReference[frame_length_index],
                WebRtcVad_CalculateFeatures(self, speech, kFrameLengths[j],
                                            features));
      for (int k = 0; k < kNumChannels; ++k) {
        EXPECT_EQ(kFeatures[k + frame_length_index * kNumChannels],
                  features[k]);
      }
      frame_length_index++;
    }
  }
  EXPECT_EQ(kNumValidFrameLengths, frame_length_index);

  // Verify that all zeros in gives kOffsetVector out.
  memset(speech, 0, sizeof(speech));
  ASSERT_EQ(0, WebRtcVad_InitCore(self));
  for (size_t j = 0; j < kFrameLengthsSize; ++j) {
    if (ValidRatesAndFrameLengths(8000, kFrameLengths[j])) {
      EXPECT_EQ(0, WebRtcVad_CalculateFeatures(self, speech, kFrameLengths[j],
                                               features));
      for (int k = 0; k < kNumChannels; ++k) {
        EXPECT_EQ(kOffsetVector[k], features[k]);
      }
    }
  }

  // Verify that all ones in gives kOffsetVector out. Any other constant input
  // will have a small impact in the sub bands.
  for (size_t i = 0; i < kMaxFrameLength; ++i) {
    speech[i] = 1;
  }
  for (size_t j = 0; j < kFrameLengthsSize; ++j) {
    if (ValidRatesAndFrameLengths(8000, kFrameLengths[j])) {
      ASSERT_EQ(0, WebRtcVad_InitCore(self));
      EXPECT_EQ(0, WebRtcVad_CalculateFeatures(self, speech, kFrameLengths[j],
                                               features));
      for (int k = 0; k < kNumChannels; ++k) {
        EXPECT_EQ(kOffsetVector[k], features[k]);
      }
    }
  }

  free(self);
}
}  // namespace test
}  // namespace webrtc