1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
|
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_NETEQ_NETEQ_H_
#define API_NETEQ_NETEQ_H_
#include <stddef.h> // Provide access to size_t.
#include <map>
#include <string>
#include <vector>
#include "absl/types/optional.h"
#include "api/audio_codecs/audio_codec_pair_id.h"
#include "api/audio_codecs/audio_decoder.h"
#include "api/audio_codecs/audio_format.h"
#include "api/rtp_headers.h"
#include "api/scoped_refptr.h"
namespace webrtc {
// Forward declarations.
class AudioFrame;
class AudioDecoderFactory;
class Clock;
struct NetEqNetworkStatistics {
uint16_t current_buffer_size_ms; // Current jitter buffer size in ms.
uint16_t preferred_buffer_size_ms; // Target buffer size in ms.
uint16_t jitter_peaks_found; // 1 if adding extra delay due to peaky
// jitter; 0 otherwise.
uint16_t expand_rate; // Fraction (of original stream) of synthesized
// audio inserted through expansion (in Q14).
uint16_t speech_expand_rate; // Fraction (of original stream) of synthesized
// speech inserted through expansion (in Q14).
uint16_t preemptive_rate; // Fraction of data inserted through pre-emptive
// expansion (in Q14).
uint16_t accelerate_rate; // Fraction of data removed through acceleration
// (in Q14).
uint16_t secondary_decoded_rate; // Fraction of data coming from FEC/RED
// decoding (in Q14).
uint16_t secondary_discarded_rate; // Fraction of discarded FEC/RED data (in
// Q14).
// Statistics for packet waiting times, i.e., the time between a packet
// arrives until it is decoded.
int mean_waiting_time_ms;
int median_waiting_time_ms;
int min_waiting_time_ms;
int max_waiting_time_ms;
};
// NetEq statistics that persist over the lifetime of the class.
// These metrics are never reset.
struct NetEqLifetimeStatistics {
// Stats below correspond to similarly-named fields in the WebRTC stats spec.
// https://w3c.github.io/webrtc-stats/#dom-rtcinboundrtpstreamstats
uint64_t total_samples_received = 0;
uint64_t concealed_samples = 0;
uint64_t concealment_events = 0;
uint64_t jitter_buffer_delay_ms = 0;
uint64_t jitter_buffer_emitted_count = 0;
uint64_t jitter_buffer_target_delay_ms = 0;
uint64_t jitter_buffer_minimum_delay_ms = 0;
uint64_t inserted_samples_for_deceleration = 0;
uint64_t removed_samples_for_acceleration = 0;
uint64_t silent_concealed_samples = 0;
uint64_t fec_packets_received = 0;
uint64_t fec_packets_discarded = 0;
uint64_t packets_discarded = 0;
// Below stats are not part of the spec.
uint64_t delayed_packet_outage_samples = 0;
uint64_t delayed_packet_outage_events = 0;
// This is sum of relative packet arrival delays of received packets so far.
// Since end-to-end delay of a packet is difficult to measure and is not
// necessarily useful for measuring jitter buffer performance, we report a
// relative packet arrival delay. The relative packet arrival delay of a
// packet is defined as the arrival delay compared to the first packet
// received, given that it had zero delay. To avoid clock drift, the "first"
// packet can be made dynamic.
uint64_t relative_packet_arrival_delay_ms = 0;
uint64_t jitter_buffer_packets_received = 0;
// An interruption is a loss-concealment event lasting at least 150 ms. The
// two stats below count the number os such events and the total duration of
// these events.
int32_t interruption_count = 0;
int32_t total_interruption_duration_ms = 0;
// Total number of comfort noise samples generated during DTX.
uint64_t generated_noise_samples = 0;
};
// Metrics that describe the operations performed in NetEq, and the internal
// state.
struct NetEqOperationsAndState {
// These sample counters are cumulative, and don't reset. As a reference, the
// total number of output samples can be found in
// NetEqLifetimeStatistics::total_samples_received.
uint64_t preemptive_samples = 0;
uint64_t accelerate_samples = 0;
// Count of the number of buffer flushes.
uint64_t packet_buffer_flushes = 0;
// The statistics below are not cumulative.
// The waiting time of the last decoded packet.
uint64_t last_waiting_time_ms = 0;
// The sum of the packet and jitter buffer size in ms.
uint64_t current_buffer_size_ms = 0;
// The current frame size in ms.
uint64_t current_frame_size_ms = 0;
// Flag to indicate that the next packet is available.
bool next_packet_available = false;
};
// This is the interface class for NetEq.
class NetEq {
public:
struct Config {
Config();
Config(const Config&);
Config(Config&&);
~Config();
Config& operator=(const Config&);
Config& operator=(Config&&);
std::string ToString() const;
int sample_rate_hz = 48000; // Initial value. Will change with input data.
bool enable_post_decode_vad = false;
size_t max_packets_in_buffer = 200;
int max_delay_ms = 0;
int min_delay_ms = 0;
bool enable_fast_accelerate = false;
bool enable_muted_state = false;
bool enable_rtx_handling = false;
absl::optional<AudioCodecPairId> codec_pair_id;
bool for_test_no_time_stretching = false; // Use only for testing.
};
enum ReturnCodes { kOK = 0, kFail = -1 };
enum class Operation {
kNormal,
kMerge,
kExpand,
kAccelerate,
kFastAccelerate,
kPreemptiveExpand,
kRfc3389Cng,
kRfc3389CngNoPacket,
kCodecInternalCng,
kDtmf,
kUndefined,
};
enum class Mode {
kNormal,
kExpand,
kMerge,
kAccelerateSuccess,
kAccelerateLowEnergy,
kAccelerateFail,
kPreemptiveExpandSuccess,
kPreemptiveExpandLowEnergy,
kPreemptiveExpandFail,
kRfc3389Cng,
kCodecInternalCng,
kCodecPlc,
kDtmf,
kError,
kUndefined,
};
// Return type for GetDecoderFormat.
struct DecoderFormat {
int sample_rate_hz;
int num_channels;
SdpAudioFormat sdp_format;
};
virtual ~NetEq() {}
// Inserts a new packet into NetEq.
// Returns 0 on success, -1 on failure.
virtual int InsertPacket(const RTPHeader& rtp_header,
rtc::ArrayView<const uint8_t> payload) = 0;
// Lets NetEq know that a packet arrived with an empty payload. This typically
// happens when empty packets are used for probing the network channel, and
// these packets use RTP sequence numbers from the same series as the actual
// audio packets.
virtual void InsertEmptyPacket(const RTPHeader& rtp_header) = 0;
// Instructs NetEq to deliver 10 ms of audio data. The data is written to
// `audio_frame`. All data in `audio_frame` is wiped; `data_`, `speech_type_`,
// `num_channels_`, `sample_rate_hz_`, `samples_per_channel_`, and
// `vad_activity_` are updated upon success. If an error is returned, some
// fields may not have been updated, or may contain inconsistent values.
// If muted state is enabled (through Config::enable_muted_state), `muted`
// may be set to true after a prolonged expand period. When this happens, the
// `data_` in `audio_frame` is not written, but should be interpreted as being
// all zeros. For testing purposes, an override can be supplied in the
// `action_override` argument, which will cause NetEq to take this action
// next, instead of the action it would normally choose. An optional output
// argument for fetching the current sample rate can be provided, which
// will return the same value as last_output_sample_rate_hz() but will avoid
// additional synchronization.
// Returns kOK on success, or kFail in case of an error.
virtual int GetAudio(
AudioFrame* audio_frame,
bool* muted,
int* current_sample_rate_hz = nullptr,
absl::optional<Operation> action_override = absl::nullopt) = 0;
// Replaces the current set of decoders with the given one.
virtual void SetCodecs(const std::map<int, SdpAudioFormat>& codecs) = 0;
// Associates `rtp_payload_type` with the given codec, which NetEq will
// instantiate when it needs it. Returns true iff successful.
virtual bool RegisterPayloadType(int rtp_payload_type,
const SdpAudioFormat& audio_format) = 0;
// Removes `rtp_payload_type` from the codec database. Returns 0 on success,
// -1 on failure. Removing a payload type that is not registered is ok and
// will not result in an error.
virtual int RemovePayloadType(uint8_t rtp_payload_type) = 0;
// Removes all payload types from the codec database.
virtual void RemoveAllPayloadTypes() = 0;
// Sets a minimum delay in millisecond for packet buffer. The minimum is
// maintained unless a higher latency is dictated by channel condition.
// Returns true if the minimum is successfully applied, otherwise false is
// returned.
virtual bool SetMinimumDelay(int delay_ms) = 0;
// Sets a maximum delay in milliseconds for packet buffer. The latency will
// not exceed the given value, even required delay (given the channel
// conditions) is higher. Calling this method has the same effect as setting
// the `max_delay_ms` value in the NetEq::Config struct.
virtual bool SetMaximumDelay(int delay_ms) = 0;
// Sets a base minimum delay in milliseconds for packet buffer. The minimum
// delay which is set via `SetMinimumDelay` can't be lower than base minimum
// delay. Calling this method is similar to setting the `min_delay_ms` value
// in the NetEq::Config struct. Returns true if the base minimum is
// successfully applied, otherwise false is returned.
virtual bool SetBaseMinimumDelayMs(int delay_ms) = 0;
// Returns current value of base minimum delay in milliseconds.
virtual int GetBaseMinimumDelayMs() const = 0;
// Returns the current target delay in ms. This includes any extra delay
// requested through SetMinimumDelay.
virtual int TargetDelayMs() const = 0;
// Returns the current total delay (packet buffer and sync buffer) in ms,
// with smoothing applied to even out short-time fluctuations due to jitter.
// The packet buffer part of the delay is not updated during DTX/CNG periods.
virtual int FilteredCurrentDelayMs() const = 0;
// Writes the current network statistics to `stats`. The statistics are reset
// after the call.
virtual int NetworkStatistics(NetEqNetworkStatistics* stats) = 0;
// Current values only, not resetting any state.
virtual NetEqNetworkStatistics CurrentNetworkStatistics() const = 0;
// Returns a copy of this class's lifetime statistics. These statistics are
// never reset.
virtual NetEqLifetimeStatistics GetLifetimeStatistics() const = 0;
// Returns statistics about the performed operations and internal state. These
// statistics are never reset.
virtual NetEqOperationsAndState GetOperationsAndState() const = 0;
// Enables post-decode VAD. When enabled, GetAudio() will return
// kOutputVADPassive when the signal contains no speech.
virtual void EnableVad() = 0;
// Disables post-decode VAD.
virtual void DisableVad() = 0;
// Returns the RTP timestamp for the last sample delivered by GetAudio().
// The return value will be empty if no valid timestamp is available.
virtual absl::optional<uint32_t> GetPlayoutTimestamp() const = 0;
// Returns the sample rate in Hz of the audio produced in the last GetAudio
// call. If GetAudio has not been called yet, the configured sample rate
// (Config::sample_rate_hz) is returned.
virtual int last_output_sample_rate_hz() const = 0;
// Returns the decoder info for the given payload type. Returns empty if no
// such payload type was registered.
virtual absl::optional<DecoderFormat> GetDecoderFormat(
int payload_type) const = 0;
// Flushes both the packet buffer and the sync buffer.
virtual void FlushBuffers() = 0;
// Enables NACK and sets the maximum size of the NACK list, which should be
// positive and no larger than Nack::kNackListSizeLimit. If NACK is already
// enabled then the maximum NACK list size is modified accordingly.
virtual void EnableNack(size_t max_nack_list_size) = 0;
virtual void DisableNack() = 0;
// Returns a list of RTP sequence numbers corresponding to packets to be
// retransmitted, given an estimate of the round-trip time in milliseconds.
virtual std::vector<uint16_t> GetNackList(
int64_t round_trip_time_ms) const = 0;
// Returns the length of the audio yet to play in the sync buffer.
// Mainly intended for testing.
virtual int SyncBufferSizeMs() const = 0;
};
} // namespace webrtc
#endif // API_NETEQ_NETEQ_H_
|