summaryrefslogtreecommitdiffstats
path: root/third_party/libwebrtc/media/base/video_adapter.cc
blob: 149071d1534e2bea188d2d1ac7a8bdad89c9a9bf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
/*
 *  Copyright (c) 2010 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "media/base/video_adapter.h"

#include <algorithm>
#include <cmath>
#include <cstdlib>
#include <limits>
#include <utility>

#include "absl/types/optional.h"
#include "media/base/video_common.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
#include "rtc_base/strings/string_builder.h"
#include "rtc_base/time_utils.h"
#include "system_wrappers/include/field_trial.h"

namespace {

struct Fraction {
  int numerator;
  int denominator;

  void DivideByGcd() {
    int g = cricket::GreatestCommonDivisor(numerator, denominator);
    numerator /= g;
    denominator /= g;
  }

  // Determines number of output pixels if both width and height of an input of
  // `input_pixels` pixels is scaled with the fraction numerator / denominator.
  int scale_pixel_count(int input_pixels) {
    return (numerator * numerator * input_pixels) / (denominator * denominator);
  }
};

// Round `value_to_round` to a multiple of `multiple`. Prefer rounding upwards,
// but never more than `max_value`.
int roundUp(int value_to_round, int multiple, int max_value) {
  const int rounded_value =
      (value_to_round + multiple - 1) / multiple * multiple;
  return rounded_value <= max_value ? rounded_value
                                    : (max_value / multiple * multiple);
}

// Generates a scale factor that makes `input_pixels` close to `target_pixels`,
// but no higher than `max_pixels`.
Fraction FindScale(int input_width,
                   int input_height,
                   int target_pixels,
                   int max_pixels,
                   bool variable_start_scale_factor) {
  // This function only makes sense for a positive target.
  RTC_DCHECK_GT(target_pixels, 0);
  RTC_DCHECK_GT(max_pixels, 0);
  RTC_DCHECK_GE(max_pixels, target_pixels);

  const int input_pixels = input_width * input_height;

  // Don't scale up original.
  if (target_pixels >= input_pixels)
    return Fraction{1, 1};

  Fraction current_scale = Fraction{1, 1};
  Fraction best_scale = Fraction{1, 1};

  if (variable_start_scale_factor) {
    // Start scaling down by 2/3 depending on `input_width` and `input_height`.
    if (input_width % 3 == 0 && input_height % 3 == 0) {
      // 2/3 (then alternates 3/4, 2/3, 3/4,...).
      current_scale = Fraction{6, 6};
    }
    if (input_width % 9 == 0 && input_height % 9 == 0) {
      // 2/3, 2/3 (then alternates 3/4, 2/3, 3/4,...).
      current_scale = Fraction{36, 36};
    }
  }

  // The minimum (absolute) difference between the number of output pixels and
  // the target pixel count.
  int min_pixel_diff = std::numeric_limits<int>::max();
  if (input_pixels <= max_pixels) {
    // Start condition for 1/1 case, if it is less than max.
    min_pixel_diff = std::abs(input_pixels - target_pixels);
  }

  // Alternately scale down by 3/4 and 2/3. This results in fractions which are
  // effectively scalable. For instance, starting at 1280x720 will result in
  // the series (3/4) => 960x540, (1/2) => 640x360, (3/8) => 480x270,
  // (1/4) => 320x180, (3/16) => 240x125, (1/8) => 160x90.
  while (current_scale.scale_pixel_count(input_pixels) > target_pixels) {
    if (current_scale.numerator % 3 == 0 &&
        current_scale.denominator % 2 == 0) {
      // Multiply by 2/3.
      current_scale.numerator /= 3;
      current_scale.denominator /= 2;
    } else {
      // Multiply by 3/4.
      current_scale.numerator *= 3;
      current_scale.denominator *= 4;
    }

    int output_pixels = current_scale.scale_pixel_count(input_pixels);
    if (output_pixels <= max_pixels) {
      int diff = std::abs(target_pixels - output_pixels);
      if (diff < min_pixel_diff) {
        min_pixel_diff = diff;
        best_scale = current_scale;
      }
    }
  }
  best_scale.DivideByGcd();

  return best_scale;
}

absl::optional<std::pair<int, int>> Swap(
    const absl::optional<std::pair<int, int>>& in) {
  if (!in) {
    return absl::nullopt;
  }
  return std::make_pair(in->second, in->first);
}

}  // namespace

namespace cricket {

VideoAdapter::VideoAdapter(int source_resolution_alignment)
    : frames_in_(0),
      frames_out_(0),
      frames_scaled_(0),
      adaption_changes_(0),
      previous_width_(0),
      previous_height_(0),
      variable_start_scale_factor_(!webrtc::field_trial::IsDisabled(
          "WebRTC-Video-VariableStartScaleFactor")),
      source_resolution_alignment_(source_resolution_alignment),
      resolution_alignment_(source_resolution_alignment),
      resolution_request_target_pixel_count_(std::numeric_limits<int>::max()),
      resolution_request_max_pixel_count_(std::numeric_limits<int>::max()),
      max_framerate_request_(std::numeric_limits<int>::max()) {}

VideoAdapter::VideoAdapter() : VideoAdapter(1) {}

VideoAdapter::~VideoAdapter() {}

bool VideoAdapter::DropFrame(int64_t in_timestamp_ns) {
  int max_fps = max_framerate_request_;
  if (output_format_request_.max_fps)
    max_fps = std::min(max_fps, *output_format_request_.max_fps);

  framerate_controller_.SetMaxFramerate(max_fps);
  return framerate_controller_.ShouldDropFrame(in_timestamp_ns);
}

bool VideoAdapter::AdaptFrameResolution(int in_width,
                                        int in_height,
                                        int64_t in_timestamp_ns,
                                        int* cropped_width,
                                        int* cropped_height,
                                        int* out_width,
                                        int* out_height) {
  webrtc::MutexLock lock(&mutex_);
  ++frames_in_;

  // The max output pixel count is the minimum of the requests from
  // OnOutputFormatRequest and OnResolutionFramerateRequest.
  int max_pixel_count = resolution_request_max_pixel_count_;

  // Select target aspect ratio and max pixel count depending on input frame
  // orientation.
  absl::optional<std::pair<int, int>> target_aspect_ratio;
  if (in_width > in_height) {
    target_aspect_ratio = output_format_request_.target_landscape_aspect_ratio;
    if (output_format_request_.max_landscape_pixel_count)
      max_pixel_count = std::min(
          max_pixel_count, *output_format_request_.max_landscape_pixel_count);
  } else {
    target_aspect_ratio = output_format_request_.target_portrait_aspect_ratio;
    if (output_format_request_.max_portrait_pixel_count)
      max_pixel_count = std::min(
          max_pixel_count, *output_format_request_.max_portrait_pixel_count);
  }

  int target_pixel_count =
      std::min(resolution_request_target_pixel_count_, max_pixel_count);

  // Drop the input frame if necessary.
  if (max_pixel_count <= 0 || DropFrame(in_timestamp_ns)) {
    // Show VAdapt log every 90 frames dropped. (3 seconds)
    if ((frames_in_ - frames_out_) % 90 == 0) {
      // TODO(fbarchard): Reduce to LS_VERBOSE when adapter info is not needed
      // in default calls.
      RTC_LOG(LS_INFO) << "VAdapt Drop Frame: scaled " << frames_scaled_
                       << " / out " << frames_out_ << " / in " << frames_in_
                       << " Changes: " << adaption_changes_
                       << " Input: " << in_width << "x" << in_height
                       << " timestamp: " << in_timestamp_ns
                       << " Output fps: " << max_framerate_request_ << "/"
                       << output_format_request_.max_fps.value_or(-1)
                       << " alignment: " << resolution_alignment_;
    }

    // Drop frame.
    return false;
  }

  // Calculate how the input should be cropped.
  if (!target_aspect_ratio || target_aspect_ratio->first <= 0 ||
      target_aspect_ratio->second <= 0) {
    *cropped_width = in_width;
    *cropped_height = in_height;
  } else {
    const float requested_aspect =
        target_aspect_ratio->first /
        static_cast<float>(target_aspect_ratio->second);
    *cropped_width =
        std::min(in_width, static_cast<int>(in_height * requested_aspect));
    *cropped_height =
        std::min(in_height, static_cast<int>(in_width / requested_aspect));
  }
  const Fraction scale =
      FindScale(*cropped_width, *cropped_height, target_pixel_count,
                max_pixel_count, variable_start_scale_factor_);
  // Adjust cropping slightly to get correctly aligned output size and a perfect
  // scale factor.
  *cropped_width = roundUp(*cropped_width,
                           scale.denominator * resolution_alignment_, in_width);
  *cropped_height = roundUp(
      *cropped_height, scale.denominator * resolution_alignment_, in_height);
  RTC_DCHECK_EQ(0, *cropped_width % scale.denominator);
  RTC_DCHECK_EQ(0, *cropped_height % scale.denominator);

  // Calculate final output size.
  *out_width = *cropped_width / scale.denominator * scale.numerator;
  *out_height = *cropped_height / scale.denominator * scale.numerator;
  RTC_DCHECK_EQ(0, *out_width % resolution_alignment_);
  RTC_DCHECK_EQ(0, *out_height % resolution_alignment_);

  ++frames_out_;
  if (scale.numerator != scale.denominator)
    ++frames_scaled_;

  if (previous_width_ &&
      (previous_width_ != *out_width || previous_height_ != *out_height)) {
    ++adaption_changes_;
    RTC_LOG(LS_INFO) << "Frame size changed: scaled " << frames_scaled_
                     << " / out " << frames_out_ << " / in " << frames_in_
                     << " Changes: " << adaption_changes_
                     << " Input: " << in_width << "x" << in_height
                     << " Scale: " << scale.numerator << "/"
                     << scale.denominator << " Output: " << *out_width << "x"
                     << *out_height << " fps: " << max_framerate_request_ << "/"
                     << output_format_request_.max_fps.value_or(-1)
                     << " alignment: " << resolution_alignment_;
  }

  previous_width_ = *out_width;
  previous_height_ = *out_height;

  return true;
}

void VideoAdapter::OnOutputFormatRequest(
    const absl::optional<VideoFormat>& format) {
  absl::optional<std::pair<int, int>> target_aspect_ratio;
  absl::optional<int> max_pixel_count;
  absl::optional<int> max_fps;
  if (format) {
    target_aspect_ratio = std::make_pair(format->width, format->height);
    max_pixel_count = format->width * format->height;
    if (format->interval > 0)
      max_fps = rtc::kNumNanosecsPerSec / format->interval;
  }
  OnOutputFormatRequest(target_aspect_ratio, max_pixel_count, max_fps);
}

void VideoAdapter::OnOutputFormatRequest(
    const absl::optional<std::pair<int, int>>& target_aspect_ratio,
    const absl::optional<int>& max_pixel_count,
    const absl::optional<int>& max_fps) {
  absl::optional<std::pair<int, int>> target_landscape_aspect_ratio;
  absl::optional<std::pair<int, int>> target_portrait_aspect_ratio;
  if (target_aspect_ratio && target_aspect_ratio->first > 0 &&
      target_aspect_ratio->second > 0) {
    // Maintain input orientation.
    const int max_side =
        std::max(target_aspect_ratio->first, target_aspect_ratio->second);
    const int min_side =
        std::min(target_aspect_ratio->first, target_aspect_ratio->second);
    target_landscape_aspect_ratio = std::make_pair(max_side, min_side);
    target_portrait_aspect_ratio = std::make_pair(min_side, max_side);
  }
  OnOutputFormatRequest(target_landscape_aspect_ratio, max_pixel_count,
                        target_portrait_aspect_ratio, max_pixel_count, max_fps);
}

void VideoAdapter::OnOutputFormatRequest(
    const absl::optional<std::pair<int, int>>& target_landscape_aspect_ratio,
    const absl::optional<int>& max_landscape_pixel_count,
    const absl::optional<std::pair<int, int>>& target_portrait_aspect_ratio,
    const absl::optional<int>& max_portrait_pixel_count,
    const absl::optional<int>& max_fps) {
  webrtc::MutexLock lock(&mutex_);

  OutputFormatRequest request = {
      .target_landscape_aspect_ratio = target_landscape_aspect_ratio,
      .max_landscape_pixel_count = max_landscape_pixel_count,
      .target_portrait_aspect_ratio = target_portrait_aspect_ratio,
      .max_portrait_pixel_count = max_portrait_pixel_count,
      .max_fps = max_fps};

  if (stashed_output_format_request_) {
    // Save the output format request for later use in case the encoder making
    // this call would become active, because currently all active encoders use
    // requested_resolution instead.
    stashed_output_format_request_ = request;
    RTC_LOG(LS_INFO) << "Stashing OnOutputFormatRequest: "
                     << stashed_output_format_request_->ToString();
  } else {
    output_format_request_ = request;
    RTC_LOG(LS_INFO) << "Setting output_format_request_: "
                     << output_format_request_.ToString();
  }

  framerate_controller_.Reset();
}

void VideoAdapter::OnSinkWants(const rtc::VideoSinkWants& sink_wants) {
  webrtc::MutexLock lock(&mutex_);
  resolution_request_max_pixel_count_ = sink_wants.max_pixel_count;
  resolution_request_target_pixel_count_ =
      sink_wants.target_pixel_count.value_or(
          resolution_request_max_pixel_count_);
  max_framerate_request_ = sink_wants.max_framerate_fps;
  resolution_alignment_ = cricket::LeastCommonMultiple(
      source_resolution_alignment_, sink_wants.resolution_alignment);

  if (!sink_wants.aggregates) {
    RTC_LOG(LS_WARNING)
        << "These should always be created by VideoBroadcaster!";
    return;
  }

  // If requested_resolution is used, and there are no active encoders
  // that are NOT using requested_resolution (aka newapi), then override
  // calls to OnOutputFormatRequest and use values from requested_resolution
  // instead (combined with qualityscaling based on pixel counts above).
  if (webrtc::field_trial::IsDisabled(
          "WebRTC-Video-RequestedResolutionOverrideOutputFormatRequest")) {
    // kill-switch...
    return;
  }

  if (!sink_wants.requested_resolution) {
    if (stashed_output_format_request_) {
      // because current active_output_format_request is based on
      // requested_resolution logic, while current encoder(s) doesn't want that,
      // we have to restore the stashed request.
      RTC_LOG(LS_INFO) << "Unstashing OnOutputFormatRequest: "
                       << stashed_output_format_request_->ToString();
      output_format_request_ = *stashed_output_format_request_;
      stashed_output_format_request_.reset();
    }
    return;
  }

  if (sink_wants.aggregates->any_active_without_requested_resolution) {
    return;
  }

  if (!stashed_output_format_request_) {
    // The active output format request is about to be rewritten by
    // request_resolution. We need to save it for later use in case the encoder
    // which doesn't use request_resolution logic become active in the future.
    stashed_output_format_request_ = output_format_request_;
    RTC_LOG(LS_INFO) << "Stashing OnOutputFormatRequest: "
                     << stashed_output_format_request_->ToString();
  }

  auto res = *sink_wants.requested_resolution;
  auto pixel_count = res.width * res.height;
  output_format_request_.target_landscape_aspect_ratio =
      std::make_pair(res.width, res.height);
  output_format_request_.max_landscape_pixel_count = pixel_count;
  output_format_request_.target_portrait_aspect_ratio =
      std::make_pair(res.height, res.width);
  output_format_request_.max_portrait_pixel_count = pixel_count;
  output_format_request_.max_fps = max_framerate_request_;
  RTC_LOG(LS_INFO) << "Setting output_format_request_ based on sink_wants: "
                   << output_format_request_.ToString();
}

int VideoAdapter::GetTargetPixels() const {
  webrtc::MutexLock lock(&mutex_);
  return resolution_request_target_pixel_count_;
}

float VideoAdapter::GetMaxFramerate() const {
  webrtc::MutexLock lock(&mutex_);
  // Minimum of `output_format_request_.max_fps` and `max_framerate_request_` is
  // used to throttle frame-rate.
  int framerate =
      std::min(max_framerate_request_,
               output_format_request_.max_fps.value_or(max_framerate_request_));
  if (framerate == std::numeric_limits<int>::max()) {
    return std::numeric_limits<float>::infinity();
  } else {
    return max_framerate_request_;
  }
}

std::string VideoAdapter::OutputFormatRequest::ToString() const {
  rtc::StringBuilder oss;
  oss << "[ ";
  if (target_landscape_aspect_ratio == Swap(target_portrait_aspect_ratio) &&
      max_landscape_pixel_count == max_portrait_pixel_count) {
    if (target_landscape_aspect_ratio) {
      oss << target_landscape_aspect_ratio->first << "x"
          << target_landscape_aspect_ratio->second;
    } else {
      oss << "unset-resolution";
    }
    if (max_landscape_pixel_count) {
      oss << " max_pixel_count: " << *max_landscape_pixel_count;
    }
  } else {
    oss << "[ landscape: ";
    if (target_landscape_aspect_ratio) {
      oss << target_landscape_aspect_ratio->first << "x"
          << target_landscape_aspect_ratio->second;
    } else {
      oss << "unset";
    }
    if (max_landscape_pixel_count) {
      oss << " max_pixel_count: " << *max_landscape_pixel_count;
    }
    oss << " ] [ portrait: ";
    if (target_portrait_aspect_ratio) {
      oss << target_portrait_aspect_ratio->first << "x"
          << target_portrait_aspect_ratio->second;
    }
    if (max_portrait_pixel_count) {
      oss << " max_pixel_count: " << *max_portrait_pixel_count;
    }
    oss << " ]";
  }
  oss << " max_fps: ";
  if (max_fps) {
    oss << *max_fps;
  } else {
    oss << "unset";
  }
  oss << " ]";
  return oss.Release();
}

}  // namespace cricket