summaryrefslogtreecommitdiffstats
path: root/third_party/libwebrtc/modules/video_coding/rtp_vp9_ref_finder.cc
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /third_party/libwebrtc/modules/video_coding/rtp_vp9_ref_finder.cc
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/libwebrtc/modules/video_coding/rtp_vp9_ref_finder.cc')
-rw-r--r--third_party/libwebrtc/modules/video_coding/rtp_vp9_ref_finder.cc367
1 files changed, 367 insertions, 0 deletions
diff --git a/third_party/libwebrtc/modules/video_coding/rtp_vp9_ref_finder.cc b/third_party/libwebrtc/modules/video_coding/rtp_vp9_ref_finder.cc
new file mode 100644
index 0000000000..175ed3464b
--- /dev/null
+++ b/third_party/libwebrtc/modules/video_coding/rtp_vp9_ref_finder.cc
@@ -0,0 +1,367 @@
+/*
+ * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/video_coding/rtp_vp9_ref_finder.h"
+
+#include <algorithm>
+#include <utility>
+
+#include "rtc_base/logging.h"
+
+namespace webrtc {
+RtpFrameReferenceFinder::ReturnVector RtpVp9RefFinder::ManageFrame(
+ std::unique_ptr<RtpFrameObject> frame) {
+ const RTPVideoHeaderVP9& codec_header = absl::get<RTPVideoHeaderVP9>(
+ frame->GetRtpVideoHeader().video_type_header);
+
+ if (codec_header.temporal_idx != kNoTemporalIdx)
+ frame->SetTemporalIndex(codec_header.temporal_idx);
+ frame->SetSpatialIndex(codec_header.spatial_idx);
+ frame->SetId(codec_header.picture_id & (kFrameIdLength - 1));
+
+ FrameDecision decision;
+ if (codec_header.temporal_idx >= kMaxTemporalLayers ||
+ codec_header.spatial_idx >= kMaxSpatialLayers) {
+ decision = kDrop;
+ } else if (codec_header.flexible_mode) {
+ decision = ManageFrameFlexible(frame.get(), codec_header);
+ } else {
+ if (codec_header.tl0_pic_idx == kNoTl0PicIdx) {
+ RTC_LOG(LS_WARNING) << "TL0PICIDX is expected to be present in "
+ "non-flexible mode.";
+ decision = kDrop;
+ } else {
+ int64_t unwrapped_tl0 =
+ tl0_unwrapper_.Unwrap(codec_header.tl0_pic_idx & 0xFF);
+ decision = ManageFrameGof(frame.get(), codec_header, unwrapped_tl0);
+
+ if (decision == kStash) {
+ if (stashed_frames_.size() > kMaxStashedFrames) {
+ stashed_frames_.pop_back();
+ }
+
+ stashed_frames_.push_front(
+ {.unwrapped_tl0 = unwrapped_tl0, .frame = std::move(frame)});
+ }
+ }
+ }
+
+ RtpFrameReferenceFinder::ReturnVector res;
+ switch (decision) {
+ case kStash:
+ return res;
+ case kHandOff:
+ res.push_back(std::move(frame));
+ RetryStashedFrames(res);
+ return res;
+ case kDrop:
+ return res;
+ }
+
+ return res;
+}
+
+RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameFlexible(
+ RtpFrameObject* frame,
+ const RTPVideoHeaderVP9& codec_header) {
+ if (codec_header.num_ref_pics > EncodedFrame::kMaxFrameReferences) {
+ return kDrop;
+ }
+
+ frame->num_references = codec_header.num_ref_pics;
+ for (size_t i = 0; i < frame->num_references; ++i) {
+ frame->references[i] =
+ Subtract<kFrameIdLength>(frame->Id(), codec_header.pid_diff[i]);
+ }
+
+ FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
+ return kHandOff;
+}
+
+RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameGof(
+ RtpFrameObject* frame,
+ const RTPVideoHeaderVP9& codec_header,
+ int64_t unwrapped_tl0) {
+ GofInfo* info;
+ if (codec_header.ss_data_available) {
+ if (codec_header.temporal_idx != 0) {
+ RTC_LOG(LS_WARNING) << "Received scalability structure on a non base "
+ "layer frame. Scalability structure ignored.";
+ } else {
+ if (codec_header.gof.num_frames_in_gof > kMaxVp9FramesInGof) {
+ return kDrop;
+ }
+
+ for (size_t i = 0; i < codec_header.gof.num_frames_in_gof; ++i) {
+ if (codec_header.gof.num_ref_pics[i] > kMaxVp9RefPics) {
+ return kDrop;
+ }
+ }
+
+ GofInfoVP9 gof = codec_header.gof;
+ if (gof.num_frames_in_gof == 0) {
+ RTC_LOG(LS_WARNING) << "Number of frames in GOF is zero. Assume "
+ "that stream has only one temporal layer.";
+ gof.SetGofInfoVP9(kTemporalStructureMode1);
+ }
+
+ current_ss_idx_ = Add<kMaxGofSaved>(current_ss_idx_, 1);
+ scalability_structures_[current_ss_idx_] = gof;
+ scalability_structures_[current_ss_idx_].pid_start = frame->Id();
+ gof_info_.emplace(
+ unwrapped_tl0,
+ GofInfo(&scalability_structures_[current_ss_idx_], frame->Id()));
+ }
+
+ const auto gof_info_it = gof_info_.find(unwrapped_tl0);
+ if (gof_info_it == gof_info_.end())
+ return kStash;
+
+ info = &gof_info_it->second;
+
+ if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
+ frame->num_references = 0;
+ FrameReceivedVp9(frame->Id(), info);
+ FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
+ return kHandOff;
+ }
+ } else if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
+ if (frame->SpatialIndex() == 0) {
+ RTC_LOG(LS_WARNING) << "Received keyframe without scalability structure";
+ return kDrop;
+ }
+ const auto gof_info_it = gof_info_.find(unwrapped_tl0);
+ if (gof_info_it == gof_info_.end())
+ return kStash;
+
+ info = &gof_info_it->second;
+
+ frame->num_references = 0;
+ FrameReceivedVp9(frame->Id(), info);
+ FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
+ return kHandOff;
+ } else {
+ auto gof_info_it = gof_info_.find(
+ (codec_header.temporal_idx == 0) ? unwrapped_tl0 - 1 : unwrapped_tl0);
+
+ // Gof info for this frame is not available yet, stash this frame.
+ if (gof_info_it == gof_info_.end())
+ return kStash;
+
+ if (codec_header.temporal_idx == 0) {
+ gof_info_it = gof_info_
+ .emplace(unwrapped_tl0,
+ GofInfo(gof_info_it->second.gof, frame->Id()))
+ .first;
+ }
+
+ info = &gof_info_it->second;
+ }
+
+ // Clean up info for base layers that are too old.
+ int64_t old_tl0_pic_idx = unwrapped_tl0 - kMaxGofSaved;
+ auto clean_gof_info_to = gof_info_.lower_bound(old_tl0_pic_idx);
+ gof_info_.erase(gof_info_.begin(), clean_gof_info_to);
+
+ FrameReceivedVp9(frame->Id(), info);
+
+ // Make sure we don't miss any frame that could potentially have the
+ // up switch flag set.
+ if (MissingRequiredFrameVp9(frame->Id(), *info))
+ return kStash;
+
+ if (codec_header.temporal_up_switch)
+ up_switch_.emplace(frame->Id(), codec_header.temporal_idx);
+
+ // Clean out old info about up switch frames.
+ uint16_t old_picture_id = Subtract<kFrameIdLength>(frame->Id(), 50);
+ auto up_switch_erase_to = up_switch_.lower_bound(old_picture_id);
+ up_switch_.erase(up_switch_.begin(), up_switch_erase_to);
+
+ size_t diff =
+ ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start, frame->Id());
+ size_t gof_idx = diff % info->gof->num_frames_in_gof;
+
+ if (info->gof->num_ref_pics[gof_idx] > EncodedFrame::kMaxFrameReferences) {
+ return kDrop;
+ }
+ // Populate references according to the scalability structure.
+ frame->num_references = info->gof->num_ref_pics[gof_idx];
+ for (size_t i = 0; i < frame->num_references; ++i) {
+ frame->references[i] =
+ Subtract<kFrameIdLength>(frame->Id(), info->gof->pid_diff[gof_idx][i]);
+
+ // If this is a reference to a frame earlier than the last up switch point,
+ // then ignore this reference.
+ if (UpSwitchInIntervalVp9(frame->Id(), codec_header.temporal_idx,
+ frame->references[i])) {
+ --frame->num_references;
+ }
+ }
+
+ // Override GOF references.
+ if (!codec_header.inter_pic_predicted) {
+ frame->num_references = 0;
+ }
+
+ FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
+ return kHandOff;
+}
+
+bool RtpVp9RefFinder::MissingRequiredFrameVp9(uint16_t picture_id,
+ const GofInfo& info) {
+ size_t diff =
+ ForwardDiff<uint16_t, kFrameIdLength>(info.gof->pid_start, picture_id);
+ size_t gof_idx = diff % info.gof->num_frames_in_gof;
+ size_t temporal_idx = info.gof->temporal_idx[gof_idx];
+
+ if (temporal_idx >= kMaxTemporalLayers) {
+ RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
+ << " temporal "
+ "layers are supported.";
+ return true;
+ }
+
+ // For every reference this frame has, check if there is a frame missing in
+ // the interval (`ref_pid`, `picture_id`) in any of the lower temporal
+ // layers. If so, we are missing a required frame.
+ uint8_t num_references = info.gof->num_ref_pics[gof_idx];
+ for (size_t i = 0; i < num_references; ++i) {
+ uint16_t ref_pid =
+ Subtract<kFrameIdLength>(picture_id, info.gof->pid_diff[gof_idx][i]);
+ for (size_t l = 0; l < temporal_idx; ++l) {
+ auto missing_frame_it = missing_frames_for_layer_[l].lower_bound(ref_pid);
+ if (missing_frame_it != missing_frames_for_layer_[l].end() &&
+ AheadOf<uint16_t, kFrameIdLength>(picture_id, *missing_frame_it)) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+void RtpVp9RefFinder::FrameReceivedVp9(uint16_t picture_id, GofInfo* info) {
+ int last_picture_id = info->last_picture_id;
+ size_t gof_size = std::min(info->gof->num_frames_in_gof, kMaxVp9FramesInGof);
+
+ // If there is a gap, find which temporal layer the missing frames
+ // belong to and add the frame as missing for that temporal layer.
+ // Otherwise, remove this frame from the set of missing frames.
+ if (AheadOf<uint16_t, kFrameIdLength>(picture_id, last_picture_id)) {
+ size_t diff = ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start,
+ last_picture_id);
+ size_t gof_idx = diff % gof_size;
+
+ last_picture_id = Add<kFrameIdLength>(last_picture_id, 1);
+ while (last_picture_id != picture_id) {
+ gof_idx = (gof_idx + 1) % gof_size;
+ RTC_CHECK(gof_idx < kMaxVp9FramesInGof);
+
+ size_t temporal_idx = info->gof->temporal_idx[gof_idx];
+ if (temporal_idx >= kMaxTemporalLayers) {
+ RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
+ << " temporal "
+ "layers are supported.";
+ return;
+ }
+
+ missing_frames_for_layer_[temporal_idx].insert(last_picture_id);
+ last_picture_id = Add<kFrameIdLength>(last_picture_id, 1);
+ }
+
+ info->last_picture_id = last_picture_id;
+ } else {
+ size_t diff =
+ ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start, picture_id);
+ size_t gof_idx = diff % gof_size;
+ RTC_CHECK(gof_idx < kMaxVp9FramesInGof);
+
+ size_t temporal_idx = info->gof->temporal_idx[gof_idx];
+ if (temporal_idx >= kMaxTemporalLayers) {
+ RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
+ << " temporal "
+ "layers are supported.";
+ return;
+ }
+
+ missing_frames_for_layer_[temporal_idx].erase(picture_id);
+ }
+}
+
+bool RtpVp9RefFinder::UpSwitchInIntervalVp9(uint16_t picture_id,
+ uint8_t temporal_idx,
+ uint16_t pid_ref) {
+ for (auto up_switch_it = up_switch_.upper_bound(pid_ref);
+ up_switch_it != up_switch_.end() &&
+ AheadOf<uint16_t, kFrameIdLength>(picture_id, up_switch_it->first);
+ ++up_switch_it) {
+ if (up_switch_it->second < temporal_idx)
+ return true;
+ }
+
+ return false;
+}
+
+void RtpVp9RefFinder::RetryStashedFrames(
+ RtpFrameReferenceFinder::ReturnVector& res) {
+ bool complete_frame = false;
+ do {
+ complete_frame = false;
+ for (auto it = stashed_frames_.begin(); it != stashed_frames_.end();) {
+ const RTPVideoHeaderVP9& codec_header = absl::get<RTPVideoHeaderVP9>(
+ it->frame->GetRtpVideoHeader().video_type_header);
+ RTC_DCHECK(!codec_header.flexible_mode);
+ FrameDecision decision =
+ ManageFrameGof(it->frame.get(), codec_header, it->unwrapped_tl0);
+
+ switch (decision) {
+ case kStash:
+ ++it;
+ break;
+ case kHandOff:
+ complete_frame = true;
+ res.push_back(std::move(it->frame));
+ [[fallthrough]];
+ case kDrop:
+ it = stashed_frames_.erase(it);
+ }
+ }
+ } while (complete_frame);
+}
+
+void RtpVp9RefFinder::FlattenFrameIdAndRefs(RtpFrameObject* frame,
+ bool inter_layer_predicted) {
+ for (size_t i = 0; i < frame->num_references; ++i) {
+ frame->references[i] =
+ unwrapper_.Unwrap(frame->references[i]) * kMaxSpatialLayers +
+ *frame->SpatialIndex();
+ }
+ frame->SetId(unwrapper_.Unwrap(frame->Id()) * kMaxSpatialLayers +
+ *frame->SpatialIndex());
+
+ if (inter_layer_predicted &&
+ frame->num_references + 1 <= EncodedFrame::kMaxFrameReferences) {
+ frame->references[frame->num_references] = frame->Id() - 1;
+ ++frame->num_references;
+ }
+}
+
+void RtpVp9RefFinder::ClearTo(uint16_t seq_num) {
+ auto it = stashed_frames_.begin();
+ while (it != stashed_frames_.end()) {
+ if (AheadOf<uint16_t>(seq_num, it->frame->first_seq_num())) {
+ it = stashed_frames_.erase(it);
+ } else {
+ ++it;
+ }
+ }
+}
+
+} // namespace webrtc