summaryrefslogtreecommitdiffstats
path: root/third_party/libwebrtc/call/rtp_payload_params.cc
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /third_party/libwebrtc/call/rtp_payload_params.cc
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/libwebrtc/call/rtp_payload_params.cc')
-rw-r--r--third_party/libwebrtc/call/rtp_payload_params.cc790
1 files changed, 790 insertions, 0 deletions
diff --git a/third_party/libwebrtc/call/rtp_payload_params.cc b/third_party/libwebrtc/call/rtp_payload_params.cc
new file mode 100644
index 0000000000..4b63ebefb3
--- /dev/null
+++ b/third_party/libwebrtc/call/rtp_payload_params.cc
@@ -0,0 +1,790 @@
+/*
+ * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "call/rtp_payload_params.h"
+
+#include <stddef.h>
+
+#include <algorithm>
+
+#include "absl/container/inlined_vector.h"
+#include "absl/strings/match.h"
+#include "absl/types/variant.h"
+#include "api/video/video_timing.h"
+#include "modules/video_coding/codecs/h264/include/h264_globals.h"
+#include "modules/video_coding/codecs/interface/common_constants.h"
+#include "modules/video_coding/codecs/vp8/include/vp8_globals.h"
+#include "modules/video_coding/codecs/vp9/include/vp9_globals.h"
+#include "modules/video_coding/frame_dependencies_calculator.h"
+#include "rtc_base/arraysize.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
+#include "rtc_base/random.h"
+#include "rtc_base/time_utils.h"
+
+namespace webrtc {
+namespace {
+
+constexpr int kMaxSimulatedSpatialLayers = 3;
+
+void PopulateRtpWithCodecSpecifics(const CodecSpecificInfo& info,
+ absl::optional<int> spatial_index,
+ RTPVideoHeader* rtp) {
+ rtp->codec = info.codecType;
+ rtp->is_last_frame_in_picture = info.end_of_picture;
+ switch (info.codecType) {
+ case kVideoCodecVP8: {
+ auto& vp8_header = rtp->video_type_header.emplace<RTPVideoHeaderVP8>();
+ vp8_header.InitRTPVideoHeaderVP8();
+ vp8_header.nonReference = info.codecSpecific.VP8.nonReference;
+ vp8_header.temporalIdx = info.codecSpecific.VP8.temporalIdx;
+ vp8_header.layerSync = info.codecSpecific.VP8.layerSync;
+ vp8_header.keyIdx = info.codecSpecific.VP8.keyIdx;
+ return;
+ }
+ case kVideoCodecVP9: {
+ auto& vp9_header = rtp->video_type_header.emplace<RTPVideoHeaderVP9>();
+ vp9_header.InitRTPVideoHeaderVP9();
+ vp9_header.inter_pic_predicted =
+ info.codecSpecific.VP9.inter_pic_predicted;
+ vp9_header.flexible_mode = info.codecSpecific.VP9.flexible_mode;
+ vp9_header.ss_data_available = info.codecSpecific.VP9.ss_data_available;
+ vp9_header.non_ref_for_inter_layer_pred =
+ info.codecSpecific.VP9.non_ref_for_inter_layer_pred;
+ vp9_header.temporal_idx = info.codecSpecific.VP9.temporal_idx;
+ vp9_header.temporal_up_switch = info.codecSpecific.VP9.temporal_up_switch;
+ vp9_header.inter_layer_predicted =
+ info.codecSpecific.VP9.inter_layer_predicted;
+ vp9_header.gof_idx = info.codecSpecific.VP9.gof_idx;
+ vp9_header.num_spatial_layers = info.codecSpecific.VP9.num_spatial_layers;
+ vp9_header.first_active_layer = info.codecSpecific.VP9.first_active_layer;
+ if (vp9_header.num_spatial_layers > 1) {
+ vp9_header.spatial_idx = spatial_index.value_or(kNoSpatialIdx);
+ } else {
+ vp9_header.spatial_idx = kNoSpatialIdx;
+ }
+ if (info.codecSpecific.VP9.ss_data_available) {
+ vp9_header.spatial_layer_resolution_present =
+ info.codecSpecific.VP9.spatial_layer_resolution_present;
+ if (info.codecSpecific.VP9.spatial_layer_resolution_present) {
+ for (size_t i = 0; i < info.codecSpecific.VP9.num_spatial_layers;
+ ++i) {
+ vp9_header.width[i] = info.codecSpecific.VP9.width[i];
+ vp9_header.height[i] = info.codecSpecific.VP9.height[i];
+ }
+ }
+ vp9_header.gof.CopyGofInfoVP9(info.codecSpecific.VP9.gof);
+ }
+
+ vp9_header.num_ref_pics = info.codecSpecific.VP9.num_ref_pics;
+ for (int i = 0; i < info.codecSpecific.VP9.num_ref_pics; ++i) {
+ vp9_header.pid_diff[i] = info.codecSpecific.VP9.p_diff[i];
+ }
+ vp9_header.end_of_picture = info.end_of_picture;
+ return;
+ }
+ case kVideoCodecH264: {
+ auto& h264_header = rtp->video_type_header.emplace<RTPVideoHeaderH264>();
+ h264_header.packetization_mode =
+ info.codecSpecific.H264.packetization_mode;
+ return;
+ }
+ case kVideoCodecMultiplex:
+ case kVideoCodecGeneric:
+ rtp->codec = kVideoCodecGeneric;
+ return;
+ // TODO(bugs.webrtc.org/13485): Implement H265 codec specific info
+ default:
+ return;
+ }
+}
+
+void SetVideoTiming(const EncodedImage& image, VideoSendTiming* timing) {
+ if (image.timing_.flags == VideoSendTiming::TimingFrameFlags::kInvalid ||
+ image.timing_.flags == VideoSendTiming::TimingFrameFlags::kNotTriggered) {
+ timing->flags = VideoSendTiming::TimingFrameFlags::kInvalid;
+ return;
+ }
+
+ timing->encode_start_delta_ms = VideoSendTiming::GetDeltaCappedMs(
+ image.capture_time_ms_, image.timing_.encode_start_ms);
+ timing->encode_finish_delta_ms = VideoSendTiming::GetDeltaCappedMs(
+ image.capture_time_ms_, image.timing_.encode_finish_ms);
+ timing->packetization_finish_delta_ms = 0;
+ timing->pacer_exit_delta_ms = 0;
+ timing->network_timestamp_delta_ms = 0;
+ timing->network2_timestamp_delta_ms = 0;
+ timing->flags = image.timing_.flags;
+}
+
+// Returns structure that aligns with simulated generic info. The templates
+// allow to produce valid dependency descriptor for any stream where
+// `num_spatial_layers` * `num_temporal_layers` <= 32 (limited by
+// https://aomediacodec.github.io/av1-rtp-spec/#a82-syntax, see
+// template_fdiffs()). The set of the templates is not tuned for any paricular
+// structure thus dependency descriptor would use more bytes on the wire than
+// with tuned templates.
+FrameDependencyStructure MinimalisticStructure(int num_spatial_layers,
+ int num_temporal_layers) {
+ RTC_DCHECK_LE(num_spatial_layers, DependencyDescriptor::kMaxSpatialIds);
+ RTC_DCHECK_LE(num_temporal_layers, DependencyDescriptor::kMaxTemporalIds);
+ RTC_DCHECK_LE(num_spatial_layers * num_temporal_layers, 32);
+ FrameDependencyStructure structure;
+ structure.num_decode_targets = num_spatial_layers * num_temporal_layers;
+ structure.num_chains = num_spatial_layers;
+ structure.templates.reserve(num_spatial_layers * num_temporal_layers);
+ for (int sid = 0; sid < num_spatial_layers; ++sid) {
+ for (int tid = 0; tid < num_temporal_layers; ++tid) {
+ FrameDependencyTemplate a_template;
+ a_template.spatial_id = sid;
+ a_template.temporal_id = tid;
+ for (int s = 0; s < num_spatial_layers; ++s) {
+ for (int t = 0; t < num_temporal_layers; ++t) {
+ // Prefer kSwitch indication for frames that is part of the decode
+ // target because dependency descriptor information generated in this
+ // class use kSwitch indications more often that kRequired, increasing
+ // the chance of a good (or complete) template match.
+ a_template.decode_target_indications.push_back(
+ sid <= s && tid <= t ? DecodeTargetIndication::kSwitch
+ : DecodeTargetIndication::kNotPresent);
+ }
+ }
+ a_template.frame_diffs.push_back(tid == 0 ? num_spatial_layers *
+ num_temporal_layers
+ : num_spatial_layers);
+ a_template.chain_diffs.assign(structure.num_chains, 1);
+ structure.templates.push_back(a_template);
+
+ structure.decode_target_protected_by_chain.push_back(sid);
+ }
+ }
+ return structure;
+}
+} // namespace
+
+RtpPayloadParams::RtpPayloadParams(const uint32_t ssrc,
+ const RtpPayloadState* state,
+ const FieldTrialsView& trials)
+ : ssrc_(ssrc),
+ generic_picture_id_experiment_(
+ absl::StartsWith(trials.Lookup("WebRTC-GenericPictureId"),
+ "Enabled")),
+ simulate_generic_structure_(absl::StartsWith(
+ trials.Lookup("WebRTC-GenericCodecDependencyDescriptor"),
+ "Enabled")) {
+ for (auto& spatial_layer : last_shared_frame_id_)
+ spatial_layer.fill(-1);
+
+ chain_last_frame_id_.fill(-1);
+ buffer_id_to_frame_id_.fill(-1);
+
+ Random random(rtc::TimeMicros());
+ state_.picture_id =
+ state ? state->picture_id : (random.Rand<int16_t>() & 0x7FFF);
+ state_.tl0_pic_idx = state ? state->tl0_pic_idx : (random.Rand<uint8_t>());
+}
+
+RtpPayloadParams::RtpPayloadParams(const RtpPayloadParams& other) = default;
+
+RtpPayloadParams::~RtpPayloadParams() {}
+
+RTPVideoHeader RtpPayloadParams::GetRtpVideoHeader(
+ const EncodedImage& image,
+ const CodecSpecificInfo* codec_specific_info,
+ int64_t shared_frame_id) {
+ RTPVideoHeader rtp_video_header;
+ if (codec_specific_info) {
+ PopulateRtpWithCodecSpecifics(*codec_specific_info, image.SpatialIndex(),
+ &rtp_video_header);
+ }
+ rtp_video_header.simulcastIdx = image.SimulcastIndex().value_or(0);
+ rtp_video_header.frame_type = image._frameType;
+ rtp_video_header.rotation = image.rotation_;
+ rtp_video_header.content_type = image.content_type_;
+ rtp_video_header.playout_delay = image.PlayoutDelay();
+ rtp_video_header.width = image._encodedWidth;
+ rtp_video_header.height = image._encodedHeight;
+ rtp_video_header.color_space = image.ColorSpace()
+ ? absl::make_optional(*image.ColorSpace())
+ : absl::nullopt;
+ rtp_video_header.video_frame_tracking_id = image.VideoFrameTrackingId();
+ SetVideoTiming(image, &rtp_video_header.video_timing);
+
+ const bool is_keyframe = image._frameType == VideoFrameType::kVideoFrameKey;
+ const bool first_frame_in_picture =
+ (codec_specific_info && codec_specific_info->codecType == kVideoCodecVP9)
+ ? codec_specific_info->codecSpecific.VP9.first_frame_in_picture
+ : true;
+
+ SetCodecSpecific(&rtp_video_header, first_frame_in_picture);
+
+ SetGeneric(codec_specific_info, shared_frame_id, is_keyframe,
+ &rtp_video_header);
+
+ return rtp_video_header;
+}
+
+uint32_t RtpPayloadParams::ssrc() const {
+ return ssrc_;
+}
+
+RtpPayloadState RtpPayloadParams::state() const {
+ return state_;
+}
+
+void RtpPayloadParams::SetCodecSpecific(RTPVideoHeader* rtp_video_header,
+ bool first_frame_in_picture) {
+ // Always set picture id. Set tl0_pic_idx iff temporal index is set.
+ if (first_frame_in_picture) {
+ state_.picture_id = (static_cast<uint16_t>(state_.picture_id) + 1) & 0x7FFF;
+ }
+ if (rtp_video_header->codec == kVideoCodecVP8) {
+ auto& vp8_header =
+ absl::get<RTPVideoHeaderVP8>(rtp_video_header->video_type_header);
+ vp8_header.pictureId = state_.picture_id;
+
+ if (vp8_header.temporalIdx != kNoTemporalIdx) {
+ if (vp8_header.temporalIdx == 0) {
+ ++state_.tl0_pic_idx;
+ }
+ vp8_header.tl0PicIdx = state_.tl0_pic_idx;
+ }
+ }
+ if (rtp_video_header->codec == kVideoCodecVP9) {
+ auto& vp9_header =
+ absl::get<RTPVideoHeaderVP9>(rtp_video_header->video_type_header);
+ vp9_header.picture_id = state_.picture_id;
+
+ // Note that in the case that we have no temporal layers but we do have
+ // spatial layers, packets will carry layering info with a temporal_idx of
+ // zero, and we then have to set and increment tl0_pic_idx.
+ if (vp9_header.temporal_idx != kNoTemporalIdx ||
+ vp9_header.spatial_idx != kNoSpatialIdx) {
+ if (first_frame_in_picture &&
+ (vp9_header.temporal_idx == 0 ||
+ vp9_header.temporal_idx == kNoTemporalIdx)) {
+ ++state_.tl0_pic_idx;
+ }
+ vp9_header.tl0_pic_idx = state_.tl0_pic_idx;
+ }
+ }
+ if (generic_picture_id_experiment_ &&
+ rtp_video_header->codec == kVideoCodecGeneric) {
+ rtp_video_header->video_type_header.emplace<RTPVideoHeaderLegacyGeneric>()
+ .picture_id = state_.picture_id;
+ }
+}
+
+RTPVideoHeader::GenericDescriptorInfo
+RtpPayloadParams::GenericDescriptorFromFrameInfo(
+ const GenericFrameInfo& frame_info,
+ int64_t frame_id) {
+ RTPVideoHeader::GenericDescriptorInfo generic;
+ generic.frame_id = frame_id;
+ generic.dependencies = dependencies_calculator_.FromBuffersUsage(
+ frame_id, frame_info.encoder_buffers);
+ generic.chain_diffs =
+ chains_calculator_.From(frame_id, frame_info.part_of_chain);
+ generic.spatial_index = frame_info.spatial_id;
+ generic.temporal_index = frame_info.temporal_id;
+ generic.decode_target_indications = frame_info.decode_target_indications;
+ generic.active_decode_targets = frame_info.active_decode_targets;
+ return generic;
+}
+
+void RtpPayloadParams::SetGeneric(const CodecSpecificInfo* codec_specific_info,
+ int64_t frame_id,
+ bool is_keyframe,
+ RTPVideoHeader* rtp_video_header) {
+ if (codec_specific_info && codec_specific_info->generic_frame_info &&
+ !codec_specific_info->generic_frame_info->encoder_buffers.empty()) {
+ if (is_keyframe) {
+ // Key frame resets all chains it is in.
+ chains_calculator_.Reset(
+ codec_specific_info->generic_frame_info->part_of_chain);
+ }
+ rtp_video_header->generic = GenericDescriptorFromFrameInfo(
+ *codec_specific_info->generic_frame_info, frame_id);
+ return;
+ }
+
+ switch (rtp_video_header->codec) {
+ case VideoCodecType::kVideoCodecGeneric:
+ GenericToGeneric(frame_id, is_keyframe, rtp_video_header);
+ return;
+ case VideoCodecType::kVideoCodecVP8:
+ if (codec_specific_info) {
+ Vp8ToGeneric(codec_specific_info->codecSpecific.VP8, frame_id,
+ is_keyframe, rtp_video_header);
+ }
+ return;
+ case VideoCodecType::kVideoCodecVP9:
+ if (codec_specific_info != nullptr) {
+ Vp9ToGeneric(codec_specific_info->codecSpecific.VP9, frame_id,
+ *rtp_video_header);
+ }
+ return;
+ case VideoCodecType::kVideoCodecAV1:
+ // TODO(philipel): Implement AV1 to generic descriptor.
+ return;
+ case VideoCodecType::kVideoCodecH264:
+ if (codec_specific_info) {
+ H264ToGeneric(codec_specific_info->codecSpecific.H264, frame_id,
+ is_keyframe, rtp_video_header);
+ }
+ return;
+ case VideoCodecType::kVideoCodecMultiplex:
+ return;
+ case VideoCodecType::kVideoCodecH265:
+ // TODO(bugs.webrtc.org/13485): Implement H265 to generic descriptor.
+ return;
+ }
+ RTC_DCHECK_NOTREACHED() << "Unsupported codec.";
+}
+
+absl::optional<FrameDependencyStructure> RtpPayloadParams::GenericStructure(
+ const CodecSpecificInfo* codec_specific_info) {
+ if (codec_specific_info == nullptr) {
+ return absl::nullopt;
+ }
+ // This helper shouldn't be used when template structure is specified
+ // explicetly.
+ RTC_DCHECK(!codec_specific_info->template_structure.has_value());
+ switch (codec_specific_info->codecType) {
+ case VideoCodecType::kVideoCodecGeneric:
+ if (simulate_generic_structure_) {
+ return MinimalisticStructure(/*num_spatial_layers=*/1,
+ /*num_temporal_layer=*/1);
+ }
+ return absl::nullopt;
+ case VideoCodecType::kVideoCodecVP8:
+ return MinimalisticStructure(/*num_spatial_layers=*/1,
+ /*num_temporal_layer=*/kMaxTemporalStreams);
+ case VideoCodecType::kVideoCodecVP9: {
+ absl::optional<FrameDependencyStructure> structure =
+ MinimalisticStructure(
+ /*num_spatial_layers=*/kMaxSimulatedSpatialLayers,
+ /*num_temporal_layer=*/kMaxTemporalStreams);
+ const CodecSpecificInfoVP9& vp9 = codec_specific_info->codecSpecific.VP9;
+ if (vp9.ss_data_available && vp9.spatial_layer_resolution_present) {
+ RenderResolution first_valid;
+ RenderResolution last_valid;
+ for (size_t i = 0; i < vp9.num_spatial_layers; ++i) {
+ RenderResolution r(vp9.width[i], vp9.height[i]);
+ if (r.Valid()) {
+ if (!first_valid.Valid()) {
+ first_valid = r;
+ }
+ last_valid = r;
+ }
+ structure->resolutions.push_back(r);
+ }
+ if (!last_valid.Valid()) {
+ // No valid resolution found. Do not send resolutions.
+ structure->resolutions.clear();
+ } else {
+ structure->resolutions.resize(kMaxSimulatedSpatialLayers, last_valid);
+ // VP9 encoder wrapper may disable first few spatial layers by
+ // setting invalid resolution (0,0). `structure->resolutions`
+ // doesn't support invalid resolution, so reset them to something
+ // valid.
+ for (RenderResolution& r : structure->resolutions) {
+ if (!r.Valid()) {
+ r = first_valid;
+ }
+ }
+ }
+ }
+ return structure;
+ }
+ case VideoCodecType::kVideoCodecAV1:
+ case VideoCodecType::kVideoCodecH264:
+ case VideoCodecType::kVideoCodecH265:
+ case VideoCodecType::kVideoCodecMultiplex:
+ return absl::nullopt;
+ }
+ RTC_DCHECK_NOTREACHED() << "Unsupported codec.";
+}
+
+void RtpPayloadParams::GenericToGeneric(int64_t shared_frame_id,
+ bool is_keyframe,
+ RTPVideoHeader* rtp_video_header) {
+ RTPVideoHeader::GenericDescriptorInfo& generic =
+ rtp_video_header->generic.emplace();
+
+ generic.frame_id = shared_frame_id;
+ generic.decode_target_indications.push_back(DecodeTargetIndication::kSwitch);
+
+ if (is_keyframe) {
+ generic.chain_diffs.push_back(0);
+ last_shared_frame_id_[0].fill(-1);
+ } else {
+ int64_t frame_id = last_shared_frame_id_[0][0];
+ RTC_DCHECK_NE(frame_id, -1);
+ RTC_DCHECK_LT(frame_id, shared_frame_id);
+ generic.chain_diffs.push_back(shared_frame_id - frame_id);
+ generic.dependencies.push_back(frame_id);
+ }
+
+ last_shared_frame_id_[0][0] = shared_frame_id;
+}
+
+void RtpPayloadParams::H264ToGeneric(const CodecSpecificInfoH264& h264_info,
+ int64_t shared_frame_id,
+ bool is_keyframe,
+ RTPVideoHeader* rtp_video_header) {
+ const int temporal_index =
+ h264_info.temporal_idx != kNoTemporalIdx ? h264_info.temporal_idx : 0;
+
+ if (temporal_index >= RtpGenericFrameDescriptor::kMaxTemporalLayers) {
+ RTC_LOG(LS_WARNING) << "Temporal and/or spatial index is too high to be "
+ "used with generic frame descriptor.";
+ return;
+ }
+
+ RTPVideoHeader::GenericDescriptorInfo& generic =
+ rtp_video_header->generic.emplace();
+
+ generic.frame_id = shared_frame_id;
+ generic.temporal_index = temporal_index;
+
+ if (is_keyframe) {
+ RTC_DCHECK_EQ(temporal_index, 0);
+ last_shared_frame_id_[/*spatial index*/ 0].fill(-1);
+ last_shared_frame_id_[/*spatial index*/ 0][temporal_index] =
+ shared_frame_id;
+ return;
+ }
+
+ if (h264_info.base_layer_sync) {
+ int64_t tl0_frame_id = last_shared_frame_id_[/*spatial index*/ 0][0];
+
+ for (int i = 1; i < RtpGenericFrameDescriptor::kMaxTemporalLayers; ++i) {
+ if (last_shared_frame_id_[/*spatial index*/ 0][i] < tl0_frame_id) {
+ last_shared_frame_id_[/*spatial index*/ 0][i] = -1;
+ }
+ }
+
+ RTC_DCHECK_GE(tl0_frame_id, 0);
+ RTC_DCHECK_LT(tl0_frame_id, shared_frame_id);
+ generic.dependencies.push_back(tl0_frame_id);
+ } else {
+ for (int i = 0; i <= temporal_index; ++i) {
+ int64_t frame_id = last_shared_frame_id_[/*spatial index*/ 0][i];
+
+ if (frame_id != -1) {
+ RTC_DCHECK_LT(frame_id, shared_frame_id);
+ generic.dependencies.push_back(frame_id);
+ }
+ }
+ }
+
+ last_shared_frame_id_[/*spatial_index*/ 0][temporal_index] = shared_frame_id;
+}
+
+void RtpPayloadParams::Vp8ToGeneric(const CodecSpecificInfoVP8& vp8_info,
+ int64_t shared_frame_id,
+ bool is_keyframe,
+ RTPVideoHeader* rtp_video_header) {
+ const auto& vp8_header =
+ absl::get<RTPVideoHeaderVP8>(rtp_video_header->video_type_header);
+ const int spatial_index = 0;
+ const int temporal_index =
+ vp8_header.temporalIdx != kNoTemporalIdx ? vp8_header.temporalIdx : 0;
+
+ if (temporal_index >= RtpGenericFrameDescriptor::kMaxTemporalLayers ||
+ spatial_index >= RtpGenericFrameDescriptor::kMaxSpatialLayers) {
+ RTC_LOG(LS_WARNING) << "Temporal and/or spatial index is too high to be "
+ "used with generic frame descriptor.";
+ return;
+ }
+
+ RTPVideoHeader::GenericDescriptorInfo& generic =
+ rtp_video_header->generic.emplace();
+
+ generic.frame_id = shared_frame_id;
+ generic.spatial_index = spatial_index;
+ generic.temporal_index = temporal_index;
+
+ // Generate decode target indications.
+ RTC_DCHECK_LT(temporal_index, kMaxTemporalStreams);
+ generic.decode_target_indications.resize(kMaxTemporalStreams);
+ auto it = std::fill_n(generic.decode_target_indications.begin(),
+ temporal_index, DecodeTargetIndication::kNotPresent);
+ std::fill(it, generic.decode_target_indications.end(),
+ DecodeTargetIndication::kSwitch);
+
+ // Frame dependencies.
+ if (vp8_info.useExplicitDependencies) {
+ SetDependenciesVp8New(vp8_info, shared_frame_id, is_keyframe,
+ vp8_header.layerSync, &generic);
+ } else {
+ SetDependenciesVp8Deprecated(vp8_info, shared_frame_id, is_keyframe,
+ spatial_index, temporal_index,
+ vp8_header.layerSync, &generic);
+ }
+
+ // Calculate chains.
+ generic.chain_diffs = {
+ (is_keyframe || chain_last_frame_id_[0] < 0)
+ ? 0
+ : static_cast<int>(shared_frame_id - chain_last_frame_id_[0])};
+ if (temporal_index == 0) {
+ chain_last_frame_id_[0] = shared_frame_id;
+ }
+}
+
+void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info,
+ int64_t shared_frame_id,
+ RTPVideoHeader& rtp_video_header) {
+ const auto& vp9_header =
+ absl::get<RTPVideoHeaderVP9>(rtp_video_header.video_type_header);
+ const int num_spatial_layers = kMaxSimulatedSpatialLayers;
+ const int first_active_spatial_id = vp9_header.first_active_layer;
+ const int last_active_spatial_id = vp9_header.num_spatial_layers - 1;
+ const int num_temporal_layers = kMaxTemporalStreams;
+ static_assert(num_spatial_layers <=
+ RtpGenericFrameDescriptor::kMaxSpatialLayers);
+ static_assert(num_temporal_layers <=
+ RtpGenericFrameDescriptor::kMaxTemporalLayers);
+ static_assert(num_spatial_layers <= DependencyDescriptor::kMaxSpatialIds);
+ static_assert(num_temporal_layers <= DependencyDescriptor::kMaxTemporalIds);
+
+ int spatial_index =
+ vp9_header.spatial_idx != kNoSpatialIdx ? vp9_header.spatial_idx : 0;
+ int temporal_index =
+ vp9_header.temporal_idx != kNoTemporalIdx ? vp9_header.temporal_idx : 0;
+
+ if (!(temporal_index < num_temporal_layers &&
+ first_active_spatial_id <= spatial_index &&
+ spatial_index <= last_active_spatial_id &&
+ last_active_spatial_id < num_spatial_layers)) {
+ // Prefer to generate no generic layering than an inconsistent one.
+ RTC_LOG(LS_ERROR) << "Inconsistent layer id sid=" << spatial_index
+ << ",tid=" << temporal_index
+ << " in VP9 header. Active spatial ids: ["
+ << first_active_spatial_id << ","
+ << last_active_spatial_id << "]";
+ return;
+ }
+
+ RTPVideoHeader::GenericDescriptorInfo& result =
+ rtp_video_header.generic.emplace();
+
+ result.frame_id = shared_frame_id;
+ result.spatial_index = spatial_index;
+ result.temporal_index = temporal_index;
+
+ result.decode_target_indications.reserve(num_spatial_layers *
+ num_temporal_layers);
+ for (int sid = 0; sid < num_spatial_layers; ++sid) {
+ for (int tid = 0; tid < num_temporal_layers; ++tid) {
+ DecodeTargetIndication dti;
+ if (sid < spatial_index || tid < temporal_index) {
+ dti = DecodeTargetIndication::kNotPresent;
+ } else if (spatial_index != sid &&
+ vp9_header.non_ref_for_inter_layer_pred) {
+ dti = DecodeTargetIndication::kNotPresent;
+ } else if (sid == spatial_index && tid == temporal_index) {
+ // Assume that if frame is decodable, all of its own layer is decodable.
+ dti = DecodeTargetIndication::kSwitch;
+ } else if (sid == spatial_index && vp9_header.temporal_up_switch) {
+ dti = DecodeTargetIndication::kSwitch;
+ } else if (!vp9_header.inter_pic_predicted) {
+ // Key frame or spatial upswitch
+ dti = DecodeTargetIndication::kSwitch;
+ } else {
+ // Make no other assumptions. That should be safe, though suboptimal.
+ // To provide more accurate dti, encoder wrapper should fill in
+ // CodecSpecificInfo::generic_frame_info
+ dti = DecodeTargetIndication::kRequired;
+ }
+ result.decode_target_indications.push_back(dti);
+ }
+ }
+
+ // Calculate frame dependencies.
+ static constexpr int kPictureDiffLimit = 128;
+ if (last_vp9_frame_id_.empty()) {
+ // Create the array only if it is ever used.
+ last_vp9_frame_id_.resize(kPictureDiffLimit);
+ }
+
+ if (vp9_header.flexible_mode) {
+ if (vp9_header.inter_layer_predicted && spatial_index > 0) {
+ result.dependencies.push_back(
+ last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit]
+ [spatial_index - 1]);
+ }
+ if (vp9_header.inter_pic_predicted) {
+ for (size_t i = 0; i < vp9_header.num_ref_pics; ++i) {
+ // picture_id is 15 bit number that wraps around. Though undeflow may
+ // produce picture that exceeds 2^15, it is ok because in this
+ // code block only last 7 bits of the picture_id are used.
+ uint16_t depend_on = vp9_header.picture_id - vp9_header.pid_diff[i];
+ result.dependencies.push_back(
+ last_vp9_frame_id_[depend_on % kPictureDiffLimit][spatial_index]);
+ }
+ }
+ last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit]
+ [spatial_index] = shared_frame_id;
+ } else {
+ // Implementing general conversion logic for non-flexible mode requires some
+ // work and we will almost certainly never need it, so for now support only
+ // non-layerd streams.
+ if (spatial_index > 0 || temporal_index > 0) {
+ // Prefer to generate no generic layering than an inconsistent one.
+ rtp_video_header.generic.reset();
+ return;
+ }
+
+ if (vp9_header.inter_pic_predicted) {
+ // Since we only support non-scalable streams we only need to save the
+ // last frame id.
+ result.dependencies.push_back(last_vp9_frame_id_[0][0]);
+ }
+ last_vp9_frame_id_[0][0] = shared_frame_id;
+ }
+
+ result.active_decode_targets =
+ ((uint32_t{1} << num_temporal_layers * (last_active_spatial_id + 1)) -
+ 1) ^
+ ((uint32_t{1} << num_temporal_layers * first_active_spatial_id) - 1);
+
+ // Calculate chains, asuming chain includes all frames with temporal_id = 0
+ if (!vp9_header.inter_pic_predicted && !vp9_header.inter_layer_predicted) {
+ // Assume frames without dependencies also reset chains.
+ for (int sid = spatial_index; sid <= last_active_spatial_id; ++sid) {
+ chain_last_frame_id_[sid] = -1;
+ }
+ }
+ result.chain_diffs.resize(num_spatial_layers, 0);
+ for (int sid = first_active_spatial_id; sid <= last_active_spatial_id;
+ ++sid) {
+ if (chain_last_frame_id_[sid] == -1) {
+ result.chain_diffs[sid] = 0;
+ continue;
+ }
+ int64_t chain_diff = shared_frame_id - chain_last_frame_id_[sid];
+ if (chain_diff >= 256) {
+ RTC_LOG(LS_ERROR)
+ << "Too many frames since last VP9 T0 frame for spatial layer #"
+ << sid << " at frame#" << shared_frame_id;
+ chain_last_frame_id_[sid] = -1;
+ chain_diff = 0;
+ }
+ result.chain_diffs[sid] = chain_diff;
+ }
+
+ if (temporal_index == 0) {
+ chain_last_frame_id_[spatial_index] = shared_frame_id;
+ if (!vp9_header.non_ref_for_inter_layer_pred) {
+ for (int sid = spatial_index + 1; sid <= last_active_spatial_id; ++sid) {
+ chain_last_frame_id_[sid] = shared_frame_id;
+ }
+ }
+ }
+}
+
+void RtpPayloadParams::SetDependenciesVp8Deprecated(
+ const CodecSpecificInfoVP8& vp8_info,
+ int64_t shared_frame_id,
+ bool is_keyframe,
+ int spatial_index,
+ int temporal_index,
+ bool layer_sync,
+ RTPVideoHeader::GenericDescriptorInfo* generic) {
+ RTC_DCHECK(!vp8_info.useExplicitDependencies);
+ RTC_DCHECK(!new_version_used_.has_value() || !new_version_used_.value());
+ new_version_used_ = false;
+
+ if (is_keyframe) {
+ RTC_DCHECK_EQ(temporal_index, 0);
+ last_shared_frame_id_[spatial_index].fill(-1);
+ last_shared_frame_id_[spatial_index][temporal_index] = shared_frame_id;
+ return;
+ }
+
+ if (layer_sync) {
+ int64_t tl0_frame_id = last_shared_frame_id_[spatial_index][0];
+
+ for (int i = 1; i < RtpGenericFrameDescriptor::kMaxTemporalLayers; ++i) {
+ if (last_shared_frame_id_[spatial_index][i] < tl0_frame_id) {
+ last_shared_frame_id_[spatial_index][i] = -1;
+ }
+ }
+
+ RTC_DCHECK_GE(tl0_frame_id, 0);
+ RTC_DCHECK_LT(tl0_frame_id, shared_frame_id);
+ generic->dependencies.push_back(tl0_frame_id);
+ } else {
+ for (int i = 0; i <= temporal_index; ++i) {
+ int64_t frame_id = last_shared_frame_id_[spatial_index][i];
+
+ if (frame_id != -1) {
+ RTC_DCHECK_LT(frame_id, shared_frame_id);
+ generic->dependencies.push_back(frame_id);
+ }
+ }
+ }
+
+ last_shared_frame_id_[spatial_index][temporal_index] = shared_frame_id;
+}
+
+void RtpPayloadParams::SetDependenciesVp8New(
+ const CodecSpecificInfoVP8& vp8_info,
+ int64_t shared_frame_id,
+ bool is_keyframe,
+ bool layer_sync,
+ RTPVideoHeader::GenericDescriptorInfo* generic) {
+ RTC_DCHECK(vp8_info.useExplicitDependencies);
+ RTC_DCHECK(!new_version_used_.has_value() || new_version_used_.value());
+ new_version_used_ = true;
+
+ if (is_keyframe) {
+ RTC_DCHECK_EQ(vp8_info.referencedBuffersCount, 0u);
+ buffer_id_to_frame_id_.fill(shared_frame_id);
+ return;
+ }
+
+ constexpr size_t kBuffersCountVp8 = CodecSpecificInfoVP8::kBuffersCount;
+
+ RTC_DCHECK_GT(vp8_info.referencedBuffersCount, 0u);
+ RTC_DCHECK_LE(vp8_info.referencedBuffersCount,
+ arraysize(vp8_info.referencedBuffers));
+
+ for (size_t i = 0; i < vp8_info.referencedBuffersCount; ++i) {
+ const size_t referenced_buffer = vp8_info.referencedBuffers[i];
+ RTC_DCHECK_LT(referenced_buffer, kBuffersCountVp8);
+ RTC_DCHECK_LT(referenced_buffer, buffer_id_to_frame_id_.size());
+
+ const int64_t dependency_frame_id =
+ buffer_id_to_frame_id_[referenced_buffer];
+ RTC_DCHECK_GE(dependency_frame_id, 0);
+ RTC_DCHECK_LT(dependency_frame_id, shared_frame_id);
+
+ const bool is_new_dependency =
+ std::find(generic->dependencies.begin(), generic->dependencies.end(),
+ dependency_frame_id) == generic->dependencies.end();
+ if (is_new_dependency) {
+ generic->dependencies.push_back(dependency_frame_id);
+ }
+ }
+
+ RTC_DCHECK_LE(vp8_info.updatedBuffersCount, kBuffersCountVp8);
+ for (size_t i = 0; i < vp8_info.updatedBuffersCount; ++i) {
+ const size_t updated_id = vp8_info.updatedBuffers[i];
+ buffer_id_to_frame_id_[updated_id] = shared_frame_id;
+ }
+
+ RTC_DCHECK_LE(buffer_id_to_frame_id_.size(), kBuffersCountVp8);
+}
+
+} // namespace webrtc