Adding upstream version 124.0.1.upstream/124.0.1

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 00:47:55 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 00:47:55 +0000
commit: 26a029d407be480d791972afb5975cf62c9360a6 (patch)
tree: f435a8308119effd964b339f76abb83a57c29483 /third_party/libwebrtc/call/rtp_payload_params.cc
parent: Initial commit. (diff)
download: firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
1 files changed, 790 insertions, 0 deletions
diff --git a/third_party/libwebrtc/call/rtp_payload_params.cc b/third_party/libwebrtc/call/rtp_payload_params.cc
new file mode 100644
index 0000000000..4b63ebefb3
--- /dev/null
+++ b/third_party/libwebrtc/call/rtp_payload_params.cc
@@ -0,0 +1,790 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "call/rtp_payload_params.h"
+
+#include <stddef.h>
+
+#include <algorithm>
+
+#include "absl/container/inlined_vector.h"
+#include "absl/strings/match.h"
+#include "absl/types/variant.h"
+#include "api/video/video_timing.h"
+#include "modules/video_coding/codecs/h264/include/h264_globals.h"
+#include "modules/video_coding/codecs/interface/common_constants.h"
+#include "modules/video_coding/codecs/vp8/include/vp8_globals.h"
+#include "modules/video_coding/codecs/vp9/include/vp9_globals.h"
+#include "modules/video_coding/frame_dependencies_calculator.h"
+#include "rtc_base/arraysize.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
+#include "rtc_base/random.h"
+#include "rtc_base/time_utils.h"
+
+namespace webrtc {
+namespace {
+
+constexpr int kMaxSimulatedSpatialLayers = 3;
+
+void PopulateRtpWithCodecSpecifics(const CodecSpecificInfo& info,
+                                   absl::optional<int> spatial_index,
+                                   RTPVideoHeader* rtp) {
+  rtp->codec = info.codecType;
+  rtp->is_last_frame_in_picture = info.end_of_picture;
+  switch (info.codecType) {
+    case kVideoCodecVP8: {
+      auto& vp8_header = rtp->video_type_header.emplace<RTPVideoHeaderVP8>();
+      vp8_header.InitRTPVideoHeaderVP8();
+      vp8_header.nonReference = info.codecSpecific.VP8.nonReference;
+      vp8_header.temporalIdx = info.codecSpecific.VP8.temporalIdx;
+      vp8_header.layerSync = info.codecSpecific.VP8.layerSync;
+      vp8_header.keyIdx = info.codecSpecific.VP8.keyIdx;
+      return;
+    }
+    case kVideoCodecVP9: {
+      auto& vp9_header = rtp->video_type_header.emplace<RTPVideoHeaderVP9>();
+      vp9_header.InitRTPVideoHeaderVP9();
+      vp9_header.inter_pic_predicted =
+          info.codecSpecific.VP9.inter_pic_predicted;
+      vp9_header.flexible_mode = info.codecSpecific.VP9.flexible_mode;
+      vp9_header.ss_data_available = info.codecSpecific.VP9.ss_data_available;
+      vp9_header.non_ref_for_inter_layer_pred =
+          info.codecSpecific.VP9.non_ref_for_inter_layer_pred;
+      vp9_header.temporal_idx = info.codecSpecific.VP9.temporal_idx;
+      vp9_header.temporal_up_switch = info.codecSpecific.VP9.temporal_up_switch;
+      vp9_header.inter_layer_predicted =
+          info.codecSpecific.VP9.inter_layer_predicted;
+      vp9_header.gof_idx = info.codecSpecific.VP9.gof_idx;
+      vp9_header.num_spatial_layers = info.codecSpecific.VP9.num_spatial_layers;
+      vp9_header.first_active_layer = info.codecSpecific.VP9.first_active_layer;
+      if (vp9_header.num_spatial_layers > 1) {
+        vp9_header.spatial_idx = spatial_index.value_or(kNoSpatialIdx);
+      } else {
+        vp9_header.spatial_idx = kNoSpatialIdx;
+      }
+      if (info.codecSpecific.VP9.ss_data_available) {
+        vp9_header.spatial_layer_resolution_present =
+            info.codecSpecific.VP9.spatial_layer_resolution_present;
+        if (info.codecSpecific.VP9.spatial_layer_resolution_present) {
+          for (size_t i = 0; i < info.codecSpecific.VP9.num_spatial_layers;
+               ++i) {
+            vp9_header.width[i] = info.codecSpecific.VP9.width[i];
+            vp9_header.height[i] = info.codecSpecific.VP9.height[i];
+          }
+        }
+        vp9_header.gof.CopyGofInfoVP9(info.codecSpecific.VP9.gof);
+      }
+
+      vp9_header.num_ref_pics = info.codecSpecific.VP9.num_ref_pics;
+      for (int i = 0; i < info.codecSpecific.VP9.num_ref_pics; ++i) {
+        vp9_header.pid_diff[i] = info.codecSpecific.VP9.p_diff[i];
+      }
+      vp9_header.end_of_picture = info.end_of_picture;
+      return;
+    }
+    case kVideoCodecH264: {
+      auto& h264_header = rtp->video_type_header.emplace<RTPVideoHeaderH264>();
+      h264_header.packetization_mode =
+          info.codecSpecific.H264.packetization_mode;
+      return;
+    }
+    case kVideoCodecMultiplex:
+    case kVideoCodecGeneric:
+      rtp->codec = kVideoCodecGeneric;
+      return;
+    // TODO(bugs.webrtc.org/13485): Implement H265 codec specific info
+    default:
+      return;
+  }
+}
+
+void SetVideoTiming(const EncodedImage& image, VideoSendTiming* timing) {
+  if (image.timing_.flags == VideoSendTiming::TimingFrameFlags::kInvalid ||
+      image.timing_.flags == VideoSendTiming::TimingFrameFlags::kNotTriggered) {
+    timing->flags = VideoSendTiming::TimingFrameFlags::kInvalid;
+    return;
+  }
+
+  timing->encode_start_delta_ms = VideoSendTiming::GetDeltaCappedMs(
+      image.capture_time_ms_, image.timing_.encode_start_ms);
+  timing->encode_finish_delta_ms = VideoSendTiming::GetDeltaCappedMs(
+      image.capture_time_ms_, image.timing_.encode_finish_ms);
+  timing->packetization_finish_delta_ms = 0;
+  timing->pacer_exit_delta_ms = 0;
+  timing->network_timestamp_delta_ms = 0;
+  timing->network2_timestamp_delta_ms = 0;
+  timing->flags = image.timing_.flags;
+}
+
+// Returns structure that aligns with simulated generic info. The templates
+// allow to produce valid dependency descriptor for any stream where
+// `num_spatial_layers` * `num_temporal_layers` <= 32 (limited by
+// https://aomediacodec.github.io/av1-rtp-spec/#a82-syntax, see
+// template_fdiffs()). The set of the templates is not tuned for any paricular
+// structure thus dependency descriptor would use more bytes on the wire than
+// with tuned templates.
+FrameDependencyStructure MinimalisticStructure(int num_spatial_layers,
+                                               int num_temporal_layers) {
+  RTC_DCHECK_LE(num_spatial_layers, DependencyDescriptor::kMaxSpatialIds);
+  RTC_DCHECK_LE(num_temporal_layers, DependencyDescriptor::kMaxTemporalIds);
+  RTC_DCHECK_LE(num_spatial_layers * num_temporal_layers, 32);
+  FrameDependencyStructure structure;
+  structure.num_decode_targets = num_spatial_layers * num_temporal_layers;
+  structure.num_chains = num_spatial_layers;
+  structure.templates.reserve(num_spatial_layers * num_temporal_layers);
+  for (int sid = 0; sid < num_spatial_layers; ++sid) {
+    for (int tid = 0; tid < num_temporal_layers; ++tid) {
+      FrameDependencyTemplate a_template;
+      a_template.spatial_id = sid;
+      a_template.temporal_id = tid;
+      for (int s = 0; s < num_spatial_layers; ++s) {
+        for (int t = 0; t < num_temporal_layers; ++t) {
+          // Prefer kSwitch indication for frames that is part of the decode
+          // target because dependency descriptor information generated in this
+          // class use kSwitch indications more often that kRequired, increasing
+          // the chance of a good (or complete) template match.
+          a_template.decode_target_indications.push_back(
+              sid <= s && tid <= t ? DecodeTargetIndication::kSwitch
+                                   : DecodeTargetIndication::kNotPresent);
+        }
+      }
+      a_template.frame_diffs.push_back(tid == 0 ? num_spatial_layers *
+                                                      num_temporal_layers
+                                                : num_spatial_layers);
+      a_template.chain_diffs.assign(structure.num_chains, 1);
+      structure.templates.push_back(a_template);
+
+      structure.decode_target_protected_by_chain.push_back(sid);
+    }
+  }
+  return structure;
+}
+}  // namespace
+
+RtpPayloadParams::RtpPayloadParams(const uint32_t ssrc,
+                                   const RtpPayloadState* state,
+                                   const FieldTrialsView& trials)
+    : ssrc_(ssrc),
+      generic_picture_id_experiment_(
+          absl::StartsWith(trials.Lookup("WebRTC-GenericPictureId"),
+                           "Enabled")),
+      simulate_generic_structure_(absl::StartsWith(
+          trials.Lookup("WebRTC-GenericCodecDependencyDescriptor"),
+          "Enabled")) {
+  for (auto& spatial_layer : last_shared_frame_id_)
+    spatial_layer.fill(-1);
+
+  chain_last_frame_id_.fill(-1);
+  buffer_id_to_frame_id_.fill(-1);
+
+  Random random(rtc::TimeMicros());
+  state_.picture_id =
+      state ? state->picture_id : (random.Rand<int16_t>() & 0x7FFF);
+  state_.tl0_pic_idx = state ? state->tl0_pic_idx : (random.Rand<uint8_t>());
+}
+
+RtpPayloadParams::RtpPayloadParams(const RtpPayloadParams& other) = default;
+
+RtpPayloadParams::~RtpPayloadParams() {}
+
+RTPVideoHeader RtpPayloadParams::GetRtpVideoHeader(
+    const EncodedImage& image,
+    const CodecSpecificInfo* codec_specific_info,
+    int64_t shared_frame_id) {
+  RTPVideoHeader rtp_video_header;
+  if (codec_specific_info) {
+    PopulateRtpWithCodecSpecifics(*codec_specific_info, image.SpatialIndex(),
+                                  &rtp_video_header);
+  }
+  rtp_video_header.simulcastIdx = image.SimulcastIndex().value_or(0);
+  rtp_video_header.frame_type = image._frameType;
+  rtp_video_header.rotation = image.rotation_;
+  rtp_video_header.content_type = image.content_type_;
+  rtp_video_header.playout_delay = image.PlayoutDelay();
+  rtp_video_header.width = image._encodedWidth;
+  rtp_video_header.height = image._encodedHeight;
+  rtp_video_header.color_space = image.ColorSpace()
+                                     ? absl::make_optional(*image.ColorSpace())
+                                     : absl::nullopt;
+  rtp_video_header.video_frame_tracking_id = image.VideoFrameTrackingId();
+  SetVideoTiming(image, &rtp_video_header.video_timing);
+
+  const bool is_keyframe = image._frameType == VideoFrameType::kVideoFrameKey;
+  const bool first_frame_in_picture =
+      (codec_specific_info && codec_specific_info->codecType == kVideoCodecVP9)
+          ? codec_specific_info->codecSpecific.VP9.first_frame_in_picture
+          : true;
+
+  SetCodecSpecific(&rtp_video_header, first_frame_in_picture);
+
+  SetGeneric(codec_specific_info, shared_frame_id, is_keyframe,
+             &rtp_video_header);
+
+  return rtp_video_header;
+}
+
+uint32_t RtpPayloadParams::ssrc() const {
+  return ssrc_;
+}
+
+RtpPayloadState RtpPayloadParams::state() const {
+  return state_;
+}
+
+void RtpPayloadParams::SetCodecSpecific(RTPVideoHeader* rtp_video_header,
+                                        bool first_frame_in_picture) {
+  // Always set picture id. Set tl0_pic_idx iff temporal index is set.
+  if (first_frame_in_picture) {
+    state_.picture_id = (static_cast<uint16_t>(state_.picture_id) + 1) & 0x7FFF;
+  }
+  if (rtp_video_header->codec == kVideoCodecVP8) {
+    auto& vp8_header =
+        absl::get<RTPVideoHeaderVP8>(rtp_video_header->video_type_header);
+    vp8_header.pictureId = state_.picture_id;
+
+    if (vp8_header.temporalIdx != kNoTemporalIdx) {
+      if (vp8_header.temporalIdx == 0) {
+        ++state_.tl0_pic_idx;
+      }
+      vp8_header.tl0PicIdx = state_.tl0_pic_idx;
+    }
+  }
+  if (rtp_video_header->codec == kVideoCodecVP9) {
+    auto& vp9_header =
+        absl::get<RTPVideoHeaderVP9>(rtp_video_header->video_type_header);
+    vp9_header.picture_id = state_.picture_id;
+
+    // Note that in the case that we have no temporal layers but we do have
+    // spatial layers, packets will carry layering info with a temporal_idx of
+    // zero, and we then have to set and increment tl0_pic_idx.
+    if (vp9_header.temporal_idx != kNoTemporalIdx ||
+        vp9_header.spatial_idx != kNoSpatialIdx) {
+      if (first_frame_in_picture &&
+          (vp9_header.temporal_idx == 0 ||
+           vp9_header.temporal_idx == kNoTemporalIdx)) {
+        ++state_.tl0_pic_idx;
+      }
+      vp9_header.tl0_pic_idx = state_.tl0_pic_idx;
+    }
+  }
+  if (generic_picture_id_experiment_ &&
+      rtp_video_header->codec == kVideoCodecGeneric) {
+    rtp_video_header->video_type_header.emplace<RTPVideoHeaderLegacyGeneric>()
+        .picture_id = state_.picture_id;
+  }
+}
+
+RTPVideoHeader::GenericDescriptorInfo
+RtpPayloadParams::GenericDescriptorFromFrameInfo(
+    const GenericFrameInfo& frame_info,
+    int64_t frame_id) {
+  RTPVideoHeader::GenericDescriptorInfo generic;
+  generic.frame_id = frame_id;
+  generic.dependencies = dependencies_calculator_.FromBuffersUsage(
+      frame_id, frame_info.encoder_buffers);
+  generic.chain_diffs =
+      chains_calculator_.From(frame_id, frame_info.part_of_chain);
+  generic.spatial_index = frame_info.spatial_id;
+  generic.temporal_index = frame_info.temporal_id;
+  generic.decode_target_indications = frame_info.decode_target_indications;
+  generic.active_decode_targets = frame_info.active_decode_targets;
+  return generic;
+}
+
+void RtpPayloadParams::SetGeneric(const CodecSpecificInfo* codec_specific_info,
+                                  int64_t frame_id,
+                                  bool is_keyframe,
+                                  RTPVideoHeader* rtp_video_header) {
+  if (codec_specific_info && codec_specific_info->generic_frame_info &&
+      !codec_specific_info->generic_frame_info->encoder_buffers.empty()) {
+    if (is_keyframe) {
+      // Key frame resets all chains it is in.
+      chains_calculator_.Reset(
+          codec_specific_info->generic_frame_info->part_of_chain);
+    }
+    rtp_video_header->generic = GenericDescriptorFromFrameInfo(
+        *codec_specific_info->generic_frame_info, frame_id);
+    return;
+  }
+
+  switch (rtp_video_header->codec) {
+    case VideoCodecType::kVideoCodecGeneric:
+      GenericToGeneric(frame_id, is_keyframe, rtp_video_header);
+      return;
+    case VideoCodecType::kVideoCodecVP8:
+      if (codec_specific_info) {
+        Vp8ToGeneric(codec_specific_info->codecSpecific.VP8, frame_id,
+                     is_keyframe, rtp_video_header);
+      }
+      return;
+    case VideoCodecType::kVideoCodecVP9:
+      if (codec_specific_info != nullptr) {
+        Vp9ToGeneric(codec_specific_info->codecSpecific.VP9, frame_id,
+                     *rtp_video_header);
+      }
+      return;
+    case VideoCodecType::kVideoCodecAV1:
+      // TODO(philipel): Implement AV1 to generic descriptor.
+      return;
+    case VideoCodecType::kVideoCodecH264:
+      if (codec_specific_info) {
+        H264ToGeneric(codec_specific_info->codecSpecific.H264, frame_id,
+                      is_keyframe, rtp_video_header);
+      }
+      return;
+    case VideoCodecType::kVideoCodecMultiplex:
+      return;
+    case VideoCodecType::kVideoCodecH265:
+      // TODO(bugs.webrtc.org/13485): Implement H265 to generic descriptor.
+      return;
+  }
+  RTC_DCHECK_NOTREACHED() << "Unsupported codec.";
+}
+
+absl::optional<FrameDependencyStructure> RtpPayloadParams::GenericStructure(
+    const CodecSpecificInfo* codec_specific_info) {
+  if (codec_specific_info == nullptr) {
+    return absl::nullopt;
+  }
+  // This helper shouldn't be used when template structure is specified
+  // explicetly.
+  RTC_DCHECK(!codec_specific_info->template_structure.has_value());
+  switch (codec_specific_info->codecType) {
+    case VideoCodecType::kVideoCodecGeneric:
+      if (simulate_generic_structure_) {
+        return MinimalisticStructure(/*num_spatial_layers=*/1,
+                                     /*num_temporal_layer=*/1);
+      }
+      return absl::nullopt;
+    case VideoCodecType::kVideoCodecVP8:
+      return MinimalisticStructure(/*num_spatial_layers=*/1,
+                                   /*num_temporal_layer=*/kMaxTemporalStreams);
+    case VideoCodecType::kVideoCodecVP9: {
+      absl::optional<FrameDependencyStructure> structure =
+          MinimalisticStructure(
+              /*num_spatial_layers=*/kMaxSimulatedSpatialLayers,
+              /*num_temporal_layer=*/kMaxTemporalStreams);
+      const CodecSpecificInfoVP9& vp9 = codec_specific_info->codecSpecific.VP9;
+      if (vp9.ss_data_available && vp9.spatial_layer_resolution_present) {
+        RenderResolution first_valid;
+        RenderResolution last_valid;
+        for (size_t i = 0; i < vp9.num_spatial_layers; ++i) {
+          RenderResolution r(vp9.width[i], vp9.height[i]);
+          if (r.Valid()) {
+            if (!first_valid.Valid()) {
+              first_valid = r;
+            }
+            last_valid = r;
+          }
+          structure->resolutions.push_back(r);
+        }
+        if (!last_valid.Valid()) {
+          // No valid resolution found. Do not send resolutions.
+          structure->resolutions.clear();
+        } else {
+          structure->resolutions.resize(kMaxSimulatedSpatialLayers, last_valid);
+          // VP9 encoder wrapper may disable first few spatial layers by
+          // setting invalid resolution (0,0). `structure->resolutions`
+          // doesn't support invalid resolution, so reset them to something
+          // valid.
+          for (RenderResolution& r : structure->resolutions) {
+            if (!r.Valid()) {
+              r = first_valid;
+            }
+          }
+        }
+      }
+      return structure;
+    }
+    case VideoCodecType::kVideoCodecAV1:
+    case VideoCodecType::kVideoCodecH264:
+    case VideoCodecType::kVideoCodecH265:
+    case VideoCodecType::kVideoCodecMultiplex:
+      return absl::nullopt;
+  }
+  RTC_DCHECK_NOTREACHED() << "Unsupported codec.";
+}
+
+void RtpPayloadParams::GenericToGeneric(int64_t shared_frame_id,
+                                        bool is_keyframe,
+                                        RTPVideoHeader* rtp_video_header) {
+  RTPVideoHeader::GenericDescriptorInfo& generic =
+      rtp_video_header->generic.emplace();
+
+  generic.frame_id = shared_frame_id;
+  generic.decode_target_indications.push_back(DecodeTargetIndication::kSwitch);
+
+  if (is_keyframe) {
+    generic.chain_diffs.push_back(0);
+    last_shared_frame_id_[0].fill(-1);
+  } else {
+    int64_t frame_id = last_shared_frame_id_[0][0];
+    RTC_DCHECK_NE(frame_id, -1);
+    RTC_DCHECK_LT(frame_id, shared_frame_id);
+    generic.chain_diffs.push_back(shared_frame_id - frame_id);
+    generic.dependencies.push_back(frame_id);
+  }
+
+  last_shared_frame_id_[0][0] = shared_frame_id;
+}
+
+void RtpPayloadParams::H264ToGeneric(const CodecSpecificInfoH264& h264_info,
+                                     int64_t shared_frame_id,
+                                     bool is_keyframe,
+                                     RTPVideoHeader* rtp_video_header) {
+  const int temporal_index =
+      h264_info.temporal_idx != kNoTemporalIdx ? h264_info.temporal_idx : 0;
+
+  if (temporal_index >= RtpGenericFrameDescriptor::kMaxTemporalLayers) {
+    RTC_LOG(LS_WARNING) << "Temporal and/or spatial index is too high to be "
+                           "used with generic frame descriptor.";
+    return;
+  }
+
+  RTPVideoHeader::GenericDescriptorInfo& generic =
+      rtp_video_header->generic.emplace();
+
+  generic.frame_id = shared_frame_id;
+  generic.temporal_index = temporal_index;
+
+  if (is_keyframe) {
+    RTC_DCHECK_EQ(temporal_index, 0);
+    last_shared_frame_id_[/*spatial index*/ 0].fill(-1);
+    last_shared_frame_id_[/*spatial index*/ 0][temporal_index] =
+        shared_frame_id;
+    return;
+  }
+
+  if (h264_info.base_layer_sync) {
+    int64_t tl0_frame_id = last_shared_frame_id_[/*spatial index*/ 0][0];
+
+    for (int i = 1; i < RtpGenericFrameDescriptor::kMaxTemporalLayers; ++i) {
+      if (last_shared_frame_id_[/*spatial index*/ 0][i] < tl0_frame_id) {
+        last_shared_frame_id_[/*spatial index*/ 0][i] = -1;
+      }
+    }
+
+    RTC_DCHECK_GE(tl0_frame_id, 0);
+    RTC_DCHECK_LT(tl0_frame_id, shared_frame_id);
+    generic.dependencies.push_back(tl0_frame_id);
+  } else {
+    for (int i = 0; i <= temporal_index; ++i) {
+      int64_t frame_id = last_shared_frame_id_[/*spatial index*/ 0][i];
+
+      if (frame_id != -1) {
+        RTC_DCHECK_LT(frame_id, shared_frame_id);
+        generic.dependencies.push_back(frame_id);
+      }
+    }
+  }
+
+  last_shared_frame_id_[/*spatial_index*/ 0][temporal_index] = shared_frame_id;
+}
+
+void RtpPayloadParams::Vp8ToGeneric(const CodecSpecificInfoVP8& vp8_info,
+                                    int64_t shared_frame_id,
+                                    bool is_keyframe,
+                                    RTPVideoHeader* rtp_video_header) {
+  const auto& vp8_header =
+      absl::get<RTPVideoHeaderVP8>(rtp_video_header->video_type_header);
+  const int spatial_index = 0;
+  const int temporal_index =
+      vp8_header.temporalIdx != kNoTemporalIdx ? vp8_header.temporalIdx : 0;
+
+  if (temporal_index >= RtpGenericFrameDescriptor::kMaxTemporalLayers ||
+      spatial_index >= RtpGenericFrameDescriptor::kMaxSpatialLayers) {
+    RTC_LOG(LS_WARNING) << "Temporal and/or spatial index is too high to be "
+                           "used with generic frame descriptor.";
+    return;
+  }
+
+  RTPVideoHeader::GenericDescriptorInfo& generic =
+      rtp_video_header->generic.emplace();
+
+  generic.frame_id = shared_frame_id;
+  generic.spatial_index = spatial_index;
+  generic.temporal_index = temporal_index;
+
+  // Generate decode target indications.
+  RTC_DCHECK_LT(temporal_index, kMaxTemporalStreams);
+  generic.decode_target_indications.resize(kMaxTemporalStreams);
+  auto it = std::fill_n(generic.decode_target_indications.begin(),
+                        temporal_index, DecodeTargetIndication::kNotPresent);
+  std::fill(it, generic.decode_target_indications.end(),
+            DecodeTargetIndication::kSwitch);
+
+  // Frame dependencies.
+  if (vp8_info.useExplicitDependencies) {
+    SetDependenciesVp8New(vp8_info, shared_frame_id, is_keyframe,
+                          vp8_header.layerSync, &generic);
+  } else {
+    SetDependenciesVp8Deprecated(vp8_info, shared_frame_id, is_keyframe,
+                                 spatial_index, temporal_index,
+                                 vp8_header.layerSync, &generic);
+  }
+
+  // Calculate chains.
+  generic.chain_diffs = {
+      (is_keyframe || chain_last_frame_id_[0] < 0)
+          ? 0
+          : static_cast<int>(shared_frame_id - chain_last_frame_id_[0])};
+  if (temporal_index == 0) {
+    chain_last_frame_id_[0] = shared_frame_id;
+  }
+}
+
+void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info,
+                                    int64_t shared_frame_id,
+                                    RTPVideoHeader& rtp_video_header) {
+  const auto& vp9_header =
+      absl::get<RTPVideoHeaderVP9>(rtp_video_header.video_type_header);
+  const int num_spatial_layers = kMaxSimulatedSpatialLayers;
+  const int first_active_spatial_id = vp9_header.first_active_layer;
+  const int last_active_spatial_id = vp9_header.num_spatial_layers - 1;
+  const int num_temporal_layers = kMaxTemporalStreams;
+  static_assert(num_spatial_layers <=
+                RtpGenericFrameDescriptor::kMaxSpatialLayers);
+  static_assert(num_temporal_layers <=
+                RtpGenericFrameDescriptor::kMaxTemporalLayers);
+  static_assert(num_spatial_layers <= DependencyDescriptor::kMaxSpatialIds);
+  static_assert(num_temporal_layers <= DependencyDescriptor::kMaxTemporalIds);
+
+  int spatial_index =
+      vp9_header.spatial_idx != kNoSpatialIdx ? vp9_header.spatial_idx : 0;
+  int temporal_index =
+      vp9_header.temporal_idx != kNoTemporalIdx ? vp9_header.temporal_idx : 0;
+
+  if (!(temporal_index < num_temporal_layers &&
+        first_active_spatial_id <= spatial_index &&
+        spatial_index <= last_active_spatial_id &&
+        last_active_spatial_id < num_spatial_layers)) {
+    // Prefer to generate no generic layering than an inconsistent one.
+    RTC_LOG(LS_ERROR) << "Inconsistent layer id sid=" << spatial_index
+                      << ",tid=" << temporal_index
+                      << " in VP9 header. Active spatial ids: ["
+                      << first_active_spatial_id << ","
+                      << last_active_spatial_id << "]";
+    return;
+  }
+
+  RTPVideoHeader::GenericDescriptorInfo& result =
+      rtp_video_header.generic.emplace();
+
+  result.frame_id = shared_frame_id;
+  result.spatial_index = spatial_index;
+  result.temporal_index = temporal_index;
+
+  result.decode_target_indications.reserve(num_spatial_layers *
+                                           num_temporal_layers);
+  for (int sid = 0; sid < num_spatial_layers; ++sid) {
+    for (int tid = 0; tid < num_temporal_layers; ++tid) {
+      DecodeTargetIndication dti;
+      if (sid < spatial_index || tid < temporal_index) {
+        dti = DecodeTargetIndication::kNotPresent;
+      } else if (spatial_index != sid &&
+                 vp9_header.non_ref_for_inter_layer_pred) {
+        dti = DecodeTargetIndication::kNotPresent;
+      } else if (sid == spatial_index && tid == temporal_index) {
+        // Assume that if frame is decodable, all of its own layer is decodable.
+        dti = DecodeTargetIndication::kSwitch;
+      } else if (sid == spatial_index && vp9_header.temporal_up_switch) {
+        dti = DecodeTargetIndication::kSwitch;
+      } else if (!vp9_header.inter_pic_predicted) {
+        // Key frame or spatial upswitch
+        dti = DecodeTargetIndication::kSwitch;
+      } else {
+        // Make no other assumptions. That should be safe, though suboptimal.
+        // To provide more accurate dti, encoder wrapper should fill in
+        // CodecSpecificInfo::generic_frame_info
+        dti = DecodeTargetIndication::kRequired;
+      }
+      result.decode_target_indications.push_back(dti);
+    }
+  }
+
+  // Calculate frame dependencies.
+  static constexpr int kPictureDiffLimit = 128;
+  if (last_vp9_frame_id_.empty()) {
+    // Create the array only if it is ever used.
+    last_vp9_frame_id_.resize(kPictureDiffLimit);
+  }
+
+  if (vp9_header.flexible_mode) {
+    if (vp9_header.inter_layer_predicted && spatial_index > 0) {
+      result.dependencies.push_back(
+          last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit]
+                            [spatial_index - 1]);
+    }
+    if (vp9_header.inter_pic_predicted) {
+      for (size_t i = 0; i < vp9_header.num_ref_pics; ++i) {
+        // picture_id is 15 bit number that wraps around. Though undeflow may
+        // produce picture that exceeds 2^15, it is ok because in this
+        // code block only last 7 bits of the picture_id are used.
+        uint16_t depend_on = vp9_header.picture_id - vp9_header.pid_diff[i];
+        result.dependencies.push_back(
+            last_vp9_frame_id_[depend_on % kPictureDiffLimit][spatial_index]);
+      }
+    }
+    last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit]
+                      [spatial_index] = shared_frame_id;
+  } else {
+    // Implementing general conversion logic for non-flexible mode requires some
+    // work and we will almost certainly never need it, so for now support only
+    // non-layerd streams.
+    if (spatial_index > 0 || temporal_index > 0) {
+      // Prefer to generate no generic layering than an inconsistent one.
+      rtp_video_header.generic.reset();
+      return;
+    }
+
+    if (vp9_header.inter_pic_predicted) {
+      // Since we only support non-scalable streams we only need to save the
+      // last frame id.
+      result.dependencies.push_back(last_vp9_frame_id_[0][0]);
+    }
+    last_vp9_frame_id_[0][0] = shared_frame_id;
+  }
+
+  result.active_decode_targets =
+      ((uint32_t{1} << num_temporal_layers * (last_active_spatial_id + 1)) -
+       1) ^
+      ((uint32_t{1} << num_temporal_layers * first_active_spatial_id) - 1);
+
+  // Calculate chains, asuming chain includes all frames with temporal_id = 0
+  if (!vp9_header.inter_pic_predicted && !vp9_header.inter_layer_predicted) {
+    // Assume frames without dependencies also reset chains.
+    for (int sid = spatial_index; sid <= last_active_spatial_id; ++sid) {
+      chain_last_frame_id_[sid] = -1;
+    }
+  }
+  result.chain_diffs.resize(num_spatial_layers, 0);
+  for (int sid = first_active_spatial_id; sid <= last_active_spatial_id;
+       ++sid) {
+    if (chain_last_frame_id_[sid] == -1) {
+      result.chain_diffs[sid] = 0;
+      continue;
+    }
+    int64_t chain_diff = shared_frame_id - chain_last_frame_id_[sid];
+    if (chain_diff >= 256) {
+      RTC_LOG(LS_ERROR)
+          << "Too many frames since last VP9 T0 frame for spatial layer #"
+          << sid << " at frame#" << shared_frame_id;
+      chain_last_frame_id_[sid] = -1;
+      chain_diff = 0;
+    }
+    result.chain_diffs[sid] = chain_diff;
+  }
+
+  if (temporal_index == 0) {
+    chain_last_frame_id_[spatial_index] = shared_frame_id;
+    if (!vp9_header.non_ref_for_inter_layer_pred) {
+      for (int sid = spatial_index + 1; sid <= last_active_spatial_id; ++sid) {
+        chain_last_frame_id_[sid] = shared_frame_id;
+      }
+    }
+  }
+}
+
+void RtpPayloadParams::SetDependenciesVp8Deprecated(
+    const CodecSpecificInfoVP8& vp8_info,
+    int64_t shared_frame_id,
+    bool is_keyframe,
+    int spatial_index,
+    int temporal_index,
+    bool layer_sync,
+    RTPVideoHeader::GenericDescriptorInfo* generic) {
+  RTC_DCHECK(!vp8_info.useExplicitDependencies);
+  RTC_DCHECK(!new_version_used_.has_value() || !new_version_used_.value());
+  new_version_used_ = false;
+
+  if (is_keyframe) {
+    RTC_DCHECK_EQ(temporal_index, 0);
+    last_shared_frame_id_[spatial_index].fill(-1);
+    last_shared_frame_id_[spatial_index][temporal_index] = shared_frame_id;
+    return;
+  }
+
+  if (layer_sync) {
+    int64_t tl0_frame_id = last_shared_frame_id_[spatial_index][0];
+
+    for (int i = 1; i < RtpGenericFrameDescriptor::kMaxTemporalLayers; ++i) {
+      if (last_shared_frame_id_[spatial_index][i] < tl0_frame_id) {
+        last_shared_frame_id_[spatial_index][i] = -1;
+      }
+    }
+
+    RTC_DCHECK_GE(tl0_frame_id, 0);
+    RTC_DCHECK_LT(tl0_frame_id, shared_frame_id);
+    generic->dependencies.push_back(tl0_frame_id);
+  } else {
+    for (int i = 0; i <= temporal_index; ++i) {
+      int64_t frame_id = last_shared_frame_id_[spatial_index][i];
+
+      if (frame_id != -1) {
+        RTC_DCHECK_LT(frame_id, shared_frame_id);
+        generic->dependencies.push_back(frame_id);
+      }
+    }
+  }
+
+  last_shared_frame_id_[spatial_index][temporal_index] = shared_frame_id;
+}
+
+void RtpPayloadParams::SetDependenciesVp8New(
+    const CodecSpecificInfoVP8& vp8_info,
+    int64_t shared_frame_id,
+    bool is_keyframe,
+    bool layer_sync,
+    RTPVideoHeader::GenericDescriptorInfo* generic) {
+  RTC_DCHECK(vp8_info.useExplicitDependencies);
+  RTC_DCHECK(!new_version_used_.has_value() || new_version_used_.value());
+  new_version_used_ = true;
+
+  if (is_keyframe) {
+    RTC_DCHECK_EQ(vp8_info.referencedBuffersCount, 0u);
+    buffer_id_to_frame_id_.fill(shared_frame_id);
+    return;
+  }
+
+  constexpr size_t kBuffersCountVp8 = CodecSpecificInfoVP8::kBuffersCount;
+
+  RTC_DCHECK_GT(vp8_info.referencedBuffersCount, 0u);
+  RTC_DCHECK_LE(vp8_info.referencedBuffersCount,
+                arraysize(vp8_info.referencedBuffers));
+
+  for (size_t i = 0; i < vp8_info.referencedBuffersCount; ++i) {
+    const size_t referenced_buffer = vp8_info.referencedBuffers[i];
+    RTC_DCHECK_LT(referenced_buffer, kBuffersCountVp8);
+    RTC_DCHECK_LT(referenced_buffer, buffer_id_to_frame_id_.size());
+
+    const int64_t dependency_frame_id =
+        buffer_id_to_frame_id_[referenced_buffer];
+    RTC_DCHECK_GE(dependency_frame_id, 0);
+    RTC_DCHECK_LT(dependency_frame_id, shared_frame_id);
+
+    const bool is_new_dependency =
+        std::find(generic->dependencies.begin(), generic->dependencies.end(),
+                  dependency_frame_id) == generic->dependencies.end();
+    if (is_new_dependency) {
+      generic->dependencies.push_back(dependency_frame_id);
+    }
+  }
+
+  RTC_DCHECK_LE(vp8_info.updatedBuffersCount, kBuffersCountVp8);
+  for (size_t i = 0; i < vp8_info.updatedBuffersCount; ++i) {
+    const size_t updated_id = vp8_info.updatedBuffers[i];
+    buffer_id_to_frame_id_[updated_id] = shared_frame_id;
+  }
+
+  RTC_DCHECK_LE(buffer_id_to_frame_id_.size(), kBuffersCountVp8);
+}
+
+}  // namespace webrtc
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 00:47:55 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 00:47:55 +0000
commit	26a029d407be480d791972afb5975cf62c9360a6 (patch)
tree	f435a8308119effd964b339f76abb83a57c29483 /third_party/libwebrtc/call/rtp_payload_params.cc
parent	Initial commit. (diff)
download	firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip