summaryrefslogtreecommitdiffstats
path: root/third_party/libwebrtc/api/video/rtp_video_frame_assembler.cc
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/libwebrtc/api/video/rtp_video_frame_assembler.cc')
-rw-r--r--third_party/libwebrtc/api/video/rtp_video_frame_assembler.cc340
1 files changed, 340 insertions, 0 deletions
diff --git a/third_party/libwebrtc/api/video/rtp_video_frame_assembler.cc b/third_party/libwebrtc/api/video/rtp_video_frame_assembler.cc
new file mode 100644
index 0000000000..b748534f3f
--- /dev/null
+++ b/third_party/libwebrtc/api/video/rtp_video_frame_assembler.cc
@@ -0,0 +1,340 @@
+/*
+ * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "api/video/rtp_video_frame_assembler.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "absl/container/inlined_vector.h"
+#include "absl/types/optional.h"
+#include "modules/rtp_rtcp/source/rtp_dependency_descriptor_extension.h"
+#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor_extension.h"
+#include "modules/rtp_rtcp/source/rtp_packet_received.h"
+#include "modules/rtp_rtcp/source/video_rtp_depacketizer_av1.h"
+#include "modules/rtp_rtcp/source/video_rtp_depacketizer_generic.h"
+#include "modules/rtp_rtcp/source/video_rtp_depacketizer_h264.h"
+#include "modules/rtp_rtcp/source/video_rtp_depacketizer_raw.h"
+#include "modules/rtp_rtcp/source/video_rtp_depacketizer_vp8.h"
+#include "modules/rtp_rtcp/source/video_rtp_depacketizer_vp9.h"
+#include "modules/video_coding/frame_object.h"
+#include "modules/video_coding/packet_buffer.h"
+#include "modules/video_coding/rtp_frame_reference_finder.h"
+#include "rtc_base/logging.h"
+#include "rtc_base/numerics/sequence_number_unwrapper.h"
+
+namespace webrtc {
+namespace {
+std::unique_ptr<VideoRtpDepacketizer> CreateDepacketizer(
+ RtpVideoFrameAssembler::PayloadFormat payload_format) {
+ switch (payload_format) {
+ case RtpVideoFrameAssembler::kRaw:
+ return std::make_unique<VideoRtpDepacketizerRaw>();
+ case RtpVideoFrameAssembler::kH264:
+ return std::make_unique<VideoRtpDepacketizerH264>();
+ case RtpVideoFrameAssembler::kVp8:
+ return std::make_unique<VideoRtpDepacketizerVp8>();
+ case RtpVideoFrameAssembler::kVp9:
+ return std::make_unique<VideoRtpDepacketizerVp9>();
+ case RtpVideoFrameAssembler::kAv1:
+ return std::make_unique<VideoRtpDepacketizerAv1>();
+ case RtpVideoFrameAssembler::kGeneric:
+ return std::make_unique<VideoRtpDepacketizerGeneric>();
+ }
+ RTC_DCHECK_NOTREACHED();
+ return nullptr;
+}
+} // namespace
+
+class RtpVideoFrameAssembler::Impl {
+ public:
+ explicit Impl(std::unique_ptr<VideoRtpDepacketizer> depacketizer);
+ ~Impl() = default;
+
+ FrameVector InsertPacket(const RtpPacketReceived& packet);
+
+ private:
+ using RtpFrameVector =
+ absl::InlinedVector<std::unique_ptr<RtpFrameObject>, 3>;
+
+ RtpFrameVector AssembleFrames(
+ video_coding::PacketBuffer::InsertResult insert_result);
+ FrameVector FindReferences(RtpFrameVector frames);
+ FrameVector UpdateWithPadding(uint16_t seq_num);
+ bool ParseDependenciesDescriptorExtension(const RtpPacketReceived& rtp_packet,
+ RTPVideoHeader& video_header);
+ bool ParseGenericDescriptorExtension(const RtpPacketReceived& rtp_packet,
+ RTPVideoHeader& video_header);
+ void ClearOldData(uint16_t incoming_seq_num);
+
+ std::unique_ptr<FrameDependencyStructure> video_structure_;
+ SeqNumUnwrapper<uint16_t> frame_id_unwrapper_;
+ absl::optional<int64_t> video_structure_frame_id_;
+ std::unique_ptr<VideoRtpDepacketizer> depacketizer_;
+ video_coding::PacketBuffer packet_buffer_;
+ RtpFrameReferenceFinder reference_finder_;
+};
+
+RtpVideoFrameAssembler::Impl::Impl(
+ std::unique_ptr<VideoRtpDepacketizer> depacketizer)
+ : depacketizer_(std::move(depacketizer)),
+ packet_buffer_(/*start_buffer_size=*/2048, /*max_buffer_size=*/2048) {}
+
+RtpVideoFrameAssembler::FrameVector RtpVideoFrameAssembler::Impl::InsertPacket(
+ const RtpPacketReceived& rtp_packet) {
+ if (rtp_packet.payload_size() == 0) {
+ ClearOldData(rtp_packet.SequenceNumber());
+ return UpdateWithPadding(rtp_packet.SequenceNumber());
+ }
+
+ absl::optional<VideoRtpDepacketizer::ParsedRtpPayload> parsed_payload =
+ depacketizer_->Parse(rtp_packet.PayloadBuffer());
+
+ if (parsed_payload == absl::nullopt) {
+ return {};
+ }
+
+ if (rtp_packet.HasExtension<RtpDependencyDescriptorExtension>()) {
+ if (!ParseDependenciesDescriptorExtension(rtp_packet,
+ parsed_payload->video_header)) {
+ return {};
+ }
+ } else if (rtp_packet.HasExtension<RtpGenericFrameDescriptorExtension00>()) {
+ if (!ParseGenericDescriptorExtension(rtp_packet,
+ parsed_payload->video_header)) {
+ return {};
+ }
+ }
+
+ parsed_payload->video_header.is_last_packet_in_frame |= rtp_packet.Marker();
+
+ auto packet = std::make_unique<video_coding::PacketBuffer::Packet>(
+ rtp_packet, parsed_payload->video_header);
+ packet->video_payload = std::move(parsed_payload->video_payload);
+
+ ClearOldData(rtp_packet.SequenceNumber());
+ return FindReferences(
+ AssembleFrames(packet_buffer_.InsertPacket(std::move(packet))));
+}
+
+void RtpVideoFrameAssembler::Impl::ClearOldData(uint16_t incoming_seq_num) {
+ constexpr uint16_t kOldSeqNumThreshold = 2000;
+ uint16_t old_seq_num = incoming_seq_num - kOldSeqNumThreshold;
+ packet_buffer_.ClearTo(old_seq_num);
+ reference_finder_.ClearTo(old_seq_num);
+}
+
+RtpVideoFrameAssembler::Impl::RtpFrameVector
+RtpVideoFrameAssembler::Impl::AssembleFrames(
+ video_coding::PacketBuffer::InsertResult insert_result) {
+ video_coding::PacketBuffer::Packet* first_packet = nullptr;
+ std::vector<rtc::ArrayView<const uint8_t>> payloads;
+ RtpFrameVector result;
+
+ for (auto& packet : insert_result.packets) {
+ if (packet->is_first_packet_in_frame()) {
+ first_packet = packet.get();
+ payloads.clear();
+ }
+ payloads.emplace_back(packet->video_payload);
+
+ if (packet->is_last_packet_in_frame()) {
+ rtc::scoped_refptr<EncodedImageBuffer> bitstream =
+ depacketizer_->AssembleFrame(payloads);
+
+ if (!bitstream) {
+ continue;
+ }
+
+ const video_coding::PacketBuffer::Packet& last_packet = *packet;
+ result.push_back(std::make_unique<RtpFrameObject>(
+ first_packet->seq_num, //
+ last_packet.seq_num, //
+ last_packet.marker_bit, //
+ /*times_nacked=*/0, //
+ /*first_packet_received_time=*/0, //
+ /*last_packet_received_time=*/0, //
+ first_packet->timestamp, //
+ /*ntp_time_ms=*/0, //
+ /*timing=*/VideoSendTiming(), //
+ first_packet->payload_type, //
+ first_packet->codec(), //
+ last_packet.video_header.rotation, //
+ last_packet.video_header.content_type, //
+ first_packet->video_header, //
+ last_packet.video_header.color_space, //
+ /*packet_infos=*/RtpPacketInfos(), //
+ std::move(bitstream)));
+ }
+ }
+
+ return result;
+}
+
+RtpVideoFrameAssembler::FrameVector
+RtpVideoFrameAssembler::Impl::FindReferences(RtpFrameVector frames) {
+ FrameVector res;
+ for (auto& frame : frames) {
+ auto complete_frames = reference_finder_.ManageFrame(std::move(frame));
+ for (std::unique_ptr<RtpFrameObject>& complete_frame : complete_frames) {
+ uint16_t rtp_seq_num_start = complete_frame->first_seq_num();
+ uint16_t rtp_seq_num_end = complete_frame->last_seq_num();
+ res.emplace_back(rtp_seq_num_start, rtp_seq_num_end,
+ std::move(complete_frame));
+ }
+ }
+ return res;
+}
+
+RtpVideoFrameAssembler::FrameVector
+RtpVideoFrameAssembler::Impl::UpdateWithPadding(uint16_t seq_num) {
+ auto res =
+ FindReferences(AssembleFrames(packet_buffer_.InsertPadding(seq_num)));
+ auto ref_finder_update = reference_finder_.PaddingReceived(seq_num);
+
+ for (std::unique_ptr<RtpFrameObject>& complete_frame : ref_finder_update) {
+ uint16_t rtp_seq_num_start = complete_frame->first_seq_num();
+ uint16_t rtp_seq_num_end = complete_frame->last_seq_num();
+ res.emplace_back(rtp_seq_num_start, rtp_seq_num_end,
+ std::move(complete_frame));
+ }
+
+ return res;
+}
+
+bool RtpVideoFrameAssembler::Impl::ParseDependenciesDescriptorExtension(
+ const RtpPacketReceived& rtp_packet,
+ RTPVideoHeader& video_header) {
+ webrtc::DependencyDescriptor dependency_descriptor;
+
+ if (!rtp_packet.GetExtension<RtpDependencyDescriptorExtension>(
+ video_structure_.get(), &dependency_descriptor)) {
+ // Descriptor is either malformed, or the template referenced is not in
+ // the `video_structure_` currently being held.
+ // TODO(bugs.webrtc.org/10342): Improve packet reordering behavior.
+ RTC_LOG(LS_WARNING) << "ssrc: " << rtp_packet.Ssrc()
+ << " Failed to parse dependency descriptor.";
+ return false;
+ }
+
+ if (dependency_descriptor.attached_structure != nullptr &&
+ !dependency_descriptor.first_packet_in_frame) {
+ RTC_LOG(LS_WARNING) << "ssrc: " << rtp_packet.Ssrc()
+ << "Invalid dependency descriptor: structure "
+ "attached to non first packet of a frame.";
+ return false;
+ }
+
+ video_header.is_first_packet_in_frame =
+ dependency_descriptor.first_packet_in_frame;
+ video_header.is_last_packet_in_frame =
+ dependency_descriptor.last_packet_in_frame;
+
+ int64_t frame_id =
+ frame_id_unwrapper_.Unwrap(dependency_descriptor.frame_number);
+ auto& generic_descriptor_info = video_header.generic.emplace();
+ generic_descriptor_info.frame_id = frame_id;
+ generic_descriptor_info.spatial_index =
+ dependency_descriptor.frame_dependencies.spatial_id;
+ generic_descriptor_info.temporal_index =
+ dependency_descriptor.frame_dependencies.temporal_id;
+
+ for (int fdiff : dependency_descriptor.frame_dependencies.frame_diffs) {
+ generic_descriptor_info.dependencies.push_back(frame_id - fdiff);
+ }
+ for (int cdiff : dependency_descriptor.frame_dependencies.chain_diffs) {
+ generic_descriptor_info.chain_diffs.push_back(frame_id - cdiff);
+ }
+ generic_descriptor_info.decode_target_indications =
+ dependency_descriptor.frame_dependencies.decode_target_indications;
+ if (dependency_descriptor.resolution) {
+ video_header.width = dependency_descriptor.resolution->Width();
+ video_header.height = dependency_descriptor.resolution->Height();
+ }
+ if (dependency_descriptor.active_decode_targets_bitmask.has_value()) {
+ generic_descriptor_info.active_decode_targets =
+ *dependency_descriptor.active_decode_targets_bitmask;
+ }
+
+ // FrameDependencyStructure is sent in the dependency descriptor of the first
+ // packet of a key frame and is required to parse all subsequent packets until
+ // the next key frame.
+ if (dependency_descriptor.attached_structure) {
+ RTC_DCHECK(dependency_descriptor.first_packet_in_frame);
+ if (video_structure_frame_id_ > frame_id) {
+ RTC_LOG(LS_WARNING)
+ << "Arrived key frame with id " << frame_id << " and structure id "
+ << dependency_descriptor.attached_structure->structure_id
+ << " is older than the latest received key frame with id "
+ << *video_structure_frame_id_ << " and structure id "
+ << video_structure_->structure_id;
+ return false;
+ }
+ video_structure_ = std::move(dependency_descriptor.attached_structure);
+ video_structure_frame_id_ = frame_id;
+ video_header.frame_type = VideoFrameType::kVideoFrameKey;
+ } else {
+ video_header.frame_type = VideoFrameType::kVideoFrameDelta;
+ }
+ return true;
+}
+
+bool RtpVideoFrameAssembler::Impl::ParseGenericDescriptorExtension(
+ const RtpPacketReceived& rtp_packet,
+ RTPVideoHeader& video_header) {
+ RtpGenericFrameDescriptor generic_frame_descriptor;
+ if (!rtp_packet.GetExtension<RtpGenericFrameDescriptorExtension00>(
+ &generic_frame_descriptor)) {
+ return false;
+ }
+
+ video_header.is_first_packet_in_frame =
+ generic_frame_descriptor.FirstPacketInSubFrame();
+ video_header.is_last_packet_in_frame =
+ generic_frame_descriptor.LastPacketInSubFrame();
+
+ if (generic_frame_descriptor.FirstPacketInSubFrame()) {
+ video_header.frame_type =
+ generic_frame_descriptor.FrameDependenciesDiffs().empty()
+ ? VideoFrameType::kVideoFrameKey
+ : VideoFrameType::kVideoFrameDelta;
+
+ auto& generic_descriptor_info = video_header.generic.emplace();
+ int64_t frame_id =
+ frame_id_unwrapper_.Unwrap(generic_frame_descriptor.FrameId());
+ generic_descriptor_info.frame_id = frame_id;
+ generic_descriptor_info.spatial_index =
+ generic_frame_descriptor.SpatialLayer();
+ generic_descriptor_info.temporal_index =
+ generic_frame_descriptor.TemporalLayer();
+ for (uint16_t fdiff : generic_frame_descriptor.FrameDependenciesDiffs()) {
+ generic_descriptor_info.dependencies.push_back(frame_id - fdiff);
+ }
+ }
+ video_header.width = generic_frame_descriptor.Width();
+ video_header.height = generic_frame_descriptor.Height();
+ return true;
+}
+
+RtpVideoFrameAssembler::RtpVideoFrameAssembler(PayloadFormat payload_format)
+ : impl_(std::make_unique<Impl>(CreateDepacketizer(payload_format))) {}
+
+RtpVideoFrameAssembler::~RtpVideoFrameAssembler() = default;
+
+RtpVideoFrameAssembler::FrameVector RtpVideoFrameAssembler::InsertPacket(
+ const RtpPacketReceived& packet) {
+ return impl_->InsertPacket(packet);
+}
+
+} // namespace webrtc