diff --git a/BUILD.gn b/BUILD.gn index 3ad9c97c31..2d62b3e797 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -236,6 +236,10 @@ config("common_inherited_config") { if (is_ubsan) { cflags += [ "-fsanitize=float-cast-overflow" ] } + + if (rtc_use_h265) { + defines += [ "WEBRTC_USE_H265" ] + } } # TODO(bugs.webrtc.org/9693): Remove the possibility to suppress this warning @@ -293,6 +297,10 @@ config("common_config") { defines += [ "WEBRTC_USE_H264" ] } + if (rtc_use_h265) { + defines += [ "WEBRTC_USE_H265" ] + } + if (rtc_use_absl_mutex) { defines += [ "WEBRTC_ABSL_MUTEX" ] } diff --git a/api/video/video_codec_type.h b/api/video/video_codec_type.h index 74a4bc4258..aad835281d 100644 --- a/api/video/video_codec_type.h +++ b/api/video/video_codec_type.h @@ -21,6 +21,7 @@ enum VideoCodecType { kVideoCodecVP9, kVideoCodecAV1, kVideoCodecH264, + kVideoCodecH265, kVideoCodecMultiplex, }; diff --git a/api/video_codecs/video_codec.cc b/api/video_codecs/video_codec.cc index c6122d3f6a..65783689b2 100644 --- a/api/video_codecs/video_codec.cc +++ b/api/video_codecs/video_codec.cc @@ -26,6 +26,7 @@ constexpr char kPayloadNameAv1[] = "AV1"; // needed. constexpr char kPayloadNameAv1x[] = "AV1X"; constexpr char kPayloadNameH264[] = "H264"; +constexpr char kPayloadNameH265[] = "H265"; constexpr char kPayloadNameGeneric[] = "Generic"; constexpr char kPayloadNameMultiplex[] = "Multiplex"; } // namespace @@ -52,6 +53,15 @@ bool VideoCodecH264::operator==(const VideoCodecH264& other) const { numberOfTemporalLayers == other.numberOfTemporalLayers); } +bool VideoCodecH265::operator==(const VideoCodecH265& other) const { + return (frameDroppingOn == other.frameDroppingOn && + keyFrameInterval == other.keyFrameInterval && + vpsLen == other.vpsLen && spsLen == other.spsLen && + ppsLen == other.ppsLen && + (spsLen == 0 || memcmp(spsData, other.spsData, spsLen) == 0) && + (ppsLen == 0 || memcmp(ppsData, other.ppsData, ppsLen) == 0)); +} + VideoCodec::VideoCodec() : codecType(kVideoCodecGeneric), width(0), @@ -102,6 +112,16 @@ const VideoCodecH264& VideoCodec::H264() const { return codec_specific_.H264; } +VideoCodecH265* VideoCodec::H265() { + RTC_DCHECK_EQ(codecType, kVideoCodecH265); + return &codec_specific_.H265; +} + +const VideoCodecH265& VideoCodec::H265() const { + RTC_DCHECK_EQ(codecType, kVideoCodecH265); + return codec_specific_.H265; +} + const char* CodecTypeToPayloadString(VideoCodecType type) { switch (type) { case kVideoCodecVP8: @@ -112,9 +132,12 @@ const char* CodecTypeToPayloadString(VideoCodecType type) { return kPayloadNameAv1; case kVideoCodecH264: return kPayloadNameH264; + case kVideoCodecH265: + return kPayloadNameH265; case kVideoCodecMultiplex: return kPayloadNameMultiplex; case kVideoCodecGeneric: + default: return kPayloadNameGeneric; } RTC_CHECK_NOTREACHED(); @@ -132,6 +155,8 @@ VideoCodecType PayloadStringToCodecType(const std::string& name) { return kVideoCodecH264; if (absl::EqualsIgnoreCase(name, kPayloadNameMultiplex)) return kVideoCodecMultiplex; + if (absl::EqualsIgnoreCase(name, kPayloadNameH265)) + return kVideoCodecH265; return kVideoCodecGeneric; } diff --git a/api/video_codecs/video_codec.h b/api/video_codecs/video_codec.h index 068b09d27a..ecabe5c63b 100644 --- a/api/video_codecs/video_codec.h +++ b/api/video_codecs/video_codec.h @@ -82,6 +82,21 @@ struct VideoCodecH264 { uint8_t numberOfTemporalLayers; }; +struct VideoCodecH265 { + bool operator==(const VideoCodecH265& other) const; + bool operator!=(const VideoCodecH265& other) const { + return !(*this == other); + } + bool frameDroppingOn; + int keyFrameInterval; + const uint8_t* vpsData; + size_t vpsLen; + const uint8_t* spsData; + size_t spsLen; + const uint8_t* ppsData; + size_t ppsLen; +}; + // Translates from name of codec to codec type and vice versa. RTC_EXPORT const char* CodecTypeToPayloadString(VideoCodecType type); RTC_EXPORT VideoCodecType PayloadStringToCodecType(const std::string& name); @@ -90,6 +105,7 @@ union VideoCodecUnion { VideoCodecVP8 VP8; VideoCodecVP9 VP9; VideoCodecH264 H264; + VideoCodecH265 H265; }; enum class VideoCodecMode { kRealtimeVideo, kScreensharing }; @@ -169,6 +185,8 @@ class RTC_EXPORT VideoCodec { const VideoCodecVP9& VP9() const; VideoCodecH264* H264(); const VideoCodecH264& H264() const; + VideoCodecH265* H265(); + const VideoCodecH265& H265() const; private: // TODO(hta): Consider replacing the union with a pointer type. diff --git a/api/video_codecs/video_decoder_software_fallback_wrapper.cc b/api/video_codecs/video_decoder_software_fallback_wrapper.cc index cf6f823b92..e664965c55 100644 --- a/api/video_codecs/video_decoder_software_fallback_wrapper.cc +++ b/api/video_codecs/video_decoder_software_fallback_wrapper.cc @@ -167,6 +167,10 @@ void VideoDecoderSoftwareFallbackWrapper::UpdateFallbackDecoderHistograms() { RTC_HISTOGRAM_COUNTS_100000(kFallbackHistogramsUmaPrefix + "H264", hw_decoded_frames_since_last_fallback_); break; + case kVideoCodecH265: + RTC_HISTOGRAM_COUNTS_100000(kFallbackHistogramsUmaPrefix + "H265", + hw_decoded_frames_since_last_fallback_); + break; case kVideoCodecMultiplex: RTC_HISTOGRAM_COUNTS_100000(kFallbackHistogramsUmaPrefix + "Multiplex", hw_decoded_frames_since_last_fallback_); diff --git a/api/video_codecs/video_encoder.cc b/api/video_codecs/video_encoder.cc index b85b9328cf..0d3b8d65a9 100644 --- a/api/video_codecs/video_encoder.cc +++ b/api/video_codecs/video_encoder.cc @@ -57,6 +57,23 @@ VideoCodecH264 VideoEncoder::GetDefaultH264Settings() { return h264_settings; } +#ifdef WEBRTC_USE_H265 +VideoCodecH265 VideoEncoder::GetDefaultH265Settings() { + VideoCodecH265 h265_settings; + memset(&h265_settings, 0, sizeof(h265_settings)); + + // h265_settings.profile = kProfileBase; + h265_settings.frameDroppingOn = true; + h265_settings.keyFrameInterval = 3000; + h265_settings.spsData = nullptr; + h265_settings.spsLen = 0; + h265_settings.ppsData = nullptr; + h265_settings.ppsLen = 0; + + return h265_settings; +} +#endif + VideoEncoder::ScalingSettings::ScalingSettings() = default; VideoEncoder::ScalingSettings::ScalingSettings(KOff) : ScalingSettings() {} diff --git a/api/video_codecs/video_encoder.h b/api/video_codecs/video_encoder.h index 94d7287f78..d122577ff8 100644 --- a/api/video_codecs/video_encoder.h +++ b/api/video_codecs/video_encoder.h @@ -333,6 +333,9 @@ class RTC_EXPORT VideoEncoder { static VideoCodecVP8 GetDefaultVp8Settings(); static VideoCodecVP9 GetDefaultVp9Settings(); static VideoCodecH264 GetDefaultH264Settings(); +#ifdef WEBRTC_USE_H265 + static VideoCodecH265 GetDefaultH265Settings(); +#endif virtual ~VideoEncoder() {} diff --git a/api/video_codecs/video_encoder_config.cc b/api/video_codecs/video_encoder_config.cc index fd4a68fc05..9e624ed58c 100644 --- a/api/video_codecs/video_encoder_config.cc +++ b/api/video_codecs/video_encoder_config.cc @@ -96,6 +96,10 @@ void VideoEncoderConfig::EncoderSpecificSettings::FillEncoderSpecificSettings( FillVideoCodecVp8(codec->VP8()); } else if (codec->codecType == kVideoCodecVP9) { FillVideoCodecVp9(codec->VP9()); +#ifdef WEBRTC_USE_H265 + } else if (codec->codecType == kVideoCodecH265) { + FillVideoCodecH265(codec->H265()); +#endif } else { RTC_DCHECK_NOTREACHED() << "Encoder specifics set/used for unknown codec type."; @@ -112,6 +116,22 @@ void VideoEncoderConfig::EncoderSpecificSettings::FillVideoCodecVp9( RTC_DCHECK_NOTREACHED(); } +#ifdef WEBRTC_USE_H265 +void VideoEncoderConfig::EncoderSpecificSettings::FillVideoCodecH265( + VideoCodecH265* h265_settings) const { + RTC_DCHECK_NOTREACHED(); +} + +VideoEncoderConfig::H265EncoderSpecificSettings::H265EncoderSpecificSettings( + const VideoCodecH265& specifics) + : specifics_(specifics) {} + +void VideoEncoderConfig::H265EncoderSpecificSettings::FillVideoCodecH265( + VideoCodecH265* h265_settings) const { + *h265_settings = specifics_; +} +#endif + VideoEncoderConfig::Vp8EncoderSpecificSettings::Vp8EncoderSpecificSettings( const VideoCodecVP8& specifics) : specifics_(specifics) {} diff --git a/api/video_codecs/video_encoder_config.h b/api/video_codecs/video_encoder_config.h index 4076208b56..6d7d39d44e 100644 --- a/api/video_codecs/video_encoder_config.h +++ b/api/video_codecs/video_encoder_config.h @@ -86,12 +86,26 @@ class VideoEncoderConfig { virtual void FillVideoCodecVp8(VideoCodecVP8* vp8_settings) const; virtual void FillVideoCodecVp9(VideoCodecVP9* vp9_settings) const; +#ifdef WEBRTC_USE_H265 + virtual void FillVideoCodecH265(VideoCodecH265* h265_settings) const; +#endif private: ~EncoderSpecificSettings() override {} friend class VideoEncoderConfig; }; +#ifdef WEBRTC_USE_H265 + class H265EncoderSpecificSettings : public EncoderSpecificSettings { + public: + explicit H265EncoderSpecificSettings(const VideoCodecH265& specifics); + void FillVideoCodecH265(VideoCodecH265* h265_settings) const override; + + private: + VideoCodecH265 specifics_; + }; +#endif + class Vp8EncoderSpecificSettings : public EncoderSpecificSettings { public: explicit Vp8EncoderSpecificSettings(const VideoCodecVP8& specifics); diff --git a/build_overrides/build.gni b/build_overrides/build.gni index 137b6a40b2..ba5dbaf4a4 100644 --- a/build_overrides/build.gni +++ b/build_overrides/build.gni @@ -32,6 +32,12 @@ ubsan_vptr_ignorelist_path = # so we just ignore that assert. See https://crbug.com/648948 for more info. ignore_elf32_limitations = true +if (is_win || is_ios || is_android) { + rtc_use_h265 = true +} else { + rtc_use_h265 = false +} + # Use bundled hermetic Xcode installation maintainted by Chromium, # except for local iOS builds where it's unsupported. # Allow for mac cross compile on linux machines. diff --git a/call/rtp_payload_params.cc b/call/rtp_payload_params.cc index 6ff7549901..b62da911d7 100644 --- a/call/rtp_payload_params.cc +++ b/call/rtp_payload_params.cc @@ -98,6 +98,14 @@ void PopulateRtpWithCodecSpecifics(const CodecSpecificInfo& info, rtp->simulcastIdx = spatial_index.value_or(0); return; } +#ifdef WEBRTC_USE_H265 + case kVideoCodecH265: { + auto& h265_header = rtp->video_type_header.emplace(); + h265_header.packetization_mode = + info.codecSpecific.H265.packetization_mode; + } + return; +#endif case kVideoCodecMultiplex: case kVideoCodecGeneric: rtp->codec = kVideoCodecGeneric; @@ -341,6 +349,12 @@ void RtpPayloadParams::SetGeneric(const CodecSpecificInfo* codec_specific_info, is_keyframe, rtp_video_header); } return; + case VideoCodecType::kVideoCodecH265: + if (codec_specific_info) { + H265ToGeneric(codec_specific_info->codecSpecific.H265, frame_id, + is_keyframe, rtp_video_header); + } + return; case VideoCodecType::kVideoCodecMultiplex: return; } @@ -404,6 +418,7 @@ absl::optional RtpPayloadParams::GenericStructure( } case VideoCodecType::kVideoCodecAV1: case VideoCodecType::kVideoCodecH264: + case VideoCodecType::kVideoCodecH265: case VideoCodecType::kVideoCodecMultiplex: return absl::nullopt; } @@ -486,6 +501,26 @@ void RtpPayloadParams::H264ToGeneric(const CodecSpecificInfoH264& h264_info, last_shared_frame_id_[/*spatial_index*/ 0][temporal_index] = shared_frame_id; } +void RtpPayloadParams::H265ToGeneric(const CodecSpecificInfoH265& h265_info, + int64_t shared_frame_id, + bool is_keyframe, + RTPVideoHeader* rtp_video_header) { + if (h265_info.picture_id <= 0) { + // picture_id is only used by cloud gaming. + return; + } + RTPVideoHeader::GenericDescriptorInfo& generic = + rtp_video_header->generic.emplace(); + generic.frame_id = h265_info.picture_id; + generic.spatial_index = 0; // Not enabled at present. + generic.temporal_index = 0; // Not enabled at present. + for (int dep_idx = 0; dep_idx < 5; dep_idx++) { + if (h265_info.dependencies[dep_idx] <= 0) + break; + generic.dependencies[dep_idx] = h265_info.dependencies[dep_idx]; + } +} + void RtpPayloadParams::Vp8ToGeneric(const CodecSpecificInfoVP8& vp8_info, int64_t shared_frame_id, bool is_keyframe, diff --git a/call/rtp_payload_params.h b/call/rtp_payload_params.h index 5feee11ab0..fc85794f31 100644 --- a/call/rtp_payload_params.h +++ b/call/rtp_payload_params.h @@ -73,6 +73,10 @@ class RtpPayloadParams final { int64_t shared_frame_id, bool is_keyframe, RTPVideoHeader* rtp_video_header); + void H265ToGeneric(const CodecSpecificInfoH265& h265_info, + int64_t shared_frame_id, + bool is_keyframe, + RTPVideoHeader* rtp_video_header); void GenericToGeneric(int64_t shared_frame_id, bool is_keyframe, diff --git a/common_video/BUILD.gn b/common_video/BUILD.gn index 3598afdcb6..88e8489d54 100644 --- a/common_video/BUILD.gn +++ b/common_video/BUILD.gn @@ -23,10 +23,22 @@ rtc_library("common_video") { "h264/h264_common.h", "h264/pps_parser.cc", "h264/pps_parser.h", + "h264/prefix_parser.cc", + "h264/prefix_parser.h", "h264/sps_parser.cc", "h264/sps_parser.h", "h264/sps_vui_rewriter.cc", "h264/sps_vui_rewriter.h", + "h265/h265_bitstream_parser.cc", + "h265/h265_bitstream_parser.h", + "h265/h265_common.cc", + "h265/h265_common.h", + "h265/h265_pps_parser.cc", + "h265/h265_pps_parser.h", + "h265/h265_sps_parser.cc", + "h265/h265_sps_parser.h", + "h265/h265_vps_parser.cc", + "h265/h265_vps_parser.h", "include/bitrate_adjuster.h", "include/incoming_video_stream.h", "include/quality_limitation_reason.h", diff --git a/common_video/h264/prefix_parser.cc b/common_video/h264/prefix_parser.cc new file mode 100644 index 0000000000..b2f58df985 --- /dev/null +++ b/common_video/h264/prefix_parser.cc @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h264/prefix_parser.h" + +#include +#include + +#include "common_video/h264/h264_common.h" +#include "rtc_base/bitstream_reader.h" + +namespace { +typedef absl::optional OptionalPrefix; + +#define RETURN_EMPTY_ON_FAIL(x) \ + if (!(x)) { \ + return OptionalPrefix(); \ + } +} // namespace + +namespace webrtc { + +PrefixParser::PrefixState::PrefixState() = default; +PrefixParser::PrefixState::PrefixState(const PrefixState&) = default; +PrefixParser::PrefixState::~PrefixState() = default; + +// General note: this is based off the 02/2016 version of the H.264 standard. +// You can find it on this page: +// http://www.itu.int/rec/T-REC-H.264 + +// Unpack RBSP and parse SVC extension state from the supplied buffer. +absl::optional PrefixParser::ParsePrefix( + const uint8_t* data, + size_t length) { + std::vector unpacked_buffer = H264::ParseRbsp(data, length); + BitstreamReader reader(unpacked_buffer); + return ParsePrefixUpToSvcExtension(reader); +} + +absl::optional +PrefixParser::ParsePrefixUpToSvcExtension(BitstreamReader& reader) { + // Now, we need to use a bit buffer to parse through the actual SVC extension + // format. See Section 7.3.1 ("NAL unit syntax") and 7.3.1.1 ("NAL unit header + // SVC extension syntax") of the H.264 standard for a complete description. + PrefixState svc_extension; + + // Make sure the svc_extension_flag is on. + bool svc_extension_flag = reader.ReadBit(); + if (!svc_extension_flag) + return OptionalPrefix(); + + // idr_flag: u(1) + svc_extension.idr_flag = reader.Read(); + // priority_id: u(6) + svc_extension.priority_id = reader.ReadBits(6); + // no_inter_layer_pred_flag: u(1) + svc_extension.no_inter_layer_pred_flag = reader.Read(); + // dependency_id: u(3) + svc_extension.dependency_id = reader.ReadBits(3); + // quality_id: u(4) + svc_extension.quality_id = reader.ReadBits(4); + // temporal_id: u(3) + svc_extension.temporal_id = reader.ReadBits(3); + // use_ref_base_pic_flag: u(1) + svc_extension.use_ref_base_pic_flag = reader.Read(); + // discardable_flag: u(1) + svc_extension.discardable_flag = reader.Read(); + // output_flag: u(1) + svc_extension.output_flag = reader.Read(); + + return OptionalPrefix(svc_extension); +} + +} // namespace webrtc diff --git a/common_video/h264/prefix_parser.h b/common_video/h264/prefix_parser.h new file mode 100644 index 0000000000..14fa4619d6 --- /dev/null +++ b/common_video/h264/prefix_parser.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_H264_PREFIX_PARSER_H_ +#define COMMON_VIDEO_H264_PREFIX_PARSER_H_ + +#include "absl/types/optional.h" +#include "rtc_base/bitstream_reader.h" + +namespace webrtc { + +// A class for parsing out SVC extension data from an H264 prefix NALU +class PrefixParser { + public: + // The parsed state of the SVC extension. Only some select values are stored. + // Add more as they are actually needed. + struct PrefixState { + PrefixState(); + PrefixState(const PrefixState&); + ~PrefixState(); + + uint32_t idr_flag = 0; + uint32_t priority_id = 0; + uint32_t no_inter_layer_pred_flag = 1; + uint32_t dependency_id = 0; + uint32_t quality_id = 0; + uint32_t temporal_id = 0; + uint32_t use_ref_base_pic_flag = 0; + uint32_t discardable_flag = 1; + uint32_t output_flag = 1; + }; + + // Unpack RBSP and parse prefix state from the supplied buffer. + static absl::optional ParsePrefix(const uint8_t* data, size_t length); + + protected: + // Parse the prefix NAL, up till the SVC extension part, for a bit buffer + // where RBSP decoding has already been performed. + static absl::optional ParsePrefixUpToSvcExtension( + BitstreamReader& reader); +}; + +} // namespace webrtc +#endif // COMMON_VIDEO_H264_PREFIX_PARSER_H_ diff --git a/common_video/h265/h265_bitstream_parser.cc b/common_video/h265/h265_bitstream_parser.cc new file mode 100644 index 0000000000..5b31e2f468 --- /dev/null +++ b/common_video/h265/h265_bitstream_parser.cc @@ -0,0 +1,410 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "common_video/h265/h265_bitstream_parser.h" + +#include + +#include +#include + +#include "common_video/h265/h265_common.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/logging.h" +#include "rtc_base/bitstream_reader.h" + +namespace { + +const int kMaxAbsQpDeltaValue = 51; +const int kMinQpValue = 0; +const int kMaxQpValue = 51; + +} // namespace + +namespace webrtc { + +#define RETURN_ON_FAIL(x, res) \ + if (!(x)) { \ + RTC_LOG_F(LS_ERROR) << "FAILED: " #x; \ + return res; \ + } + +#define RETURN_INV_ON_FAIL(x) RETURN_ON_FAIL(x, kInvalidStream) + +H265BitstreamParser::H265BitstreamParser() {} +H265BitstreamParser::~H265BitstreamParser() {} + +// General note: this is based off the 08/2021 version of the H.265 standard, +// section 7.3.6.1. You can find it on this page: +// http://www.itu.int/rec/T-REC-H.265 +H265BitstreamParser::Result H265BitstreamParser::ParseNonParameterSetNalu( + const uint8_t* source, + size_t source_length, + uint8_t nalu_type) { + if (!sps_ || !pps_) + return kInvalidStream; + + last_slice_qp_delta_ = absl::nullopt; + const std::vector slice_rbsp = + H265::ParseRbsp(source, source_length); + if (slice_rbsp.size() < H265::kNaluTypeSize) + return kInvalidStream; + + BitstreamReader slice_reader(slice_rbsp); + slice_reader.ConsumeBits(H265::kNaluTypeSize * 8); + + // first_slice_segment_in_pic_flag: u(1) + bool first_slice_segment_in_pic_flag = slice_reader.Read(); + if (H265::NaluType::kBlaWLp <= nalu_type && + nalu_type <= H265::NaluType::kRsvIrapVcl23) { + // no_output_of_prior_pics_flag: u(1) + slice_reader.ConsumeBits(1); + } + // slice_pic_parameter_set_id: ue(v) + slice_reader.ReadExponentialGolomb(); + bool dependent_slice_segment_flag = 0; + if (!first_slice_segment_in_pic_flag) { + if (pps_->dependent_slice_segments_enabled_flag) { + // dependent_slice_segment_flag: u(1) + dependent_slice_segment_flag = slice_reader.Read(); + } + + // slice_segment_address: u(v) + int32_t log2_ctb_size_y = sps_->log2_min_luma_coding_block_size_minus3 + 3 + + sps_->log2_diff_max_min_luma_coding_block_size; + uint32_t ctb_size_y = 1 << log2_ctb_size_y; + uint32_t pic_width_in_ctbs_y = sps_->pic_width_in_luma_samples / ctb_size_y; + if (sps_->pic_width_in_luma_samples % ctb_size_y) + pic_width_in_ctbs_y++; + + uint32_t pic_height_in_ctbs_y = + sps_->pic_height_in_luma_samples / ctb_size_y; + if (sps_->pic_height_in_luma_samples % ctb_size_y) + pic_height_in_ctbs_y++; + + uint32_t slice_segment_address_bits = + H265::Log2(pic_height_in_ctbs_y * pic_width_in_ctbs_y); + slice_reader.ConsumeBits(slice_segment_address_bits); + } + + if (dependent_slice_segment_flag == 0) { + for (uint32_t i = 0; i < pps_->num_extra_slice_header_bits; i++) { + // slice_reserved_flag: u(1) + slice_reader.ConsumeBits(1); + } + // slice_type: ue(v) + uint32_t slice_type = 0; + slice_reader.ReadExponentialGolomb(); + if (pps_->output_flag_present_flag) { + // pic_output_flag: u(1) + slice_reader.ConsumeBits(1); + } + if (sps_->separate_colour_plane_flag) { + // colour_plane_id: u(2) + slice_reader.ConsumeBits(2); + } + uint32_t num_long_term_sps = 0; + uint32_t num_long_term_pics = 0; + std::vector lt_idx_sps; + std::vector used_by_curr_pic_lt_flag; + bool short_term_ref_pic_set_sps_flag = false; + uint32_t short_term_ref_pic_set_idx = 0; + H265SpsParser::ShortTermRefPicSet short_term_ref_pic_set; + bool slice_temporal_mvp_enabled_flag = 0; + if (nalu_type != H265::NaluType::kIdrWRadl && + nalu_type != H265::NaluType::kIdrNLp) { + // slice_pic_order_cnt_lsb: u(v) + uint32_t slice_pic_order_cnt_lsb_bits = + sps_->log2_max_pic_order_cnt_lsb_minus4 + 4; + slice_reader.ConsumeBits(slice_pic_order_cnt_lsb_bits); + // short_term_ref_pic_set_sps_flag: u(1) + short_term_ref_pic_set_sps_flag = slice_reader.Read(); + if (!short_term_ref_pic_set_sps_flag) { + absl::optional ref_pic_set = + H265SpsParser::ParseShortTermRefPicSet( + sps_->num_short_term_ref_pic_sets, + sps_->num_short_term_ref_pic_sets, sps_->short_term_ref_pic_set, + *sps_, slice_reader); + if (ref_pic_set) { + short_term_ref_pic_set = *ref_pic_set; + } else { + return kInvalidStream; + } + } else if (sps_->num_short_term_ref_pic_sets > 1) { + // short_term_ref_pic_set_idx: u(v) + uint32_t short_term_ref_pic_set_idx_bits = + H265::Log2(sps_->num_short_term_ref_pic_sets); + if ((1 << short_term_ref_pic_set_idx_bits) < + sps_->num_short_term_ref_pic_sets) { + short_term_ref_pic_set_idx_bits++; + } + if (short_term_ref_pic_set_idx_bits > 0) { + short_term_ref_pic_set_idx = + slice_reader.ReadBits(short_term_ref_pic_set_idx_bits); + } + } + if (sps_->long_term_ref_pics_present_flag) { + if (sps_->num_long_term_ref_pics_sps > 0) { + // num_long_term_sps: ue(v) + num_long_term_sps = slice_reader.ReadExponentialGolomb(); + } + // num_long_term_sps: ue(v) + num_long_term_pics = slice_reader.ReadExponentialGolomb(); + lt_idx_sps.resize(num_long_term_sps + num_long_term_pics, 0); + used_by_curr_pic_lt_flag.resize(num_long_term_sps + num_long_term_pics, + 0); + for (uint32_t i = 0; i < num_long_term_sps + num_long_term_pics; i++) { + if (i < num_long_term_sps) { + if (sps_->num_long_term_ref_pics_sps > 1) { + // lt_idx_sps: u(v) + uint32_t lt_idx_sps_bits = + H265::Log2(sps_->num_long_term_ref_pics_sps); + lt_idx_sps[i] = slice_reader.ReadBits(lt_idx_sps_bits); + } + } else { + // poc_lsb_lt: u(v) + uint32_t poc_lsb_lt_bits = + sps_->log2_max_pic_order_cnt_lsb_minus4 + 4; + slice_reader.ConsumeBits(poc_lsb_lt_bits); + // used_by_curr_pic_lt_flag: u(1) + used_by_curr_pic_lt_flag[i] = slice_reader.Read(); + } + // delta_poc_msb_present_flag: u(1) + bool delta_poc_msb_present_flag = slice_reader.Read(); + if (delta_poc_msb_present_flag) { + // delta_poc_msb_cycle_lt: ue(v) + slice_reader.ReadExponentialGolomb(); + } + } + } + if (sps_->sps_temporal_mvp_enabled_flag) { + // slice_temporal_mvp_enabled_flag: u(1) + slice_temporal_mvp_enabled_flag = slice_reader.Read(); + } + } + + if (sps_->sample_adaptive_offset_enabled_flag) { + // slice_sao_luma_flag: u(1) + slice_reader.ConsumeBits(1); + uint32_t chroma_array_type = + sps_->separate_colour_plane_flag == 0 ? sps_->chroma_format_idc : 0; + if (chroma_array_type != 0) { + // slice_sao_chroma_flag: u(1) + slice_reader.ConsumeBits(1); + } + } + + if (slice_type == H265::SliceType::kP || + slice_type == H265::SliceType::kB) { + // num_ref_idx_active_override_flag: u(1) + bool num_ref_idx_active_override_flag = slice_reader.Read(); + uint32_t num_ref_idx_l0_active_minus1 = + pps_->num_ref_idx_l0_default_active_minus1; + uint32_t num_ref_idx_l1_active_minus1 = + pps_->num_ref_idx_l1_default_active_minus1; + if (num_ref_idx_active_override_flag) { + // num_ref_idx_l0_active_minus1: ue(v) + num_ref_idx_l0_active_minus1 = slice_reader.ReadExponentialGolomb(); + if (slice_type == H265::SliceType::kB) { + // num_ref_idx_l1_active_minus1: ue(v) + num_ref_idx_l1_active_minus1 = slice_reader.ReadExponentialGolomb(); + } + } + uint32_t num_pic_total_curr = CalcNumPocTotalCurr( + num_long_term_sps, num_long_term_pics, lt_idx_sps, + used_by_curr_pic_lt_flag, short_term_ref_pic_set_sps_flag, + short_term_ref_pic_set_idx, short_term_ref_pic_set); + if (pps_->lists_modification_present_flag && num_pic_total_curr > 1) { + // ref_pic_lists_modification() + uint32_t list_entry_bits = H265::Log2(num_pic_total_curr); + if ((1 << list_entry_bits) < num_pic_total_curr) { + list_entry_bits++; + } + // ref_pic_list_modification_flag_l0: u(1) + bool ref_pic_list_modification_flag_l0 = slice_reader.Read(); + if (ref_pic_list_modification_flag_l0) { + for (uint32_t i = 0; i < num_ref_idx_l0_active_minus1; i++) { + // list_entry_l0: u(v) + slice_reader.ConsumeBits(list_entry_bits); + } + } + if (slice_type == H265::SliceType::kB) { + // ref_pic_list_modification_flag_l1: u(1) + bool ref_pic_list_modification_flag_l1 = slice_reader.Read(); + if (ref_pic_list_modification_flag_l1) { + for (uint32_t i = 0; i < num_ref_idx_l1_active_minus1; i++) { + // list_entry_l1: u(v) + slice_reader.ConsumeBits(list_entry_bits); + } + } + } + } + if (slice_type == H265::SliceType::kB) { + // mvd_l1_zero_flag: u(1) + slice_reader.ConsumeBits(1); + } + if (pps_->cabac_init_present_flag) { + // cabac_init_flag: u(1) + slice_reader.ConsumeBits(1); + } + if (slice_temporal_mvp_enabled_flag) { + bool collocated_from_l0_flag = false; + if (slice_type == H265::SliceType::kB) { + // collocated_from_l0_flag: u(1) + collocated_from_l0_flag = slice_reader.Read(); + } + if ((collocated_from_l0_flag && num_ref_idx_l0_active_minus1 > 0) || + (!collocated_from_l0_flag && num_ref_idx_l1_active_minus1 > 0)) { + // collocated_ref_idx: ue(v) + slice_reader.ReadExponentialGolomb(); + } + } + if ((pps_->weighted_pred_flag && slice_type == H265::SliceType::kP) || + (pps_->weighted_bipred_flag && slice_type == H265::SliceType::kB)) { + // pred_weight_table() + // TODO(piasy): Do we need support for pred_weight_table()? + RTC_LOG(LS_ERROR) << "Streams with pred_weight_table unsupported."; + return kUnsupportedStream; + } + // five_minus_max_num_merge_cand: ue(v) + slice_reader.ReadExponentialGolomb(); + // TODO(piasy): motion_vector_resolution_control_idc? + } + } + + // slice_qp_delta: se(v) + int32_t last_slice_qp_delta = slice_reader.ReadSignedExponentialGolomb(); + if (abs(last_slice_qp_delta) > kMaxAbsQpDeltaValue) { + // Something has gone wrong, and the parsed value is invalid. + RTC_LOG(LS_WARNING) << "Parsed QP value out of range."; + return kInvalidStream; + } + + last_slice_qp_delta_ = last_slice_qp_delta; + + return kOk; +} + +uint32_t H265BitstreamParser::CalcNumPocTotalCurr( + uint32_t num_long_term_sps, uint32_t num_long_term_pics, + const std::vector lt_idx_sps, + const std::vector used_by_curr_pic_lt_flag, + bool short_term_ref_pic_set_sps_flag, + uint32_t short_term_ref_pic_set_idx, + const H265SpsParser::ShortTermRefPicSet& short_term_ref_pic_set) { + uint32_t num_poc_total_curr = 0; + uint32_t curr_sps_idx; + + bool used_by_curr_pic_lt[16]; + uint32_t num_long_term = num_long_term_sps + num_long_term_pics; + + for (uint32_t i = 0; i < num_long_term; i++) { + if (i < num_long_term_sps) { + used_by_curr_pic_lt[i] = + sps_->used_by_curr_pic_lt_sps_flag[lt_idx_sps[i]]; + } else { + used_by_curr_pic_lt[i] = used_by_curr_pic_lt_flag[i]; + } + } + + if (short_term_ref_pic_set_sps_flag) { + curr_sps_idx = short_term_ref_pic_set_idx; + } else { + curr_sps_idx = sps_->num_short_term_ref_pic_sets; + } + + if (sps_->short_term_ref_pic_set.size() <= curr_sps_idx) { + if (curr_sps_idx != 0 || short_term_ref_pic_set_sps_flag) { + return 0; + } + } + + const H265SpsParser::ShortTermRefPicSet* ref_pic_set; + if (curr_sps_idx < sps_->short_term_ref_pic_set.size()) { + ref_pic_set = &(sps_->short_term_ref_pic_set[curr_sps_idx]); + } else { + ref_pic_set = &short_term_ref_pic_set; + } + + for (uint32_t i = 0; i < ref_pic_set->num_negative_pics; i++) { + if (ref_pic_set->used_by_curr_pic_s0_flag[i]) { + num_poc_total_curr++; + } + } + + for (uint32_t i = 0; i < ref_pic_set->num_positive_pics; i++) { + if (ref_pic_set->used_by_curr_pic_s1_flag[i]) { + num_poc_total_curr++; + } + } + + for (uint32_t i = 0; i < num_long_term_sps + num_long_term_pics; i++) { + if (used_by_curr_pic_lt[i]) { + num_poc_total_curr++; + } + } + + return num_poc_total_curr; +} + +void H265BitstreamParser::ParseSlice(const uint8_t* slice, size_t length) { + H265::NaluType nalu_type = H265::ParseNaluType(slice[0]); + if (nalu_type == H265::NaluType::kSps) { + sps_ = H265SpsParser::ParseSps(slice + H265::kNaluTypeSize, + length - H265::kNaluTypeSize); + if (!sps_) { + RTC_LOG(LS_WARNING) << "Unable to parse SPS from H265 bitstream."; + } + } else if (nalu_type == H265::NaluType::kPps) { + pps_ = H265PpsParser::ParsePps(slice + H265::kNaluTypeSize, + length - H265::kNaluTypeSize); + if (!pps_) { + RTC_LOG(LS_WARNING) << "Unable to parse PPS from H265 bitstream."; + } + } else if (nalu_type <= H265::NaluType::kRsvIrapVcl23) { + Result res = ParseNonParameterSetNalu(slice, length, nalu_type); + if (res != kOk) { + RTC_LOG(LS_INFO) << "Failed to parse bitstream. Error: " << res; + } + } +} + +void H265BitstreamParser::ParseBitstream(const uint8_t* bitstream, + size_t length) { + std::vector nalu_indices = + H265::FindNaluIndices(bitstream, length); + for (const H265::NaluIndex& index : nalu_indices) + ParseSlice(&bitstream[index.payload_start_offset], index.payload_size); +} + +bool H265BitstreamParser::GetLastSliceQp(int* qp) const { + if (!last_slice_qp_delta_ || !pps_) { + return false; + } + const int parsed_qp = 26 + pps_->pic_init_qp_minus26 + *last_slice_qp_delta_; + if (parsed_qp < kMinQpValue || parsed_qp > kMaxQpValue) { + RTC_LOG(LS_ERROR) << "Parsed invalid QP from bitstream."; + return false; + } + *qp = parsed_qp; + return true; +} + +void H265BitstreamParser::ParseBitstream( + rtc::ArrayView bitstream) { + ParseBitstream(bitstream.data(), bitstream.size()); +} + +absl::optional H265BitstreamParser::GetLastSliceQp() const { + int qp; + bool success = GetLastSliceQp(&qp); + return success ? absl::optional(qp) : absl::nullopt; +} + +} // namespace webrtc diff --git a/common_video/h265/h265_bitstream_parser.h b/common_video/h265/h265_bitstream_parser.h new file mode 100644 index 0000000000..b38cf628e5 --- /dev/null +++ b/common_video/h265/h265_bitstream_parser.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_H265_H265_BITSTREAM_PARSER_H_ +#define COMMON_VIDEO_H265_H265_BITSTREAM_PARSER_H_ +#include +#include + +#include "absl/types/optional.h" +#include "api/video_codecs/bitstream_parser.h" +#include "common_video/h265/h265_pps_parser.h" +#include "common_video/h265/h265_sps_parser.h" + +namespace webrtc { + +// Stateful H265 bitstream parser (due to SPS/PPS). Used to parse out QP values +// from the bitstream. +// TODO(pbos): Unify with RTP SPS parsing and only use one H265 parser. +// TODO(pbos): If/when this gets used on the receiver side CHECKs must be +// removed and gracefully abort as we have no control over receive-side +// bitstreams. +class H265BitstreamParser : public BitstreamParser { + public: + H265BitstreamParser(); + ~H265BitstreamParser() override; + + // These are here for backwards-compatability for the time being. + void ParseBitstream(const uint8_t* bitstream, size_t length); + bool GetLastSliceQp(int* qp) const; + + // New interface. + void ParseBitstream(rtc::ArrayView bitstream) override; + absl::optional GetLastSliceQp() const override; + + protected: + enum Result { + kOk, + kInvalidStream, + kUnsupportedStream, + }; + void ParseSlice(const uint8_t* slice, size_t length); + Result ParseNonParameterSetNalu(const uint8_t* source, + size_t source_length, + uint8_t nalu_type); + + uint32_t CalcNumPocTotalCurr( + uint32_t num_long_term_sps, + uint32_t num_long_term_pics, + const std::vector lt_idx_sps, + const std::vector used_by_curr_pic_lt_flag, + bool short_term_ref_pic_set_sps_flag, + uint32_t short_term_ref_pic_set_idx, + const H265SpsParser::ShortTermRefPicSet& short_term_ref_pic_set); + + // SPS/PPS state, updated when parsing new SPS/PPS, used to parse slices. + absl::optional sps_; + absl::optional pps_; + + // Last parsed slice QP. + absl::optional last_slice_qp_delta_; +}; + +} // namespace webrtc + +#endif // COMMON_VIDEO_H265_H265_BITSTREAM_PARSER_H_ diff --git a/common_video/h265/h265_common.cc b/common_video/h265/h265_common.cc new file mode 100644 index 0000000000..aa0cb87430 --- /dev/null +++ b/common_video/h265/h265_common.cc @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h265/h265_common.h" +#include "common_video/h264/h264_common.h" + +namespace webrtc { +namespace H265 { + +const uint8_t kNaluTypeMask = 0x7E; + +std::vector FindNaluIndices(const uint8_t* buffer, + size_t buffer_size) { + std::vector indices = H264::FindNaluIndices(buffer, buffer_size); + std::vector results; + for (auto& index : indices) { + results.push_back({index.start_offset, index.payload_start_offset, index.payload_size}); + } + return results; +} + +NaluType ParseNaluType(uint8_t data) { + return static_cast((data & kNaluTypeMask) >> 1); +} + +std::vector ParseRbsp(const uint8_t* data, size_t length) { + return H264::ParseRbsp(data, length); +} + +void WriteRbsp(const uint8_t* bytes, size_t length, rtc::Buffer* destination) { + H264::WriteRbsp(bytes, length, destination); +} + +uint32_t Log2(uint32_t value) { + uint32_t result = 0; + // If value is not a power of two an additional bit is required + // to account for the ceil() of log2() below. + if ((value & (value - 1)) != 0) { + ++result; + } + while (value > 0) { + value >>= 1; + ++result; + } + + return result; +} + +} // namespace H265 +} // namespace webrtc diff --git a/common_video/h265/h265_common.h b/common_video/h265/h265_common.h new file mode 100644 index 0000000000..a829195a10 --- /dev/null +++ b/common_video/h265/h265_common.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_H265_H265_COMMON_H_ +#define COMMON_VIDEO_H265_H265_COMMON_H_ + +#include +#include + +#include "rtc_base/buffer.h" + +namespace webrtc { + +namespace H265 { +// The size of a full NALU start sequence {0 0 0 1}, used for the first NALU +// of an access unit, and for SPS and PPS blocks. +const size_t kNaluLongStartSequenceSize = 4; + +// The size of a shortened NALU start sequence {0 0 1}, that may be used if +// not the first NALU of an access unit or an SPS or PPS block. +const size_t kNaluShortStartSequenceSize = 3; + +// The size of the NALU type byte (2). +const size_t kNaluTypeSize = 2; + +enum NaluType : uint8_t { + kTrailN = 0, + kTrailR = 1, + kTsaN = 2, + kTsaR = 3, + kStsaN = 4, + kStsaR = 5, + kRadlN = 6, + kRadlR = 7, + kBlaWLp = 16, + kBlaWRadl = 17, + kBlaNLp = 18, + kIdrWRadl = 19, + kIdrNLp = 20, + kCra = 21, + kRsvIrapVcl23 = 23, + kVps = 32, + kSps = 33, + kPps = 34, + kAud = 35, + kPrefixSei = 39, + kSuffixSei = 40, + kAP = 48, + kFU = 49 +}; + +enum SliceType : uint8_t { kB = 0, kP = 1, kI = 2 }; + +struct NaluIndex { + // Start index of NALU, including start sequence. + size_t start_offset; + // Start index of NALU payload, typically type header. + size_t payload_start_offset; + // Length of NALU payload, in bytes, counting from payload_start_offset. + size_t payload_size; +}; + +// Returns a vector of the NALU indices in the given buffer. +std::vector FindNaluIndices(const uint8_t* buffer, + size_t buffer_size); + +// Get the NAL type from the header byte immediately following start sequence. +NaluType ParseNaluType(uint8_t data); + +// Methods for parsing and writing RBSP. See section 7.4.2 of the H265 spec. +// +// The following sequences are illegal, and need to be escaped when encoding: +// 00 00 00 -> 00 00 03 00 +// 00 00 01 -> 00 00 03 01 +// 00 00 02 -> 00 00 03 02 +// And things in the source that look like the emulation byte pattern (00 00 03) +// need to have an extra emulation byte added, so it's removed when decoding: +// 00 00 03 -> 00 00 03 03 +// +// Decoding is simply a matter of finding any 00 00 03 sequence and removing +// the 03 emulation byte. + +// Parse the given data and remove any emulation byte escaping. +std::vector ParseRbsp(const uint8_t* data, size_t length); + +// Write the given data to the destination buffer, inserting and emulation +// bytes in order to escape any data the could be interpreted as a start +// sequence. +void WriteRbsp(const uint8_t* bytes, size_t length, rtc::Buffer* destination); + +uint32_t Log2(uint32_t value); +} // namespace H265 +} // namespace webrtc + +#endif // COMMON_VIDEO_H265_H265_COMMON_H_ diff --git a/common_video/h265/h265_pps_parser.cc b/common_video/h265/h265_pps_parser.cc new file mode 100644 index 0000000000..c3ea0d5125 --- /dev/null +++ b/common_video/h265/h265_pps_parser.cc @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h265/h265_pps_parser.h" + +#include +#include + +#include "absl/types/optional.h" +#include "common_video/h265/h265_common.h" +#include "common_video/h265/h265_sps_parser.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/bitstream_reader.h" +#include "rtc_base/logging.h" + +#define RETURN_EMPTY_ON_FAIL(x) \ + if (!(x)) { \ + return absl::nullopt; \ + } + +namespace { +const int kMaxPicInitQpDeltaValue = 25; +const int kMinPicInitQpDeltaValue = -26; +} // namespace + +namespace webrtc { + +// General note: this is based off the 06/2019 version of the H.265 standard. +// You can find it on this page: +// http://www.itu.int/rec/T-REC-H.265 + +absl::optional H265PpsParser::ParsePps( + const uint8_t* data, + size_t length) { + // First, parse out rbsp, which is basically the source buffer minus emulation + // bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in + // section 7.3.1.1 of the H.265 standard. + return ParseInternal(H265::ParseRbsp(data, length)); +} + +bool H265PpsParser::ParsePpsIds(const uint8_t* data, + size_t length, + uint32_t* pps_id, + uint32_t* sps_id) { + RTC_DCHECK(pps_id); + RTC_DCHECK(sps_id); + // First, parse out rbsp, which is basically the source buffer minus emulation + // bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in + // section 7.3.1.1 of the H.265 standard. + std::vector unpacked_buffer = H265::ParseRbsp(data, length); + BitstreamReader reader(unpacked_buffer); + *pps_id = reader.ReadExponentialGolomb(); + *sps_id = reader.ReadExponentialGolomb(); + return reader.Ok(); +} + +absl::optional H265PpsParser::ParsePpsIdFromSliceSegmentLayerRbsp( + const uint8_t* data, + size_t length, + uint8_t nalu_type) { + std::vector unpacked_buffer = H265::ParseRbsp(data, length); + BitstreamReader slice_reader(unpacked_buffer); + + // first_mb_in_slice: ue(v) + slice_reader.ReadExponentialGolomb(); + // slice_type: ue(v) + slice_reader.ReadExponentialGolomb(); + + // first_slice_segment_in_pic_flag: u(1) + slice_reader.ConsumeBits(1); + if (!slice_reader.Ok()) { + return absl::nullopt; + } + + if (nalu_type >= H265::NaluType::kBlaWLp && + nalu_type <= H265::NaluType::kRsvIrapVcl23) { + // no_output_of_prior_pics_flag: u(1) + slice_reader.ConsumeBits(1); + } + + // slice_pic_parameter_set_id: ue(v) + uint32_t slice_pic_parameter_set_id = slice_reader.ReadExponentialGolomb(); + if (!slice_reader.Ok()) { + return absl::nullopt; + } + + return slice_pic_parameter_set_id; +} + +absl::optional H265PpsParser::ParseInternal( + rtc::ArrayView buffer) { + BitstreamReader reader(buffer); + PpsState pps; + + if(!ParsePpsIdsInternal(reader, pps.id, pps.sps_id)){ + return absl::nullopt; + } + + // dependent_slice_segments_enabled_flag: u(1) + pps.dependent_slice_segments_enabled_flag = reader.Read(); + // output_flag_present_flag: u(1) + pps.output_flag_present_flag = reader.Read(); + // num_extra_slice_header_bits: u(3) + pps.num_extra_slice_header_bits = reader.ReadBits(3); + // sign_data_hiding_enabled_flag: u(1) + reader.ConsumeBits(1); + // cabac_init_present_flag: u(1) + pps.cabac_init_present_flag = reader.Read(); + // num_ref_idx_l0_default_active_minus1: ue(v) + pps.num_ref_idx_l0_default_active_minus1 = reader.ReadExponentialGolomb(); + // num_ref_idx_l1_default_active_minus1: ue(v) + pps.num_ref_idx_l1_default_active_minus1 = reader.ReadExponentialGolomb(); + // init_qp_minus26: se(v) + pps.pic_init_qp_minus26 = reader.ReadSignedExponentialGolomb(); + // Sanity-check parsed value + if (pps.pic_init_qp_minus26 > kMaxPicInitQpDeltaValue || + pps.pic_init_qp_minus26 < kMinPicInitQpDeltaValue) { + return absl::nullopt; + } + // constrained_intra_pred_flag: u(1) + reader.ConsumeBits(1); + // transform_skip_enabled_flag: u(1) + reader.ConsumeBits(1); + // cu_qp_delta_enabled_flag: u(1) + bool cu_qp_delta_enabled_flag = reader.Read(); + if (cu_qp_delta_enabled_flag) { + // diff_cu_qp_delta_depth: ue(v) + reader.ReadExponentialGolomb(); + } + // pps_cb_qp_offset: se(v) + reader.ReadSignedExponentialGolomb(); + // pps_cr_qp_offset: se(v) + reader.ReadSignedExponentialGolomb(); + // pps_slice_chroma_qp_offsets_present_flag: u(1) + reader.ConsumeBits(1); + // weighted_pred_flag: u(1) + pps.weighted_pred_flag = reader.Read(); + // weighted_bipred_flag: u(1) + pps.weighted_bipred_flag = reader.Read(); + // transquant_bypass_enabled_flag: u(1) + reader.ConsumeBits(1); + // tiles_enabled_flag: u(1) + bool tiles_enabled_flag = reader.Read(); + // entropy_coding_sync_enabled_flag: u(1) + reader.ConsumeBits(1); + if (tiles_enabled_flag) { + // num_tile_columns_minus1: ue(v) + uint32_t num_tile_columns_minus1 = reader.ReadExponentialGolomb(); + // num_tile_rows_minus1: ue(v) + uint32_t num_tile_rows_minus1 = reader.ReadExponentialGolomb(); + // uniform_spacing_flag: u(1) + bool uniform_spacing_flag = reader.Read(); + if (!uniform_spacing_flag) { + for (uint32_t i = 0; i < num_tile_columns_minus1; i++) { + // column_width_minus1: ue(v) + reader.ReadExponentialGolomb(); + } + for (uint32_t i = 0; i < num_tile_rows_minus1; i++) { + // row_height_minus1: ue(v) + reader.ReadExponentialGolomb(); + } + // loop_filter_across_tiles_enabled_flag: u(1) + reader.ConsumeBits(1); + } + } + // pps_loop_filter_across_slices_enabled_flag: u(1) + reader.ConsumeBits(1); + // deblocking_filter_control_present_flag: u(1) + bool deblocking_filter_control_present_flag = reader.Read(); + if (deblocking_filter_control_present_flag) { + // deblocking_filter_override_enabled_flag: u(1) + reader.ConsumeBits(1); + // pps_deblocking_filter_disabled_flag: u(1) + bool pps_deblocking_filter_disabled_flag = reader.Read(); + if (!pps_deblocking_filter_disabled_flag) { + // pps_beta_offset_div2: se(v) + reader.ReadSignedExponentialGolomb(); + // pps_tc_offset_div2: se(v) + reader.ReadSignedExponentialGolomb(); + } + } + // pps_scaling_list_data_present_flag: u(1) + bool pps_scaling_list_data_present_flag = 0; + pps_scaling_list_data_present_flag = reader.Read(); + if (pps_scaling_list_data_present_flag) { + // scaling_list_data() + if (!H265SpsParser::ParseScalingListData(reader)) { + return absl::nullopt; + } + } + // lists_modification_present_flag: u(1) + pps.lists_modification_present_flag = reader.Read(); + // log2_parallel_merge_level_minus2: ue(v) + reader.ReadExponentialGolomb(); + // slice_segment_header_extension_present_flag: u(1) + reader.ConsumeBits(1); + + if (!reader.Ok()) { + return absl::nullopt; + } + + return pps; +} + +bool H265PpsParser::ParsePpsIdsInternal(BitstreamReader& reader, + uint32_t& pps_id, + uint32_t& sps_id) { + // pic_parameter_set_id: ue(v) + pps_id = reader.ReadExponentialGolomb(); + if (!reader.Ok()) + return false; + // seq_parameter_set_id: ue(v) + sps_id = reader.ReadExponentialGolomb(); + if (!reader.Ok()) + return false; + return true; +} + +} // namespace webrtc diff --git a/common_video/h265/h265_pps_parser.h b/common_video/h265/h265_pps_parser.h new file mode 100644 index 0000000000..a618121965 --- /dev/null +++ b/common_video/h265/h265_pps_parser.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_H265_PPS_PARSER_H_ +#define COMMON_VIDEO_H265_PPS_PARSER_H_ + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "rtc_base/bitstream_reader.h" + +namespace rtc { +class BitBuffer; +} + +namespace webrtc { + +// A class for parsing out picture parameter set (PPS) data from a H265 NALU. +class H265PpsParser { + public: + // The parsed state of the PPS. Only some select values are stored. + // Add more as they are actually needed. + struct PpsState { + PpsState() = default; + + bool dependent_slice_segments_enabled_flag = 0; + bool cabac_init_present_flag = 0; + bool output_flag_present_flag = 0; + uint32_t num_extra_slice_header_bits = 0; + uint32_t num_ref_idx_l0_default_active_minus1 = 0; + uint32_t num_ref_idx_l1_default_active_minus1 = 0; + int32_t pic_init_qp_minus26 = 0; + bool weighted_pred_flag = 0; + bool weighted_bipred_flag = 0; + bool lists_modification_present_flag = 0; + uint32_t id = 0; + uint32_t sps_id = 0; + }; + + // Unpack RBSP and parse PPS state from the supplied buffer. + static absl::optional ParsePps(const uint8_t* data, size_t length); + + static bool ParsePpsIds(const uint8_t* data, + size_t length, + uint32_t* pps_id, + uint32_t* sps_id); + + static absl::optional ParsePpsIdFromSliceSegmentLayerRbsp( + const uint8_t* data, + size_t length, + uint8_t nalu_type); + + protected: + // Parse the PPS state, for a bit buffer where RBSP decoding has already been + // performed. + static absl::optional ParseInternal( + rtc::ArrayView buffer); + static bool ParsePpsIdsInternal(BitstreamReader& reader, + uint32_t& pps_id, + uint32_t& sps_id); +}; + +} // namespace webrtc + +#endif // COMMON_VIDEO_H265_PPS_PARSER_H_ diff --git a/common_video/h265/h265_sps_parser.cc b/common_video/h265/h265_sps_parser.cc new file mode 100644 index 0000000000..d9003cb10a --- /dev/null +++ b/common_video/h265/h265_sps_parser.cc @@ -0,0 +1,405 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h265/h265_sps_parser.h" + +#include +#include + +#include "common_video/h265/h265_common.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/logging.h" + +namespace { +typedef absl::optional OptionalSps; +typedef absl::optional + OptionalShortTermRefPicSet; +} // namespace + +namespace webrtc { + +H265SpsParser::SpsState::SpsState() = default; + +H265SpsParser::ShortTermRefPicSet::ShortTermRefPicSet() = default; + +// General note: this is based off the 06/2019 version of the H.265 standard. +// You can find it on this page: +// http://www.itu.int/rec/T-REC-H.265 + +// Unpack RBSP and parse SPS state from the supplied buffer. +absl::optional H265SpsParser::ParseSps( + const uint8_t* data, + size_t length) { + return ParseSpsInternal(H265::ParseRbsp(data, length)); +} + +bool H265SpsParser::ParseScalingListData(BitstreamReader& reader) { + uint32_t scaling_list_pred_mode_flag[4][6]; + uint32_t scaling_list_pred_matrix_id_delta[4][6]; + int32_t scaling_list_dc_coef_minus8[4][6]; + int32_t scaling_list[4][6][64]; + for (int size_id = 0; size_id < 4; size_id++) { + for (int matrix_id = 0; matrix_id < 6; + matrix_id += (size_id == 3) ? 3 : 1) { + // scaling_list_pred_mode_flag: u(1) + scaling_list_pred_mode_flag[size_id][matrix_id] = reader.Read(); + if (!scaling_list_pred_mode_flag[size_id][matrix_id]) { + // scaling_list_pred_matrix_id_delta: ue(v) + scaling_list_pred_matrix_id_delta[size_id][matrix_id] = + reader.ReadExponentialGolomb(); + } else { + int32_t next_coef = 8; + uint32_t coef_num = std::min(64, 1 << (4 + (size_id << 1))); + if (size_id > 1) { + // scaling_list_dc_coef_minus8: se(v) + scaling_list_dc_coef_minus8[size_id - 2][matrix_id] = + reader.ReadSignedExponentialGolomb(); + next_coef = scaling_list_dc_coef_minus8[size_id - 2][matrix_id]; + } + for (uint32_t i = 0; i < coef_num; i++) { + // scaling_list_delta_coef: se(v) + int32_t scaling_list_delta_coef = + reader.ReadSignedExponentialGolomb(); + next_coef = (next_coef + scaling_list_delta_coef + 256) % 256; + scaling_list[size_id][matrix_id][i] = next_coef; + } + } + } + } + return true; +} + +absl::optional +H265SpsParser::ParseShortTermRefPicSet( + uint32_t st_rps_idx, + uint32_t num_short_term_ref_pic_sets, + const std::vector& + short_term_ref_pic_set, + H265SpsParser::SpsState& sps, + BitstreamReader& reader) { + H265SpsParser::ShortTermRefPicSet ref_pic_set; + + bool inter_ref_pic_set_prediction_flag = false; + if (st_rps_idx != 0) { + // inter_ref_pic_set_prediction_flag: u(1) + inter_ref_pic_set_prediction_flag = reader.Read(); + } + if (inter_ref_pic_set_prediction_flag) { + uint32_t delta_idx_minus1 = 0; + if (st_rps_idx == num_short_term_ref_pic_sets) { + // delta_idx_minus1: ue(v) + delta_idx_minus1 = reader.ReadExponentialGolomb(); + } + // delta_rps_sign: u(1) + reader.ConsumeBits(1); + // abs_delta_rps_minus1: ue(v) + reader.ReadExponentialGolomb(); + uint32_t ref_rps_idx = st_rps_idx - (delta_idx_minus1 + 1); + uint32_t num_delta_pocs = 0; + if (short_term_ref_pic_set[ref_rps_idx].inter_ref_pic_set_prediction_flag) { + auto& used_by_curr_pic_flag = + short_term_ref_pic_set[ref_rps_idx].used_by_curr_pic_flag; + auto& use_delta_flag = short_term_ref_pic_set[ref_rps_idx].use_delta_flag; + if (used_by_curr_pic_flag.size() != use_delta_flag.size()) { + return OptionalShortTermRefPicSet(); + } + for (uint32_t i = 0; i < used_by_curr_pic_flag.size(); i++) { + if (used_by_curr_pic_flag[i] || use_delta_flag[i]) { + num_delta_pocs++; + } + } + } else { + num_delta_pocs = short_term_ref_pic_set[ref_rps_idx].num_negative_pics + + short_term_ref_pic_set[ref_rps_idx].num_positive_pics; + } + ref_pic_set.used_by_curr_pic_flag.resize(num_delta_pocs + 1, 0); + ref_pic_set.use_delta_flag.resize(num_delta_pocs + 1, 1); + for (uint32_t j = 0; j <= num_delta_pocs; j++) { + // used_by_curr_pic_flag: u(1) + ref_pic_set.used_by_curr_pic_flag[j] = reader.Read(); + if (!ref_pic_set.used_by_curr_pic_flag[j]) { + // use_delta_flag: u(1) + ref_pic_set.use_delta_flag[j] = reader.Read(); + } + } + } else { + // num_negative_pics: ue(v) + ref_pic_set.num_negative_pics = reader.ReadExponentialGolomb(); + // num_positive_pics: ue(v) + ref_pic_set.num_positive_pics = reader.ReadExponentialGolomb(); + + ref_pic_set.delta_poc_s0_minus1.resize(ref_pic_set.num_negative_pics, 0); + ref_pic_set.used_by_curr_pic_s0_flag.resize(ref_pic_set.num_negative_pics, + 0); + for (uint32_t i = 0; i < ref_pic_set.num_negative_pics; i++) { + // delta_poc_s0_minus1: ue(v) + ref_pic_set.delta_poc_s0_minus1[i] = reader.ReadExponentialGolomb(); + // used_by_curr_pic_s0_flag: u(1) + ref_pic_set.used_by_curr_pic_s0_flag[i] = reader.Read(); + } + ref_pic_set.delta_poc_s1_minus1.resize(ref_pic_set.num_positive_pics, 0); + ref_pic_set.used_by_curr_pic_s1_flag.resize(ref_pic_set.num_positive_pics, + 0); + for (uint32_t i = 0; i < ref_pic_set.num_positive_pics; i++) { + // delta_poc_s1_minus1: ue(v) + ref_pic_set.delta_poc_s1_minus1[i] = reader.ReadExponentialGolomb(); + // used_by_curr_pic_s1_flag: u(1) + ref_pic_set.used_by_curr_pic_s1_flag[i] = reader.Read(); + } + } + + return OptionalShortTermRefPicSet(ref_pic_set); +} + +absl::optional H265SpsParser::ParseSpsInternal( + rtc::ArrayView buffer) { + BitstreamReader reader(buffer); + + // Now, we need to use a bit buffer to parse through the actual HEVC SPS + // format. See Section 7.3.2.2.1 ("General sequence parameter set data + // syntax") of the H.265 standard for a complete description. + // Since we only care about resolution, we ignore the majority of fields, but + // we still have to actively parse through a lot of the data, since many of + // the fields have variable size. + // We're particularly interested in: + // chroma_format_idc -> affects crop units + // pic_{width,height}_* -> resolution of the frame in macroblocks (16x16). + // frame_crop_*_offset -> crop information + SpsState sps; + + // sps_video_parameter_set_id: u(4) + uint32_t sps_video_parameter_set_id = 0; + sps_video_parameter_set_id = reader.ReadBits(4); + // sps_max_sub_layers_minus1: u(3) + uint32_t sps_max_sub_layers_minus1 = 0; + sps_max_sub_layers_minus1 = reader.ReadBits(3); + sps.sps_max_sub_layers_minus1 = sps_max_sub_layers_minus1; + sps.sps_max_dec_pic_buffering_minus1.resize(sps_max_sub_layers_minus1 + 1, 0); + // sps_temporal_id_nesting_flag: u(1) + reader.ConsumeBits(1); + // profile_tier_level(1, sps_max_sub_layers_minus1). We are acutally not + // using them, so read/skip over it. + // general_profile_space+general_tier_flag+general_prfile_idc: u(8) + reader.ConsumeBits(8); + // general_profile_compatabilitiy_flag[32] + reader.ConsumeBits(32); + // general_progressive_source_flag + interlaced_source_flag+ + // non-packed_constraint flag + frame_only_constraint_flag: u(4) + reader.ConsumeBits(4); + // general_profile_idc decided flags or reserved. u(43) + reader.ConsumeBits(43); + // general_inbld_flag or reserved 0: u(1) + reader.ConsumeBits(1); + // general_level_idc: u(8) + reader.ConsumeBits(8); + // if max_sub_layers_minus1 >=1, read the sublayer profile information + std::vector sub_layer_profile_present_flags; + std::vector sub_layer_level_present_flags; + uint32_t sub_layer_profile_present = 0; + uint32_t sub_layer_level_present = 0; + for (uint32_t i = 0; i < sps_max_sub_layers_minus1; i++) { + // sublayer_profile_present_flag and sublayer_level_presnet_flag: u(2) + sub_layer_profile_present = reader.Read(); + sub_layer_level_present = reader.Read(); + sub_layer_profile_present_flags.push_back(sub_layer_profile_present); + sub_layer_level_present_flags.push_back(sub_layer_level_present); + } + if (sps_max_sub_layers_minus1 > 0) { + for (uint32_t j = sps_max_sub_layers_minus1; j < 8; j++) { + // reserved 2 bits: u(2) + reader.ConsumeBits(2); + } + } + for (uint32_t k = 0; k < sps_max_sub_layers_minus1; k++) { + if (sub_layer_profile_present_flags[k]) { // + // sub_layer profile_space/tier_flag/profile_idc. ignored. u(8) + reader.ConsumeBits(8); + // profile_compatability_flag: u(32) + reader.ConsumeBits(32); + // sub_layer progressive_source_flag/interlaced_source_flag/ + // non_packed_constraint_flag/frame_only_constraint_flag: u(4) + reader.ConsumeBits(4); + // following 43-bits are profile_idc specific. We simply read/skip it. + // u(43) + reader.ConsumeBits(43); + // 1-bit profile_idc specific inbld flag. We simply read/skip it. u(1) + reader.ConsumeBits(1); + } + if (sub_layer_level_present_flags[k]) { + // sub_layer_level_idc: u(8) + reader.ConsumeBits(8); + } + } + // sps_seq_parameter_set_id: ue(v) + sps.id = reader.ReadExponentialGolomb(); + // chrome_format_idc: ue(v) + sps.chroma_format_idc = reader.ReadExponentialGolomb(); + if (sps.chroma_format_idc == 3) { + // seperate_colour_plane_flag: u(1) + sps.separate_colour_plane_flag = reader.Read(); + } + uint32_t pic_width_in_luma_samples = 0; + uint32_t pic_height_in_luma_samples = 0; + // pic_width_in_luma_samples: ue(v) + pic_width_in_luma_samples = reader.ReadExponentialGolomb(); + // pic_height_in_luma_samples: ue(v) + pic_height_in_luma_samples = reader.ReadExponentialGolomb(); + // conformance_window_flag: u(1) + bool conformance_window_flag = reader.Read(); + + uint32_t conf_win_left_offset = 0; + uint32_t conf_win_right_offset = 0; + uint32_t conf_win_top_offset = 0; + uint32_t conf_win_bottom_offset = 0; + if (conformance_window_flag) { + // conf_win_left_offset: ue(v) + conf_win_left_offset = reader.ReadExponentialGolomb(); + // conf_win_right_offset: ue(v) + conf_win_right_offset = reader.ReadExponentialGolomb(); + // conf_win_top_offset: ue(v) + conf_win_top_offset = reader.ReadExponentialGolomb(); + // conf_win_bottom_offset: ue(v) + conf_win_bottom_offset = reader.ReadExponentialGolomb(); + } + + // bit_depth_luma_minus8: ue(v) + reader.ReadExponentialGolomb(); + // bit_depth_chroma_minus8: ue(v) + reader.ReadExponentialGolomb(); + // log2_max_pic_order_cnt_lsb_minus4: ue(v) + sps.log2_max_pic_order_cnt_lsb_minus4 = reader.ReadExponentialGolomb(); + uint32_t sps_sub_layer_ordering_info_present_flag = 0; + // sps_sub_layer_ordering_info_present_flag: u(1) + sps_sub_layer_ordering_info_present_flag = reader.Read(); + for (uint32_t i = (sps_sub_layer_ordering_info_present_flag != 0) + ? 0 + : sps_max_sub_layers_minus1; + i <= sps_max_sub_layers_minus1; i++) { + // sps_max_dec_pic_buffering_minus1: ue(v) + sps.sps_max_dec_pic_buffering_minus1[i] = reader.ReadExponentialGolomb(); + // sps_max_num_reorder_pics: ue(v) + reader.ReadExponentialGolomb(); + // sps_max_latency_increase_plus1: ue(v) + reader.ReadExponentialGolomb(); + } + // log2_min_luma_coding_block_size_minus3: ue(v) + sps.log2_min_luma_coding_block_size_minus3 = reader.ReadExponentialGolomb(); + // log2_diff_max_min_luma_coding_block_size: ue(v) + sps.log2_diff_max_min_luma_coding_block_size = reader.ReadExponentialGolomb(); + // log2_min_luma_transform_block_size_minus2: ue(v) + reader.ReadExponentialGolomb(); + // log2_diff_max_min_luma_transform_block_size: ue(v) + reader.ReadExponentialGolomb(); + // max_transform_hierarchy_depth_inter: ue(v) + reader.ReadExponentialGolomb(); + // max_transform_hierarchy_depth_intra: ue(v) + reader.ReadExponentialGolomb(); + // scaling_list_enabled_flag: u(1) + bool scaling_list_enabled_flag = reader.Read(); + if (scaling_list_enabled_flag) { + // sps_scaling_list_data_present_flag: u(1) + bool sps_scaling_list_data_present_flag = reader.Read(); + if (sps_scaling_list_data_present_flag) { + // scaling_list_data() + if (!ParseScalingListData(reader)) { + return OptionalSps(); + } + } + } + + // amp_enabled_flag: u(1) + reader.ConsumeBits(1); + // sample_adaptive_offset_enabled_flag: u(1) + sps.sample_adaptive_offset_enabled_flag = reader.Read(); + // pcm_enabled_flag: u(1) + bool pcm_enabled_flag = reader.Read(); + if (pcm_enabled_flag) { + // pcm_sample_bit_depth_luma_minus1: u(4) + reader.ConsumeBits(4); + // pcm_sample_bit_depth_chroma_minus1: u(4) + reader.ConsumeBits(4); + // log2_min_pcm_luma_coding_block_size_minus3: ue(v) + reader.ReadExponentialGolomb(); + // log2_diff_max_min_pcm_luma_coding_block_size: ue(v) + reader.ReadExponentialGolomb(); + // pcm_loop_filter_disabled_flag: u(1) + reader.ConsumeBits(1); + } + + // num_short_term_ref_pic_sets: ue(v) + sps.num_short_term_ref_pic_sets = reader.ReadExponentialGolomb(); + sps.short_term_ref_pic_set.resize(sps.num_short_term_ref_pic_sets); + for (uint32_t st_rps_idx = 0; st_rps_idx < sps.num_short_term_ref_pic_sets; + st_rps_idx++) { + // st_ref_pic_set() + OptionalShortTermRefPicSet ref_pic_set = + ParseShortTermRefPicSet(st_rps_idx, sps.num_short_term_ref_pic_sets, + sps.short_term_ref_pic_set, sps, reader); + if (ref_pic_set) { + sps.short_term_ref_pic_set[st_rps_idx] = *ref_pic_set; + } else { + return OptionalSps(); + } + } + + // long_term_ref_pics_present_flag: u(1) + sps.long_term_ref_pics_present_flag = reader.Read(); + if (sps.long_term_ref_pics_present_flag) { + // num_long_term_ref_pics_sps: ue(v) + sps.num_long_term_ref_pics_sps = reader.ReadExponentialGolomb(); + sps.used_by_curr_pic_lt_sps_flag.resize(sps.num_long_term_ref_pics_sps, 0); + for (uint32_t i = 0; i < sps.num_long_term_ref_pics_sps; i++) { + // lt_ref_pic_poc_lsb_sps: u(v) + uint32_t lt_ref_pic_poc_lsb_sps_bits = + sps.log2_max_pic_order_cnt_lsb_minus4 + 4; + reader.ConsumeBits(lt_ref_pic_poc_lsb_sps_bits); + // used_by_curr_pic_lt_sps_flag: u(1) + sps.used_by_curr_pic_lt_sps_flag[i] = reader.Read(); + } + } + + // sps_temporal_mvp_enabled_flag: u(1) + sps.sps_temporal_mvp_enabled_flag = reader.Read(); + + // Far enough! We don't use the rest of the SPS. + + sps.vps_id = sps_video_parameter_set_id; + + sps.pic_width_in_luma_samples = pic_width_in_luma_samples; + sps.pic_height_in_luma_samples = pic_height_in_luma_samples; + + // Start with the resolution determined by the pic_width/pic_height fields. + sps.width = pic_width_in_luma_samples; + sps.height = pic_height_in_luma_samples; + + if (conformance_window_flag) { + int sub_width_c = + ((1 == sps.chroma_format_idc) || (2 == sps.chroma_format_idc)) && + (0 == sps.separate_colour_plane_flag) + ? 2 + : 1; + int sub_height_c = + (1 == sps.chroma_format_idc) && (0 == sps.separate_colour_plane_flag) + ? 2 + : 1; + // the offset includes the pixel within conformance window. so don't need to + // +1 as per spec + sps.width -= sub_width_c * (conf_win_right_offset + conf_win_left_offset); + sps.height -= sub_height_c * (conf_win_top_offset + conf_win_bottom_offset); + } + + if (!reader.Ok()) { + return absl::nullopt; + } + + return OptionalSps(sps); +} + +} // namespace webrtc diff --git a/common_video/h265/h265_sps_parser.h b/common_video/h265/h265_sps_parser.h new file mode 100644 index 0000000000..494db97c1f --- /dev/null +++ b/common_video/h265/h265_sps_parser.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_H265_H265_SPS_PARSER_H_ +#define COMMON_VIDEO_H265_H265_SPS_PARSER_H_ + +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "rtc_base/bitstream_reader.h" + +namespace rtc { +class BitBuffer; +} + +namespace webrtc { + +// A class for parsing out sequence parameter set (SPS) data from an H265 NALU. +class H265SpsParser { + public: + + struct ShortTermRefPicSet { + ShortTermRefPicSet(); + + uint32_t inter_ref_pic_set_prediction_flag = 0; + std::vector used_by_curr_pic_flag; + std::vector use_delta_flag; + uint32_t num_negative_pics = 0; + uint32_t num_positive_pics = 0; + std::vector delta_poc_s0_minus1; + std::vector used_by_curr_pic_s0_flag; + std::vector delta_poc_s1_minus1; + std::vector used_by_curr_pic_s1_flag; + }; + + // The parsed state of the SPS. Only some select values are stored. + // Add more as they are actually needed. + struct SpsState { + SpsState(); + + uint32_t sps_max_sub_layers_minus1; + uint32_t chroma_format_idc = 0; + uint32_t separate_colour_plane_flag = 0; + uint32_t pic_width_in_luma_samples = 0; + uint32_t pic_height_in_luma_samples = 0; + uint32_t log2_max_pic_order_cnt_lsb_minus4 = 0; + std::vector sps_max_dec_pic_buffering_minus1; + uint32_t log2_min_luma_coding_block_size_minus3 = 0; + uint32_t log2_diff_max_min_luma_coding_block_size = 0; + uint32_t sample_adaptive_offset_enabled_flag = 0; + uint32_t num_short_term_ref_pic_sets = 0; + std::vector short_term_ref_pic_set; + uint32_t long_term_ref_pics_present_flag = 0; + uint32_t num_long_term_ref_pics_sps = 0; + std::vector used_by_curr_pic_lt_sps_flag; + uint32_t sps_temporal_mvp_enabled_flag = 0; + uint32_t width = 0; + uint32_t height = 0; + uint32_t id = 0; + uint32_t vps_id = 0; + }; + + // Unpack RBSP and parse SPS state from the supplied buffer. + static absl::optional ParseSps(const uint8_t* data, size_t length); + + static bool ParseScalingListData(BitstreamReader& reader); + + static absl::optional ParseShortTermRefPicSet( + uint32_t st_rps_idx, uint32_t num_short_term_ref_pic_sets, + const std::vector& ref_pic_sets, + SpsState& sps, BitstreamReader& reader); + + protected: + // Parse the SPS state, for a bit buffer where RBSP decoding has already been + // performed. + static absl::optional ParseSpsInternal( + rtc::ArrayView buffer); +}; + +} // namespace webrtc +#endif // COMMON_VIDEO_H265_H265_SPS_PARSER_H_ diff --git a/common_video/h265/h265_vps_parser.cc b/common_video/h265/h265_vps_parser.cc new file mode 100644 index 0000000000..91f51ca280 --- /dev/null +++ b/common_video/h265/h265_vps_parser.cc @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "common_video/h265/h265_common.h" +#include "common_video/h265/h265_vps_parser.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/logging.h" + +#include "rtc_base/bitstream_reader.h" + +namespace webrtc { + +H265VpsParser::VpsState::VpsState() = default; + +// General note: this is based off the 06/2019 version of the H.265 standard. +// You can find it on this page: +// http://www.itu.int/rec/T-REC-H.265 + +// Unpack RBSP and parse SPS state from the supplied buffer. +absl::optional H265VpsParser::ParseVps( + const uint8_t* data, + size_t length) { + return ParseInternal(H265::ParseRbsp(data, length)); +} + +absl::optional H265VpsParser::ParseInternal( + rtc::ArrayView buffer) { + BitstreamReader reader(buffer); + + // Now, we need to use a bit buffer to parse through the actual HEVC VPS + // format. See Section 7.3.2.1 ("Video parameter set RBSP syntax") of the + // H.265 standard for a complete description. + VpsState vps; + + // vps_video_parameter_set_id: u(4) + vps.id = reader.ReadBits(4); + + if (!reader.Ok()) { + return absl::nullopt; + } + + return vps; +} + +} // namespace webrtc diff --git a/common_video/h265/h265_vps_parser.h b/common_video/h265/h265_vps_parser.h new file mode 100644 index 0000000000..a17d0ea822 --- /dev/null +++ b/common_video/h265/h265_vps_parser.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_H265_H265_VPS_PARSER_H_ +#define COMMON_VIDEO_H265_H265_VPS_PARSER_H_ + +#include "absl/types/optional.h" +#include "api/array_view.h" + +namespace rtc { +class BitBuffer; +} + +namespace webrtc { + +// A class for parsing out sequence parameter set (VPS) data from an H265 NALU. +class H265VpsParser { + public: + // The parsed state of the VPS. Only some select values are stored. + // Add more as they are actually needed. + struct VpsState { + VpsState(); + + uint32_t id = 0; + }; + + // Unpack RBSP and parse VPS state from the supplied buffer. + static absl::optional ParseVps(const uint8_t* data, size_t length); + + protected: + // Parse the VPS state, for a bit buffer where RBSP decoding has already been + // performed. + static absl::optional ParseInternal( + rtc::ArrayView buffer); +}; + +} // namespace webrtc +#endif // COMMON_VIDEO_H265_H265_VPS_PARSER_H_ diff --git a/logging/rtc_event_log/encoder/rtc_event_log_encoder_new_format.cc b/logging/rtc_event_log/encoder/rtc_event_log_encoder_new_format.cc index d88f124f9e..c51fd79b29 100644 --- a/logging/rtc_event_log/encoder/rtc_event_log_encoder_new_format.cc +++ b/logging/rtc_event_log/encoder/rtc_event_log_encoder_new_format.cc @@ -104,6 +104,8 @@ rtclog2::FrameDecodedEvents::Codec ConvertToProtoFormat(VideoCodecType codec) { return rtclog2::FrameDecodedEvents::CODEC_AV1; case VideoCodecType::kVideoCodecH264: return rtclog2::FrameDecodedEvents::CODEC_H264; + case VideoCodecType::kVideoCodecH265: + return rtclog2::FrameDecodedEvents::CODEC_H265; case VideoCodecType::kVideoCodecMultiplex: // This codec type is afaik not used. return rtclog2::FrameDecodedEvents::CODEC_UNKNOWN; diff --git a/logging/rtc_event_log/rtc_event_log2.proto b/logging/rtc_event_log/rtc_event_log2.proto index a541533dcc..f90ed12587 100644 --- a/logging/rtc_event_log/rtc_event_log2.proto +++ b/logging/rtc_event_log/rtc_event_log2.proto @@ -293,6 +293,7 @@ message FrameDecodedEvents { CODEC_VP9 = 3; CODEC_AV1 = 4; CODEC_H264 = 5; + CODEC_H265 = 6; } // required diff --git a/logging/rtc_event_log/rtc_event_log_parser.cc b/logging/rtc_event_log/rtc_event_log_parser.cc index 406818b1ab..a1f1669b63 100644 --- a/logging/rtc_event_log/rtc_event_log_parser.cc +++ b/logging/rtc_event_log/rtc_event_log_parser.cc @@ -314,6 +314,10 @@ VideoCodecType GetRuntimeCodecType(rtclog2::FrameDecodedEvents::Codec codec) { return VideoCodecType::kVideoCodecAV1; case rtclog2::FrameDecodedEvents::CODEC_H264: return VideoCodecType::kVideoCodecH264; +#ifdef WEBRTC_USE_H265 + case rtclog2::FrameDecodedEvents::CODEC_H265: + return VideoCodecType::kVideoCodecH265; +#endif case rtclog2::FrameDecodedEvents::CODEC_UNKNOWN: RTC_LOG(LS_ERROR) << "Unknown codec type. Assuming " "VideoCodecType::kVideoCodecMultiplex"; diff --git a/media/base/media_constants.cc b/media/base/media_constants.cc index da5e7a8adf..46e65c01b2 100644 --- a/media/base/media_constants.cc +++ b/media/base/media_constants.cc @@ -104,6 +104,7 @@ const char kVp8CodecName[] = "VP8"; const char kVp9CodecName[] = "VP9"; const char kAv1CodecName[] = "AV1"; const char kH264CodecName[] = "H264"; +const char kH265CodecName[] = "H265"; // RFC 6184 RTP Payload Format for H.264 video const char kH264FmtpProfileLevelId[] = "profile-level-id"; @@ -113,6 +114,13 @@ const char kH264FmtpSpropParameterSets[] = "sprop-parameter-sets"; const char kH264FmtpSpsPpsIdrInKeyframe[] = "sps-pps-idr-in-keyframe"; const char kH264ProfileLevelConstrainedBaseline[] = "42e01f"; const char kH264ProfileLevelConstrainedHigh[] = "640c1f"; +#ifdef WEBRTC_USE_H265 +// RFC 7798 RTP Payload Format for H.265 video +const char kH265FmtpProfileSpace[] = "profile-space"; +const char kH265FmtpProfileId[] = "profile-id"; +const char kH265FmtpTierFlag[] = "tier-flag"; +const char kH265FmtpLevelId[] = "level-id"; +#endif const int kDefaultVideoMaxFramerate = 60; diff --git a/media/base/media_constants.h b/media/base/media_constants.h index 16c5db92b9..3330e136af 100644 --- a/media/base/media_constants.h +++ b/media/base/media_constants.h @@ -124,6 +124,7 @@ RTC_EXPORT extern const char kVp8CodecName[]; RTC_EXPORT extern const char kVp9CodecName[]; RTC_EXPORT extern const char kAv1CodecName[]; RTC_EXPORT extern const char kH264CodecName[]; +RTC_EXPORT extern const char kH265CodecName[]; // RFC 6184 RTP Payload Format for H.264 video RTC_EXPORT extern const char kH264FmtpProfileLevelId[]; @@ -134,6 +135,13 @@ extern const char kH264FmtpSpsPpsIdrInKeyframe[]; extern const char kH264ProfileLevelConstrainedBaseline[]; extern const char kH264ProfileLevelConstrainedHigh[]; +#ifdef WEBRTC_USE_H265 +// RFC 7798 RTP Payload Format for H.265 video +RTC_EXPORT extern const char kH265FmtpProfileSpace[]; +RTC_EXPORT extern const char kH265FmtpProfileId[]; +RTC_EXPORT extern const char kH265FmtpTierFlag[]; +RTC_EXPORT extern const char kH265FmtpLevelId[]; +#endif extern const int kDefaultVideoMaxFramerate; extern const size_t kConferenceMaxNumSpatialLayers; diff --git a/modules/rtp_rtcp/BUILD.gn b/modules/rtp_rtcp/BUILD.gn index ce9c409cff..7fc37fbbff 100644 --- a/modules/rtp_rtcp/BUILD.gn +++ b/modules/rtp_rtcp/BUILD.gn @@ -191,6 +191,8 @@ rtc_library("rtp_rtcp") { "source/rtp_format.h", "source/rtp_format_h264.cc", "source/rtp_format_h264.h", + "source/rtp_format_h265.cc", + "source/rtp_format_h265.h", "source/rtp_format_video_generic.cc", "source/rtp_format_video_generic.h", "source/rtp_format_vp8.cc", @@ -240,6 +242,8 @@ rtc_library("rtp_rtcp") { "source/video_rtp_depacketizer_generic.h", "source/video_rtp_depacketizer_h264.cc", "source/video_rtp_depacketizer_h264.h", + "source/video_rtp_depacketizer_h265.cc", + "source/video_rtp_depacketizer_h265.h", "source/video_rtp_depacketizer_raw.cc", "source/video_rtp_depacketizer_raw.h", "source/video_rtp_depacketizer_vp8.cc", diff --git a/modules/rtp_rtcp/source/create_video_rtp_depacketizer.cc b/modules/rtp_rtcp/source/create_video_rtp_depacketizer.cc index f1e4eddb4b..aaa0ba3f57 100644 --- a/modules/rtp_rtcp/source/create_video_rtp_depacketizer.cc +++ b/modules/rtp_rtcp/source/create_video_rtp_depacketizer.cc @@ -19,6 +19,7 @@ #include "modules/rtp_rtcp/source/video_rtp_depacketizer_h264.h" #include "modules/rtp_rtcp/source/video_rtp_depacketizer_vp8.h" #include "modules/rtp_rtcp/source/video_rtp_depacketizer_vp9.h" +#include "modules/rtp_rtcp/source/video_rtp_depacketizer_h265.h" namespace webrtc { @@ -31,6 +32,8 @@ std::unique_ptr CreateVideoRtpDepacketizer( return std::make_unique(); case kVideoCodecVP9: return std::make_unique(); + case kVideoCodecH265: + return std::make_unique(); case kVideoCodecAV1: return std::make_unique(); case kVideoCodecGeneric: diff --git a/modules/rtp_rtcp/source/h265_sps_parser.cc b/modules/rtp_rtcp/source/h265_sps_parser.cc new file mode 100644 index 0000000000..6e174f6304 --- /dev/null +++ b/modules/rtp_rtcp/source/h265_sps_parser.cc @@ -0,0 +1,189 @@ +/* + * Intel License + */ + +#include "webrtc/modules/rtp_rtcp/source/h265_sps_parser.h" + +#include "webrtc/base/bitbuffer.h" +#include "webrtc/base/bytebuffer.h" +#include "webrtc/base/logging.h" + +#include + +#define RETURN_FALSE_ON_FAIL(x) \ + if (!(x)) { \ + return false; \ + } + +namespace webrtc { + +H265SpsParser::H265SpsParser(const uint8_t* sps, size_t byte_length) + : sps_(sps), byte_length_(byte_length), width_(), height_() { +} + +bool H265SpsParser::Parse() { + // General note: this is based off the 04/2015 version of the H.265 standard. + // You can find it on this page: + // http://www.itu.int/rec/T-REC-H.265 + + const char* sps_bytes = reinterpret_cast(sps_); + // First, parse out rbsp, which is basically the source buffer minus emulation + // bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in + // section 7.3.1.1 of the H.265 standard, similar to H264. + rtc::ByteBufferWriter rbsp_buffer; + for (size_t i = 0; i < byte_length_;) { + // Be careful about over/underflow here. byte_length_ - 3 can underflow, and + // i + 3 can overflow, but byte_length_ - i can't, because i < byte_length_ + // above, and that expression will produce the number of bytes left in + // the stream including the byte at i. + if (byte_length_ - i >= 3 && sps_[i] == 0 && sps_[i + 1] == 0 && + sps_[i + 2] == 3) { + // Two rbsp bytes + the emulation byte. + rbsp_buffer.WriteBytes(sps_bytes + i, 2); + i += 3; + } else { + // Single rbsp byte. + rbsp_buffer.WriteBytes(sps_bytes + i, 1); + i++; + } + } + + // Now, we need to use a bit buffer to parse through the actual HEVC SPS + // format. See Section 7.3.2.1.1 ("Sequence parameter set data syntax") of the + // H.265 standard for a complete description. + // Since we only care about resolution, we ignore the majority of fields, but + // we still have to actively parse through a lot of the data, since many of + // the fields have variable size. + // Unlike H264, for H265, the picture size is indicated by pic_width_in_luma_samples + // and pic_height_in_luma_samples, if conformance_window_flag !=1; + // When conformance_window_flag is 1, the width is adjusted with con_win_xx_offset + // + rtc::BitBuffer parser(reinterpret_cast(rbsp_buffer.Data()), + rbsp_buffer.Length()); + + // The golomb values we have to read, not just consume. + uint32_t golomb_ignored; + + // separate_colour_plane_flag is optional (assumed 0), but has implications + // about the ChromaArrayType, which modifies how we treat crop coordinates. + uint32_t separate_colour_plane_flag = 0; + // chroma_format_idc will be ChromaArrayType if separate_colour_plane_flag is + // 0. It defaults to 1, when not specified. + uint32_t chroma_format_idc = 1; + + + // sps_video_parameter_set_id: u(4) + RETURN_FALSE_ON_FAIL(parser.ConsumeBits(4)); + // sps_max_sub_layers_minus1: u(3) + uint32_t sps_max_sub_layers_minus1 = 0; + RETURN_FALSE_ON_FAIL(parser.ReadBits(&sps_max_sub_layers_minus1, 3)); + // sps_temporal_id_nesting_flag: u(1) + RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1)); + // profile_tier_level(1, sps_max_sub_layers_minus1). We are acutally not + // using them, so read/skip over it. + // general_profile_space+general_tier_flag+general_prfile_idc: u(8) + RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(1)); + // general_profile_compatabilitiy_flag[32] + RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(4)); + // general_progressive_source_flag + interlaced_source_flag+ non-packed_constraint + // flag + frame_only_constraint_flag: u(4) + RETURN_FALSE_ON_FAIL(parser.ConsumeBits(4)); + // general_profile_idc decided flags or reserved. u(43) + RETURN_FALSE_ON_FAIL(parser.ConsumeBits(43)); + // general_inbld_flag or reserved 0: u(1) + RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1)); + // general_level_idc: u(8) + RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(1)); + // if max_sub_layers_minus1 >=1, read the sublayer profile information + std::vector sub_layer_profile_present_flags; + std::vector sub_layer_level_present_flags; + uint32_t sub_layer_profile_present = 0; + uint32_t sub_layer_level_present = 0; + for (uint32_t i = 0; i < sps_max_sub_layers_minus1; i++) { + //sublayer_profile_present_flag and sublayer_level_presnet_flag: u(2) + RETURN_FALSE_ON_FAIL(parser.ReadBits(&sub_layer_profile_present, 1)); + RETURN_FALSE_ON_FAIL(parser.ReadBits(&sub_layer_level_present, 1)); + sub_layer_profile_present_flags.push_back(sub_layer_profile_present); + sub_layer_level_present_flags.push_back(sub_layer_level_present); + } + if (sps_max_sub_layers_minus1 > 0) { + for (uint32_t j = sps_max_sub_layers_minus1; j < 8; j++) { + // reserved 2 bits: u(2) + RETURN_FALSE_ON_FAIL(parser.ConsumeBits(2)); + } + } + for (uint32_t k = 0; k < sps_max_sub_layers_minus1; k++) { + if(sub_layer_profile_present_flags[k]) {// + // sub_layer profile_space/tier_flag/profile_idc. ignored. u(8) + RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(1)); + // profile_compatability_flag: u(32) + RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(4)); + // sub_layer progressive_source_flag/interlaced_source_flag/ + // non_packed_constraint_flag/frame_only_constraint_flag: u(4) + RETURN_FALSE_ON_FAIL(parser.ConsumeBits(4)); + // following 43-bits are profile_idc specific. We simply read/skip it. u(43) + RETURN_FALSE_ON_FAIL(parser.ConsumeBits(43)); + // 1-bit profile_idc specific inbld flag. We simply read/skip it. u(1) + RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1)); + } + if (sub_layer_level_present_flags[k]) { + // sub_layer_level_idc: u(8) + RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(1)); + } + } + //sps_seq_parameter_set_id: ue(v) + RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); + // chrome_format_idc: ue(v) + RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&chroma_format_idc)); + if (chroma_format_idc == 3) { + // seperate_colour_plane_flag: u(1) + RETURN_FALSE_ON_FAIL(parser.ReadBits(&separate_colour_plane_flag, 1)); + } + uint32_t pic_width_in_luma_samples = 0; + uint32_t pic_height_in_luma_samples = 0; + // pic_width_in_luma_samples: ue(v) + RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&pic_width_in_luma_samples)); + // pic_height_in_luma_samples: ue(v) + RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&pic_height_in_luma_samples)); + // conformance_window_flag: u(1) + uint32_t conformance_window_flag = 0; + RETURN_FALSE_ON_FAIL(parser.ReadBits(&conformance_window_flag, 1)); + + uint32_t conf_win_left_offset = 0; + uint32_t conf_win_right_offset = 0; + uint32_t conf_win_top_offset = 0; + uint32_t conf_win_bottom_offset = 0; + if (conformance_window_flag) { + // conf_win_left_offset: ue(v) + RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&conf_win_left_offset)); + // conf_win_right_offset: ue(v) + RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&conf_win_right_offset)); + // conf_win_top_offset: ue(v) + RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&conf_win_top_offset)); + // conf_win_bottom_offset: ue(v) + RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&conf_win_bottom_offset)); + } + + //For enough to get the resolution information. calcaluate according to HEVC spec 7.4.3.2 + int width = 0; + int height = 0; + + width = pic_width_in_luma_samples; + height = pic_height_in_luma_samples; + + if (conformance_window_flag) { + int sub_width_c = ((1 == chroma_format_idc) || (2 == chroma_format_idc)) && + (0 == separate_colour_plane_flag) ? 2 : 1; + int sub_height_c = (1 == chroma_format_idc) && (0 == separate_colour_plane_flag) ? 2 : 1; + //the offset includes the pixel within conformance window. so don't need to +1 as per spec + width -= sub_width_c*(conf_win_right_offset + conf_win_left_offset); + height -= sub_height_c*(conf_win_top_offset + conf_win_bottom_offset); + } + + width_ = width; + height_ = height; + return true; + +} + +} // namespace webrtc diff --git a/modules/rtp_rtcp/source/h265_sps_parser.h b/modules/rtp_rtcp/source/h265_sps_parser.h new file mode 100644 index 0000000000..6b08b0959f --- /dev/null +++ b/modules/rtp_rtcp/source/h265_sps_parser.h @@ -0,0 +1,31 @@ +/* + * Intel License + */ + +#ifndef WEBRTC_MODULES_RTP_RTCP_SOURCE_H265_SPS_PARSER_H_ +#define WEBRTC_MODULES_RTP_RTCP_SOURCE_H265_SPS_PARSER_H_ + +#include "webrtc/base/common.h" + +namespace webrtc { + +// A class for parsing out sequence parameter set (SPS) data from an H265 NALU. +// Currently, only resolution is read without being ignored. +class H265SpsParser { + public: + H265SpsParser(const uint8_t* sps, size_t byte_length); + // Parses the SPS to completion. Returns true if the SPS was parsed correctly. + bool Parse(); + uint16_t width() { return width_; } + uint16_t height() { return height_; } + + private: + const uint8_t* const sps_; + const size_t byte_length_; + + uint16_t width_; + uint16_t height_; +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_RTP_RTCP_SOURCE_H265_SPS_PARSER_H_ diff --git a/modules/rtp_rtcp/source/rtp_format.cc b/modules/rtp_rtcp/source/rtp_format.cc index 7550b70f69..59d6cdd98b 100644 --- a/modules/rtp_rtcp/source/rtp_format.cc +++ b/modules/rtp_rtcp/source/rtp_format.cc @@ -14,11 +14,13 @@ #include "absl/types/variant.h" #include "modules/rtp_rtcp/source/rtp_format_h264.h" +#include "modules/rtp_rtcp/source/rtp_format_h265.h" #include "modules/rtp_rtcp/source/rtp_format_video_generic.h" #include "modules/rtp_rtcp/source/rtp_format_vp8.h" #include "modules/rtp_rtcp/source/rtp_format_vp9.h" #include "modules/rtp_rtcp/source/rtp_packetizer_av1.h" #include "modules/video_coding/codecs/h264/include/h264_globals.h" +#include "modules/video_coding/codecs/h265/include/h265_globals.h" #include "modules/video_coding/codecs/vp8/include/vp8_globals.h" #include "modules/video_coding/codecs/vp9/include/vp9_globals.h" #include "rtc_base/checks.h" @@ -43,6 +45,12 @@ std::unique_ptr RtpPacketizer::Create( return std::make_unique(payload, limits, h264.packetization_mode); } + case kVideoCodecH265: { + const auto& h265 = + absl::get(rtp_video_header.video_type_header); + return std::make_unique(payload, limits, + h265.packetization_mode); + } case kVideoCodecVP8: { const auto& vp8 = absl::get(rtp_video_header.video_type_header); diff --git a/modules/rtp_rtcp/source/rtp_format_h265.cc b/modules/rtp_rtcp/source/rtp_format_h265.cc new file mode 100644 index 0000000000..611bf9e42b --- /dev/null +++ b/modules/rtp_rtcp/source/rtp_format_h265.cc @@ -0,0 +1,367 @@ +/* + * Intel License + */ + +#include + +#include "absl/types/optional.h" +#include "absl/types/variant.h" + +#include "common_video/h264/h264_common.h" +#include "common_video/h265/h265_common.h" +#include "common_video/h265/h265_pps_parser.h" +#include "common_video/h265/h265_sps_parser.h" +#include "common_video/h265/h265_vps_parser.h" +#include "modules/include/module_common_types.h" +#include "modules/rtp_rtcp/source/byte_io.h" +#include "modules/rtp_rtcp/source/rtp_format_h265.h" +#include "modules/rtp_rtcp/source/rtp_packet_to_send.h" +#include "rtc_base/logging.h" + +using namespace rtc; + +namespace webrtc { +namespace { + +enum NaluType { + kTrailN = 0, + kTrailR = 1, + kTsaN = 2, + kTsaR = 3, + kStsaN = 4, + kStsaR = 5, + kRadlN = 6, + kRadlR = 7, + kBlaWLp = 16, + kBlaWRadl = 17, + kBlaNLp = 18, + kIdrWRadl = 19, + kIdrNLp = 20, + kCra = 21, + kVps = 32, + kHevcSps = 33, + kHevcPps = 34, + kHevcAud = 35, + kPrefixSei = 39, + kSuffixSei = 40, + kHevcAp = 48, + kHevcFu = 49 +}; + +/* + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | PayloadHdr (Type=49) | FU header | DONL (cond) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-| +*/ +// Unlike H.264, HEVC NAL header is 2-bytes. +static const size_t kHevcNalHeaderSize = 2; +// H.265's FU is constructed of 2-byte payload header, and 1-byte FU header +static const size_t kHevcFuHeaderSize = 1; +static const size_t kHevcLengthFieldSize = 2; + +enum HevcNalHdrMasks { + kHevcFBit = 0x80, + kHevcTypeMask = 0x7E, + kHevcLayerIDHMask = 0x1, + kHevcLayerIDLMask = 0xF8, + kHevcTIDMask = 0x7, + kHevcTypeMaskN = 0x81, + kHevcTypeMaskInFuHeader = 0x3F +}; + +// Bit masks for FU headers. +enum HevcFuDefs { kHevcSBit = 0x80, kHevcEBit = 0x40, kHevcFuTypeBit = 0x3F }; + +} // namespace + +RtpPacketizerH265::RtpPacketizerH265( + rtc::ArrayView payload, + PayloadSizeLimits limits, + H265PacketizationMode packetization_mode) + : limits_(limits), + num_packets_left_(0) { + // Guard against uninitialized memory in packetization_mode. + RTC_CHECK(packetization_mode == H265PacketizationMode::NonInterleaved || + packetization_mode == H265PacketizationMode::SingleNalUnit); + + for (const auto& nalu : + H264::FindNaluIndices(payload.data(), payload.size())) { + input_fragments_.push_back( + payload.subview(nalu.payload_start_offset, nalu.payload_size)); + } + + if (!GeneratePackets(packetization_mode)) { + // If failed to generate all the packets, discard already generated + // packets in case the caller would ignore return value and still try to + // call NextPacket(). + num_packets_left_ = 0; + while (!packets_.empty()) { + packets_.pop(); + } + } +} + +RtpPacketizerH265::~RtpPacketizerH265() {} + +size_t RtpPacketizerH265::NumPackets() const { + return num_packets_left_; +} + +bool RtpPacketizerH265::GeneratePackets( + H265PacketizationMode packetization_mode) { + // For HEVC we follow non-interleaved mode for the packetization, + // and don't support single-nalu mode at present. + for (size_t i = 0; i < input_fragments_.size();) { + int fragment_len = input_fragments_[i].size(); + int single_packet_capacity = limits_.max_payload_len; + if (input_fragments_.size() == 1) + single_packet_capacity -= limits_.single_packet_reduction_len; + else if (i == 0) + single_packet_capacity -= limits_.first_packet_reduction_len; + else if (i + 1 == input_fragments_.size()) { + // Pretend that last fragment is larger instead of making last packet + // smaller. + single_packet_capacity -= limits_.last_packet_reduction_len; + } + if (fragment_len > single_packet_capacity) { + PacketizeFu(i); + ++i; + } else { + PacketizeSingleNalu(i); + ++i; + } + } + return true; +} + +bool RtpPacketizerH265::PacketizeFu(size_t fragment_index) { + // Fragment payload into packets (FU). + // Strip out the original header and leave room for the FU header. + rtc::ArrayView fragment = input_fragments_[fragment_index]; + PayloadSizeLimits limits = limits_; + limits.max_payload_len -= kHevcFuHeaderSize + kHevcNalHeaderSize; + + // Update single/first/last packet reductions unless it is single/first/last + // fragment. + if (input_fragments_.size() != 1) { + // if this fragment is put into a single packet, it might still be the + // first or the last packet in the whole sequence of packets. + if (fragment_index == input_fragments_.size() - 1) { + limits.single_packet_reduction_len = limits_.last_packet_reduction_len; + } else if (fragment_index == 0) { + limits.single_packet_reduction_len = limits_.first_packet_reduction_len; + } else { + limits.single_packet_reduction_len = 0; + } + } + if (fragment_index != 0) + limits.first_packet_reduction_len = 0; + if (fragment_index != input_fragments_.size() - 1) + limits.last_packet_reduction_len = 0; + + // Strip out the original header. + size_t payload_left = fragment.size() - kHevcNalHeaderSize; + int offset = kHevcNalHeaderSize; + + std::vector payload_sizes = SplitAboutEqually(payload_left, limits); + if (payload_sizes.empty()) + return false; + + for (size_t i = 0; i < payload_sizes.size(); ++i) { + int packet_length = payload_sizes[i]; + RTC_CHECK_GT(packet_length, 0); + uint16_t header = (fragment[0] << 8) | fragment[1]; + packets_.push(PacketUnit(fragment.subview(offset, packet_length), + /*first_fragment=*/i == 0, + /*last_fragment=*/i == payload_sizes.size() - 1, + false, header)); + offset += packet_length; + payload_left -= packet_length; + } + num_packets_left_ += payload_sizes.size(); + RTC_CHECK_EQ(0, payload_left); + return true; +} + + +bool RtpPacketizerH265::PacketizeSingleNalu(size_t fragment_index) { + // Add a single NALU to the queue, no aggregation. + size_t payload_size_left = limits_.max_payload_len; + if (input_fragments_.size() == 1) + payload_size_left -= limits_.single_packet_reduction_len; + else if (fragment_index == 0) + payload_size_left -= limits_.first_packet_reduction_len; + else if (fragment_index + 1 == input_fragments_.size()) + payload_size_left -= limits_.last_packet_reduction_len; + rtc::ArrayView fragment = input_fragments_[fragment_index]; + if (payload_size_left < fragment.size()) { + RTC_LOG(LS_ERROR) << "Failed to fit a fragment to packet in SingleNalu " + "packetization mode. Payload size left " + << payload_size_left << ", fragment length " + << fragment.size() << ", packet capacity " + << limits_.max_payload_len; + return false; + } + RTC_CHECK_GT(fragment.size(), 0u); + packets_.push(PacketUnit(fragment, true /* first */, true /* last */, + false /* aggregated */, fragment[0])); + ++num_packets_left_; + return true; +} + +int RtpPacketizerH265::PacketizeAp(size_t fragment_index) { + // Aggregate fragments into one packet (STAP-A). + size_t payload_size_left = limits_.max_payload_len; + if (input_fragments_.size() == 1) + payload_size_left -= limits_.single_packet_reduction_len; + else if (fragment_index == 0) + payload_size_left -= limits_.first_packet_reduction_len; + int aggregated_fragments = 0; + size_t fragment_headers_length = 0; + rtc::ArrayView fragment = input_fragments_[fragment_index]; + RTC_CHECK_GE(payload_size_left, fragment.size()); + ++num_packets_left_; + + auto payload_size_needed = [&] { + size_t fragment_size = fragment.size() + fragment_headers_length; + if (input_fragments_.size() == 1) { + // Single fragment, single packet, payload_size_left already adjusted + // with limits_.single_packet_reduction_len. + return fragment_size; + } + if (fragment_index == input_fragments_.size() - 1) { + // Last fragment, so StrapA might be the last packet. + return fragment_size + limits_.last_packet_reduction_len; + } + return fragment_size; + }; + + while (payload_size_left >= payload_size_needed()) { + RTC_CHECK_GT(fragment.size(), 0); + packets_.push(PacketUnit(fragment, aggregated_fragments == 0, false, true, + fragment[0])); + payload_size_left -= fragment.size(); + payload_size_left -= fragment_headers_length; + + fragment_headers_length = kHevcLengthFieldSize; + // If we are going to try to aggregate more fragments into this packet + // we need to add the STAP-A NALU header and a length field for the first + // NALU of this packet. + if (aggregated_fragments == 0) + fragment_headers_length += kHevcNalHeaderSize + kHevcLengthFieldSize; + ++aggregated_fragments; + + // Next fragment. + ++fragment_index; + if (fragment_index == input_fragments_.size()) + break; + fragment = input_fragments_[fragment_index]; + } + RTC_CHECK_GT(aggregated_fragments, 0); + packets_.back().last_fragment = true; + return fragment_index; +} + +bool RtpPacketizerH265::NextPacket(RtpPacketToSend* rtp_packet) { + RTC_DCHECK(rtp_packet); + + if (packets_.empty()) { + return false; + } + + PacketUnit packet = packets_.front(); + + if (packet.first_fragment && packet.last_fragment) { + // Single NAL unit packet. + size_t bytes_to_send = packet.source_fragment.size(); + uint8_t* buffer = rtp_packet->AllocatePayload(bytes_to_send); + memcpy(buffer, packet.source_fragment.data(), bytes_to_send); + packets_.pop(); + input_fragments_.pop_front(); + } else if (packet.aggregated) { + bool is_last_packet = num_packets_left_ == 1; + NextAggregatePacket(rtp_packet, is_last_packet); + } else { + NextFragmentPacket(rtp_packet); + } + rtp_packet->SetMarker(packets_.empty()); + --num_packets_left_; + return true; +} + +void RtpPacketizerH265::NextAggregatePacket(RtpPacketToSend* rtp_packet, + bool last) { + size_t payload_capacity = rtp_packet->FreeCapacity(); + RTC_CHECK_GE(payload_capacity, kHevcNalHeaderSize); + uint8_t* buffer = rtp_packet->AllocatePayload(payload_capacity); + + PacketUnit* packet = &packets_.front(); + RTC_CHECK(packet->first_fragment); + uint8_t payload_hdr_h = packet->header >> 8; + uint8_t payload_hdr_l = packet->header & 0xFF; + uint8_t layer_id_h = payload_hdr_h & kHevcLayerIDHMask; + + payload_hdr_h = + (payload_hdr_h & kHevcTypeMaskN) | (kHevcAp << 1) | layer_id_h; + + buffer[0] = payload_hdr_h; + buffer[1] = payload_hdr_l; + int index = kHevcNalHeaderSize; + bool is_last_fragment = packet->last_fragment; + while (packet->aggregated) { + // Add NAL unit length field. + rtc::ArrayView fragment = packet->source_fragment; + ByteWriter::WriteBigEndian(&buffer[index], fragment.size()); + index += kHevcLengthFieldSize; + // Add NAL unit. + memcpy(&buffer[index], fragment.data(), fragment.size()); + index += fragment.size(); + packets_.pop(); + input_fragments_.pop_front(); + if (is_last_fragment) + break; + packet = &packets_.front(); + is_last_fragment = packet->last_fragment; + } + RTC_CHECK(is_last_fragment); + rtp_packet->SetPayloadSize(index); +} + +void RtpPacketizerH265::NextFragmentPacket(RtpPacketToSend* rtp_packet) { + PacketUnit* packet = &packets_.front(); + // NAL unit fragmented over multiple packets (FU). + // We do not send original NALU header, so it will be replaced by the + // PayloadHdr of the first packet. + uint8_t payload_hdr_h = + packet->header >> 8; // 1-bit F, 6-bit type, 1-bit layerID highest-bit + uint8_t payload_hdr_l = packet->header & 0xFF; + uint8_t layer_id_h = payload_hdr_h & kHevcLayerIDHMask; + uint8_t fu_header = 0; + // S | E |6 bit type. + fu_header |= (packet->first_fragment ? kHevcSBit : 0); + fu_header |= (packet->last_fragment ? kHevcEBit : 0); + uint8_t type = (payload_hdr_h & kHevcTypeMask) >> 1; + fu_header |= type; + // Now update payload_hdr_h with FU type. + payload_hdr_h = + (payload_hdr_h & kHevcTypeMaskN) | (kHevcFu << 1) | layer_id_h; + rtc::ArrayView fragment = packet->source_fragment; + uint8_t* buffer = rtp_packet->AllocatePayload( + kHevcFuHeaderSize + kHevcNalHeaderSize + fragment.size()); + buffer[0] = payload_hdr_h; + buffer[1] = payload_hdr_l; + buffer[2] = fu_header; + + if (packet->last_fragment) { + memcpy(buffer + kHevcFuHeaderSize + kHevcNalHeaderSize, fragment.data(), + fragment.size()); + } else { + memcpy(buffer + kHevcFuHeaderSize + kHevcNalHeaderSize, fragment.data(), + fragment.size()); + } + packets_.pop(); +} + +} // namespace webrtc diff --git a/modules/rtp_rtcp/source/rtp_format_h265.h b/modules/rtp_rtcp/source/rtp_format_h265.h new file mode 100644 index 0000000000..6811ea8356 --- /dev/null +++ b/modules/rtp_rtcp/source/rtp_format_h265.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_H265_H_ +#define MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_H265_H_ + +#include +#include +#include + +#include "api/array_view.h" +#include "modules/rtp_rtcp/source/rtp_format.h" +#include "modules/rtp_rtcp/source/rtp_packet_to_send.h" +#include "modules/video_coding/codecs/h265/include/h265_globals.h" +#include "rtc_base/buffer.h" + +namespace webrtc { + +class RtpPacketizerH265 : public RtpPacketizer { + public: + // Initialize with payload from encoder. + // The payload_data must be exactly one encoded H.265 frame. + RtpPacketizerH265(rtc::ArrayView payload, + PayloadSizeLimits limits, + H265PacketizationMode packetization_mode); + + ~RtpPacketizerH265() override; + + RtpPacketizerH265(const RtpPacketizerH265&) = delete; + RtpPacketizerH265& operator=(const RtpPacketizerH265&) = delete; + + size_t NumPackets() const override; + + // Get the next payload with H.265 payload header. + // buffer is a pointer to where the output will be written. + // bytes_to_send is an output variable that will contain number of bytes + // written to buffer. The parameter last_packet is true for the last packet of + // the frame, false otherwise (i.e., call the function again to get the + // next packet). + // Returns true on success or false if there was no payload to packetize. + bool NextPacket(RtpPacketToSend* rtp_packet) override; + + private: + struct Packet { + Packet(size_t offset, + size_t size, + bool first_fragment, + bool last_fragment, + bool aggregated, + uint16_t header) + : offset(offset), + size(size), + first_fragment(first_fragment), + last_fragment(last_fragment), + aggregated(aggregated), + header(header) {} + + size_t offset; + size_t size; + bool first_fragment; + bool last_fragment; + bool aggregated; + uint16_t header; // Different from H264 + }; + struct PacketUnit { + PacketUnit(rtc::ArrayView source_fragment, + bool first_fragment, + bool last_fragment, + bool aggregated, + uint16_t header) + : source_fragment(source_fragment), + first_fragment(first_fragment), + last_fragment(last_fragment), + aggregated(aggregated), + header(header) {} + + rtc::ArrayView source_fragment; + bool first_fragment; + bool last_fragment; + bool aggregated; + uint16_t header; + }; + typedef std::queue PacketQueue; + std::deque> input_fragments_; + std::queue packets_; + + bool GeneratePackets(H265PacketizationMode packetization_mode); + bool PacketizeFu(size_t fragment_index); + int PacketizeAp(size_t fragment_index); + bool PacketizeSingleNalu(size_t fragment_index); + + void NextAggregatePacket(RtpPacketToSend* rtp_packet, bool last); + void NextFragmentPacket(RtpPacketToSend* rtp_packet); + + const PayloadSizeLimits limits_; + size_t num_packets_left_; +}; +} // namespace webrtc +#endif // MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_H265_H_ diff --git a/modules/rtp_rtcp/source/rtp_sender_video.cc b/modules/rtp_rtcp/source/rtp_sender_video.cc index 05428ff289..3ff884034f 100644 --- a/modules/rtp_rtcp/source/rtp_sender_video.cc +++ b/modules/rtp_rtcp/source/rtp_sender_video.cc @@ -815,6 +815,7 @@ uint8_t RTPSenderVideo::GetTemporalId(const RTPVideoHeader& header) { uint8_t operator()(const RTPVideoHeaderLegacyGeneric&) { return kNoTemporalIdx; } + uint8_t operator()(const RTPVideoHeaderH265&) { return kNoTemporalIdx; } uint8_t operator()(const absl::monostate&) { return kNoTemporalIdx; } }; return absl::visit(TemporalIdGetter(), header.video_type_header); diff --git a/modules/rtp_rtcp/source/rtp_video_header.h b/modules/rtp_rtcp/source/rtp_video_header.h index 115b17d36d..e10de135b7 100644 --- a/modules/rtp_rtcp/source/rtp_video_header.h +++ b/modules/rtp_rtcp/source/rtp_video_header.h @@ -25,6 +25,7 @@ #include "api/video/video_rotation.h" #include "api/video/video_timing.h" #include "modules/video_coding/codecs/h264/include/h264_globals.h" +#include "modules/video_coding/codecs/h265/include/h265_globals.h" #include "modules/video_coding/codecs/vp8/include/vp8_globals.h" #include "modules/video_coding/codecs/vp9/include/vp9_globals.h" @@ -40,6 +41,7 @@ using RTPVideoTypeHeader = absl::variant; struct RTPVideoHeader { diff --git a/modules/rtp_rtcp/source/video_rtp_depacketizer_h265.cc b/modules/rtp_rtcp/source/video_rtp_depacketizer_h265.cc new file mode 100644 index 0000000000..a6de581e7e --- /dev/null +++ b/modules/rtp_rtcp/source/video_rtp_depacketizer_h265.cc @@ -0,0 +1,354 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/rtp_rtcp/source/video_rtp_depacketizer_h265.h" + +#include +#include +#include +#include + +#include "absl/base/macros.h" +#include "absl/types/optional.h" +#include "absl/types/variant.h" +#include "common_video/h264/h264_common.h" +#include "common_video/h265/h265_common.h" +#include "common_video/h265/h265_pps_parser.h" +#include "common_video/h265/h265_sps_parser.h" +#include "common_video/h265/h265_vps_parser.h" +#include "modules/rtp_rtcp/source/byte_io.h" +#include "modules/rtp_rtcp/source/video_rtp_depacketizer.h" +#include "modules/video_coding/codecs/h265/include/h265_globals.h" +#include "rtc_base/checks.h" +#include "rtc_base/copy_on_write_buffer.h" +#include "rtc_base/logging.h" + +namespace webrtc { +namespace { + +enum NaluType { + kTrailN = 0, + kTrailR = 1, + kTsaN = 2, + kTsaR = 3, + kStsaN = 4, + kStsaR = 5, + kRadlN = 6, + kRadlR = 7, + kBlaWLp = 16, + kBlaWRadl = 17, + kBlaNLp = 18, + kIdrWRadl = 19, + kIdrNLp = 20, + kCra = 21, + kVps = 32, + kHevcSps = 33, + kHevcPps = 34, + kHevcAud = 35, + kPrefixSei = 39, + kSuffixSei = 40, + kHevcAp = 48, + kHevcFu = 49 +}; + +/* + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | PayloadHdr (Type=49) | FU header | DONL (cond) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-| +*/ +// Unlike H.264, HEVC NAL header is 2-bytes. +static const size_t kHevcNalHeaderSize = 2; +// H.265's FU is constructed of 2-byte payload header, and 1-byte FU header +static const size_t kHevcFuHeaderSize = 1; +static const size_t kHevcLengthFieldSize = 2; +static const size_t kHevcApHeaderSize = + kHevcNalHeaderSize + kHevcLengthFieldSize; + +enum HevcNalHdrMasks { + kHevcFBit = 0x80, + kHevcTypeMask = 0x7E, + kHevcLayerIDHMask = 0x1, + kHevcLayerIDLMask = 0xF8, + kHevcTIDMask = 0x7, + kHevcTypeMaskN = 0x81, + kHevcTypeMaskInFuHeader = 0x3F +}; + +// Bit masks for FU headers. +enum HevcFuDefs { kHevcSBit = 0x80, kHevcEBit = 0x40, kHevcFuTypeBit = 0x3F }; + +// TODO(pbos): Avoid parsing this here as well as inside the jitter buffer. +bool ParseApStartOffsets(const uint8_t* nalu_ptr, + size_t length_remaining, + std::vector* offsets) { + size_t offset = 0; + while (length_remaining > 0) { + // Buffer doesn't contain room for additional nalu length. + if (length_remaining < sizeof(uint16_t)) + return false; + uint16_t nalu_size = ByteReader::ReadBigEndian(nalu_ptr); + nalu_ptr += sizeof(uint16_t); + length_remaining -= sizeof(uint16_t); + if (nalu_size > length_remaining) + return false; + nalu_ptr += nalu_size; + length_remaining -= nalu_size; + + offsets->push_back(offset + kHevcApHeaderSize); + offset += kHevcLengthFieldSize + nalu_size; + } + return true; +} + +absl::optional ProcessApOrSingleNalu( + rtc::CopyOnWriteBuffer rtp_payload) { + const uint8_t* const payload_data = rtp_payload.cdata(); + absl::optional parsed_payload( + absl::in_place); + parsed_payload->video_payload = rtp_payload; + parsed_payload->video_header.width = 0; + parsed_payload->video_header.height = 0; + parsed_payload->video_header.codec = kVideoCodecH265; + parsed_payload->video_header.is_first_packet_in_frame = true; + auto& h265_header = parsed_payload->video_header.video_type_header + .emplace(); + + const uint8_t* nalu_start = payload_data + kHevcNalHeaderSize; + const size_t nalu_length = rtp_payload.size() - kHevcNalHeaderSize; + uint8_t nal_type = (payload_data[0] & kHevcTypeMask) >> 1; + std::vector nalu_start_offsets; + if (nal_type == H265::NaluType::kAP) { + // Skip the StapA header (StapA NAL type + length). + if (rtp_payload.size() <= kHevcApHeaderSize) { + RTC_LOG(LS_ERROR) << "AP header truncated."; + return absl::nullopt; + } + + if (!ParseApStartOffsets(nalu_start, nalu_length, &nalu_start_offsets)) { + RTC_LOG(LS_ERROR) << "AP packet with incorrect NALU packet lengths."; + return absl::nullopt; + } + + h265_header.packetization_type = kH265AP; + // nal_type = (payload_data[kHevcApHeaderSize] & kHevcTypeMask) >> 1; + } else { + h265_header.packetization_type = kH265SingleNalu; + nalu_start_offsets.push_back(0); + } + h265_header.nalu_type = nal_type; + parsed_payload->video_header.frame_type = VideoFrameType::kVideoFrameDelta; + + nalu_start_offsets.push_back(rtp_payload.size() + kHevcLengthFieldSize); // End offset. + for (size_t i = 0; i < nalu_start_offsets.size() - 1; ++i) { + size_t start_offset = nalu_start_offsets[i]; + // End offset is actually start offset for next unit, excluding length field + // so remove that from this units length. + size_t end_offset = nalu_start_offsets[i + 1] - kHevcLengthFieldSize; + if (end_offset - start_offset < kHevcNalHeaderSize) { // Same as H.264. + RTC_LOG(LS_ERROR) << "AP packet too short"; + return absl::nullopt; + } + + H265NaluInfo nalu; + nalu.type = (payload_data[start_offset] & kHevcTypeMask) >> 1; + nalu.vps_id = -1; + nalu.sps_id = -1; + nalu.pps_id = -1; + start_offset += kHevcNalHeaderSize; + switch (nalu.type) { + case H265::NaluType::kVps: { + absl::optional vps = H265VpsParser::ParseVps( + &payload_data[start_offset], end_offset - start_offset); + if (vps) { + nalu.vps_id = vps->id; + } else { + RTC_LOG(LS_WARNING) << "Failed to parse VPS id from VPS slice."; + } + break; + } + case H265::NaluType::kSps: { + // TODO: Check if VUI is present in SPS and if it needs to be modified to + // avoid excessive decoder latency. + + // Copy any previous data first (likely just the first header). + std::unique_ptr output_buffer(new rtc::Buffer()); + if (start_offset) + output_buffer->AppendData(payload_data, start_offset); + + absl::optional sps = H265SpsParser::ParseSps( + &payload_data[start_offset], end_offset - start_offset); + + if (sps) { + parsed_payload->video_header.width = sps->width; + parsed_payload->video_header.height = sps->height; + nalu.sps_id = sps->id; + nalu.vps_id = sps->vps_id; + } else { + RTC_LOG(LS_WARNING) + << "Failed to parse SPS and VPS id from SPS slice."; + } + parsed_payload->video_header.frame_type = VideoFrameType::kVideoFrameKey; + break; + } + case H265::NaluType::kPps: { + uint32_t pps_id; + uint32_t sps_id; + if (H265PpsParser::ParsePpsIds(&payload_data[start_offset], + end_offset - start_offset, &pps_id, + &sps_id)) { + nalu.pps_id = pps_id; + nalu.sps_id = sps_id; + } else { + RTC_LOG(LS_WARNING) + << "Failed to parse PPS id and SPS id from PPS slice."; + } + break; + } + case H265::NaluType::kIdrWRadl: + case H265::NaluType::kIdrNLp: + case H265::NaluType::kCra: + parsed_payload->video_header.frame_type = + VideoFrameType::kVideoFrameKey; + ABSL_FALLTHROUGH_INTENDED; + case H265::NaluType::kTrailN: + case H265::NaluType::kTrailR: { + absl::optional pps_id = + H265PpsParser::ParsePpsIdFromSliceSegmentLayerRbsp( + &payload_data[start_offset], end_offset - start_offset, + nalu.type); + if (pps_id) { + nalu.pps_id = *pps_id; + } else { + RTC_LOG(LS_WARNING) << "Failed to parse PPS id from slice of type: " + << static_cast(nalu.type); + } + break; + } + // Slices below don't contain SPS or PPS ids. + case H265::NaluType::kAud: + case H265::NaluType::kTsaN: + case H265::NaluType::kTsaR: + case H265::NaluType::kStsaN: + case H265::NaluType::kStsaR: + case H265::NaluType::kRadlN: + case H265::NaluType::kRadlR: + case H265::NaluType::kBlaWLp: + case H265::NaluType::kBlaWRadl: + case H265::NaluType::kPrefixSei: + case H265::NaluType::kSuffixSei: + break; + case H265::NaluType::kAP: + case H265::NaluType::kFU: + RTC_LOG(LS_WARNING) << "Unexpected AP or FU received."; + return absl::nullopt; + } + + if (h265_header.nalus_length == kMaxNalusPerPacket) { + RTC_LOG(LS_WARNING) + << "Received packet containing more than " << kMaxNalusPerPacket + << " NAL units. Will not keep track sps and pps ids for all of them."; + } else { + h265_header.nalus[h265_header.nalus_length++] = nalu; + } + } + return parsed_payload; +} + +absl::optional ParseFuNalu( + rtc::CopyOnWriteBuffer rtp_payload) { + if (rtp_payload.size() < kHevcFuHeaderSize + kHevcNalHeaderSize) { + RTC_LOG(LS_ERROR) << "FU-A NAL units truncated."; + return absl::nullopt; + } + absl::optional parsed_payload( + absl::in_place); + + uint8_t f = rtp_payload.cdata()[0] & kHevcFBit; + uint8_t layer_id_h = rtp_payload.cdata()[0] & kHevcLayerIDHMask; + uint8_t layer_id_l_unshifted = rtp_payload.cdata()[1] & kHevcLayerIDLMask; + uint8_t tid = rtp_payload.cdata()[1] & kHevcTIDMask; + + uint8_t original_nal_type = rtp_payload.cdata()[2] & kHevcTypeMaskInFuHeader; + bool first_fragment = rtp_payload.cdata()[2] & kHevcSBit; + H265NaluInfo nalu; + nalu.type = original_nal_type; + nalu.vps_id = -1; + nalu.sps_id = -1; + nalu.pps_id = -1; + if (first_fragment) { + absl::optional pps_id = + H265PpsParser::ParsePpsIdFromSliceSegmentLayerRbsp( + rtp_payload.cdata() + kHevcNalHeaderSize + kHevcFuHeaderSize, + rtp_payload.size() - kHevcFuHeaderSize, nalu.type); + if (pps_id) { + nalu.pps_id = *pps_id; + } else { + RTC_LOG(LS_WARNING) + << "Failed to parse PPS from first fragment of FU NAL " + "unit with original type: " + << static_cast(nalu.type); + } + rtp_payload = rtp_payload.Slice(1, rtp_payload.size() - 1); + rtp_payload.MutableData()[0] = f | original_nal_type << 1 | layer_id_h; + rtp_payload.MutableData()[1] = layer_id_l_unshifted | tid; + parsed_payload->video_payload = std::move(rtp_payload); + } else { + parsed_payload->video_payload = rtp_payload.Slice( + kHevcNalHeaderSize + kHevcFuHeaderSize, + rtp_payload.size() - kHevcNalHeaderSize - kHevcFuHeaderSize); + } + + if (original_nal_type == H265::NaluType::kIdrWRadl + || original_nal_type == H265::NaluType::kIdrNLp + || original_nal_type == H265::NaluType::kCra) { + parsed_payload->video_header.frame_type = VideoFrameType::kVideoFrameKey; + } else { + parsed_payload->video_header.frame_type = VideoFrameType::kVideoFrameDelta; + } + parsed_payload->video_header.width = 0; + parsed_payload->video_header.height = 0; + parsed_payload->video_header.codec = kVideoCodecH265; + parsed_payload->video_header.is_first_packet_in_frame = first_fragment; + auto& h265_header = parsed_payload->video_header.video_type_header + .emplace(); + h265_header.packetization_type = kH265FU; + h265_header.nalu_type = original_nal_type; + if (first_fragment) { + h265_header.nalus[h265_header.nalus_length] = nalu; + h265_header.nalus_length = 1; + } + return parsed_payload; +} + +} // namespace + +absl::optional +VideoRtpDepacketizerH265::Parse(rtc::CopyOnWriteBuffer rtp_payload) { + if (rtp_payload.size() == 0) { + RTC_LOG(LS_ERROR) << "Empty payload."; + return absl::nullopt; + } + + uint8_t nal_type = (rtp_payload.cdata()[0] & kHevcTypeMask) >> 1; + + if (nal_type == H265::NaluType::kFU) { + // Fragmented NAL units (FU-A). + return ParseFuNalu(std::move(rtp_payload)); + } else { + // We handle STAP-A and single NALU's the same way here. The jitter buffer + // will depacketize the STAP-A into NAL units later. + // TODO(sprang): Parse STAP-A offsets here and store in fragmentation vec. + return ProcessApOrSingleNalu(std::move(rtp_payload)); + } +} + +} // namespace webrtc diff --git a/modules/rtp_rtcp/source/video_rtp_depacketizer_h265.h b/modules/rtp_rtcp/source/video_rtp_depacketizer_h265.h new file mode 100644 index 0000000000..4ae90cb6fe --- /dev/null +++ b/modules/rtp_rtcp/source/video_rtp_depacketizer_h265.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_RTP_RTCP_SOURCE_VIDEO_RTP_DEPACKETIZER_H265_H_ +#define MODULES_RTP_RTCP_SOURCE_VIDEO_RTP_DEPACKETIZER_H265_H_ + +#include "absl/types/optional.h" +#include "modules/rtp_rtcp/source/video_rtp_depacketizer.h" +#include "rtc_base/copy_on_write_buffer.h" + +namespace webrtc { +class VideoRtpDepacketizerH265 : public VideoRtpDepacketizer { + public: + ~VideoRtpDepacketizerH265() override = default; + + absl::optional Parse( + rtc::CopyOnWriteBuffer rtp_payload) override; +}; +} // namespace webrtc + +#endif // MODULES_RTP_RTCP_SOURCE_VIDEO_RTP_DEPACKETIZER_H265_H_ diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn index 59236058e3..c82ea90e4d 100644 --- a/modules/video_coding/BUILD.gn +++ b/modules/video_coding/BUILD.gn @@ -180,6 +180,8 @@ rtc_library("video_coding") { "h264_sprop_parameter_sets.h", "h264_sps_pps_tracker.cc", "h264_sps_pps_tracker.h", + "h265_vps_sps_pps_tracker.cc", + "h265_vps_sps_pps_tracker.h", "include/video_codec_initializer.h", "internal_defines.h", "loss_notification_controller.cc", diff --git a/modules/video_coding/codecs/h264/include/h264_globals.h b/modules/video_coding/codecs/h264/include/h264_globals.h index b61dc8c507..c4b84380aa 100644 --- a/modules/video_coding/codecs/h264/include/h264_globals.h +++ b/modules/video_coding/codecs/h264/include/h264_globals.h @@ -78,6 +78,17 @@ struct RTPVideoHeaderH264 { // The packetization mode of this transport. Packetization mode // determines which packetization types are allowed when packetizing. H264PacketizationMode packetization_mode; + // Running cuter for every frame to determin frame decodable + // depending along with Temporal ID (obtained from RTP header extn). + // '0' if PictureID does not exist. + uint16_t picture_id; + // For support slice-based transmission, mark end of a frame so that + // the H.264 packetizer will not set marker bit for the last fragment of + // current outgoing data if it does not contain last fragment of the frame; + // and will treat the first fragment of the frame as continuous playload, so + // that it will not create FU header or STAP-A header on first fragment if + // contains last fragment of the frame. + bool has_last_fragement; }; } // namespace webrtc diff --git a/modules/video_coding/codecs/h265/include/h265_globals.h b/modules/video_coding/codecs/h265/include/h265_globals.h new file mode 100644 index 0000000000..b69237ae50 --- /dev/null +++ b/modules/video_coding/codecs/h265/include/h265_globals.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This file contains codec dependent definitions that are needed in +// order to compile the WebRTC codebase, even if this codec is not used. + +#ifndef MODULES_VIDEO_CODING_CODECS_H265_INCLUDE_H265_GLOBALS_H_ +#define MODULES_VIDEO_CODING_CODECS_H265_INCLUDE_H265_GLOBALS_H_ + +#include "modules/video_coding/codecs/h264/include/h264_globals.h" + +namespace webrtc { + +// The packetization types that we support: single, aggregated, and fragmented. +enum H265PacketizationTypes { + kH265SingleNalu, // This packet contains a single NAL unit. + kH265AP, // This packet contains aggregation Packet. + // If this packet has an associated NAL unit type, + // it'll be for the first such aggregated packet. + kH265FU, // This packet contains a FU (fragmentation + // unit) packet, meaning it is a part of a frame + // that was too large to fit into a single packet. +}; + +struct H265NaluInfo { + uint8_t type; + int vps_id; + int sps_id; + int pps_id; +}; + +enum class H265PacketizationMode { + NonInterleaved = 0, // Mode 1 - STAP-A, FU-A is allowed + SingleNalUnit // Mode 0 - only single NALU allowed +}; + +struct RTPVideoHeaderH265 { + // The NAL unit type. If this is a header for a fragmented packet, it's the + // NAL unit type of the original data. If this is the header for an aggregated + // packet, it's the NAL unit type of the first NAL unit in the packet. + uint8_t nalu_type; + H265PacketizationTypes packetization_type; + H265NaluInfo nalus[kMaxNalusPerPacket]; + size_t nalus_length; + // The packetization type of this buffer - single, aggregated or fragmented. + H265PacketizationMode packetization_mode; + // Running cuter for every frame to determin frame decodable + // depending along with Temporal ID (obtained from RTP header extn). + // '0' if PictureID does not exist. + uint16_t picture_id; + // For support slice-based transmission, mark end of a frame so that + // the H.265 packetizer will not set marker bit for the last fragment of + // current outgoing data if it does not contain last fragment of the frame; + // and will treat the first fragment of the frame as continuous playload, so + // that it will not create FU header or STAP-A header on first fragment if + // contains last fragment of the frame. + bool has_last_fragement; +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_H265_INCLUDE_H265_GLOBALS_H_ diff --git a/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc b/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc index 80744e2d8c..7951445db1 100644 --- a/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc +++ b/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc @@ -93,6 +93,10 @@ int MultiplexEncoderAdapter::InitEncode( key_frame_interval_ = video_codec.H264()->keyFrameInterval; video_codec.H264()->keyFrameInterval = 0; break; + case kVideoCodecH265: + key_frame_interval_ = video_codec.H265()->keyFrameInterval; + video_codec.H265()->keyFrameInterval = 0; + break; default: break; } diff --git a/modules/video_coding/encoded_frame.cc b/modules/video_coding/encoded_frame.cc index 637a20cfc9..565a127819 100644 --- a/modules/video_coding/encoded_frame.cc +++ b/modules/video_coding/encoded_frame.cc @@ -140,6 +140,10 @@ void VCMEncodedFrame::CopyCodecSpecific(const RTPVideoHeader* header) { _codecSpecificInfo.codecType = kVideoCodecAV1; break; } + case kVideoCodecH265: { + _codecSpecificInfo.codecType = kVideoCodecH265; + break; + } default: { _codecSpecificInfo.codecType = kVideoCodecGeneric; break; diff --git a/modules/video_coding/h265_vps_sps_pps_tracker.cc b/modules/video_coding/h265_vps_sps_pps_tracker.cc new file mode 100644 index 0000000000..84a6c35771 --- /dev/null +++ b/modules/video_coding/h265_vps_sps_pps_tracker.cc @@ -0,0 +1,315 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/h265_vps_sps_pps_tracker.h" + +#include +#include +#include + +#include "absl/types/variant.h" +#include "common_video/h264/h264_common.h" +#include "common_video/h265/h265_common.h" +#include "common_video/h265/h265_pps_parser.h" +#include "common_video/h265/h265_sps_parser.h" +#include "common_video/h265/h265_vps_parser.h" +#include "modules/video_coding/codecs/h264/include/h264_globals.h" +#include "modules/video_coding/codecs/h265/include/h265_globals.h" +#include "modules/video_coding/frame_object.h" +#include "modules/video_coding/packet_buffer.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { +namespace video_coding { + +namespace { +const uint8_t start_code_h265[] = {0, 0, 0, 1}; +} // namespace + +H265VpsSpsPpsTracker::FixedBitstream H265VpsSpsPpsTracker::CopyAndFixBitstream( + rtc::ArrayView bitstream, + RTPVideoHeader* video_header) { + RTC_DCHECK(video_header); + RTC_DCHECK(video_header->codec == kVideoCodecH265); + + auto& h265_header = + absl::get(video_header->video_type_header); + + bool append_vps_sps_pps = false; + auto vps = vps_data_.end(); + auto sps = sps_data_.end(); + auto pps = pps_data_.end(); + + for (size_t i = 0; i < h265_header.nalus_length; ++i) { + const H265NaluInfo& nalu = h265_header.nalus[i]; + switch (nalu.type) { + case H265::NaluType::kVps: { + vps_data_[nalu.vps_id].size = 0; + break; + } + case H265::NaluType::kSps: { + sps_data_[nalu.sps_id].vps_id = nalu.vps_id; + sps_data_[nalu.sps_id].width = video_header->width; + sps_data_[nalu.sps_id].height = video_header->height; + break; + } + case H265::NaluType::kPps: { + pps_data_[nalu.pps_id].sps_id = nalu.sps_id; + break; + } + case H265::NaluType::kIdrWRadl: + case H265::NaluType::kIdrNLp: + case H265::NaluType::kCra: { + // If this is the first packet of an IDR, make sure we have the required + // SPS/PPS and also calculate how much extra space we need in the buffer + // to prepend the SPS/PPS to the bitstream with start codes. + if (video_header->is_first_packet_in_frame) { + if (nalu.pps_id == -1) { + RTC_LOG(LS_WARNING) << "No PPS id in IDR nalu."; + return {kRequestKeyframe}; + } + + pps = pps_data_.find(nalu.pps_id); + if (pps == pps_data_.end()) { + RTC_LOG(LS_WARNING) + << "No PPS with id " << nalu.pps_id << " received"; + return {kRequestKeyframe}; + } + + sps = sps_data_.find(pps->second.sps_id); + if (sps == sps_data_.end()) { + RTC_LOG(LS_WARNING) + << "No SPS with id << " << pps->second.sps_id << " received"; + return {kRequestKeyframe}; + } + + vps = vps_data_.find(sps->second.vps_id); + if (vps == vps_data_.end()) { + RTC_LOG(LS_WARNING) + << "No VPS with id " << sps->second.vps_id << " received"; + return {kRequestKeyframe}; + } + + // Since the first packet of every keyframe should have its width and + // height set we set it here in the case of it being supplied out of + // band. + video_header->width = sps->second.width; + video_header->height = sps->second.height; + + // If the VPS/SPS/PPS was supplied out of band then we will have saved + // the actual bitstream in |data|. + // This branch is not verified. + if (vps->second.data && sps->second.data && pps->second.data) { + RTC_DCHECK_GT(vps->second.size, 0); + RTC_DCHECK_GT(sps->second.size, 0); + RTC_DCHECK_GT(pps->second.size, 0); + append_vps_sps_pps = true; + } + } + break; + } + default: + break; + } + } + + RTC_CHECK(!append_vps_sps_pps || + (sps != sps_data_.end() && pps != pps_data_.end())); + + // Calculate how much space we need for the rest of the bitstream. + size_t required_size = 0; + + if (append_vps_sps_pps) { + required_size += vps->second.size + sizeof(start_code_h265); + required_size += sps->second.size + sizeof(start_code_h265); + required_size += pps->second.size + sizeof(start_code_h265); + } + + if (h265_header.packetization_type == kH265AP) { + const uint8_t* nalu_ptr = bitstream.data() + 1; + while (nalu_ptr < bitstream.data() + bitstream.size()) { + RTC_DCHECK(video_header->is_first_packet_in_frame); + required_size += sizeof(start_code_h265); + + // The first two bytes describe the length of a segment. + uint16_t segment_length = nalu_ptr[0] << 8 | nalu_ptr[1]; + nalu_ptr += 2; + + required_size += segment_length; + nalu_ptr += segment_length; + } + } else { + // TODO: in h.264 this is "h264_header.nalus_length > 0" + if (video_header->is_first_packet_in_frame) + required_size += sizeof(start_code_h265); + required_size += bitstream.size(); + } + + // Then we copy to the new buffer. + H265VpsSpsPpsTracker::FixedBitstream fixed; + fixed.bitstream.EnsureCapacity(required_size); + + if (append_vps_sps_pps) { + // Insert VPS. + fixed.bitstream.AppendData(start_code_h265); + fixed.bitstream.AppendData(vps->second.data.get(), vps->second.size); + + // Insert SPS. + fixed.bitstream.AppendData(start_code_h265); + fixed.bitstream.AppendData(sps->second.data.get(), sps->second.size); + + // Insert PPS. + fixed.bitstream.AppendData(start_code_h265); + fixed.bitstream.AppendData(pps->second.data.get(), pps->second.size); + + // Update codec header to reflect the newly added SPS and PPS. + H265NaluInfo vps_info; + vps_info.type = H265::NaluType::kVps; + vps_info.vps_id = vps->first; + vps_info.sps_id = -1; + vps_info.pps_id = -1; + H265NaluInfo sps_info; + sps_info.type = H265::NaluType::kSps; + sps_info.vps_id = vps->first; + sps_info.sps_id = sps->first; + sps_info.pps_id = -1; + H265NaluInfo pps_info; + pps_info.type = H265::NaluType::kPps; + pps_info.vps_id = vps->first; + pps_info.sps_id = sps->first; + pps_info.pps_id = pps->first; + if (h265_header.nalus_length + 2 <= kMaxNalusPerPacket) { + h265_header.nalus[h265_header.nalus_length++] = vps_info; + h265_header.nalus[h265_header.nalus_length++] = sps_info; + h265_header.nalus[h265_header.nalus_length++] = pps_info; + } else { + RTC_LOG(LS_WARNING) << "Not enough space in H.265 codec header to insert " + "SPS/PPS provided out-of-band."; + } + } + + // Copy the rest of the bitstream and insert start codes. + if (h265_header.packetization_type == kH265AP) { + const uint8_t* nalu_ptr = bitstream.data() + 1; + while (nalu_ptr < bitstream.data() + bitstream.size()) { + fixed.bitstream.AppendData(start_code_h265); + + // The first two bytes describe the length of a segment. + uint16_t segment_length = nalu_ptr[0] << 8 | nalu_ptr[1]; + nalu_ptr += 2; + + size_t copy_end = nalu_ptr - bitstream.data() + segment_length; + if (copy_end > bitstream.size()) { + return {kDrop}; + } + + fixed.bitstream.AppendData(nalu_ptr, segment_length); + nalu_ptr += segment_length; + } + } else { + // For h.264 it is "h264_header.nalus_length > 0" + if (video_header->is_first_packet_in_frame) { + fixed.bitstream.AppendData(start_code_h265); + } + fixed.bitstream.AppendData(bitstream.data(), bitstream.size()); + } + + fixed.action = kInsert; + return fixed; +} + +void H265VpsSpsPpsTracker::InsertVpsSpsPpsNalus( + const std::vector& vps, + const std::vector& sps, + const std::vector& pps) { + constexpr size_t kNaluHeaderOffset = 1; + if (vps.size() < kNaluHeaderOffset) { + RTC_LOG(LS_WARNING) << "VPS size " << vps.size() << " is smaller than " + << kNaluHeaderOffset; + return; + } + if ((vps[0] & 0x7e) >> 1 != H265::NaluType::kSps) { + RTC_LOG(LS_WARNING) << "SPS Nalu header missing"; + return; + } + if (sps.size() < kNaluHeaderOffset) { + RTC_LOG(LS_WARNING) << "SPS size " << sps.size() << " is smaller than " + << kNaluHeaderOffset; + return; + } + if ((sps[0] & 0x7e) >> 1 != H265::NaluType::kSps) { + RTC_LOG(LS_WARNING) << "SPS Nalu header missing"; + return; + } + if (pps.size() < kNaluHeaderOffset) { + RTC_LOG(LS_WARNING) << "PPS size " << pps.size() << " is smaller than " + << kNaluHeaderOffset; + return; + } + if ((pps[0] & 0x7e) >> 1 != H265::NaluType::kPps) { + RTC_LOG(LS_WARNING) << "SPS Nalu header missing"; + return; + } + absl::optional parsed_vps = H265VpsParser::ParseVps( + vps.data() + kNaluHeaderOffset, vps.size() - kNaluHeaderOffset); + absl::optional parsed_sps = H265SpsParser::ParseSps( + sps.data() + kNaluHeaderOffset, sps.size() - kNaluHeaderOffset); + absl::optional parsed_pps = H265PpsParser::ParsePps( + pps.data() + kNaluHeaderOffset, pps.size() - kNaluHeaderOffset); + + if (!parsed_vps) { + RTC_LOG(LS_WARNING) << "Failed to parse VPS."; + } + + if (!parsed_sps) { + RTC_LOG(LS_WARNING) << "Failed to parse SPS."; + } + + if (!parsed_pps) { + RTC_LOG(LS_WARNING) << "Failed to parse PPS."; + } + + if (!parsed_vps || !parsed_pps || !parsed_sps) { + return; + } + + VpsInfo vps_info; + vps_info.size = vps.size(); + uint8_t* vps_data = new uint8_t[vps_info.size]; + memcpy(vps_data, vps.data(), vps_info.size); + vps_info.data.reset(vps_data); + vps_data_[parsed_vps->id] = std::move(vps_info); + + SpsInfo sps_info; + sps_info.size = sps.size(); + sps_info.width = parsed_sps->width; + sps_info.height = parsed_sps->height; + sps_info.vps_id = parsed_sps->vps_id; + uint8_t* sps_data = new uint8_t[sps_info.size]; + memcpy(sps_data, sps.data(), sps_info.size); + sps_info.data.reset(sps_data); + sps_data_[parsed_sps->id] = std::move(sps_info); + + PpsInfo pps_info; + pps_info.size = pps.size(); + pps_info.sps_id = parsed_pps->sps_id; + uint8_t* pps_data = new uint8_t[pps_info.size]; + memcpy(pps_data, pps.data(), pps_info.size); + pps_info.data.reset(pps_data); + pps_data_[parsed_pps->id] = std::move(pps_info); + + RTC_LOG(LS_INFO) << "Inserted SPS id " << parsed_sps->id << " and PPS id " + << parsed_pps->id << " (referencing SPS " + << parsed_pps->sps_id << ")"; +} + +} // namespace video_coding +} // namespace webrtc diff --git a/modules/video_coding/h265_vps_sps_pps_tracker.h b/modules/video_coding/h265_vps_sps_pps_tracker.h new file mode 100644 index 0000000000..1aa22d76ad --- /dev/null +++ b/modules/video_coding/h265_vps_sps_pps_tracker.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_H265_VPS_SPS_PPS_TRACKER_H_ +#define MODULES_VIDEO_CODING_H265_VPS_SPS_PPS_TRACKER_H_ + +#include +#include +#include +#include +#include + +#include "api/array_view.h" +#include "modules/rtp_rtcp/source/rtp_video_header.h" +#include "rtc_base/copy_on_write_buffer.h" + +namespace webrtc { +namespace video_coding { + +class H265VpsSpsPpsTracker { + public: + enum PacketAction { kInsert, kDrop, kRequestKeyframe }; + struct FixedBitstream { + PacketAction action; + rtc::CopyOnWriteBuffer bitstream; + }; + + FixedBitstream CopyAndFixBitstream(rtc::ArrayView bitstream, + RTPVideoHeader* video_header); + + void InsertVpsSpsPpsNalus(const std::vector& vps, + const std::vector& sps, + const std::vector& pps); + + private: + struct VpsInfo { + size_t size = 0; + std::unique_ptr data; + }; + + struct PpsInfo { + int sps_id = -1; + size_t size = 0; + std::unique_ptr data; + }; + + struct SpsInfo { + int vps_id = -1; + size_t size = 0; + int width = -1; + int height = -1; + std::unique_ptr data; + }; + + std::map vps_data_; + std::map pps_data_; + std::map sps_data_; +}; + +} // namespace video_coding +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_H265_SPS_PPS_TRACKER_H_ diff --git a/modules/video_coding/include/video_codec_interface.h b/modules/video_coding/include/video_codec_interface.h index 261ffb11c1..b827b03d19 100644 --- a/modules/video_coding/include/video_codec_interface.h +++ b/modules/video_coding/include/video_codec_interface.h @@ -20,6 +20,7 @@ #include "api/video_codecs/video_encoder.h" #include "common_video/generic_frame_descriptor/generic_frame_info.h" #include "modules/video_coding/codecs/h264/include/h264_globals.h" +#include "modules/video_coding/codecs/h265/include/h265_globals.h" #include "modules/video_coding/codecs/vp9/include/vp9_globals.h" #include "modules/video_coding/include/video_error_codes.h" #include "rtc_base/system/rtc_export.h" @@ -89,13 +90,26 @@ struct CodecSpecificInfoH264 { uint8_t temporal_idx; bool base_layer_sync; bool idr_frame; + int16_t picture_id; // Required by temporal scalability + bool last_fragment_in_frame; }; + +struct CodecSpecificInfoH265 { + H265PacketizationMode packetization_mode; + bool idr_frame; + bool last_fragment_in_frame; + int16_t picture_id; + int dependencies[5]; + int dtis[10]; +}; + static_assert(std::is_pod::value, ""); union CodecSpecificInfoUnion { CodecSpecificInfoVP8 VP8; CodecSpecificInfoVP9 VP9; CodecSpecificInfoH264 H264; + CodecSpecificInfoH265 H265; }; static_assert(std::is_pod::value, ""); diff --git a/modules/video_coding/jitter_buffer_common.h b/modules/video_coding/jitter_buffer_common.h index 6ccfe39199..5d032d2f1b 100644 --- a/modules/video_coding/jitter_buffer_common.h +++ b/modules/video_coding/jitter_buffer_common.h @@ -54,6 +54,9 @@ enum VCMFrameBufferStateEnum { }; enum { kH264StartCodeLengthBytes = 4 }; +#ifdef WEBRTC_USE_H265 +enum { kH265StartCodeLengthBytes = 4 }; +#endif } // namespace webrtc #endif // MODULES_VIDEO_CODING_JITTER_BUFFER_COMMON_H_ diff --git a/modules/video_coding/packet.cc b/modules/video_coding/packet.cc index 324248ab36..bebb36735f 100644 --- a/modules/video_coding/packet.cc +++ b/modules/video_coding/packet.cc @@ -44,8 +44,13 @@ VCMPacket::VCMPacket(const uint8_t* ptr, markerBit(rtp_header.markerBit), timesNacked(-1), completeNALU(kNaluIncomplete), +#ifdef WEBRTC_USE_H265 + insertStartCode((videoHeader.codec == kVideoCodecH264 || videoHeader.codec == kVideoCodecH265) && + videoHeader.is_first_packet_in_frame), +#else insertStartCode(videoHeader.codec == kVideoCodecH264 && videoHeader.is_first_packet_in_frame), +#endif video_header(videoHeader), packet_info(rtp_header, receive_time) { if (is_first_packet_in_frame() && markerBit) { diff --git a/modules/video_coding/packet_buffer.cc b/modules/video_coding/packet_buffer.cc index adc18ddce0..65a8a8f06e 100644 --- a/modules/video_coding/packet_buffer.cc +++ b/modules/video_coding/packet_buffer.cc @@ -23,10 +23,16 @@ #include "api/rtp_packet_info.h" #include "api/video/video_frame_type.h" #include "common_video/h264/h264_common.h" +#ifdef WEBRTC_USE_H265 +#include "common_video/h265/h265_common.h" +#endif #include "modules/rtp_rtcp/source/rtp_header_extensions.h" #include "modules/rtp_rtcp/source/rtp_packet_received.h" #include "modules/rtp_rtcp/source/rtp_video_header.h" #include "modules/video_coding/codecs/h264/include/h264_globals.h" +#ifdef WEBRTC_USE_H265 +#include "modules/video_coding/codecs/h265/include/h265_globals.h" +#endif #include "rtc_base/checks.h" #include "rtc_base/logging.h" #include "rtc_base/numerics/mod_ops.h" @@ -240,13 +246,23 @@ std::vector> PacketBuffer::FindFrames( bool has_h264_pps = false; bool has_h264_idr = false; bool is_h264_keyframe = false; + + bool is_h265 = false; +#ifdef WEBRTC_USE_H265 + is_h265 = buffer_[start_index]->codec() == kVideoCodecH265; + bool has_h265_sps = false; + bool has_h265_pps = false; + bool has_h265_idr = false; + bool is_h265_keyframe = false; +#endif + int idr_width = -1; int idr_height = -1; bool full_frame_found = false; while (true) { ++tested_packets; - if (!is_h264) { + if (!is_h264 && !is_h265) { if (buffer_[start_index] == nullptr || buffer_[start_index]->is_first_packet_in_frame()) { full_frame_found = buffer_[start_index] != nullptr; @@ -284,6 +300,33 @@ std::vector> PacketBuffer::FindFrames( } } } +#ifdef WEBRTC_USE_H265 + if (is_h265 && !is_h265_keyframe) { + const auto* h265_header = absl::get_if( + &buffer_[start_index]->video_header.video_type_header); + if (!h265_header || h265_header->nalus_length >= kMaxNalusPerPacket) + return found_frames; + for (size_t j = 0; j < h265_header->nalus_length; ++j) { + if (h265_header->nalus[j].type == H265::NaluType::kSps) { + has_h265_sps = true; + } else if (h265_header->nalus[j].type == H265::NaluType::kPps) { + has_h265_pps = true; + } else if (h265_header->nalus[j].type == H265::NaluType::kIdrWRadl + || h265_header->nalus[j].type == H265::NaluType::kIdrNLp + || h265_header->nalus[j].type == H265::NaluType::kCra) { + has_h265_idr = true; + } + } + if ((has_h265_sps && has_h265_pps) || has_h265_idr) { + is_h265_keyframe = true; + if (buffer_[start_index]->width() > 0 && + buffer_[start_index]->height() > 0) { + idr_width = buffer_[start_index]->width(); + idr_height = buffer_[start_index]->height(); + } + } + } +#endif if (tested_packets == buffer_.size()) break; @@ -296,7 +339,7 @@ std::vector> PacketBuffer::FindFrames( // the timestamp of that packet is the same as this one. This may cause // the PacketBuffer to hand out incomplete frames. // See: https://bugs.chromium.org/p/webrtc/issues/detail?id=7106 - if (is_h264 && (buffer_[start_index] == nullptr || + if ((is_h264 || is_h265) && (buffer_[start_index] == nullptr || buffer_[start_index]->timestamp != frame_timestamp)) { break; } @@ -342,7 +385,45 @@ std::vector> PacketBuffer::FindFrames( } } - if (is_h264 || full_frame_found) { +#ifdef WEBRTC_USE_H265 + if (is_h265) { + // Warn if this is an unsafe frame. + if (has_h265_idr && (!has_h265_sps || !has_h265_pps)) { + RTC_LOG(LS_WARNING) + << "Received H.265-IDR frame " + << "(SPS: " << has_h265_sps << ", PPS: " << has_h265_pps << "). " + << "Treating as delta frame since " + << "WebRTC-SpsPpsIdrIsH265Keyframe is always enabled."; + } + + // Now that we have decided whether to treat this frame as a key frame + // or delta frame in the frame buffer, we update the field that + // determines if the RtpFrameObject is a key frame or delta frame. + const size_t first_packet_index = start_seq_num % buffer_.size(); + if (is_h265_keyframe) { + buffer_[first_packet_index]->video_header.frame_type = + VideoFrameType::kVideoFrameKey; + if (idr_width > 0 && idr_height > 0) { + // IDR frame was finalized and we have the correct resolution for + // IDR; update first packet to have same resolution as IDR. + buffer_[first_packet_index]->video_header.width = idr_width; + buffer_[first_packet_index]->video_header.height = idr_height; + } + } else { + buffer_[first_packet_index]->video_header.frame_type = + VideoFrameType::kVideoFrameDelta; + } + + // If this is not a key frame, make sure there are no gaps in the + // packet sequence numbers up until this point. + if (!is_h265_keyframe && missing_packets_.upper_bound(start_seq_num) != + missing_packets_.begin()) { + return found_frames; + } + } +#endif + + if (is_h264 || is_h265 || full_frame_found) { const uint16_t end_seq_num = seq_num + 1; // Use uint16_t type to handle sequence number wrap around case. uint16_t num_packets = end_seq_num - start_seq_num; diff --git a/modules/video_coding/session_info.cc b/modules/video_coding/session_info.cc index bee763c9ae..18ebf9acd4 100644 --- a/modules/video_coding/session_info.cc +++ b/modules/video_coding/session_info.cc @@ -145,6 +145,21 @@ std::vector VCMSessionInfo::GetNaluInfos() const { } return nalu_infos; } +#ifdef WEBRTC_USE_H265 +std::vector VCMSessionInfo::GetH265NaluInfos() const { + if (packets_.empty() || packets_.front().video_header.codec != kVideoCodecH265) + return std::vector(); + std::vector nalu_infos; + for (const VCMPacket& packet : packets_) { + const auto& h265 = + absl::get(packet.video_header.video_type_header); + for (size_t i = 0; i < h265.nalus_length; ++i) { + nalu_infos.push_back(h265.nalus[i]); + } + } + return nalu_infos; +} +#endif void VCMSessionInfo::SetGofInfo(const GofInfoVP9& gof_info, size_t idx) { if (packets_.empty()) @@ -204,6 +219,11 @@ size_t VCMSessionInfo::InsertBuffer(uint8_t* frame_buffer, // TODO(pbos): Remove H264 parsing from this step and use a fragmentation // header supplied by the H264 depacketizer. const size_t kH264NALHeaderLengthInBytes = 1; +#ifdef WEBRTC_USE_H265 + const size_t kH265NALHeaderLengthInBytes = 2; + const auto* h265 = + absl::get_if(&packet.video_header.video_type_header); +#endif const size_t kLengthFieldLength = 2; const auto* h264 = absl::get_if(&packet.video_header.video_type_header); @@ -229,6 +249,36 @@ size_t VCMSessionInfo::InsertBuffer(uint8_t* frame_buffer, packet.sizeBytes = required_length; return packet.sizeBytes; } +#ifdef WEBRTC_USE_H265 + else if (h265 && h265->packetization_type == kH265AP) { + // Similar to H264, for H265 aggregation packets, we rely on jitter buffer + // to remove the two length bytes between each NAL unit, and potentially add + // start codes. + size_t required_length = 0; + const uint8_t* nalu_ptr = + packet_buffer + kH265NALHeaderLengthInBytes; // skip payloadhdr + while (nalu_ptr < packet_buffer + packet.sizeBytes) { + size_t length = BufferToUWord16(nalu_ptr); + required_length += + length + (packet.insertStartCode ? kH265StartCodeLengthBytes : 0); + nalu_ptr += kLengthFieldLength + length; + } + ShiftSubsequentPackets(packet_it, required_length); + nalu_ptr = packet_buffer + kH265NALHeaderLengthInBytes; + uint8_t* frame_buffer_ptr = frame_buffer + offset; + while (nalu_ptr < packet_buffer + packet.sizeBytes) { + size_t length = BufferToUWord16(nalu_ptr); + nalu_ptr += kLengthFieldLength; + // since H265 shares the same start code as H264, use the same Insert + // function to handle start code. + frame_buffer_ptr += Insert(nalu_ptr, length, packet.insertStartCode, + const_cast(frame_buffer_ptr)); + nalu_ptr += length; + } + packet.sizeBytes = required_length; + return packet.sizeBytes; + } +#endif ShiftSubsequentPackets( packet_it, packet.sizeBytes + (packet.insertStartCode ? kH264StartCodeLengthBytes : 0)); @@ -455,7 +505,22 @@ int VCMSessionInfo::InsertPacket(const VCMPacket& packet, IsNewerSequenceNumber(packet.seqNum, last_packet_seq_num_))) { last_packet_seq_num_ = packet.seqNum; } +#ifdef WEBRTC_USE_H265 + } else if (packet.codec() == kVideoCodecH265) { + frame_type_ = packet.video_header.frame_type; + if (packet.is_first_packet_in_frame() && + (first_packet_seq_num_ == -1 || + IsNewerSequenceNumber(first_packet_seq_num_, packet.seqNum))) { + first_packet_seq_num_ = packet.seqNum; + } + if (packet.markerBit && + (last_packet_seq_num_ == -1 || + IsNewerSequenceNumber(packet.seqNum, last_packet_seq_num_))) { + last_packet_seq_num_ = packet.seqNum; + } +#else } else { +#endif // Only insert media packets between first and last packets (when // available). // Placing check here, as to properly account for duplicate packets. diff --git a/modules/video_coding/session_info.h b/modules/video_coding/session_info.h index 846352a8ae..bbe5b7f08c 100644 --- a/modules/video_coding/session_info.h +++ b/modules/video_coding/session_info.h @@ -65,6 +65,9 @@ class VCMSessionInfo { int Tl0PicId() const; std::vector GetNaluInfos() const; +#ifdef WEBRTC_USE_H265 + std::vector GetH265NaluInfos() const; +#endif void SetGofInfo(const GofInfoVP9& gof_info, size_t idx); diff --git a/rtc_base/experiments/min_video_bitrate_experiment.cc b/rtc_base/experiments/min_video_bitrate_experiment.cc index f37c4e9c76..bb43e6df15 100644 --- a/rtc_base/experiments/min_video_bitrate_experiment.cc +++ b/rtc_base/experiments/min_video_bitrate_experiment.cc @@ -100,6 +100,7 @@ absl::optional GetExperimentalMinVideoBitrate(VideoCodecType type) { return min_bitrate_av1.GetOptional(); case kVideoCodecH264: return min_bitrate_h264.GetOptional(); + case kVideoCodecH265: case kVideoCodecGeneric: case kVideoCodecMultiplex: return absl::nullopt; diff --git a/sdk/BUILD.gn b/sdk/BUILD.gn index 252d92e81a..e8972e2cd8 100644 --- a/sdk/BUILD.gn +++ b/sdk/BUILD.gn @@ -727,6 +727,17 @@ if (is_ios || is_mac) { "objc/components/video_codec/RTCH264ProfileLevelId.h", "objc/components/video_codec/RTCH264ProfileLevelId.mm", ] + + if (rtc_use_h265) { + sources += [ + "objc/components/video_codec/RTCCodecSpecificInfoH265+Private.h", + "objc/components/video_codec/RTCCodecSpecificInfoH265.h", + "objc/components/video_codec/RTCCodecSpecificInfoH265.mm", + "objc/components/video_codec/RTCH265ProfileLevelId.h", + "objc/components/video_codec/RTCH265ProfileLevelId.mm", + ] + } + if (is_ios) { sources += [ "objc/components/video_codec/UIDevice+H264Profile.h", @@ -1429,6 +1440,16 @@ if (is_ios || is_mac) { "objc/api/video_codec/RTCVideoEncoderSimulcast.h", ] + if (rtc_use_h265) { + common_objc_headers += [ + "objc/components/video_codec/RTCH265ProfileLevelId.h", + "objc/components/video_codec/RTCVideoDecoderFactoryH265.h", + "objc/components/video_codec/RTCVideoDecoderH265.h", + "objc/components/video_codec/RTCVideoEncoderFactoryH265.h", + "objc/components/video_codec/RTCVideoEncoderH265.h", + ] + } + if (!build_with_chromium) { common_objc_headers += [ "objc/api/logging/RTCCallbackLogger.h", @@ -1582,6 +1603,11 @@ if (is_ios || is_mac) { "objc/components/video_codec/RTCVideoDecoderH264.h", "objc/components/video_codec/RTCVideoEncoderFactoryH264.h", "objc/components/video_codec/RTCVideoEncoderH264.h", + "objc/components/video_codec/RTCH265ProfileLevelId.h", + "objc/components/video_codec/RTCVideoDecoderFactoryH265.h", + "objc/components/video_codec/RTCVideoDecoderH265.h", + "objc/components/video_codec/RTCVideoEncoderFactoryH265.h", + "objc/components/video_codec/RTCVideoEncoderH265.h", "objc/components/video_frame_buffer/RTCCVPixelBuffer.h", "objc/helpers/RTCDispatcher.h", "objc/helpers/RTCYUVHelper.h", @@ -1589,6 +1615,15 @@ if (is_ios || is_mac) { "objc/components/video_codec/RTCVideoEncoderFactorySimulcast.h", "objc/api/video_codec/RTCVideoEncoderSimulcast.h", ] + if (rtc_use_h265) { + sources += [ + "objc/components/video_codec/RTCH265ProfileLevelId.h", + "objc/components/video_codec/RTCVideoDecoderFactoryH265.h", + "objc/components/video_codec/RTCVideoDecoderH265.h", + "objc/components/video_codec/RTCVideoEncoderFactoryH265.h", + "objc/components/video_codec/RTCVideoEncoderH265.h", + ] + } if (!build_with_chromium) { sources += [ "objc/api/logging/RTCCallbackLogger.h", @@ -1794,6 +1829,7 @@ if (is_ios || is_mac) { } rtc_library("videotoolbox_objc") { + defines = [] visibility = [ "*" ] allow_poison = [ "audio_codecs" ] # TODO(bugs.webrtc.org/8396): Remove. sources = [ @@ -1807,13 +1843,27 @@ if (is_ios || is_mac) { "objc/components/video_codec/RTCVideoEncoderH264.mm", ] + if (rtc_use_h265) { + sources += [ + "objc/components/video_codec/RTCVideoDecoderFactoryH265.h", + "objc/components/video_codec/RTCVideoDecoderFactoryH265.m", + "objc/components/video_codec/RTCVideoDecoderH265.h", + "objc/components/video_codec/RTCVideoDecoderH265.mm", + "objc/components/video_codec/RTCVideoEncoderFactoryH265.h", + "objc/components/video_codec/RTCVideoEncoderFactoryH265.m", + "objc/components/video_codec/RTCVideoEncoderH265.h", + "objc/components/video_codec/RTCVideoEncoderH265.mm", + ] + defines += [ "WEBRTC_USE_H265" ] + } + configs += [ "..:common_objc", ":used_from_extension", ] if (is_ios && rtc_apprtcmobile_broadcast_extension) { - defines = [ "RTC_APPRTCMOBILE_BROADCAST_EXTENSION" ] + defines += [ "RTC_APPRTCMOBILE_BROADCAST_EXTENSION" ] } deps = [ diff --git a/sdk/android/BUILD.gn b/sdk/android/BUILD.gn index 8fe98cb2b0..f44ec60f39 100644 --- a/sdk/android/BUILD.gn +++ b/sdk/android/BUILD.gn @@ -393,6 +393,7 @@ if (is_android) { rtc_android_library("hwcodecs_java") { visibility = [ "*" ] sources = [ + "api/org/webrtc/VideoCapabilityParser.java", "api/org/webrtc/HardwareVideoDecoderFactory.java", "api/org/webrtc/HardwareVideoEncoderFactory.java", "api/org/webrtc/PlatformSoftwareVideoDecoderFactory.java", diff --git a/sdk/android/api/org/webrtc/HardwareVideoEncoderFactory.java b/sdk/android/api/org/webrtc/HardwareVideoEncoderFactory.java index b48a39aaa6..c71a6faa78 100644 --- a/sdk/android/api/org/webrtc/HardwareVideoEncoderFactory.java +++ b/sdk/android/api/org/webrtc/HardwareVideoEncoderFactory.java @@ -13,6 +13,8 @@ import static org.webrtc.MediaCodecUtils.EXYNOS_PREFIX; import static org.webrtc.MediaCodecUtils.INTEL_PREFIX; import static org.webrtc.MediaCodecUtils.QCOM_PREFIX; +import static org.webrtc.MediaCodecUtils.HISI_PREFIX; +import static org.webrtc.MediaCodecUtils.IMG_PREFIX; import android.media.MediaCodecInfo; import android.media.MediaCodecList; @@ -46,6 +48,9 @@ public class HardwareVideoEncoderFactory implements VideoEncoderFactory { @Nullable private final EglBase14.Context sharedContext; private final boolean enableIntelVp8Encoder; private final boolean enableH264HighProfile; + private final String extraMediaCodecFile = "sdcard/mediaCodec.xml"; + private final VideoCapabilityParser vcp = new VideoCapabilityParser(); + @Nullable private final Predicate codecAllowedPredicate; /** @@ -132,9 +137,10 @@ public VideoEncoder createEncoder(VideoCodecInfo input) { public VideoCodecInfo[] getSupportedCodecs() { List supportedCodecInfos = new ArrayList(); // Generate a list of supported codecs in order of preference: - // VP8, VP9, H264 (high profile), H264 (baseline profile) and AV1. + // VP8, VP9, H265 (optional), H264 (high profile), and H264 (baseline profile). for (VideoCodecMimeType type : new VideoCodecMimeType[] {VideoCodecMimeType.VP8, - VideoCodecMimeType.VP9, VideoCodecMimeType.H264, VideoCodecMimeType.AV1}) { + VideoCodecMimeType.VP8, VideoCodecMimeType.VP9, VideoCodecMimeType.H264, + VideoCodecMimeType.H265}) { MediaCodecInfo codec = findCodecForType(type); if (codec != null) { String name = type.name(); @@ -201,6 +207,8 @@ private boolean isHardwareSupportedInCurrentSdk(MediaCodecInfo info, VideoCodecM return isHardwareSupportedInCurrentSdkVp9(info); case H264: return isHardwareSupportedInCurrentSdkH264(info); + case H265: + return isHardwareSupportedInCurrentSdkH265(info); case AV1: return false; } @@ -234,6 +242,19 @@ private boolean isHardwareSupportedInCurrentSdkH264(MediaCodecInfo info) { return name.startsWith(QCOM_PREFIX) || name.startsWith(EXYNOS_PREFIX); } + private boolean isHardwareSupportedInCurrentSdkH265(MediaCodecInfo info) { + String name = info.getName(); + // QCOM H265 encoder is supported in KITKAT or later. + return (name.startsWith(QCOM_PREFIX) && Build.VERSION.SDK_INT >= Build.VERSION_CODES.KITKAT) + // Exynos H265 encoder is supported in LOLLIPOP or later. + || (name.startsWith(EXYNOS_PREFIX) + && Build.VERSION.SDK_INT >= Build.VERSION_CODES.LOLLIPOP) + // Hisi VP8 encoder seems to be supported. Needs more testing. + || (name.startsWith(HISI_PREFIX) && Build.VERSION.SDK_INT >= Build.VERSION_CODES.KITKAT) + || (name.startsWith(IMG_PREFIX) && Build.VERSION.SDK_INT >= Build.VERSION_CODES.KITKAT) + || vcp.isExtraHardwareSupported(name, "video/hevc", vcp.parseWithTag(vcp.loadWithDom(extraMediaCodecFile), "Decoders")); + } + private boolean isMediaCodecAllowed(MediaCodecInfo info) { if (codecAllowedPredicate == null) { return true; diff --git a/sdk/android/api/org/webrtc/VideoCapabilityParser.java b/sdk/android/api/org/webrtc/VideoCapabilityParser.java new file mode 100644 index 0000000000..b749e3c73a --- /dev/null +++ b/sdk/android/api/org/webrtc/VideoCapabilityParser.java @@ -0,0 +1,84 @@ +/* + * Copyright 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +package org.webrtc; + +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.webrtc.Logging; +import org.xml.sax.SAXException; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.HashMap; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; + +public class VideoCapabilityParser { + + public Document loadWithDom(String xmlFilePath) { + Document document = null; + File file = new File(xmlFilePath); + if (file.exists()) { + try { + InputStream inputStream = new FileInputStream(file); + DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); + DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder(); + document = documentBuilder.parse(inputStream); + } catch (FileNotFoundException e) { + } catch (ParserConfigurationException e) { + } catch (IOException e) { + } catch (SAXException e) { + } + } + return document; + } + + public ArrayList> parseWithTag(Document document, String tag) { + if (document == null) { + return null; + } + ArrayList> extraMediaCodecList = new ArrayList<>(); + NodeList sList = document.getElementsByTagName(tag); + for (int i = 0; i < sList.getLength(); i++) { + Element encoded = (Element) sList.item(i); + NodeList nodeList = encoded.getElementsByTagName("MediaCodec"); + for (i = 0; i < nodeList.getLength(); i++) { + HashMap map = new HashMap<>(); + Node node = nodeList.item(i); + map.put("name", node.getAttributes().getNamedItem("name").getNodeValue()); + map.put("type", node.getAttributes().getNamedItem("type").getNodeValue()); + extraMediaCodecList.add(map); + } + } + return extraMediaCodecList; + } + + public boolean isExtraHardwareSupported(String name , String type, ArrayList> extraMediaCodecMap){ + boolean result = false; + if (extraMediaCodecMap != null) { + for (HashMap item : extraMediaCodecMap){ + if (name.startsWith(item.get("name")) && type.startsWith(item.get("type"))){ + result=true; + break; + } + } + } + return result; + } +} diff --git a/sdk/android/src/java/org/webrtc/HardwareVideoEncoder.java b/sdk/android/src/java/org/webrtc/HardwareVideoEncoder.java index b6a2f7fd00..cf84b5e676 100644 --- a/sdk/android/src/java/org/webrtc/HardwareVideoEncoder.java +++ b/sdk/android/src/java/org/webrtc/HardwareVideoEncoder.java @@ -607,7 +607,7 @@ protected void deliverEncodedImage() { } final ByteBuffer frameBuffer; - if (isKeyFrame && codecType == VideoCodecMimeType.H264) { + if (isKeyFrame && (codecType == VideoCodecMimeType.H264 || codecType == VideoCodecMimeType.H265)) { Logging.d(TAG, "Prepending config frame of size " + configBuffer.capacity() + " to output buffer with offset " + info.offset + ", size " + info.size); diff --git a/sdk/android/src/java/org/webrtc/MediaCodecUtils.java b/sdk/android/src/java/org/webrtc/MediaCodecUtils.java index d5ccae9688..0329baf4a2 100644 --- a/sdk/android/src/java/org/webrtc/MediaCodecUtils.java +++ b/sdk/android/src/java/org/webrtc/MediaCodecUtils.java @@ -31,6 +31,8 @@ class MediaCodecUtils { static final String QCOM_PREFIX = "OMX.qcom."; static final String[] SOFTWARE_IMPLEMENTATION_PREFIXES = { "OMX.google.", "OMX.SEC.", "c2.android"}; + static final String HISI_PREFIX = "OMX.hisi."; + static final String IMG_PREFIX = "OMX.IMG."; // NV12 color format supported by QCOM codec, but not declared in MediaCodec - // see /hardware/qcom/media/mm-core/inc/OMX_QCOMExtns.h @@ -84,6 +86,7 @@ static Map getCodecProperties(VideoCodecMimeType type, boolean h switch (type) { case VP8: case VP9: + case H265: case AV1: return new HashMap(); case H264: diff --git a/sdk/android/src/java/org/webrtc/MediaCodecVideoDecoderFactory.java b/sdk/android/src/java/org/webrtc/MediaCodecVideoDecoderFactory.java index bf591dda26..5b910125fe 100644 --- a/sdk/android/src/java/org/webrtc/MediaCodecVideoDecoderFactory.java +++ b/sdk/android/src/java/org/webrtc/MediaCodecVideoDecoderFactory.java @@ -63,9 +63,10 @@ public VideoDecoder createDecoder(VideoCodecInfo codecType) { public VideoCodecInfo[] getSupportedCodecs() { List supportedCodecInfos = new ArrayList(); // Generate a list of supported codecs in order of preference: - // VP8, VP9, H264 (high profile), and H264 (baseline profile). + // VP8, VP9, H.265(optional), H264 (high profile), and H264 (baseline profile). for (VideoCodecMimeType type : new VideoCodecMimeType[] {VideoCodecMimeType.VP8, - VideoCodecMimeType.VP9, VideoCodecMimeType.H264, VideoCodecMimeType.AV1}) { + VideoCodecMimeType.VP8, VideoCodecMimeType.VP9, VideoCodecMimeType.H264, + VideoCodecMimeType.H265}) { MediaCodecInfo codec = findCodecForType(type); if (codec != null) { String name = type.name(); diff --git a/sdk/android/src/java/org/webrtc/VideoCodecMimeType.java b/sdk/android/src/java/org/webrtc/VideoCodecMimeType.java index 26a030919d..1f206476a2 100644 --- a/sdk/android/src/java/org/webrtc/VideoCodecMimeType.java +++ b/sdk/android/src/java/org/webrtc/VideoCodecMimeType.java @@ -15,6 +15,7 @@ enum VideoCodecMimeType { VP8("video/x-vnd.on2.vp8"), VP9("video/x-vnd.on2.vp9"), H264("video/avc"), + H265("video/hevc"), AV1("video/av01"); private final String mimeType; diff --git a/sdk/android/src/jni/video_encoder_wrapper.cc b/sdk/android/src/jni/video_encoder_wrapper.cc index 1a841e629e..b5684c331d 100644 --- a/sdk/android/src/jni/video_encoder_wrapper.cc +++ b/sdk/android/src/jni/video_encoder_wrapper.cc @@ -13,6 +13,9 @@ #include #include "common_video/h264/h264_common.h" +#ifdef WEBRTC_USE_H265 +#include "common_video/h265/h265_common.h" +#endif #include "modules/video_coding/include/video_codec_interface.h" #include "modules/video_coding/include/video_error_codes.h" #include "modules/video_coding/svc/scalable_video_controller_no_layering.h" @@ -351,6 +354,11 @@ int VideoEncoderWrapper::ParseQp(rtc::ArrayView buffer) { qp = h264_bitstream_parser_.GetLastSliceQp().value_or(-1); success = (qp >= 0); break; +#ifdef WEBRTC_USE_H265 + case kVideoCodecH265: + success = h265_bitstream_parser_.GetLastSliceQp(&qp); + break; +#endif default: // Default is to not provide QP. success = false; break; diff --git a/sdk/android/src/jni/video_encoder_wrapper.h b/sdk/android/src/jni/video_encoder_wrapper.h index 5c5aab7588..4d845c24c5 100644 --- a/sdk/android/src/jni/video_encoder_wrapper.h +++ b/sdk/android/src/jni/video_encoder_wrapper.h @@ -21,6 +21,9 @@ #include "absl/types/optional.h" #include "api/video_codecs/video_encoder.h" #include "common_video/h264/h264_bitstream_parser.h" +#ifdef WEBRTC_USE_H265 +#include "common_video/h265/h265_bitstream_parser.h" +#endif #include "modules/video_coding/codecs/vp9/include/vp9_globals.h" #include "modules/video_coding/svc/scalable_video_controller_no_layering.h" #include "rtc_base/synchronization/mutex.h" @@ -104,6 +107,9 @@ class VideoEncoderWrapper : public VideoEncoder { VideoCodec codec_settings_; EncoderInfo encoder_info_; H264BitstreamParser h264_bitstream_parser_; +#ifdef WEBRTC_USE_H265 + H265BitstreamParser h265_bitstream_parser_; +#endif // Fills frame dependencies in codec-agnostic format. ScalableVideoControllerNoLayering svc_controller_; diff --git a/sdk/objc/api/peerconnection/RTCPeerConnectionFactory+Native.h b/sdk/objc/api/peerconnection/RTCPeerConnectionFactory+Native.h index 902925936b..5f7d068299 100644 --- a/sdk/objc/api/peerconnection/RTCPeerConnectionFactory+Native.h +++ b/sdk/objc/api/peerconnection/RTCPeerConnectionFactory+Native.h @@ -22,6 +22,7 @@ class VideoEncoderFactory; class VideoDecoderFactory; class AudioProcessing; struct PeerConnectionDependencies; +class PeerConnectionFactoryInterface; } // namespace webrtc @@ -88,6 +89,10 @@ NS_ASSUME_NONNULL_BEGIN dependencies:(std::unique_ptr)dependencies delegate:(nullable id)delegate; + +- (instancetype)initWithNativePeerConnectionFactory:( + rtc::scoped_refptr)factory; + @end NS_ASSUME_NONNULL_END diff --git a/sdk/objc/api/peerconnection/RTCPeerConnectionFactory.mm b/sdk/objc/api/peerconnection/RTCPeerConnectionFactory.mm index 84c6a878c7..245af64f06 100644 --- a/sdk/objc/api/peerconnection/RTCPeerConnectionFactory.mm +++ b/sdk/objc/api/peerconnection/RTCPeerConnectionFactory.mm @@ -274,6 +274,13 @@ - (instancetype)initWithNativeAudioEncoderFactory: } return self; } +- (instancetype)initWithNativePeerConnectionFactory: + (rtc::scoped_refptr)factory { + if (self = [self initNative]) { + _nativeFactory = factory; + } + return self; +} - (RTC_OBJC_TYPE(RTCAudioSource) *)audioSourceWithConstraints: (nullable RTC_OBJC_TYPE(RTCMediaConstraints) *)constraints { diff --git a/sdk/objc/components/video_codec/RTCCodecSpecificInfoH265+Private.h b/sdk/objc/components/video_codec/RTCCodecSpecificInfoH265+Private.h new file mode 100644 index 0000000000..ca179f34cb --- /dev/null +++ b/sdk/objc/components/video_codec/RTCCodecSpecificInfoH265+Private.h @@ -0,0 +1,26 @@ +/* + * Copyright 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +/* This file is borrowed from sdk/objc/components/video_codec/RTCCodecSpecificInfoH264+Private.h */ + +#import "RTCCodecSpecificInfoH265.h" + +#include "modules/video_coding/include/video_codec_interface.h" + +NS_ASSUME_NONNULL_BEGIN + +/* Interfaces for converting to/from internal C++ formats. */ +@interface RTC_OBJC_TYPE (RTCCodecSpecificInfoH265) +() + + - (webrtc::CodecSpecificInfo)nativeCodecSpecificInfo; + +@end + +NS_ASSUME_NONNULL_END diff --git a/sdk/objc/components/video_codec/RTCCodecSpecificInfoH265.h b/sdk/objc/components/video_codec/RTCCodecSpecificInfoH265.h new file mode 100644 index 0000000000..7e2811c7b3 --- /dev/null +++ b/sdk/objc/components/video_codec/RTCCodecSpecificInfoH265.h @@ -0,0 +1,28 @@ +/* + * Copyright 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +/* This file is borrowed from sdk/objc/components/video_codec/RTCCodecSpecificInfoH264.h. */ + +#import + +#import "RTCCodecSpecificInfo.h" +#import "RTCMacros.h" + +/** Class for H265 specific config. */ +typedef NS_ENUM(NSUInteger, RTCH265PacketizationMode) { + RTCH265PacketizationModeNonInterleaved = 0, // Mode 1 - STAP-A, FU-A is allowed + RTCH265PacketizationModeSingleNalUnit // Mode 0 - only single NALU allowed +}; + +RTC_OBJC_EXPORT +@interface RTC_OBJC_TYPE (RTCCodecSpecificInfoH265) : NSObject + +@property(nonatomic, assign) RTCH265PacketizationMode packetizationMode; + +@end diff --git a/sdk/objc/components/video_codec/RTCCodecSpecificInfoH265.mm b/sdk/objc/components/video_codec/RTCCodecSpecificInfoH265.mm new file mode 100644 index 0000000000..1db943ed6e --- /dev/null +++ b/sdk/objc/components/video_codec/RTCCodecSpecificInfoH265.mm @@ -0,0 +1,28 @@ +/* + * Copyright 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + /* This file is borrowed from sdk/objc/components/video_codec/RTCCodecSpecificInfoH264.mm */ + +#import "RTCCodecSpecificInfoH265+Private.h" + +// H265 specific settings. +@implementation RTC_OBJC_TYPE (RTCCodecSpecificInfoH265) + +@synthesize packetizationMode = _packetizationMode; + +- (webrtc::CodecSpecificInfo)nativeCodecSpecificInfo { + webrtc::CodecSpecificInfo codecSpecificInfo; + codecSpecificInfo.codecType = webrtc::kVideoCodecH265; + codecSpecificInfo.codecSpecific.H265.packetization_mode = + (webrtc::H265PacketizationMode)_packetizationMode; + + return codecSpecificInfo; +} + +@end diff --git a/sdk/objc/components/video_codec/RTCDefaultVideoDecoderFactory.m b/sdk/objc/components/video_codec/RTCDefaultVideoDecoderFactory.m index f4a97a8659..4662a60bae 100644 --- a/sdk/objc/components/video_codec/RTCDefaultVideoDecoderFactory.m +++ b/sdk/objc/components/video_codec/RTCDefaultVideoDecoderFactory.m @@ -17,6 +17,10 @@ #import "api/video_codec/RTCVideoDecoderVP8.h" #import "api/video_codec/RTCVideoDecoderVP9.h" #import "base/RTCVideoCodecInfo.h" +#ifdef WEBRTC_USE_H265 +#import "RTCH265ProfileLevelId.h" +#import "RTCVideoDecoderH265.h" +#endif @implementation RTC_OBJC_TYPE (RTCDefaultVideoDecoderFactory) @@ -42,10 +46,18 @@ @implementation RTC_OBJC_TYPE (RTCDefaultVideoDecoderFactory) RTC_OBJC_TYPE(RTCVideoCodecInfo) *vp8Info = [[RTC_OBJC_TYPE(RTCVideoCodecInfo) alloc] initWithName:kRTCVideoCodecVp8Name]; +#ifdef WEBRTC_USE_H265 + RTC_OBJC_TYPE(RTCVideoCodecInfo) *h265Info = + [[RTC_OBJC_TYPE(RTCVideoCodecInfo) alloc] initWithName:kRTCVideoCodecH265Name]; +#endif + NSMutableArray *result = [@[ constrainedHighInfo, constrainedBaselineInfo, vp8Info, +#ifdef WEBRTC_USE_H265 + h265Info, +#endif ] mutableCopy]; if ([RTC_OBJC_TYPE(RTCVideoDecoderVP9) isSupported]) { @@ -74,6 +86,13 @@ @implementation RTC_OBJC_TYPE (RTCDefaultVideoDecoderFactory) return [RTC_OBJC_TYPE(RTCVideoDecoderAV1) av1Decoder]; } +#ifdef WEBRTC_USE_H265 + if (@available(iOS 11, *)) { + if ([info.name isEqualToString:kRTCVideoCodecH265Name]) { + return [[RTC_OBJC_TYPE(RTCVideoDecoderH265) alloc] init]; + } + } +#endif return nil; } diff --git a/sdk/objc/components/video_codec/RTCDefaultVideoEncoderFactory.m b/sdk/objc/components/video_codec/RTCDefaultVideoEncoderFactory.m index 06c4e8c22f..fc0ccd3d42 100644 --- a/sdk/objc/components/video_codec/RTCDefaultVideoEncoderFactory.m +++ b/sdk/objc/components/video_codec/RTCDefaultVideoEncoderFactory.m @@ -18,6 +18,11 @@ #import "api/video_codec/RTCVideoEncoderVP9.h" #import "base/RTCVideoCodecInfo.h" +#ifdef WEBRTC_USE_H265 +#import "RTCH265ProfileLevelId.h" +#import "RTCVideoEncoderH265.h" +#endif + @implementation RTC_OBJC_TYPE (RTCDefaultVideoEncoderFactory) @synthesize preferredCodec; @@ -60,6 +65,11 @@ @implementation RTC_OBJC_TYPE (RTCDefaultVideoEncoderFactory) addObject:[[RTC_OBJC_TYPE(RTCVideoCodecInfo) alloc] initWithName:kRTCVideoCodecAv1Name]]; } +#ifdef WEBRTC_USE_H265 + [result + addObject:[[RTC_OBJC_TYPE(RTCVideoCodecInfo) alloc] initWithName:kRTCVideoCodecH265Name]]; +#endif + return result; } @@ -74,6 +84,12 @@ @implementation RTC_OBJC_TYPE (RTCDefaultVideoEncoderFactory) } else if ([info.name isEqualToString:kRTCVideoCodecAv1Name] && [RTC_OBJC_TYPE(RTCVideoEncoderAV1) isSupported]) { return [RTC_OBJC_TYPE(RTCVideoEncoderAV1) av1Encoder]; +#ifdef WEBRTC_USE_H265 + } else if (@available(iOS 11, *)) { + if ([info.name isEqualToString:kRTCVideoCodecH265Name]) { + return [[RTC_OBJC_TYPE(RTCVideoEncoderH265) alloc] initWithCodecInfo:info]; + } +#endif } return nil; diff --git a/sdk/objc/components/video_codec/RTCH265ProfileLevelId.h b/sdk/objc/components/video_codec/RTCH265ProfileLevelId.h new file mode 100644 index 0000000000..8e3486d06d --- /dev/null +++ b/sdk/objc/components/video_codec/RTCH265ProfileLevelId.h @@ -0,0 +1,16 @@ +/* + * Copyright 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#import + +#import "RTCMacros.h" + +RTC_OBJC_EXPORT extern NSString *const kRTCVideoCodecH265Name; +RTC_OBJC_EXPORT extern NSString *const kRTCLevel31Main; diff --git a/sdk/objc/components/video_codec/RTCH265ProfileLevelId.mm b/sdk/objc/components/video_codec/RTCH265ProfileLevelId.mm new file mode 100644 index 0000000000..fe93aceb79 --- /dev/null +++ b/sdk/objc/components/video_codec/RTCH265ProfileLevelId.mm @@ -0,0 +1,18 @@ +/* + * Copyright 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#import "RTCH265ProfileLevelId.h" + +#include "media/base/media_constants.h" + +NSString *const kRTCVideoCodecH265Name = @(cricket::kH265CodecName); +// TODO(jianjunz): This is value is not correct. +NSString *const kRTCLevel31Main = @"4d001f"; diff --git a/sdk/objc/components/video_codec/RTCVideoDecoderFactoryH265.h b/sdk/objc/components/video_codec/RTCVideoDecoderFactoryH265.h new file mode 100644 index 0000000000..f51a482e75 --- /dev/null +++ b/sdk/objc/components/video_codec/RTCVideoDecoderFactoryH265.h @@ -0,0 +1,19 @@ +/* + * Copyright 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#import + +#import "RTCMacros.h" +#import "RTCVideoDecoderFactory.h" + +RTC_OBJC_EXPORT +API_AVAILABLE(ios(11.0)) +@interface RTC_OBJC_TYPE (RTCVideoDecoderFactoryH265) : NSObject +@end diff --git a/sdk/objc/components/video_codec/RTCVideoDecoderFactoryH265.m b/sdk/objc/components/video_codec/RTCVideoDecoderFactoryH265.m new file mode 100644 index 0000000000..e3b950a8d6 --- /dev/null +++ b/sdk/objc/components/video_codec/RTCVideoDecoderFactoryH265.m @@ -0,0 +1,27 @@ +/* + * Copyright 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#import "RTCVideoDecoderFactoryH265.h" + +#import "RTCH265ProfileLevelId.h" +#import "RTCVideoDecoderH265.h" + +@implementation RTC_OBJC_TYPE (RTCVideoDecoderFactoryH265) + +- (NSArray *)supportedCodecs { + NSString* codecName = kRTCVideoCodecH265Name; + return @[ [[RTCVideoCodecInfo alloc] initWithName:codecName parameters:nil] ]; +} + +- (id)createDecoder:(RTC_OBJC_TYPE(RTCVideoCodecInfo) *)info { + return [[RTC_OBJC_TYPE(RTCVideoDecoderH265) alloc] init]; +} + +@end diff --git a/sdk/objc/components/video_codec/RTCVideoDecoderH265.h b/sdk/objc/components/video_codec/RTCVideoDecoderH265.h new file mode 100644 index 0000000000..7c34b642d0 --- /dev/null +++ b/sdk/objc/components/video_codec/RTCVideoDecoderH265.h @@ -0,0 +1,19 @@ +/* + * Copyright 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#import + +#import "RTCMacros.h" +#import "RTCVideoDecoder.h" + +RTC_OBJC_EXPORT +API_AVAILABLE(ios(11.0)) +@interface RTC_OBJC_TYPE (RTCVideoDecoderH265) : NSObject +@end diff --git a/sdk/objc/components/video_codec/RTCVideoDecoderH265.mm b/sdk/objc/components/video_codec/RTCVideoDecoderH265.mm new file mode 100644 index 0000000000..5654112531 --- /dev/null +++ b/sdk/objc/components/video_codec/RTCVideoDecoderH265.mm @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#import "RTCVideoDecoderH265.h" + +#import + +#import "base/RTCVideoFrame.h" +#import "base/RTCVideoFrameBuffer.h" +#import "components/video_frame_buffer/RTCCVPixelBuffer.h" +#import "helpers.h" +#import "helpers/scoped_cftyperef.h" + +#if defined(WEBRTC_IOS) +#import "helpers/UIDevice+RTCDevice.h" +#endif + +#include "modules/video_coding/include/video_error_codes.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/time_utils.h" +#include "sdk/objc/components/video_codec/nalu_rewriter.h" + +// Struct that we pass to the decoder per frame to decode. We receive it again +// in the decoder callback. +struct RTCH265FrameDecodeParams { + RTCH265FrameDecodeParams(RTCVideoDecoderCallback cb, int64_t ts) + : callback(cb), timestamp(ts) {} + RTCVideoDecoderCallback callback; + int64_t timestamp; +}; + +// This is the callback function that VideoToolbox calls when decode is +// complete. +void h265DecompressionOutputCallback(void* decoder, + void* params, + OSStatus status, + VTDecodeInfoFlags infoFlags, + CVImageBufferRef imageBuffer, + CMTime timestamp, + CMTime duration) { + std::unique_ptr decodeParams( + reinterpret_cast(params)); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to decode frame. Status: " << status; + return; + } + // TODO(tkchin): Handle CVO properly. + RTC_OBJC_TYPE(RTCCVPixelBuffer) *frameBuffer = + [[RTC_OBJC_TYPE(RTCCVPixelBuffer) alloc] initWithPixelBuffer:imageBuffer]; + RTC_OBJC_TYPE(RTCVideoFrame) *decodedFrame = [[RTC_OBJC_TYPE(RTCVideoFrame) alloc] + initWithBuffer:frameBuffer + rotation:RTCVideoRotation_0 + timeStampNs:CMTimeGetSeconds(timestamp) * rtc::kNumNanosecsPerSec]; + decodedFrame.timeStamp = decodeParams->timestamp; + decodeParams->callback(decodedFrame); +} + +// Decoder. +@implementation RTC_OBJC_TYPE (RTCVideoDecoderH265) { + CMVideoFormatDescriptionRef _videoFormat; + VTDecompressionSessionRef _decompressionSession; + RTCVideoDecoderCallback _callback; + OSStatus _error; +} + +- (instancetype)init { + self = [super init]; + return self; +} + +- (void)dealloc { + [self destroyDecompressionSession]; + [self setVideoFormat:nullptr]; +} + +- (NSInteger)startDecodeWithNumberOfCores:(int)numberOfCores { + return WEBRTC_VIDEO_CODEC_OK; +} + +- (NSInteger)decode:(RTC_OBJC_TYPE(RTCEncodedImage) *)inputImage + missingFrames:(BOOL)missingFrames + codecSpecificInfo:(__nullable id)info + renderTimeMs:(int64_t)renderTimeMs { + RTC_DCHECK(inputImage.buffer); + + if (_error != noErr) { + RTC_LOG(LS_WARNING) << "Last frame decode failed."; + _error = noErr; + return WEBRTC_VIDEO_CODEC_ERROR; + } + + rtc::ScopedCFTypeRef inputFormat = + rtc::ScopedCF(webrtc::CreateH265VideoFormatDescription( + (uint8_t*)inputImage.buffer.bytes, inputImage.buffer.length)); + if (inputFormat) { + CMVideoDimensions dimensions = + CMVideoFormatDescriptionGetDimensions(inputFormat.get()); + RTC_LOG(LS_INFO) << "Resolution: " << dimensions.width << " x " + << dimensions.height; + // Check if the video format has changed, and reinitialize decoder if + // needed. + if (!CMFormatDescriptionEqual(inputFormat.get(), _videoFormat)) { + [self setVideoFormat:inputFormat.get()]; + int resetDecompressionSessionError = [self resetDecompressionSession]; + if (resetDecompressionSessionError != WEBRTC_VIDEO_CODEC_OK) { + return resetDecompressionSessionError; + } + } + } + if (!_videoFormat) { + // We received a frame but we don't have format information so we can't + // decode it. + // This can happen after backgrounding. We need to wait for the next + // sps/pps before we can resume so we request a keyframe by returning an + // error. + RTC_LOG(LS_WARNING) << "Missing video format. Frame with sps/pps required."; + return WEBRTC_VIDEO_CODEC_ERROR; + } + CMSampleBufferRef sampleBuffer = nullptr; + if (!webrtc::H265AnnexBBufferToCMSampleBuffer( + (uint8_t*)inputImage.buffer.bytes, inputImage.buffer.length, + _videoFormat, &sampleBuffer)) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + RTC_DCHECK(sampleBuffer); + VTDecodeFrameFlags decodeFlags = + kVTDecodeFrame_EnableAsynchronousDecompression; + std::unique_ptr frameDecodeParams; + frameDecodeParams.reset( + new RTCH265FrameDecodeParams(_callback, inputImage.timeStamp)); + OSStatus status = VTDecompressionSessionDecodeFrame( + _decompressionSession, sampleBuffer, decodeFlags, + frameDecodeParams.release(), nullptr); +#if defined(WEBRTC_IOS) + // Re-initialize the decoder if we have an invalid session while the app is + // active and retry the decode request. + if (status == kVTInvalidSessionErr && + [self resetDecompressionSession] == WEBRTC_VIDEO_CODEC_OK) { + frameDecodeParams.reset( + new RTCH265FrameDecodeParams(_callback, inputImage.timeStamp)); + status = VTDecompressionSessionDecodeFrame( + _decompressionSession, sampleBuffer, decodeFlags, + frameDecodeParams.release(), nullptr); + } +#endif + CFRelease(sampleBuffer); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to decode frame with code: " << status; + return WEBRTC_VIDEO_CODEC_ERROR; + } + return WEBRTC_VIDEO_CODEC_OK; +} + +- (void)setCallback:(RTCVideoDecoderCallback)callback { + _callback = callback; +} + +- (NSInteger)releaseDecoder { + // Need to invalidate the session so that callbacks no longer occur and it + // is safe to null out the callback. + [self destroyDecompressionSession]; + [self setVideoFormat:nullptr]; + _callback = nullptr; + return WEBRTC_VIDEO_CODEC_OK; +} + +#pragma mark - Private + +- (int)resetDecompressionSession { + [self destroyDecompressionSession]; + + // Need to wait for the first SPS to initialize decoder. + if (!_videoFormat) { + return WEBRTC_VIDEO_CODEC_OK; + } + + // Set keys for OpenGL and IOSurface compatibilty, which makes the encoder + // create pixel buffers with GPU backed memory. The intent here is to pass + // the pixel buffers directly so we avoid a texture upload later during + // rendering. This currently is moot because we are converting back to an + // I420 frame after decode, but eventually we will be able to plumb + // CVPixelBuffers directly to the renderer. + // TODO(tkchin): Maybe only set OpenGL/IOSurface keys if we know that that + // we can pass CVPixelBuffers as native handles in decoder output. + static size_t const attributesSize = 3; + CFTypeRef keys[attributesSize] = { +#if defined(WEBRTC_IOS) + kCVPixelBufferOpenGLESCompatibilityKey, +#elif defined(WEBRTC_MAC) + kCVPixelBufferOpenGLCompatibilityKey, +#endif + kCVPixelBufferIOSurfacePropertiesKey, + kCVPixelBufferPixelFormatTypeKey + }; + CFDictionaryRef ioSurfaceValue = CreateCFTypeDictionary(nullptr, nullptr, 0); + int64_t nv12type = kCVPixelFormatType_420YpCbCr8BiPlanarFullRange; + CFNumberRef pixelFormat = + CFNumberCreate(nullptr, kCFNumberLongType, &nv12type); + CFTypeRef values[attributesSize] = {kCFBooleanTrue, ioSurfaceValue, + pixelFormat}; + CFDictionaryRef attributes = + CreateCFTypeDictionary(keys, values, attributesSize); + if (ioSurfaceValue) { + CFRelease(ioSurfaceValue); + ioSurfaceValue = nullptr; + } + if (pixelFormat) { + CFRelease(pixelFormat); + pixelFormat = nullptr; + } + VTDecompressionOutputCallbackRecord record = { + h265DecompressionOutputCallback, + nullptr, + }; + OSStatus status = + VTDecompressionSessionCreate(nullptr, _videoFormat, nullptr, attributes, + &record, &_decompressionSession); + CFRelease(attributes); + if (status != noErr) { + [self destroyDecompressionSession]; + return WEBRTC_VIDEO_CODEC_ERROR; + } + [self configureDecompressionSession]; + + return WEBRTC_VIDEO_CODEC_OK; +} + +- (void)configureDecompressionSession { + RTC_DCHECK(_decompressionSession); +#if defined(WEBRTC_IOS) + // VTSessionSetProperty(_decompressionSession, + // kVTDecompressionPropertyKey_RealTime, kCFBooleanTrue); +#endif +} + +- (void)destroyDecompressionSession { + if (_decompressionSession) { +#if defined(WEBRTC_IOS) + if ([UIDevice isIOS11OrLater]) { + VTDecompressionSessionWaitForAsynchronousFrames(_decompressionSession); + } +#endif + VTDecompressionSessionInvalidate(_decompressionSession); + CFRelease(_decompressionSession); + _decompressionSession = nullptr; + } +} + +- (void)setVideoFormat:(CMVideoFormatDescriptionRef)videoFormat { + if (_videoFormat == videoFormat) { + return; + } + if (_videoFormat) { + CFRelease(_videoFormat); + } + _videoFormat = videoFormat; + if (_videoFormat) { + CFRetain(_videoFormat); + } +} + +- (NSString *)implementationName { + return @"VideoToolbox"; +} + +@end diff --git a/sdk/objc/components/video_codec/RTCVideoEncoderFactoryH265.h b/sdk/objc/components/video_codec/RTCVideoEncoderFactoryH265.h new file mode 100644 index 0000000000..ad5d6b80da --- /dev/null +++ b/sdk/objc/components/video_codec/RTCVideoEncoderFactoryH265.h @@ -0,0 +1,19 @@ +/* + * Copyright 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#import + +#import "RTCMacros.h" +#import "RTCVideoEncoderFactory.h" + +RTC_OBJC_EXPORT +API_AVAILABLE(ios(11.0)) +@interface RTC_OBJC_TYPE (RTCVideoEncoderFactoryH265) : NSObject +@end diff --git a/sdk/objc/components/video_codec/RTCVideoEncoderFactoryH265.m b/sdk/objc/components/video_codec/RTCVideoEncoderFactoryH265.m new file mode 100644 index 0000000000..6f89d1346d --- /dev/null +++ b/sdk/objc/components/video_codec/RTCVideoEncoderFactoryH265.m @@ -0,0 +1,38 @@ +/* + * Copyright 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#import "RTCVideoEncoderFactoryH265.h" + +#import "RTCH265ProfileLevelId.h" +#import "RTCVideoEncoderH265.h" + +@implementation RTC_OBJC_TYPE (RTCVideoEncoderFactoryH265) + +- (NSArray *)supportedCodecs { + NSMutableArray *codecs = [NSMutableArray array]; + NSString *codecName = kRTCVideoCodecH265Name; + + NSDictionary* mainParams = @{ + @"profile-level-id" : kRTCLevel31Main, + @"level-asymmetry-allowed" : @"1", + @"packetization-mode" : @"1", + }; + RTCVideoCodecInfo* constrainedBaselineInfo = + [[RTCVideoCodecInfo alloc] initWithName:codecName parameters:mainParams]; + [codecs addObject:constrainedBaselineInfo]; + + return [codecs copy]; +} + +- (id)createEncoder:(RTC_OBJC_TYPE(RTCVideoCodecInfo) *)info { + return [[RTC_OBJC_TYPE(RTCVideoEncoderH265) alloc] initWithCodecInfo:info]; +} + +@end diff --git a/sdk/objc/components/video_codec/RTCVideoEncoderH265.h b/sdk/objc/components/video_codec/RTCVideoEncoderH265.h new file mode 100644 index 0000000000..1431565774 --- /dev/null +++ b/sdk/objc/components/video_codec/RTCVideoEncoderH265.h @@ -0,0 +1,23 @@ +/* + * Copyright 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#import + +#import "RTCMacros.h" +#import "RTCVideoCodecInfo.h" +#import "RTCVideoEncoder.h" + +RTC_OBJC_EXPORT +API_AVAILABLE(ios(11.0)) +@interface RTC_OBJC_TYPE (RTCVideoEncoderH265) : NSObject + +- (instancetype)initWithCodecInfo:(RTC_OBJC_TYPE(RTCVideoCodecInfo) *)codecInfo; + +@end diff --git a/sdk/objc/components/video_codec/RTCVideoEncoderH265.mm b/sdk/objc/components/video_codec/RTCVideoEncoderH265.mm new file mode 100644 index 0000000000..323c3bf5c7 --- /dev/null +++ b/sdk/objc/components/video_codec/RTCVideoEncoderH265.mm @@ -0,0 +1,608 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#import "RTCVideoEncoderH265.h" + +#import +#include + +#if defined(WEBRTC_IOS) +#import "helpers/UIDevice+RTCDevice.h" +#endif +#import "RTCCodecSpecificInfoH265.h" +#import "RTCH265ProfileLevelId.h" +#import "api/peerconnection/RTCVideoCodecInfo+Private.h" +#import "base/RTCI420Buffer.h" +#import "base/RTCVideoFrame.h" +#import "base/RTCVideoFrameBuffer.h" +#import "components/video_frame_buffer/RTCCVPixelBuffer.h" +#import "helpers.h" + +#include "common_video/h265/h265_bitstream_parser.h" +#include "common_video/include/bitrate_adjuster.h" +#include "libyuv/convert_from.h" +#include "modules/include/module_common_types.h" +#include "modules/video_coding/include/video_error_codes.h" +#include "rtc_base/buffer.h" +#include "rtc_base/logging.h" +#include "rtc_base/time_utils.h" +#include "sdk/objc/Framework/Classes/VideoToolbox/nalu_rewriter.h" +#include "system_wrappers/include/clock.h" + +@interface RTC_OBJC_TYPE (RTCVideoEncoderH265) +() + +- (void)frameWasEncoded:(OSStatus)status + flags:(VTEncodeInfoFlags)infoFlags + sampleBuffer:(CMSampleBufferRef)sampleBuffer + width:(int32_t)width + height:(int32_t)height + renderTimeMs:(int64_t)renderTimeMs + timestamp:(uint32_t)timestamp + rotation:(RTCVideoRotation)rotation; + +@end + +namespace { // anonymous namespace + +// The ratio between kVTCompressionPropertyKey_DataRateLimits and +// kVTCompressionPropertyKey_AverageBitRate. The data rate limit is set higher +// than the average bit rate to avoid undershooting the target. +const float kLimitToAverageBitRateFactor = 1.5f; +// These thresholds deviate from the default h265 QP thresholds, as they +// have been found to work better on devices that support VideoToolbox +const int kLowh265QpThreshold = 28; +const int kHighh265QpThreshold = 39; + +// Struct that we pass to the encoder per frame to encode. We receive it again +// in the encoder callback. +struct API_AVAILABLE(ios(11.0)) RTCFrameEncodeParams { + RTCFrameEncodeParams(RTC_OBJC_TYPE(RTCVideoEncoderH265) * e, + int32_t w, + int32_t h, + int64_t rtms, + uint32_t ts, + RTCVideoRotation r) + : encoder(e), + width(w), + height(h), + render_time_ms(rtms), + timestamp(ts), + rotation(r) {} + + RTC_OBJC_TYPE(RTCVideoEncoderH265) * encoder; + int32_t width; + int32_t height; + int64_t render_time_ms; + uint32_t timestamp; + RTCVideoRotation rotation; +}; + +// We receive I420Frames as input, but we need to feed CVPixelBuffers into the +// encoder. This performs the copy and format conversion. +// TODO(tkchin): See if encoder will accept i420 frames and compare performance. +bool CopyVideoFrameToNV12PixelBuffer(id frameBuffer, + CVPixelBufferRef pixelBuffer) { + RTC_DCHECK(pixelBuffer); + RTC_DCHECK_EQ(CVPixelBufferGetPixelFormatType(pixelBuffer), + kCVPixelFormatType_420YpCbCr8BiPlanarFullRange); + RTC_DCHECK_EQ(CVPixelBufferGetHeightOfPlane(pixelBuffer, 0), + frameBuffer.height); + RTC_DCHECK_EQ(CVPixelBufferGetWidthOfPlane(pixelBuffer, 0), + frameBuffer.width); + + CVReturn cvRet = CVPixelBufferLockBaseAddress(pixelBuffer, 0); + if (cvRet != kCVReturnSuccess) { + RTC_LOG(LS_ERROR) << "Failed to lock base address: " << cvRet; + return false; + } + + uint8_t* dstY = reinterpret_cast( + CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 0)); + int dstStrideY = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, 0); + uint8_t* dstUV = reinterpret_cast( + CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 1)); + int dstStrideUV = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, 1); + // Convert I420 to NV12. + int ret = libyuv::I420ToNV12( + frameBuffer.dataY, frameBuffer.strideY, frameBuffer.dataU, + frameBuffer.strideU, frameBuffer.dataV, frameBuffer.strideV, dstY, + dstStrideY, dstUV, dstStrideUV, frameBuffer.width, frameBuffer.height); + CVPixelBufferUnlockBaseAddress(pixelBuffer, 0); + if (ret) { + RTC_LOG(LS_ERROR) << "Error converting I420 VideoFrame to NV12 :" << ret; + return false; + } + return true; +} + +CVPixelBufferRef CreatePixelBuffer(CVPixelBufferPoolRef pixel_buffer_pool) { + if (!pixel_buffer_pool) { + RTC_LOG(LS_ERROR) << "Failed to get pixel buffer pool."; + return nullptr; + } + CVPixelBufferRef pixel_buffer; + CVReturn ret = CVPixelBufferPoolCreatePixelBuffer(nullptr, pixel_buffer_pool, &pixel_buffer); + if (ret != kCVReturnSuccess) { + RTC_LOG(LS_ERROR) << "Failed to create pixel buffer: " << ret; + // We probably want to drop frames here, since failure probably means + // that the pool is empty. + return nullptr; + } + return pixel_buffer; +} + +// This is the callback function that VideoToolbox calls when encode is +// complete. From inspection this happens on its own queue. +void compressionOutputCallback(void *encoder, + void *params, + OSStatus status, + VTEncodeInfoFlags infoFlags, + CMSampleBufferRef sampleBuffer) + API_AVAILABLE(ios(11.0)) { + RTC_CHECK(params); + std::unique_ptr encodeParams( + reinterpret_cast(params)); + RTC_CHECK(encodeParams->encoder); + [encodeParams->encoder frameWasEncoded:status + flags:infoFlags + sampleBuffer:sampleBuffer + width:encodeParams->width + height:encodeParams->height + renderTimeMs:encodeParams->render_time_ms + timestamp:encodeParams->timestamp + rotation:encodeParams->rotation]; +} +} // namespace + +@implementation RTC_OBJC_TYPE (RTCVideoEncoderH265) { + RTC_OBJC_TYPE(RTCVideoCodecInfo) * _codecInfo; + std::unique_ptr _bitrateAdjuster; + uint32_t _targetBitrateBps; + uint32_t _encoderBitrateBps; + CFStringRef _profile; + RTCVideoEncoderCallback _callback; + int32_t _width; + int32_t _height; + VTCompressionSessionRef _compressionSession; + RTCVideoCodecMode _mode; + int framesLeft; + + webrtc::H265BitstreamParser _h265BitstreamParser; + std::vector _nv12ScaleBuffer; +} + +// .5 is set as a mininum to prevent overcompensating for large temporary +// overshoots. We don't want to degrade video quality too badly. +// .95 is set to prevent oscillations. When a lower bitrate is set on the +// encoder than previously set, its output seems to have a brief period of +// drastically reduced bitrate, so we want to avoid that. In steady state +// conditions, 0.95 seems to give us better overall bitrate over long periods +// of time. +- (instancetype)initWithCodecInfo:(RTC_OBJC_TYPE(RTCVideoCodecInfo) *)codecInfo { + if (self = [super init]) { + _codecInfo = codecInfo; + _bitrateAdjuster.reset(new webrtc::BitrateAdjuster(.5, .95)); + RTC_CHECK([codecInfo.name isEqualToString:kRTCVideoCodecH265Name]); + } + return self; +} + +- (void)dealloc { + [self destroyCompressionSession]; +} + +- (NSInteger)startEncodeWithSettings:(RTC_OBJC_TYPE(RTCVideoEncoderSettings) *)settings + numberOfCores:(int)numberOfCores { + RTC_DCHECK(settings); + RTC_DCHECK([settings.name isEqualToString:kRTCVideoCodecH265Name]); + + _width = settings.width; + _height = settings.height; + _mode = settings.mode; + + // We can only set average bitrate on the HW encoder. + _targetBitrateBps = settings.startBitrate; + _bitrateAdjuster->SetTargetBitrateBps(_targetBitrateBps); + + // TODO(tkchin): Try setting payload size via + // kVTCompressionPropertyKey_Maxh265SliceBytes. + + return [self resetCompressionSession]; +} + +- (NSInteger)encode:(RTC_OBJC_TYPE(RTCVideoFrame) *)frame + codecSpecificInfo:(nullable id)codecSpecificInfo + frameTypes:(NSArray *)frameTypes { + // RTC_DCHECK_EQ(frame.width, _width); + // RTC_DCHECK_EQ(frame.height, _height); + if (!_callback || !_compressionSession) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + BOOL isKeyframeRequired = NO; + + // Get a pixel buffer from the pool and copy frame data over. + CVPixelBufferPoolRef pixelBufferPool = + VTCompressionSessionGetPixelBufferPool(_compressionSession); + +#if defined(WEBRTC_IOS) + if (!pixelBufferPool) { + // Kind of a hack. On backgrounding, the compression session seems to get + // invalidated, which causes this pool call to fail when the application + // is foregrounded and frames are being sent for encoding again. + // Resetting the session when this happens fixes the issue. + // In addition we request a keyframe so video can recover quickly. + [self resetCompressionSession]; + pixelBufferPool = + VTCompressionSessionGetPixelBufferPool(_compressionSession); + isKeyframeRequired = YES; + RTC_LOG(LS_INFO) << "Resetting compression session due to invalid pool."; + } +#endif + + CVPixelBufferRef pixelBuffer = nullptr; + if ([frame.buffer isKindOfClass:[RTC_OBJC_TYPE(RTCCVPixelBuffer) class]]) { + // Native frame buffer + RTC_OBJC_TYPE(RTCCVPixelBuffer) *rtcPixelBuffer = + (RTC_OBJC_TYPE(RTCCVPixelBuffer) *)frame.buffer; + if (![rtcPixelBuffer requiresCropping]) { + // This pixel buffer might have a higher resolution than what the + // compression session is configured to. The compression session can + // handle that and will output encoded frames in the configured + // resolution regardless of the input pixel buffer resolution. + pixelBuffer = rtcPixelBuffer.pixelBuffer; + CVBufferRetain(pixelBuffer); + } else { + // Cropping required, we need to crop and scale to a new pixel buffer. + pixelBuffer = CreatePixelBuffer(pixelBufferPool); + if (!pixelBuffer) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + int dstWidth = CVPixelBufferGetWidth(pixelBuffer); + int dstHeight = CVPixelBufferGetHeight(pixelBuffer); + if ([rtcPixelBuffer requiresScalingToWidth:dstWidth height:dstHeight]) { + int size = + [rtcPixelBuffer bufferSizeForCroppingAndScalingToWidth:dstWidth height:dstHeight]; + _nv12ScaleBuffer.resize(size); + } else { + _nv12ScaleBuffer.clear(); + } + _nv12ScaleBuffer.shrink_to_fit(); + if (![rtcPixelBuffer cropAndScaleTo:pixelBuffer + withTempBuffer:_nv12ScaleBuffer.data()]) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + } + } + + if (!pixelBuffer) { + // We did not have a native frame buffer + pixelBuffer = CreatePixelBuffer(pixelBufferPool); + if (!pixelBuffer) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + RTC_DCHECK(pixelBuffer); + if (!CopyVideoFrameToNV12PixelBuffer([frame.buffer toI420], pixelBuffer)) { + RTC_LOG(LS_ERROR) << "Failed to copy frame data."; + CVBufferRelease(pixelBuffer); + return WEBRTC_VIDEO_CODEC_ERROR; + } + } + + // Check if we need a keyframe. + if (!isKeyframeRequired && frameTypes) { + for (NSNumber *frameType in frameTypes) { + if ((RTCFrameType)frameType.intValue == RTCFrameTypeVideoFrameKey) { + isKeyframeRequired = YES; + break; + } + } + } + + CMTime presentationTimeStamp = CMTimeMake(frame.timeStampNs / rtc::kNumNanosecsPerMillisec, 1000); + CFDictionaryRef frameProperties = nullptr; + if (isKeyframeRequired) { + CFTypeRef keys[] = {kVTEncodeFrameOptionKey_ForceKeyFrame}; + CFTypeRef values[] = {kCFBooleanTrue}; + frameProperties = CreateCFTypeDictionary(keys, values, 1); + } + + std::unique_ptr encodeParams; + encodeParams.reset(new RTCFrameEncodeParams( + self, _width, _height, frame.timeStampNs / rtc::kNumNanosecsPerMillisec, + frame.timeStamp, frame.rotation)); + + // Update the bitrate if needed. + [self setBitrateBps:_bitrateAdjuster->GetAdjustedBitrateBps()]; + + OSStatus status = VTCompressionSessionEncodeFrame( + _compressionSession, pixelBuffer, presentationTimeStamp, kCMTimeInvalid, + frameProperties, encodeParams.release(), nullptr); + if (frameProperties) { + CFRelease(frameProperties); + } + if (pixelBuffer) { + CVBufferRelease(pixelBuffer); + } + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to encode frame with code: " << status; + return WEBRTC_VIDEO_CODEC_ERROR; + } + return WEBRTC_VIDEO_CODEC_OK; +} + +- (void)setCallback:(RTCVideoEncoderCallback)callback { + _callback = callback; +} + +- (int)setBitrate:(uint32_t)bitrateKbit framerate:(uint32_t)framerate { + _targetBitrateBps = 1000 * bitrateKbit; + _bitrateAdjuster->SetTargetBitrateBps(_targetBitrateBps); + [self setBitrateBps:_bitrateAdjuster->GetAdjustedBitrateBps()]; + return WEBRTC_VIDEO_CODEC_OK; +} + +- (NSInteger)resolutionAlignment { + return 1; +} + +- (BOOL)applyAlignmentToAllSimulcastLayers { + return NO; +} + +- (BOOL)supportsNativeHandle { + return YES; +} + +#pragma mark - Private + +- (NSInteger)releaseEncoder { + // Need to destroy so that the session is invalidated and won't use the + // callback anymore. Do not remove callback until the session is invalidated + // since async encoder callbacks can occur until invalidation. + [self destroyCompressionSession]; + _callback = nullptr; + return WEBRTC_VIDEO_CODEC_OK; +} + +- (int)resetCompressionSession { + [self destroyCompressionSession]; + + // Set source image buffer attributes. These attributes will be present on + // buffers retrieved from the encoder's pixel buffer pool. + const size_t attributesSize = 3; + CFTypeRef keys[attributesSize] = { +#if defined(WEBRTC_IOS) + kCVPixelBufferOpenGLESCompatibilityKey, +#elif defined(WEBRTC_MAC) + kCVPixelBufferOpenGLCompatibilityKey, +#endif + kCVPixelBufferIOSurfacePropertiesKey, + kCVPixelBufferPixelFormatTypeKey + }; + CFDictionaryRef ioSurfaceValue = CreateCFTypeDictionary(nullptr, nullptr, 0); + int64_t nv12type = kCVPixelFormatType_420YpCbCr8BiPlanarFullRange; + CFNumberRef pixelFormat = + CFNumberCreate(nullptr, kCFNumberLongType, &nv12type); + CFTypeRef values[attributesSize] = {kCFBooleanTrue, ioSurfaceValue, + pixelFormat}; + CFDictionaryRef sourceAttributes = + CreateCFTypeDictionary(keys, values, attributesSize); + if (ioSurfaceValue) { + CFRelease(ioSurfaceValue); + ioSurfaceValue = nullptr; + } + if (pixelFormat) { + CFRelease(pixelFormat); + pixelFormat = nullptr; + } + CFMutableDictionaryRef encoder_specs = nullptr; +#if defined(WEBRTC_MAC) && !defined(WEBRTC_IOS) + // Currently hw accl is supported above 360p on mac, below 360p + // the compression session will be created with hw accl disabled. + encoder_specs = + CFDictionaryCreateMutable(nullptr, 1, &kCFTypeDictionaryKeyCallBacks, + &kCFTypeDictionaryValueCallBacks); + CFDictionarySetValue( + encoder_specs, + kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder, + kCFBooleanTrue); +#endif + OSStatus status = VTCompressionSessionCreate( + nullptr, // use default allocator + _width, _height, kCMVideoCodecType_HEVC, + encoder_specs, // use hardware accelerated encoder if available + sourceAttributes, + nullptr, // use default compressed data allocator + compressionOutputCallback, nullptr, &_compressionSession); + if (sourceAttributes) { + CFRelease(sourceAttributes); + sourceAttributes = nullptr; + } + if (encoder_specs) { + CFRelease(encoder_specs); + encoder_specs = nullptr; + } + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to create compression session: " << status; + return WEBRTC_VIDEO_CODEC_ERROR; + } +#if defined(WEBRTC_MAC) && !defined(WEBRTC_IOS) + CFBooleanRef hwaccl_enabled = nullptr; + status = VTSessionCopyProperty( + _compressionSession, + kVTCompressionPropertyKey_UsingHardwareAcceleratedVideoEncoder, nullptr, + &hwaccl_enabled); + if (status == noErr && (CFBooleanGetValue(hwaccl_enabled))) { + RTC_LOG(LS_INFO) << "Compression session created with hw accl enabled"; + } else { + RTC_LOG(LS_INFO) << "Compression session created with hw accl disabled"; + } +#endif + [self configureCompressionSession]; + return WEBRTC_VIDEO_CODEC_OK; +} + +- (void)configureCompressionSession { + RTC_DCHECK(_compressionSession); + SetVTSessionProperty(_compressionSession, kVTCompressionPropertyKey_RealTime, + true); + // SetVTSessionProperty(_compressionSession, + // kVTCompressionPropertyKey_ProfileLevel, _profile); + SetVTSessionProperty(_compressionSession, + kVTCompressionPropertyKey_AllowFrameReordering, false); + [self setEncoderBitrateBps:_targetBitrateBps]; + // TODO(tkchin): Look at entropy mode and colorspace matrices. + // TODO(tkchin): Investigate to see if there's any way to make this work. + // May need it to interop with Android. Currently this call just fails. + // On inspecting encoder output on iOS8, this value is set to 6. + // internal::SetVTSessionProperty(compression_session_, + // kVTCompressionPropertyKey_MaxFrameDelayCount, + // 1); + + // Set a relatively large value for keyframe emission (7200 frames or 4 minutes). + SetVTSessionProperty(_compressionSession, kVTCompressionPropertyKey_MaxKeyFrameInterval, 7200); + SetVTSessionProperty( + _compressionSession, kVTCompressionPropertyKey_MaxKeyFrameIntervalDuration, 240); + OSStatus status = + VTCompressionSessionPrepareToEncodeFrames(_compressionSession); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Compression session failed to prepare encode frames."; + } +} + +- (void)destroyCompressionSession { + if (_compressionSession) { + VTCompressionSessionInvalidate(_compressionSession); + CFRelease(_compressionSession); + _compressionSession = nullptr; + } +} + +- (NSString *)implementationName { + return @"VideoToolbox"; +} + +- (void)setBitrateBps:(uint32_t)bitrateBps { + if (_encoderBitrateBps != bitrateBps) { + [self setEncoderBitrateBps:bitrateBps]; + } +} + +- (void)setEncoderBitrateBps:(uint32_t)bitrateBps { + if (_compressionSession) { + SetVTSessionProperty(_compressionSession, + kVTCompressionPropertyKey_AverageBitRate, bitrateBps); + + // TODO(tkchin): Add a helper method to set array value. + int64_t dataLimitBytesPerSecondValue = + static_cast(bitrateBps * kLimitToAverageBitRateFactor / 8); + CFNumberRef bytesPerSecond = + CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, + &dataLimitBytesPerSecondValue); + int64_t oneSecondValue = 1; + CFNumberRef oneSecond = CFNumberCreate( + kCFAllocatorDefault, kCFNumberSInt64Type, &oneSecondValue); + const void* nums[2] = {bytesPerSecond, oneSecond}; + CFArrayRef dataRateLimits = + CFArrayCreate(nullptr, nums, 2, &kCFTypeArrayCallBacks); + OSStatus status = VTSessionSetProperty( + _compressionSession, kVTCompressionPropertyKey_DataRateLimits, + dataRateLimits); + if (bytesPerSecond) { + CFRelease(bytesPerSecond); + } + if (oneSecond) { + CFRelease(oneSecond); + } + if (dataRateLimits) { + CFRelease(dataRateLimits); + } + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to set data rate limit"; + } + + _encoderBitrateBps = bitrateBps; + } +} + +- (void)frameWasEncoded:(OSStatus)status + flags:(VTEncodeInfoFlags)infoFlags + sampleBuffer:(CMSampleBufferRef)sampleBuffer + width:(int32_t)width + height:(int32_t)height + renderTimeMs:(int64_t)renderTimeMs + timestamp:(uint32_t)timestamp + rotation:(RTCVideoRotation)rotation { + if (status != noErr) { + RTC_LOG(LS_ERROR) << "h265 encode failed."; + return; + } + if (infoFlags & kVTEncodeInfo_FrameDropped) { + RTC_LOG(LS_INFO) << "h265 encoder dropped a frame."; + return; + } + + BOOL isKeyframe = NO; + CFArrayRef attachments = + CMSampleBufferGetSampleAttachmentsArray(sampleBuffer, 0); + if (attachments != nullptr && CFArrayGetCount(attachments)) { + CFDictionaryRef attachment = + static_cast(CFArrayGetValueAtIndex(attachments, 0)); + isKeyframe = + !CFDictionaryContainsKey(attachment, kCMSampleAttachmentKey_NotSync); + } + + if (isKeyframe) { + RTC_LOG(LS_INFO) << "Generated keyframe"; + } + + __block std::unique_ptr buffer = std::make_unique(); + if (!webrtc::H265CMSampleBufferToAnnexBBuffer(sampleBuffer, isKeyframe, buffer.get())) { + return; + } + + RTC_OBJC_TYPE(RTCEncodedImage) *frame = [[RTC_OBJC_TYPE(RTCEncodedImage) alloc] init]; + frame.buffer = [NSData dataWithBytesNoCopy:buffer->data() + length:buffer->size() + freeWhenDone:NO]; + frame.encodedWidth = width; + frame.encodedHeight = height; + frame.frameType = + isKeyframe ? RTCFrameTypeVideoFrameKey : RTCFrameTypeVideoFrameDelta; + frame.captureTimeMs = renderTimeMs; + frame.timeStamp = timestamp; + frame.rotation = rotation; + frame.contentType = (_mode == RTCVideoCodecModeScreensharing) + ? RTCVideoContentTypeScreenshare + : RTCVideoContentTypeUnspecified; + frame.flags = webrtc::VideoSendTiming::kInvalid; + + int qp; + _h265BitstreamParser.ParseBitstream(buffer->data(), buffer->size()); + _h265BitstreamParser.GetLastSliceQp(&qp); + frame.qp = @(qp); + + BOOL res = _callback(frame, [[RTC_OBJC_TYPE(RTCCodecSpecificInfoH265) alloc] init]); + if (!res) { + RTC_LOG(LS_ERROR) << "Encode callback failed."; + return; + } + _bitrateAdjuster->Update(frame.buffer.length); +} + +- (nullable RTC_OBJC_TYPE(RTCVideoEncoderQpThresholds) *)scalingSettings { + return [[RTC_OBJC_TYPE(RTCVideoEncoderQpThresholds) alloc] + initWithThresholdsLow:kLowh265QpThreshold + high:kHighh265QpThreshold]; +} + +@end diff --git a/sdk/objc/components/video_codec/nalu_rewriter.cc b/sdk/objc/components/video_codec/nalu_rewriter.cc index b7330e1f9c..f2be2a2d43 100644 --- a/sdk/objc/components/video_codec/nalu_rewriter.cc +++ b/sdk/objc/components/video_codec/nalu_rewriter.cc @@ -224,6 +224,218 @@ bool H264AnnexBBufferToCMSampleBuffer(const uint8_t* annexb_buffer, return true; } +#ifdef WEBRTC_USE_H265 +bool H265CMSampleBufferToAnnexBBuffer( + CMSampleBufferRef hvcc_sample_buffer, + bool is_keyframe, + rtc::Buffer* annexb_buffer) { + RTC_DCHECK(hvcc_sample_buffer); + + // Get format description from the sample buffer. + CMVideoFormatDescriptionRef description = + CMSampleBufferGetFormatDescription(hvcc_sample_buffer); + if (description == nullptr) { + RTC_LOG(LS_ERROR) << "Failed to get sample buffer's description."; + return false; + } + + // Get parameter set information. + int nalu_header_size = 0; + size_t param_set_count = 0; + OSStatus status = CMVideoFormatDescriptionGetHEVCParameterSetAtIndex( + description, 0, nullptr, nullptr, ¶m_set_count, &nalu_header_size); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to get parameter set."; + return false; + } + RTC_CHECK_EQ(nalu_header_size, kAvccHeaderByteSize); + RTC_DCHECK_EQ(param_set_count, 3); + + // Truncate any previous data in the buffer without changing its capacity. + annexb_buffer->SetSize(0); + + size_t nalu_offset = 0; + std::vector frag_offsets; + std::vector frag_lengths; + + // Place all parameter sets at the front of buffer. + if (is_keyframe) { + size_t param_set_size = 0; + const uint8_t* param_set = nullptr; + for (size_t i = 0; i < param_set_count; ++i) { + status = CMVideoFormatDescriptionGetHEVCParameterSetAtIndex( + description, i, ¶m_set, ¶m_set_size, nullptr, nullptr); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to get parameter set."; + return false; + } + // Update buffer. + annexb_buffer->AppendData(kAnnexBHeaderBytes, sizeof(kAnnexBHeaderBytes)); + annexb_buffer->AppendData(reinterpret_cast(param_set), + param_set_size); + // Update fragmentation. + frag_offsets.push_back(nalu_offset + sizeof(kAnnexBHeaderBytes)); + frag_lengths.push_back(param_set_size); + nalu_offset += sizeof(kAnnexBHeaderBytes) + param_set_size; + } + } + + // Get block buffer from the sample buffer. + CMBlockBufferRef block_buffer = + CMSampleBufferGetDataBuffer(hvcc_sample_buffer); + if (block_buffer == nullptr) { + RTC_LOG(LS_ERROR) << "Failed to get sample buffer's block buffer."; + return false; + } + CMBlockBufferRef contiguous_buffer = nullptr; + // Make sure block buffer is contiguous. + if (!CMBlockBufferIsRangeContiguous(block_buffer, 0, 0)) { + status = CMBlockBufferCreateContiguous( + nullptr, block_buffer, nullptr, nullptr, 0, 0, 0, &contiguous_buffer); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to flatten non-contiguous block buffer: " + << status; + return false; + } + } else { + contiguous_buffer = block_buffer; + // Retain to make cleanup easier. + CFRetain(contiguous_buffer); + block_buffer = nullptr; + } + + // Now copy the actual data. + char* data_ptr = nullptr; + size_t block_buffer_size = CMBlockBufferGetDataLength(contiguous_buffer); + status = CMBlockBufferGetDataPointer(contiguous_buffer, 0, nullptr, nullptr, + &data_ptr); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to get block buffer data."; + CFRelease(contiguous_buffer); + return false; + } + size_t bytes_remaining = block_buffer_size; + while (bytes_remaining > 0) { + // The size type here must match |nalu_header_size|, we expect 4 bytes. + // Read the length of the next packet of data. Must convert from big endian + // to host endian. + RTC_DCHECK_GE(bytes_remaining, (size_t)nalu_header_size); + uint32_t* uint32_data_ptr = reinterpret_cast(data_ptr); + uint32_t packet_size = CFSwapInt32BigToHost(*uint32_data_ptr); + // Update buffer. + annexb_buffer->AppendData(kAnnexBHeaderBytes, sizeof(kAnnexBHeaderBytes)); + annexb_buffer->AppendData(data_ptr + nalu_header_size, packet_size); + // Update fragmentation. + frag_offsets.push_back(nalu_offset + sizeof(kAnnexBHeaderBytes)); + frag_lengths.push_back(packet_size); + nalu_offset += sizeof(kAnnexBHeaderBytes) + packet_size; + + size_t bytes_written = packet_size + sizeof(kAnnexBHeaderBytes); + bytes_remaining -= bytes_written; + data_ptr += bytes_written; + } + RTC_DCHECK_EQ(bytes_remaining, (size_t)0); + + CFRelease(contiguous_buffer); + return true; +} + +bool H265AnnexBBufferToCMSampleBuffer(const uint8_t* annexb_buffer, + size_t annexb_buffer_size, + CMVideoFormatDescriptionRef video_format, + CMSampleBufferRef* out_sample_buffer) { + RTC_DCHECK(annexb_buffer); + RTC_DCHECK(out_sample_buffer); + RTC_DCHECK(video_format); + *out_sample_buffer = nullptr; + + AnnexBBufferReader reader(annexb_buffer, annexb_buffer_size); + if (reader.SeekToNextNaluOfType(H265::kVps)) { + // Buffer contains an SPS NALU - skip it and the following PPS + const uint8_t* data; + size_t data_len; + if (!reader.ReadNalu(&data, &data_len)) { + RTC_LOG(LS_ERROR) << "Failed to read VPS"; + return false; + } + if (!reader.ReadNalu(&data, &data_len)) { + RTC_LOG(LS_ERROR) << "Failed to read SPS"; + return false; + } + if (!reader.ReadNalu(&data, &data_len)) { + RTC_LOG(LS_ERROR) << "Failed to read PPS"; + return false; + } + } else { + // No SPS NALU - start reading from the first NALU in the buffer + reader.SeekToStart(); + } + + // Allocate memory as a block buffer. + // TODO(tkchin): figure out how to use a pool. + CMBlockBufferRef block_buffer = nullptr; + OSStatus status = CMBlockBufferCreateWithMemoryBlock( + nullptr, nullptr, reader.BytesRemaining(), nullptr, nullptr, 0, + reader.BytesRemaining(), kCMBlockBufferAssureMemoryNowFlag, + &block_buffer); + if (status != kCMBlockBufferNoErr) { + RTC_LOG(LS_ERROR) << "Failed to create block buffer."; + return false; + } + + // Make sure block buffer is contiguous. + CMBlockBufferRef contiguous_buffer = nullptr; + if (!CMBlockBufferIsRangeContiguous(block_buffer, 0, 0)) { + status = CMBlockBufferCreateContiguous( + nullptr, block_buffer, nullptr, nullptr, 0, 0, 0, &contiguous_buffer); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to flatten non-contiguous block buffer: " + << status; + CFRelease(block_buffer); + return false; + } + } else { + contiguous_buffer = block_buffer; + block_buffer = nullptr; + } + + // Get a raw pointer into allocated memory. + size_t block_buffer_size = 0; + char* data_ptr = nullptr; + status = CMBlockBufferGetDataPointer(contiguous_buffer, 0, nullptr, + &block_buffer_size, &data_ptr); + if (status != kCMBlockBufferNoErr) { + RTC_LOG(LS_ERROR) << "Failed to get block buffer data pointer."; + CFRelease(contiguous_buffer); + return false; + } + RTC_DCHECK(block_buffer_size == reader.BytesRemaining()); + + // Write Avcc NALUs into block buffer memory. + AvccBufferWriter writer(reinterpret_cast(data_ptr), + block_buffer_size); + while (reader.BytesRemaining() > 0) { + const uint8_t* nalu_data_ptr = nullptr; + size_t nalu_data_size = 0; + if (reader.ReadNalu(&nalu_data_ptr, &nalu_data_size)) { + writer.WriteNalu(nalu_data_ptr, nalu_data_size); + } + } + + // Create sample buffer. + status = CMSampleBufferCreate(nullptr, contiguous_buffer, true, nullptr, + nullptr, video_format, 1, 0, nullptr, 0, + nullptr, out_sample_buffer); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to create sample buffer."; + CFRelease(contiguous_buffer); + return false; + } + CFRelease(contiguous_buffer); + return true; +} +#endif + CMVideoFormatDescriptionRef CreateVideoFormatDescription( const uint8_t* annexb_buffer, size_t annexb_buffer_size) { @@ -254,6 +466,43 @@ CMVideoFormatDescriptionRef CreateVideoFormatDescription( return description; } +#ifdef WEBRTC_USE_H265 +CMVideoFormatDescriptionRef CreateH265VideoFormatDescription( + const uint8_t* annexb_buffer, + size_t annexb_buffer_size) { + const uint8_t* param_set_ptrs[3] = {}; + size_t param_set_sizes[3] = {}; + AnnexBBufferReader reader(annexb_buffer, annexb_buffer_size); + // Skip everyting before the VPS, then read the VPS, SPS and PPS + if (!reader.SeekToNextNaluOfType(H265::kVps)) { + return nullptr; + } + if (!reader.ReadNalu(¶m_set_ptrs[0], ¶m_set_sizes[0])) { + RTC_LOG(LS_ERROR) << "Failed to read VPS"; + return nullptr; + } + if (!reader.ReadNalu(¶m_set_ptrs[1], ¶m_set_sizes[1])) { + RTC_LOG(LS_ERROR) << "Failed to read SPS"; + return nullptr; + } + if (!reader.ReadNalu(¶m_set_ptrs[2], ¶m_set_sizes[2])) { + RTC_LOG(LS_ERROR) << "Failed to read PPS"; + return nullptr; + } + + // Parse the SPS and PPS into a CMVideoFormatDescription. + CMVideoFormatDescriptionRef description = nullptr; + OSStatus status = CMVideoFormatDescriptionCreateFromHEVCParameterSets( + kCFAllocatorDefault, 3, param_set_ptrs, param_set_sizes, 4, nullptr, + &description); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to create video format description."; + return nullptr; + } + return description; +} +#endif + AnnexBBufferReader::AnnexBBufferReader(const uint8_t* annexb_buffer, size_t length) : start_(annexb_buffer), length_(length) { @@ -300,6 +549,19 @@ bool AnnexBBufferReader::SeekToNextNaluOfType(NaluType type) { } return false; } + +#ifdef WEBRTC_USE_H265 +bool AnnexBBufferReader::SeekToNextNaluOfType(H265::NaluType type) { + for (; offset_ != offsets_.end(); ++offset_) { + if (offset_->payload_size < 1) + continue; + if (H265::ParseNaluType(*(start_ + offset_->payload_start_offset)) == type) + return true; + } + return false; +} +#endif + AvccBufferWriter::AvccBufferWriter(uint8_t* const avcc_buffer, size_t length) : start_(avcc_buffer), offset_(0), length_(length) { RTC_DCHECK(avcc_buffer); diff --git a/sdk/objc/components/video_codec/nalu_rewriter.h b/sdk/objc/components/video_codec/nalu_rewriter.h index c6474971e2..b020efb087 100644 --- a/sdk/objc/components/video_codec/nalu_rewriter.h +++ b/sdk/objc/components/video_codec/nalu_rewriter.h @@ -18,6 +18,10 @@ #include #include "common_video/h264/h264_common.h" +#ifdef WEBRTC_USE_H265 +#include "common_video/h265/h265_common.h" +#endif +#include "modules/include/module_common_types.h" #include "rtc_base/buffer.h" using webrtc::H264::NaluIndex; @@ -43,6 +47,30 @@ bool H264AnnexBBufferToCMSampleBuffer(const uint8_t* annexb_buffer, CMSampleBufferRef* out_sample_buffer, CMMemoryPoolRef memory_pool); +#ifdef WEBRTC_USE_H265 +// Converts a sample buffer emitted from the VideoToolbox encoder into a buffer +// suitable for RTP. The sample buffer is in hvcc format whereas the rtp buffer +// needs to be in Annex B format. Data is written directly to |annexb_buffer| +// and a new RTPFragmentationHeader is returned in |out_header|. +bool H265CMSampleBufferToAnnexBBuffer( + CMSampleBufferRef hvcc_sample_buffer, + bool is_keyframe, + rtc::Buffer* annexb_buffer) + __OSX_AVAILABLE_STARTING(__MAC_10_12, __IPHONE_11_0); + + // Converts a buffer received from RTP into a sample buffer suitable for the +// VideoToolbox decoder. The RTP buffer is in annex b format whereas the sample +// buffer is in hvcc format. +// If |is_keyframe| is true then |video_format| is ignored since the format will +// be read from the buffer. Otherwise |video_format| must be provided. +// Caller is responsible for releasing the created sample buffer. +bool H265AnnexBBufferToCMSampleBuffer(const uint8_t* annexb_buffer, + size_t annexb_buffer_size, + CMVideoFormatDescriptionRef video_format, + CMSampleBufferRef* out_sample_buffer) + __OSX_AVAILABLE_STARTING(__MAC_10_12, __IPHONE_11_0); +#endif + // Returns a video format description created from the sps/pps information in // the Annex B buffer. If there is no such information, nullptr is returned. // The caller is responsible for releasing the description. @@ -50,6 +78,13 @@ CMVideoFormatDescriptionRef CreateVideoFormatDescription( const uint8_t* annexb_buffer, size_t annexb_buffer_size); +#ifdef WEBRTC_USE_H265 +CMVideoFormatDescriptionRef CreateH265VideoFormatDescription( + const uint8_t* annexb_buffer, + size_t annexb_buffer_size) + __OSX_AVAILABLE_STARTING(__MAC_10_12, __IPHONE_11_0); +#endif + // Helper class for reading NALUs from an RTP Annex B buffer. class AnnexBBufferReader final { public: @@ -74,6 +109,9 @@ class AnnexBBufferReader final { // Return true if a NALU of the desired type is found, false if we // reached the end instead bool SeekToNextNaluOfType(H264::NaluType type); +#ifdef WEBRTC_USE_H265 + bool SeekToNextNaluOfType(H265::NaluType type); +#endif private: // Returns the the next offset that contains NALU data. diff --git a/sdk/objc/native/src/objc_video_encoder_factory.mm b/sdk/objc/native/src/objc_video_encoder_factory.mm index d4ea79cc88..cd7474a7ec 100644 --- a/sdk/objc/native/src/objc_video_encoder_factory.mm +++ b/sdk/objc/native/src/objc_video_encoder_factory.mm @@ -16,6 +16,9 @@ #import "base/RTCVideoEncoder.h" #import "base/RTCVideoEncoderFactory.h" #import "components/video_codec/RTCCodecSpecificInfoH264+Private.h" +#ifdef WEBRTC_USE_H265 +#import "components/video_codec/RTCCodecSpecificInfoH265+Private.h" +#endif #import "sdk/objc/api/peerconnection/RTCEncodedImage+Private.h" #import "sdk/objc/api/peerconnection/RTCVideoCodecInfo+Private.h" #import "sdk/objc/api/peerconnection/RTCVideoEncoderSettings+Private.h" @@ -58,6 +61,11 @@ int32_t RegisterEncodeCompleteCallback(EncodedImageCallback *callback) override if ([info isKindOfClass:[RTC_OBJC_TYPE(RTCCodecSpecificInfoH264) class]]) { codecSpecificInfo = [(RTC_OBJC_TYPE(RTCCodecSpecificInfoH264) *)info nativeCodecSpecificInfo]; +#ifdef WEBRTC_USE_H265 + } else if ([info isKindOfClass:[RTC_OBJC_TYPE(RTCCodecSpecificInfoH265) class]]) { + // if ([info isKindOfClass:[RTCCodecSpecificInfoH265 class]]) { + codecSpecificInfo = [(RTCCodecSpecificInfoH265 *)info nativeCodecSpecificInfo]; +#endif } EncodedImageCallback::Result res = callback->OnEncodedImage(encodedImage, &codecSpecificInfo); diff --git a/test/scenario/video_stream.cc b/test/scenario/video_stream.cc index 3fcef570c7..e9c64592ac 100644 --- a/test/scenario/video_stream.cc +++ b/test/scenario/video_stream.cc @@ -204,10 +204,29 @@ CreateH264SpecificSettings(VideoStreamConfig config) { return nullptr; } +#ifdef WEBRTC_USE_H265 +rtc::scoped_refptr +CreateH265SpecificSettings(VideoStreamConfig config) { + RTC_DCHECK_EQ(config.encoder.layers.temporal, 1); + RTC_DCHECK_EQ(config.encoder.layers.spatial, 1); + + VideoCodecH265 h265_settings = VideoEncoder::GetDefaultH265Settings(); + h265_settings.frameDroppingOn = config.encoder.frame_dropping; + h265_settings.keyFrameInterval = + config.encoder.key_frame_interval.value_or(0); + return new rtc::RefCountedObject< + VideoEncoderConfig::H265EncoderSpecificSettings>(h265_settings); +} +#endif + rtc::scoped_refptr CreateEncoderSpecificSettings(VideoStreamConfig config) { using Codec = VideoStreamConfig::Encoder::Codec; switch (config.encoder.codec) { +#ifdef WEBRTC_USE_H265 + case Codec::kVideoCodecH265: + return CreateH265SpecificSettings(config); +#endif case Codec::kVideoCodecH264: return CreateH264SpecificSettings(config); case Codec::kVideoCodecVP8: diff --git a/test/video_codec_settings.h b/test/video_codec_settings.h index 5ef4ed3e4a..79b3a8dacc 100644 --- a/test/video_codec_settings.h +++ b/test/video_codec_settings.h @@ -54,6 +54,11 @@ static void CodecSettings(VideoCodecType codec_type, VideoCodec* settings) { case kVideoCodecVP9: *(settings->VP9()) = VideoEncoder::GetDefaultVp9Settings(); return; +#ifdef WEBRTC_USE_H265 + case kVideoCodecH265: + *(settings->H265()) = VideoEncoder::GetDefaultH265Settings(); + return; +#endif case kVideoCodecH264: // TODO(brandtr): Set `qpMax` here, when the OpenH264 wrapper supports it. *(settings->H264()) = VideoEncoder::GetDefaultH264Settings(); diff --git a/video/rtp_video_stream_receiver2.cc b/video/rtp_video_stream_receiver2.cc index 14632445e0..43f4686da0 100644 --- a/video/rtp_video_stream_receiver2.cc +++ b/video/rtp_video_stream_receiver2.cc @@ -39,6 +39,7 @@ #include "modules/video_coding/frame_object.h" #include "modules/video_coding/h264_sprop_parameter_sets.h" #include "modules/video_coding/h264_sps_pps_tracker.h" +#include "modules/video_coding/h265_vps_sps_pps_tracker.h" #include "modules/video_coding/nack_requester.h" #include "modules/video_coding/packet_buffer.h" #include "rtc_base/checks.h" @@ -614,7 +615,31 @@ void RtpVideoStreamReceiver2::OnReceivedPayloadData( packet->video_payload = std::move(fixed.bitstream); break; } + } else if (packet->codec() == kVideoCodecH265) { + // Only when we start to receive packets will we know what payload type + // that will be used. When we know the payload type insert the correct + // sps/pps into the tracker. + if (packet->payload_type != last_payload_type_) { + last_payload_type_ = packet->payload_type; + InsertSpsPpsIntoTracker(packet->payload_type); + } + video_coding::H265VpsSpsPpsTracker::FixedBitstream fixed = + h265_tracker_.CopyAndFixBitstream( + rtc::MakeArrayView(codec_payload.cdata(), codec_payload.size()), + &packet->video_header); + + switch (fixed.action) { + case video_coding::H265VpsSpsPpsTracker::kRequestKeyframe: + rtcp_feedback_buffer_.RequestKeyFrame(); + rtcp_feedback_buffer_.SendBufferedRtcpFeedback(); + ABSL_FALLTHROUGH_INTENDED; + case video_coding::H265VpsSpsPpsTracker::kDrop: + return; + case video_coding::H265VpsSpsPpsTracker::kInsert: + packet->video_payload = std::move(fixed.bitstream); + break; + } } else { packet->video_payload = std::move(codec_payload); } diff --git a/video/rtp_video_stream_receiver2.h b/video/rtp_video_stream_receiver2.h index cb43b2c11f..16d956c7e7 100644 --- a/video/rtp_video_stream_receiver2.h +++ b/video/rtp_video_stream_receiver2.h @@ -38,6 +38,7 @@ #include "modules/rtp_rtcp/source/rtp_video_header.h" #include "modules/rtp_rtcp/source/video_rtp_depacketizer.h" #include "modules/video_coding/h264_sps_pps_tracker.h" +#include "modules/video_coding/h265_vps_sps_pps_tracker.h" #include "modules/video_coding/loss_notification_controller.h" #include "modules/video_coding/nack_requester.h" #include "modules/video_coding/packet_buffer.h" @@ -359,6 +360,8 @@ class RtpVideoStreamReceiver2 : public LossNotificationSender, RTC_GUARDED_BY(packet_sequence_checker_); video_coding::H264SpsPpsTracker tracker_ RTC_GUARDED_BY(packet_sequence_checker_); + video_coding::H265VpsSpsPpsTracker h265_tracker_ + RTC_GUARDED_BY(packet_sequence_checker_); // Maps payload id to the depacketizer. std::map> payload_type_map_ diff --git a/video/send_statistics_proxy.cc b/video/send_statistics_proxy.cc index fabe908e0b..31e1412f15 100644 --- a/video/send_statistics_proxy.cc +++ b/video/send_statistics_proxy.cc @@ -47,6 +47,9 @@ enum HistogramCodecType { kVideoVp9 = 2, kVideoH264 = 3, kVideoAv1 = 4, +#ifdef WEBRTC_USE_H265 + kVideoH265 = 5, +#endif kVideoMax = 64, }; @@ -76,6 +79,10 @@ HistogramCodecType PayloadNameToHistogramCodecType( return kVideoH264; case kVideoCodecAV1: return kVideoAv1; +#ifdef WEBRTC_USE_H265 + case kVideoCodecH265: + return kVideoH265; +#endif default: return kVideoUnknown; } diff --git a/video/video_stream_encoder.cc b/video/video_stream_encoder.cc index 791bdad0e9..956a575056 100644 --- a/video/video_stream_encoder.cc +++ b/video/video_stream_encoder.cc @@ -121,7 +121,13 @@ bool RequiresEncoderReset(const VideoCodec& prev_send_codec, return true; } break; - +#ifdef WEBRTC_USE_H265 + case kVideoCodecH265: + if (new_send_codec.H265() != prev_send_codec.H265()) { + return true; + } + break; +#endif default: break; }