From 93f9c901125abb1855181786077ed53be79a4d04 Mon Sep 17 00:00:00 2001 From: Alessandro Ros Date: Sat, 2 Sep 2023 20:56:34 +0200 Subject: [PATCH] h265: cleanup DTS extractor (#75) --- pkg/codecs/h265/dts_extractor.go | 61 +++++++------------ pkg/codecs/h265/dts_extractor_test.go | 86 ++++++++++++++++++++++++++- 2 files changed, 107 insertions(+), 40 deletions(-) diff --git a/pkg/codecs/h265/dts_extractor.go b/pkg/codecs/h265/dts_extractor.go index b44548c..1a33396 100644 --- a/pkg/codecs/h265/dts_extractor.go +++ b/pkg/codecs/h265/dts_extractor.go @@ -8,9 +8,9 @@ import ( "github.com/bluenviron/mediacommon/pkg/codecs/h264" ) -func getPictureOrderCount(buf []byte, sps *SPS, pps *PPS) (uint32, uint32, error) { +func getPTSDTSDiff(buf []byte, sps *SPS, pps *PPS) (uint32, error) { if len(buf) < 12 { - return 0, 0, fmt.Errorf("not enough bits") + return 0, fmt.Errorf("not enough bits") } buf = h264.EmulationPreventionRemove(buf[:12]) @@ -22,70 +22,70 @@ func getPictureOrderCount(buf []byte, sps *SPS, pps *PPS) (uint32, uint32, error firstSliceSegmentInPicFlag, err := bits.ReadFlag(buf, &pos) if err != nil { - return 0, 0, err + return 0, err } if !firstSliceSegmentInPicFlag { - return 0, 0, fmt.Errorf("first_slice_segment_in_pic_flag = 0 is not supported") + return 0, fmt.Errorf("first_slice_segment_in_pic_flag = 0 is not supported") } if typ >= NALUType_BLA_W_LP && typ <= NALUType_RSV_IRAP_VCL23 { _, err := bits.ReadFlag(buf, &pos) // no_output_of_prior_pics_flag if err != nil { - return 0, 0, err + return 0, err } } _, err = bits.ReadGolombUnsigned(buf, &pos) // slice_pic_parameter_set_id if err != nil { - return 0, 0, err + return 0, err } if pps.NumExtraSliceHeaderBits > 0 { err := bits.HasSpace(buf, pos, int(pps.NumExtraSliceHeaderBits)) if err != nil { - return 0, 0, err + return 0, err } pos += int(pps.NumExtraSliceHeaderBits) } sliceType, err := bits.ReadGolombUnsigned(buf, &pos) // slice_type if err != nil { - return 0, 0, err + return 0, err } if pps.OutputFlagPresentFlag { _, err := bits.ReadFlag(buf, &pos) // pic_output_flag if err != nil { - return 0, 0, err + return 0, err } } if sps.SeparateColourPlaneFlag { _, err := bits.ReadBits(buf, &pos, 2) // colour_plane_id if err != nil { - return 0, 0, err + return 0, err } } - picOrderCntLsb, err := bits.ReadBits(buf, &pos, int(sps.Log2MaxPicOrderCntLsbMinus4+4)) + _, err = bits.ReadBits(buf, &pos, int(sps.Log2MaxPicOrderCntLsbMinus4+4)) // pic_order_cnt_lsb if err != nil { - return 0, 0, err + return 0, err } shortTermRefPicSetSpsFlag, err := bits.ReadFlag(buf, &pos) if err != nil { - return 0, 0, err + return 0, err } if shortTermRefPicSetSpsFlag { - return 0, 0, fmt.Errorf("short_term_ref_pic_set_sps_flag = true is not supported") + return 0, fmt.Errorf("short_term_ref_pic_set_sps_flag = true is not supported") } var rps SPS_ShortTermRefPicSet err = rps.unmarshal(buf, &pos, uint32(len(sps.ShortTermRefPicSets)), uint32(len(sps.ShortTermRefPicSets)), nil) if err != nil { - return 0, 0, err + return 0, err } var v uint32 @@ -95,30 +95,18 @@ func getPictureOrderCount(buf []byte, sps *SPS, pps *PPS) (uint32, uint32, error v = sps.MaxNumReorderPics[0] - uint32(len(rps.DeltaPocS1Minus1)) } else if typ == NALUType_TRAIL_R || typ == NALUType_RASL_R { if len(rps.DeltaPocS0Minus1) == 0 { - return 0, 0, fmt.Errorf("invalid delta_poc_s0_minus1") + return 0, fmt.Errorf("invalid delta_poc_s0_minus1") } v = rps.DeltaPocS0Minus1[0] + sps.MaxNumReorderPics[0] - 1 } } else { // I or P-frame if len(rps.DeltaPocS0Minus1) == 0 { - return 0, 0, fmt.Errorf("invalid delta_poc_s0_minus1") + return 0, fmt.Errorf("invalid delta_poc_s0_minus1") } v = rps.DeltaPocS0Minus1[0] + sps.MaxNumReorderPics[0] } - dtsPOC := uint32(picOrderCntLsb) - v - dtsPOC &= ((1 << (sps.Log2MaxPicOrderCntLsbMinus4 + 4)) - 1) - - return uint32(picOrderCntLsb), dtsPOC, nil -} - -func getPictureOrderCountDiff(a uint32, b uint32, sps *SPS) int32 { - max := uint32(1 << (sps.Log2MaxPicOrderCntLsbMinus4 + 4)) - d := (a - b) & (max - 1) - if d > (max / 2) { - return int32(d) - int32(max) - } - return int32(d) + return v, nil } // DTSExtractor allows to extract DTS from PTS. @@ -140,7 +128,6 @@ func (d *DTSExtractor) extractInner(au [][]byte, pts time.Duration) (time.Durati for _, nalu := range au { typ := NALUType((nalu[0] >> 1) & 0b111111) - switch typ { case NALUType_SPS_NUT: var spsp SPS @@ -182,18 +169,15 @@ func (d *DTSExtractor) extractInner(au [][]byte, pts time.Duration) (time.Durati return pts, nil } - var poc uint32 - var dtsPOC uint32 + var samplesDiff uint32 switch { case idr != nil: - poc = 0 - dtsPOC = poc - 2 - dtsPOC &= ((1 << (d.spsp.Log2MaxPicOrderCntLsbMinus4 + 4)) - 1) + samplesDiff = d.spsp.MaxNumReorderPics[0] case nonIDR != nil: var err error - poc, dtsPOC, err = getPictureOrderCount(nonIDR, d.spsp, d.ppsp) + samplesDiff, err = getPTSDTSDiff(nonIDR, d.spsp, d.ppsp) if err != nil { return 0, err } @@ -202,8 +186,7 @@ func (d *DTSExtractor) extractInner(au [][]byte, pts time.Duration) (time.Durati return 0, fmt.Errorf("access unit doesn't contain an IDR or non-IDR NALU") } - pocDiff := getPictureOrderCountDiff(poc, dtsPOC, d.spsp) - timeDiff := time.Duration(pocDiff) * time.Second * + timeDiff := time.Duration(samplesDiff) * time.Second * time.Duration(d.spsp.VUI.TimingInfo.NumUnitsInTick) / time.Duration(d.spsp.VUI.TimingInfo.TimeScale) dts := pts - timeDiff diff --git a/pkg/codecs/h265/dts_extractor_test.go b/pkg/codecs/h265/dts_extractor_test.go index c59e764..84d8c74 100644 --- a/pkg/codecs/h265/dts_extractor_test.go +++ b/pkg/codecs/h265/dts_extractor_test.go @@ -19,7 +19,91 @@ func TestDTSExtractor(t *testing.T) { sequence []sequenceSample }{ { - "no timing info", + "with timing info, IDR", + []sequenceSample{ + { + [][]byte{ + { // VPS + 0x40, 0x01, 0x0c, 0x01, 0xff, 0xff, 0x01, 0x60, + 0x00, 0x00, 0x03, 0x00, 0x90, 0x00, 0x00, 0x03, + 0x00, 0x00, 0x03, 0x00, 0x78, 0x99, 0x98, 0x09, + }, + { // SPS + 0x42, 0x01, 0x01, 0x01, 0x60, 0x00, 0x00, 0x03, + 0x00, 0x90, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, + 0x00, 0x78, 0xa0, 0x03, 0xc0, 0x80, 0x10, 0xe5, + 0x96, 0x66, 0x69, 0x24, 0xca, 0xe0, 0x10, 0x00, + 0x00, 0x03, 0x00, 0x10, 0x00, 0x00, 0x03, 0x01, + 0xe0, 0x80, + }, + { // PPS + 0x44, 0x1, 0xc1, 0x72, 0xb4, 0x62, 0x40, + }, + { // IDR_W_RADL + 0x26, 0x1, 0xaf, 0x8, 0x42, 0x23, 0x48, 0x8a, 0x43, 0xe2, + }, + }, + 0, + -66666666 * time.Nanosecond, + }, + { + [][]byte{{ // TRAIL_R + 0x02, 0x01, 0xd0, 0x19, 0x5f, 0x8c, 0xb4, 0x42, + 0x49, 0x20, 0x40, 0x11, 0x16, 0x92, 0x93, 0xea, + 0x54, 0x57, 0x4e, 0x0a, + }}, + 100 * time.Millisecond, + -33333333 * time.Nanosecond, + }, + { + [][]byte{{ // TRAIL_R + 0x02, 0x01, 0xe0, 0x44, 0x97, 0xe0, 0x81, 0x20, + 0x44, 0x52, 0x62, 0x7a, 0x1b, 0x88, 0x0b, 0x21, + 0x26, 0x5f, 0x10, 0x9c, + }}, + 66666666 * time.Nanosecond, + 0, + }, + { + [][]byte{{ // TRAIL_N + 0x00, 0x01, 0xe0, 0x24, 0xff, 0xfa, 0x24, 0x0a, + 0x42, 0x25, 0x8c, 0x18, 0xe6, 0x1c, 0xea, 0x5a, + 0x5d, 0x07, 0xc1, 0x8f, + }}, + 33333333 * time.Nanosecond, + 33333333 * time.Nanosecond, + }, + { + [][]byte{{ // TRAIL_R + 0x02, 0x01, 0xd0, 0x30, 0x97, 0xd7, 0xdc, 0xf9, + 0x0c, 0x10, 0x11, 0x11, 0x20, 0x42, 0x11, 0x18, + 0x63, 0xa5, 0x18, 0x55, + }}, + 200 * time.Millisecond, + 66666667 * time.Nanosecond, + }, + { + [][]byte{{ // TRAIL_R + 0x02, 0x01, 0xe0, 0xa2, 0x25, 0xd7, 0xf7, 0x08, + 0x12, 0x04, 0x45, 0xa1, 0x83, 0xc0, 0x97, 0x53, + 0xa3, 0x5e, 0x78, 0x14, + }}, + 166666666 * time.Nanosecond, + 100 * time.Millisecond, + }, + { + [][]byte{{ // TRAIL_N + 0x00, 0x01, 0xe0, 0x82, 0x3f, 0x5f, 0xf6, 0x89, + 0x02, 0x90, 0x88, 0xa3, 0x0c, 0x7d, 0x27, 0x0c, + 0xd4, 0xd9, 0xc2, 0xa5, + }}, + 133333333 * time.Nanosecond, + 133333333 * time.Nanosecond, + }, + }, + }, + { + "no timing info, CRA", []sequenceSample{ { [][]byte{