From fb8f12befc2be2188790348c7e9db525a3bb78b0 Mon Sep 17 00:00:00 2001 From: aler9 <46489434+aler9@users.noreply.github.com> Date: Wed, 16 Aug 2023 13:17:58 +0200 Subject: [PATCH] h264, h265: improve performance --- pkg/codecs/h264/dts_extractor.go | 105 +++++++++++++++---------------- pkg/codecs/h265/dts_extractor.go | 49 ++++++--------- 2 files changed, 70 insertions(+), 84 deletions(-) diff --git a/pkg/codecs/h264/dts_extractor.go b/pkg/codecs/h264/dts_extractor.go index 648a25c..9103c88 100644 --- a/pkg/codecs/h264/dts_extractor.go +++ b/pkg/codecs/h264/dts_extractor.go @@ -50,20 +50,6 @@ func getPictureOrderCount(buf []byte, sps *SPS) (uint32, error) { return uint32(picOrderCntLsb), nil } -func findPictureOrderCount(au [][]byte, sps *SPS) (uint32, error) { - for _, nalu := range au { - typ := NALUType(nalu[0] & 0x1F) - if typ == NALUTypeNonIDR { - poc, err := getPictureOrderCount(nalu, sps) - if err != nil { - return 0, err - } - return poc, nil - } - } - return 0, fmt.Errorf("POC not found") -} - func getPictureOrderCountDiff(a uint32, b uint32, sps *SPS) int32 { max := uint32(1 << (sps.Log2MaxPicOrderCntLsbMinus4 + 4)) d := (a - b) & (max - 1) @@ -93,7 +79,8 @@ func NewDTSExtractor() *DTSExtractor { } func (d *DTSExtractor) extractInner(au [][]byte, pts time.Duration) (time.Duration, bool, error) { - idrPresent := false + var idr []byte + var nonIDR []byte for _, nalu := range au { typ := NALUType(nalu[0] & 0x1F) @@ -114,7 +101,10 @@ func (d *DTSExtractor) extractInner(au [][]byte, pts time.Duration) (time.Durati } case NALUTypeIDR: - idrPresent = true + idr = nalu + + case NALUTypeNonIDR: + nonIDR = nalu } } @@ -130,7 +120,8 @@ func (d *DTSExtractor) extractInner(au [][]byte, pts time.Duration) (time.Durati return 0, false, fmt.Errorf("pic_order_cnt_type = 1 is not supported yet") } - if idrPresent { + switch { + case idr != nil: d.expectedPOC = 0 d.pauseDTS = 0 @@ -139,56 +130,60 @@ func (d *DTSExtractor) extractInner(au [][]byte, pts time.Duration) (time.Durati } return d.prevDTS + (pts-d.prevDTS)/time.Duration(d.reorderedFrames+1), false, nil - } - d.expectedPOC += uint32(d.pocIncrement) - d.expectedPOC &= ((1 << (d.spsp.Log2MaxPicOrderCntLsbMinus4 + 4)) - 1) + case nonIDR != nil: + d.expectedPOC += uint32(d.pocIncrement) + d.expectedPOC &= ((1 << (d.spsp.Log2MaxPicOrderCntLsbMinus4 + 4)) - 1) - if d.pauseDTS > 0 { - d.pauseDTS-- - return d.prevDTS + 1*time.Millisecond, true, nil - } + if d.pauseDTS > 0 { + d.pauseDTS-- + return d.prevDTS + 1*time.Millisecond, true, nil + } - poc, err := findPictureOrderCount(au, d.spsp) - if err != nil { - return 0, false, err - } + poc, err := getPictureOrderCount(nonIDR, d.spsp) + if err != nil { + return 0, false, err + } - if d.pocIncrement == 2 && (poc%2) != 0 { - d.pocIncrement = 1 - d.expectedPOC /= 2 - } + if d.pocIncrement == 2 && (poc%2) != 0 { + d.pocIncrement = 1 + d.expectedPOC /= 2 + } - pocDiff := int(getPictureOrderCountDiff(poc, d.expectedPOC, d.spsp)) + d.reorderedFrames*d.pocIncrement + pocDiff := int(getPictureOrderCountDiff(poc, d.expectedPOC, d.spsp)) + d.reorderedFrames*d.pocIncrement - if pocDiff < 0 { - if pocDiff < -20 { - return 0, false, fmt.Errorf("POC difference between frames is too big (%d)", pocDiff) + if pocDiff < 0 { + if pocDiff < -20 { + return 0, false, fmt.Errorf("POC difference between frames is too big (%d)", pocDiff) + } + + // this happens when there are B-frames immediately following an IDR frame + d.reorderedFrames -= pocDiff + d.pauseDTS = -pocDiff + return d.prevDTS + 1*time.Millisecond, true, nil } - // this happens when there are B-frames immediately following an IDR frame - d.reorderedFrames -= pocDiff - d.pauseDTS = -pocDiff - return d.prevDTS + 1*time.Millisecond, true, nil - } + if pocDiff == 0 { + return pts, false, nil + } - if pocDiff == 0 { - return pts, false, nil - } + if pocDiff > 20 { + return 0, false, fmt.Errorf("POC difference between frames is too big (%d)", pocDiff) + } - if pocDiff > 20 { - return 0, false, fmt.Errorf("POC difference between frames is too big (%d)", pocDiff) - } + reorderedFrames := (pocDiff)/d.pocIncrement - d.reorderedFrames + if reorderedFrames > d.reorderedFrames { + // reordered frames detected, add them to the count and pause DTS + d.pauseDTS = (reorderedFrames - d.reorderedFrames - 1) + d.reorderedFrames = reorderedFrames + return d.prevDTS + 1*time.Millisecond, false, nil + } - reorderedFrames := (pocDiff)/d.pocIncrement - d.reorderedFrames - if reorderedFrames > d.reorderedFrames { - // reordered frames detected, add them to the count and pause DTS - d.pauseDTS = (reorderedFrames - d.reorderedFrames - 1) - d.reorderedFrames = reorderedFrames - return d.prevDTS + 1*time.Millisecond, false, nil - } + return d.prevDTS + (pts-d.prevDTS)*time.Duration(d.pocIncrement)/time.Duration(pocDiff+d.pocIncrement), false, nil - return d.prevDTS + (pts-d.prevDTS)*time.Duration(d.pocIncrement)/time.Duration(pocDiff+d.pocIncrement), false, nil + default: + return 0, false, fmt.Errorf("access unit doesn't contain an IDR or non-IDR NALU") + } } // Extract extracts the DTS of an access unit. diff --git a/pkg/codecs/h265/dts_extractor.go b/pkg/codecs/h265/dts_extractor.go index ab90321..b44548c 100644 --- a/pkg/codecs/h265/dts_extractor.go +++ b/pkg/codecs/h265/dts_extractor.go @@ -112,31 +112,13 @@ func getPictureOrderCount(buf []byte, sps *SPS, pps *PPS) (uint32, uint32, error return uint32(picOrderCntLsb), dtsPOC, nil } -func findPictureOrderCount(au [][]byte, sps *SPS, pps *PPS) (uint32, uint32, error) { - for _, nalu := range au { - typ := NALUType((nalu[0] >> 1) & 0b111111) - switch typ { - case NALUType_TRAIL_N, NALUType_TRAIL_R, NALUType_CRA_NUT, NALUType_RASL_N, NALUType_RASL_R: - poc, dtsPOC, err := getPictureOrderCount(nalu, sps, pps) - if err != nil { - return 0, 0, err - } - return poc, dtsPOC, nil - } +func getPictureOrderCountDiff(a uint32, b uint32, sps *SPS) int32 { + max := uint32(1 << (sps.Log2MaxPicOrderCntLsbMinus4 + 4)) + d := (a - b) & (max - 1) + if d > (max / 2) { + return int32(d) - int32(max) } - return 0, 0, fmt.Errorf("POC not found") -} - -func getPictureOrderCountDiff(poc1 uint32, poc2 uint32, sps *SPS) int32 { - diff := int32(poc1) - int32(poc2) - switch { - case diff < -((1 << (sps.Log2MaxPicOrderCntLsbMinus4 + 3)) - 1): - diff += (1 << (sps.Log2MaxPicOrderCntLsbMinus4 + 4)) - - case diff > ((1 << (sps.Log2MaxPicOrderCntLsbMinus4 + 3)) - 1): - diff -= (1 << (sps.Log2MaxPicOrderCntLsbMinus4 + 4)) - } - return diff + return int32(d) } // DTSExtractor allows to extract DTS from PTS. @@ -153,7 +135,8 @@ func NewDTSExtractor() *DTSExtractor { } func (d *DTSExtractor) extractInner(au [][]byte, pts time.Duration) (time.Duration, error) { - idrPresent := false + var idr []byte + var nonIDR []byte for _, nalu := range au { typ := NALUType((nalu[0] >> 1) & 0b111111) @@ -176,7 +159,10 @@ func (d *DTSExtractor) extractInner(au [][]byte, pts time.Duration) (time.Durati d.ppsp = &ppsp case NALUType_IDR_W_RADL, NALUType_IDR_N_LP: - idrPresent = true + idr = nalu + + case NALUType_TRAIL_N, NALUType_TRAIL_R, NALUType_CRA_NUT, NALUType_RASL_N, NALUType_RASL_R: + nonIDR = nalu } } @@ -199,16 +185,21 @@ func (d *DTSExtractor) extractInner(au [][]byte, pts time.Duration) (time.Durati var poc uint32 var dtsPOC uint32 - if idrPresent { + switch { + case idr != nil: poc = 0 dtsPOC = poc - 2 dtsPOC &= ((1 << (d.spsp.Log2MaxPicOrderCntLsbMinus4 + 4)) - 1) - } else { + + case nonIDR != nil: var err error - poc, dtsPOC, err = findPictureOrderCount(au, d.spsp, d.ppsp) + poc, dtsPOC, err = getPictureOrderCount(nonIDR, d.spsp, d.ppsp) if err != nil { return 0, err } + + default: + return 0, fmt.Errorf("access unit doesn't contain an IDR or non-IDR NALU") } pocDiff := getPictureOrderCountDiff(poc, dtsPOC, d.spsp)