From b7e21dbb957b738883608f9b3c2826bc3da52f16 Mon Sep 17 00:00:00 2001 From: Andri Yngvason Date: Sat, 29 Jun 2024 13:41:50 +0000 Subject: [PATCH] Add H.264 decoder This adds an H.264 decoder based on WebCodecs. --- core/decoders/h264.js | 346 ++++++++++++++++++++++++++++++++++++++++++ core/display.js | 41 +++++ core/encodings.js | 2 + core/rfb.js | 6 + 4 files changed, 395 insertions(+) create mode 100644 core/decoders/h264.js diff --git a/core/decoders/h264.js b/core/decoders/h264.js new file mode 100644 index 000000000..4ba0f657d --- /dev/null +++ b/core/decoders/h264.js @@ -0,0 +1,346 @@ +/* + * noVNC: HTML5 VNC client + * Copyright (C) 2024 The noVNC Authors + * Licensed under MPL 2.0 (see LICENSE.txt) + * + * See README.md for usage and integration instructions. + * + */ + +import * as Log from '../util/logging.js'; + +class H264Parser { + constructor(data) { + this._data = data; + this._index = 0; + this.profileIdc = null; + this.constraintSet = null; + this.levelIdc = null; + } + + _getStartSequenceLen(index) { + let data = this._data; + if (data[index + 0] == 0 && data[index + 1] == 0 && data[index + 2] == 0 && data[index + 3] == 1) { + return 4; + } + if (data[index + 0] == 0 && data[index + 1] == 0 && data[index + 2] == 1) { + return 3; + } + return 0; + } + + _indexOfNextNalUnit(index) { + let data = this._data; + for (let i = index; i < data.length; ++i) { + if (this._getStartSequenceLen(i) != 0) { + return i; + } + } + return -1; + } + + _parseSps(index) { + this.profileIdc = this._data[index]; + this.constraintSet = this._data[index + 1]; + this.levelIdc = this._data[index + 2]; + } + + _parseNalUnit(index) { + const firstByte = this._data[index]; + if (firstByte & 0x80) { + throw new Error('H264 parsing sanity check failed, forbidden zero bit is set'); + } + const unitType = firstByte & 0x1f; + + switch (unitType) { + case 1: // coded slice, non-idr + return { slice: true }; + case 5: // coded slice, idr + return { slice: true, key: true }; + case 6: // sei + return {}; + case 7: // sps + this._parseSps(index + 1); + return {}; + case 8: // pps + return {}; + default: + Log.Warn("Unhandled unit type: ", unitType); + break; + } + return {}; + } + + parse() { + const startIndex = this._index; + let isKey = false; + + while (this._index < this._data.length) { + const startSequenceLen = this._getStartSequenceLen(this._index); + if (startSequenceLen == 0) { + throw new Error('Invalid start sequence in bit stream'); + } + + const { slice, key } = this._parseNalUnit(this._index + startSequenceLen); + + let nextIndex = this._indexOfNextNalUnit(this._index + startSequenceLen); + if (nextIndex == -1) { + this._index = this._data.length; + } else { + this._index = nextIndex; + } + + if (key) { + isKey = true; + } + if (slice) { + break; + } + } + + if (startIndex === this._index) { + return null; + } + + return { + frame: this._data.subarray(startIndex, this._index), + key: isKey, + }; + } +} + +class H264Context { + constructor(owner, width, height) { + this.lastUsed = 0; + this._owner = owner; + this._width = width; + this._height = height; + this._profileIdc = null; + this._constraintSet = null; + this._levelIdc = null; + this._decoder = null; + this._pendingFrames = []; + } + + _handleFrame(frame) { + let pending = this._pendingFrames.shift(); + if (pending === undefined) { + throw new Error("Pending frame queue empty when receiving frame from decoder"); + } + + if (pending.timestamp != frame.timestamp) { + throw new Error("Video frame timestamp mismatch. Expected " + + frame.timestamp + " but but got " + pending.timestamp); + } + + pending.frame = frame; + pending.ready = true; + pending.resolve(); + + if (!pending.keep) { + frame.close(); + } + } + + _flushPendingFrames() { + let pendingFrames = this._pendingFrames; + this._pendingFrames = []; + + for (const pending of pendingFrames) { + pending.failed = true; + pending.resolve(); + } + } + + _handleError(e) { + Log.Warn("Failed to decode frame:", e); + this._flushPendingFrames(); + this._owner._requestKeyFrame(); + } + + _configureDecoder(profileIdc, constraintSet, levelIdc) { + if (this._decoder === null || this._decoder.state === 'closed') { + this._decoder = new VideoDecoder({ + output: frame => this._handleFrame(frame), + error: e => this._handleError(e), + }); + } + const codec = 'avc1.' + + profileIdc.toString(16).padStart(2, '0') + + constraintSet.toString(16).padStart(2, '0') + + levelIdc.toString(16).padStart(2, '0'); + this._decoder.configure({ + codec: codec, + codecWidth: this._width, + codecHeight: this._height, + optimizeForLatency: true, + }); + } + + _preparePendingFrame(timestamp) { + let pending = { + timestamp: timestamp, + promise: null, + resolve: null, + frame: null, + ready: false, + failed: false, + keep: false, + }; + pending.promise = new Promise((resolve) => { + pending.resolve = resolve; + }); + this._pendingFrames.push(pending); + + return pending; + } + + decode(payload) { + let parser = new H264Parser(payload); + let result = null; + + // Ideally, this timestamp should come from the server, but we'll just + // approximate it instead. + let timestamp = Math.round(window.performance.now() * 1e3); + + while (true) { + let encodedFrame = parser.parse(); + if (encodedFrame === null) { + break; + } + + if (parser.profileIdc !== null) { + self._profileIdc = parser.profileIdc; + self._constraintSet = parser.constraintSet; + self._levelIdc = parser.levelIdc; + } + + if (this._decoder === null || this._decoder.state !== 'configured') { + if (!encodedFrame.key) { + Log.Warn("Missing key frame. Can't decode until one arrives"); + continue; + } + if (self._profileIdc === null) { + Log.Warn('Cannot config decoder. Have not received SPS and PPS yet.'); + continue; + } + this._configureDecoder(self._profileIdc, self._constraintSet, + self._levelIdc); + } + + result = this._preparePendingFrame(timestamp); + + const chunk = new EncodedVideoChunk({ + timestamp: timestamp, + type: encodedFrame.key ? 'key' : 'delta', + data: encodedFrame.frame, + }); + + try { + this._decoder.decode(chunk); + } catch (e) { + Log.Warn("Failed to decode:", e); + } + + // If the decoder wants to use the timestamp for approximating a + // minimum frame duration, it's better to have a non-zero difference + // between frames, so we will assume 60 Hz refresh rate as it is the + // most common one. + timestamp += 16667; + } + + // We only keep last frame of each payload + if (result !== null) { + result.keep = true; + } + + return result; + } +} + +export default class H264Decoder { + constructor(requestKeyFrame) { + this._tick = 0; + this._contexts = {}; + this._requestKeyFrame = requestKeyFrame; + } + + static check() { + return 'VideoDecoder' in window; + } + + _contextId(x, y, width, height) { + return [x, y, width, height].join(','); + } + + _findOldestContextId() { + let oldestTick = 0; + let oldestKey = undefined; + for (const [key, value] of Object.entries(this._contexts)) { + if (value.lastUsed < oldestTick) { + oldestTick = value.lastUsed; + oldestKey = key; + } + } + return oldestKey; + } + + _createContext(x, y, width, height) { + const maxContexts = 64; + if (Object.keys(this._contexts).length >= maxContexts) { + let oldestContextId = this._findOldestContextId(); + delete this._contexts[oldestContextId]; + } + let context = new H264Context(this, width, height); + this._contexts[this._contextId(x, y, width, height)] = context; + return context; + } + + _getContext(x, y, width, height) { + let context = this._contexts[this._contextId(x, y, width, height)]; + return context !== undefined ? context : this._createContext(x, y, width, height); + } + + _resetContext(x, y, width, height) { + delete this._contexts[this._contextId(x, y, width, height)]; + } + + _resetAllContexts() { + this._contexts = {}; + } + + decodeRect(x, y, width, height, sock, display, depth) { + const resetContextFlag = 1; + const resetAllContextsFlag = 2; + + if (sock.rQwait("h264 header", 8)) { + return false; + } + + const length = sock.rQshift32(); + const flags = sock.rQshift32(); + + if (sock.rQwait("h264 payload", length, 8)) { + return false; + } + + if (flags & resetAllContextsFlag) { + this._resetAllContexts(); + } else if (flags & resetContextFlag) { + this._resetContext(this._contextId(x, y, width, height)); + } + + let context = this._getContext(x, y, width, height); + context.lastUsed = this._tick++; + + if (length !== 0) { + let payload = sock.rQshiftBytes(length, false); + let frame = context.decode(payload); + if (frame !== null) { + display.videoFrame(x, y, width, height, frame); + } + } + + return true; + } +} diff --git a/core/display.js b/core/display.js index fcd626999..1a30b7246 100644 --- a/core/display.js +++ b/core/display.js @@ -380,6 +380,17 @@ export default class Display { }); } + videoFrame(x, y, width, height, frame) { + this._renderQPush({ + 'type': 'frame', + 'frame': frame, + 'x': x, + 'y': y, + 'width': width, + 'height': height + }); + } + blitImage(x, y, width, height, arr, offset, fromQueue) { if (this._renderQ.length !== 0 && !fromQueue) { // NB(directxman12): it's technically more performant here to use preallocated arrays, @@ -511,6 +522,36 @@ export default class Display { ready = false; } break; + case 'frame': + if (a.frame.ready) { + // The encoded frame may be larger than the rect due to + // limitations of the encoder, so we need to crop the + // frame. + let frame = a.frame.frame; + if (frame.codedWidth < a.width || frame.codedHeight < a.height) { + Log.Warn("Decoded video frame does not cover its full rectangle area. Expecting at least " + + a.width + "x" + a.height + " but got " + + frame.codedWidth + "x" + frame.codedHeight); + } + const sx = 0; + const sy = 0; + const sw = a.width; + const sh = a.height; + const dx = a.x; + const dy = a.y; + const dw = sw; + const dh = sh; + this._drawCtx.drawImage(frame, sx, sy, sw, sh, dx, dy, dw, dh); + this._damage(a.x, a.y, a.width, a.height); + frame.close(); + } else if (!a.frame.failed) { + let display = this; + a.frame.promise.then(() => { + display._scanRenderQ(); + }); + ready = false; + } + break; } if (ready) { diff --git a/core/encodings.js b/core/encodings.js index 1a79989d1..aa1fd4bbc 100644 --- a/core/encodings.js +++ b/core/encodings.js @@ -15,6 +15,7 @@ export const encodings = { encodingZRLE: 16, encodingTightPNG: -260, encodingJPEG: 21, + encodingH264: 50, pseudoEncodingQualityLevel9: -23, pseudoEncodingQualityLevel0: -32, @@ -44,6 +45,7 @@ export function encodingName(num) { case encodings.encodingZRLE: return "ZRLE"; case encodings.encodingTightPNG: return "TightPNG"; case encodings.encodingJPEG: return "JPEG"; + case encodings.encodingH264: return "H.264"; default: return "[unknown encoding " + num + "]"; } } diff --git a/core/rfb.js b/core/rfb.js index fdb6d5e38..823150e2d 100644 --- a/core/rfb.js +++ b/core/rfb.js @@ -35,6 +35,7 @@ import TightDecoder from "./decoders/tight.js"; import TightPNGDecoder from "./decoders/tightpng.js"; import ZRLEDecoder from "./decoders/zrle.js"; import JPEGDecoder from "./decoders/jpeg.js"; +import H264Decoder from "./decoders/h264.js"; // How many seconds to wait for a disconnect to finish const DISCONNECT_TIMEOUT = 3; @@ -249,6 +250,8 @@ export default class RFB extends EventTargetMixin { this._decoders[encodings.encodingTightPNG] = new TightPNGDecoder(); this._decoders[encodings.encodingZRLE] = new ZRLEDecoder(); this._decoders[encodings.encodingJPEG] = new JPEGDecoder(); + this._decoders[encodings.encodingH264] = + new H264Decoder(() => this._requestKeyFrame()); // NB: nothing that needs explicit teardown should be done // before this point, since this can throw an exception @@ -2125,6 +2128,9 @@ export default class RFB extends EventTargetMixin { encs.push(encodings.encodingCopyRect); // Only supported with full depth support if (this._fbDepth == 24) { + if (H264Decoder.check()) { + encs.push(encodings.encodingH264); + } encs.push(encodings.encodingTight); encs.push(encodings.encodingTightPNG); encs.push(encodings.encodingZRLE);