diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d99004c..78dc371 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -32,11 +32,11 @@ jobs: run: xcrun swift test -Xswiftc -DGITHUB_ACTIONS list - name: Test FFmpeg Decoder - timeout-minutes: 5 + timeout-minutes: 7 run: xcrun swift test --skip-build --filter ffmpegDecoder - name: Test Internal Decoder - timeout-minutes: 5 + timeout-minutes: 7 run: xcrun swift test --skip-build --filter internalDecoder - name: Periphery diff --git a/.gitignore b/.gitignore index 5feb79f..30a0631 100644 --- a/.gitignore +++ b/.gitignore @@ -5,5 +5,5 @@ /.VSCodeCounter/ /.index-build/ Tests/Resources/* -!Tests/Resources/sintel.* +!Tests/Resources/sintel*.* !Tests/Resources/README.md diff --git a/README.md b/README.md index 1da6f00..82a2998 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ For more details on performance, refer to the [Accuracy](#accuracy) section belo #### Supported Formats - PGS (`.mkv`, `.sup`) -- VobSub (`.sub`, `.idx`) +- VobSub (`.mkv`, `.sub`, `.idx`) ### Building the Project diff --git a/Sources/macSubtitleOCR/Extensions/BinaryIntegerExtensions.swift b/Sources/macSubtitleOCR/Extensions/BinaryIntegerExtensions.swift new file mode 100644 index 0000000..d41a77f --- /dev/null +++ b/Sources/macSubtitleOCR/Extensions/BinaryIntegerExtensions.swift @@ -0,0 +1,14 @@ +// +// BinaryIntegerExtensions.swift +// macSubtitleOCR +// +// Created by Ethan Dye on 10/20/24. +// Copyright © 2024 Ethan Dye. All rights reserved. +// + +extension BinaryInteger { + /// Returns a formatted hexadecimal string with `0x` prefix. + func hex() -> String { + String(format: "0x%0\(MemoryLayout.size)X", self as! CVarArg) + } +} diff --git a/Sources/macSubtitleOCR/FileHandler.swift b/Sources/macSubtitleOCR/FileHandler.swift index 5f31899..dd0a5c3 100644 --- a/Sources/macSubtitleOCR/FileHandler.swift +++ b/Sources/macSubtitleOCR/FileHandler.swift @@ -2,7 +2,7 @@ // FileHandler.swift // macSubtitleOCR // -// Created by Ethan Dye on 10/16/24. +// Created by Ethan Dye on 10/17/24. // Copyright © 2024 Ethan Dye. All rights reserved. // @@ -16,6 +16,9 @@ struct FileHandler { } func saveSRTFile(for result: macSubtitleOCRResult) throws { + if result.srt.isEmpty { + return + } let srtFilePath = URL(fileURLWithPath: outputDirectory).appendingPathComponent("track_\(result.trackNumber).srt") let srt = SRT(subtitles: result.srt.sorted { $0.index < $1.index }) srt.write(toFileAt: srtFilePath) diff --git a/Sources/macSubtitleOCR/MKV/EBML/EBML.swift b/Sources/macSubtitleOCR/MKV/EBML/EBML.swift index edbfb4f..2302260 100644 --- a/Sources/macSubtitleOCR/MKV/EBML/EBML.swift +++ b/Sources/macSubtitleOCR/MKV/EBML/EBML.swift @@ -14,6 +14,7 @@ enum EBML { static let chapters: UInt32 = 0x1043_A770 static let cluster: UInt32 = 0x1F43_B675 static let codecID: UInt32 = 0x86 + static let codecPrivate: UInt32 = 0x63A2 static let segmentID: UInt32 = 0x1853_8067 static let simpleBlock: UInt32 = 0xA3 static let timestamp: UInt32 = 0xE7 diff --git a/Sources/macSubtitleOCR/MKV/EBML/EBMLParser.swift b/Sources/macSubtitleOCR/MKV/EBML/EBMLParser.swift index bd6611b..569c94d 100644 --- a/Sources/macSubtitleOCR/MKV/EBML/EBMLParser.swift +++ b/Sources/macSubtitleOCR/MKV/EBML/EBMLParser.swift @@ -9,39 +9,37 @@ import Foundation import os -private let logger = Logger(subsystem: "github.ecdye.macSubtitleOCR", category: "ebml") +private let logger = Logger(subsystem: "github.ecdye.macSubtitleOCR", category: "EBML") // Helper function to read variable-length integers (VINT) from MKV (up to 8 bytes) -func readVINT(from fileHandle: FileHandle, unmodified: Bool = false) -> UInt64 { +func readVINT(from fileHandle: FileHandle, elementSize: Bool = false) -> UInt64 { guard let firstByte = fileHandle.readData(ofLength: 1).first else { return 0 } var length: UInt8 = 1 var mask: UInt8 = 0x80 // Find how many bytes are needed for the VINT (variable integer) - while (firstByte & mask) == 0 { + while mask != 0, firstByte & mask == 0 { length += 1 mask >>= 1 } - // Adjust mask based on length and unmodified flag - mask = (mask == 0x10) ? 0xFF : (length == 1 && !unmodified) ? firstByte : mask - 1 - - var value = UInt64(firstByte & mask) + var value = UInt64(firstByte) + if elementSize { + value &= ~UInt64(mask) + } - if length > 1 { - for byte in fileHandle.readData(ofLength: Int(length - 1)) { - value = (value << 8) | UInt64(byte) - } + for byte in fileHandle.readData(ofLength: Int(length - 1)) { + value = (value << 8) | UInt64(byte) } - logger.debug("VINT: 0x\(String(format: "%08X", value))") + logger.debug("VINT: \(value.hex())") return value } // Helper function to read an EBML element's ID and size -func readEBMLElement(from fileHandle: FileHandle, unmodified: Bool = false) -> (elementID: UInt32, elementSize: UInt64) { - let elementID = readVINT(from: fileHandle, unmodified: unmodified) - let elementSize = readVINT(from: fileHandle, unmodified: true) +func readEBMLElement(from fileHandle: FileHandle) -> (elementID: UInt32, elementSize: UInt64) { + let elementID = readVINT(from: fileHandle) + let elementSize = readVINT(from: fileHandle, elementSize: true) return (UInt32(elementID), elementSize) } diff --git a/Sources/macSubtitleOCR/MKV/MKVFileHandler.swift b/Sources/macSubtitleOCR/MKV/MKVFileHandler.swift index 78e71d9..01ff07c 100644 --- a/Sources/macSubtitleOCR/MKV/MKVFileHandler.swift +++ b/Sources/macSubtitleOCR/MKV/MKVFileHandler.swift @@ -81,7 +81,7 @@ class MKVFileHandler { return (elementSize, elementID) } else { // Skip over the element's data by seeking to its end - logger.debug("Found: \(elementID), but not \(targetID), skipping element") + logger.debug("\(elementID.hex()) != \(targetID.hex()), skipping element") fileHandle.seek(toFileOffset: fileHandle.offsetInFile + elementSize) } previousOffset = fileHandle.offsetInFile diff --git a/Sources/macSubtitleOCR/MKV/MKVHelpers.swift b/Sources/macSubtitleOCR/MKV/MKVHelpers.swift index 5de95ff..d597437 100644 --- a/Sources/macSubtitleOCR/MKV/MKVHelpers.swift +++ b/Sources/macSubtitleOCR/MKV/MKVHelpers.swift @@ -9,28 +9,33 @@ import Foundation // Function to read a fixed length number of bytes and convert in into a (Un)signed integer -func readFixedLengthNumber(fileHandle: FileHandle, length: Int, signed: Bool = false) -> Int64 { +func readFixedLengthNumber(fileHandle: FileHandle, length: Int) -> Int64 { let data = fileHandle.readData(ofLength: length) var result: Int64 = 0 - for byte in data { result = result << 8 | Int64(byte) } - - if signed, data.first! & 0x80 != 0 { - result -= Int64(1) << (8 * length) // Apply two's complement for signed integers - } - return result } // Encode the absolute timestamp as 4 bytes in big-endian format for PGS -func encodePTSForPGS(_ timestamp: Int64) -> [UInt8] { +func encodePTSForPGS(_ timestamp: UInt64) -> [UInt8] { withUnsafeBytes(of: UInt32(timestamp).bigEndian) { Array($0) } } -// Calculate the absolute timestamp with 90 kHz accuracy for PGS format -func calcAbsPTSForPGS(_ clusterTimestamp: Int64, _ blockTimestamp: Int64, _ timestampScale: Double) -> Int64 { +func encodePTSForVobSub(_ timestamp: UInt64) -> [UInt8] { + var buffer = [UInt8](repeating: 0, count: 5) // 5-byte buffer + + buffer[0] = (buffer[0] & 0xF1) | UInt8((timestamp >> 29) & 0x0E) + buffer[1] = UInt8((timestamp >> 22) & 0xFF) + buffer[2] = UInt8(((timestamp >> 14) & 0xFE) | 1) + buffer[3] = UInt8((timestamp >> 7) & 0xFF) + buffer[4] = UInt8((timestamp << 1) & 0xFF) + return buffer +} + +// Calculate the absolute timestamp with 90 kHz accuracy +func calcAbsPTS(_ clusterTimestamp: Int64, _ blockTimestamp: Int64) -> UInt64 { // The block timestamp is relative, so we add it to the cluster timestamp - Int64(((Double(clusterTimestamp) + Double(blockTimestamp)) / timestampScale) * 90000000) + UInt64((Double(clusterTimestamp) + Double(blockTimestamp)) * 90) } diff --git a/Sources/macSubtitleOCR/MKV/MKVSubtitleExtractor.swift b/Sources/macSubtitleOCR/MKV/MKVSubtitleExtractor.swift index ba078e6..40b93a5 100644 --- a/Sources/macSubtitleOCR/MKV/MKVSubtitleExtractor.swift +++ b/Sources/macSubtitleOCR/MKV/MKVSubtitleExtractor.swift @@ -11,12 +11,24 @@ import os class MKVSubtitleExtractor: MKVTrackParser { func saveSubtitleTrackData(trackNumber: Int, outputDirectory: URL) { - let trackPath = outputDirectory.appendingPathComponent("\(trackNumber)").appendingPathExtension("sup").path + let codecType = tracks[trackNumber].codecId + let fileExtension = (codecType == "S_HDMV/PGS") ? "sup" : "sub" + let trackPath = outputDirectory.appendingPathComponent("track_\(trackNumber)").appendingPathExtension(fileExtension) + .path if FileManager.default.createFile(atPath: trackPath, contents: tracks[trackNumber].trackData, attributes: nil) { logger.debug("Created file at path: \(trackPath)") } else { logger.error("Failed to create file at path: \(trackPath)!") } + + if fileExtension == "sub" { + let idxPath = outputDirectory.appendingPathComponent("track_\(trackNumber)").appendingPathExtension("idx") + do { + try tracks[trackNumber].idxData?.write(to: idxPath, atomically: true, encoding: .utf8) + } catch { + logger.error("Failed to write idx file at path: \(idxPath)") + } + } } } diff --git a/Sources/macSubtitleOCR/MKV/MKVTrack.swift b/Sources/macSubtitleOCR/MKV/MKVTrack.swift index c5bbc27..75a4cd8 100644 --- a/Sources/macSubtitleOCR/MKV/MKVTrack.swift +++ b/Sources/macSubtitleOCR/MKV/MKVTrack.swift @@ -12,4 +12,5 @@ struct MKVTrack { var trackNumber: Int var codecId: String var trackData: Data + var idxData: String? } diff --git a/Sources/macSubtitleOCR/MKV/MKVTrackParser.swift b/Sources/macSubtitleOCR/MKV/MKVTrackParser.swift index 19af5ed..439a376 100644 --- a/Sources/macSubtitleOCR/MKV/MKVTrackParser.swift +++ b/Sources/macSubtitleOCR/MKV/MKVTrackParser.swift @@ -12,11 +12,12 @@ import os class MKVTrackParser: MKVFileHandler { // MARK: - Properties - var tracks: [MKVTrack] = [] + private(set) var tracks: [MKVTrack] = [] + private(set) var codecPrivate = [Int: String]() // MARK: - Functions - func parseTracks(codec: String) throws { + func parseTracks(codec: [String]) throws { guard findElement(withID: EBML.segmentID) as? (UInt64, UInt32) != nil else { fatalError("Segment element not found in file: \(filePath)") } @@ -27,13 +28,13 @@ class MKVTrackParser: MKVFileHandler { let endOfTracksOffset = fileHandle.offsetInFile + tracksSize - var trackNumbers = [Int]() + var tracks = [Int: String]() while fileHandle.offsetInFile < endOfTracksOffset { if let (elementID, elementSize) = tryParseElement() { if elementID == EBML.trackEntryID { logger.debug("Found TrackEntry element") if let track = parseTrackEntry(codec: codec) { - trackNumbers.append(track) + tracks[track.0] = track.1 } } else if elementID == EBML.chapters { break @@ -43,13 +44,17 @@ class MKVTrackParser: MKVFileHandler { } } - let trackData = extractTrackData(trackNumber: trackNumbers) + let trackData = extractTrackData(from: tracks) trackData?.enumerated().forEach { index, data in - tracks.append(MKVTrack(trackNumber: index, codecId: codec, trackData: data)) + self.tracks.append(MKVTrack( + trackNumber: index, + codecId: tracks[index + 1]!, + trackData: data, + idxData: codecPrivate[index + 1])) } } - func extractTrackData(trackNumber: [Int]) -> [Data]? { + func extractTrackData(from tracks: [Int: String]) -> [Data]? { fileHandle.seek(toFileOffset: 0) // Step 1: Locate the Segment element @@ -58,7 +63,7 @@ class MKVTrackParser: MKVFileHandler { // swiftformat:disable:next redundantSelf logger.debug("Found Segment, Size: \(segmentSize), End Offset: \(segmentEndOffset), EOF: \(self.endOfFile)") - var trackData = [Data](repeating: Data(), count: trackNumber.count) + var trackData = [Data](repeating: Data(), count: tracks.count) // Step 2: Parse Clusters within the Segment while fileHandle.offsetInFile < segmentEndOffset { @@ -74,7 +79,7 @@ class MKVTrackParser: MKVFileHandler { // Step 4: Parse Blocks (SimpleBlock or Block) within each Cluster parseBlocks( within: clusterEndOffset, - trackNumber: trackNumber, + trackNumber: tracks, clusterTimestamp: clusterTimestamp, trackData: &trackData) } @@ -84,7 +89,7 @@ class MKVTrackParser: MKVFileHandler { // MARK: - Methods - private func parseTrackEntry(codec: String) -> Int? { + private func parseTrackEntry(codec: [String]) -> (Int, String)? { var trackNumber: Int? var trackType: UInt8? var codecId: String? @@ -92,10 +97,10 @@ class MKVTrackParser: MKVFileHandler { while let (elementID, elementSize) = tryParseElement() { switch elementID { case EBML.trackNumberID: - trackNumber = Int((fileHandle.readData(ofLength: 1).first)!) + trackNumber = Int((fileHandle.readData(ofLength: Int(elementSize)).first)!) logger.debug("Found track number: \(trackNumber!)") case EBML.trackTypeID: // Unused by us, left for debugging - trackType = fileHandle.readData(ofLength: 1).first + trackType = fileHandle.readData(ofLength: Int(elementSize)).first logger.debug("Found track type: \(trackType!)") case EBML.codecID: var data = fileHandle.readData(ofLength: Int(elementSize)) @@ -109,8 +114,21 @@ class MKVTrackParser: MKVFileHandler { } if let trackNumber, let codecId { - if codecId == codec { - return trackNumber + if codecId == "S_VOBSUB" { + while let (elementID, elementSize) = tryParseElement() { + switch elementID { + case EBML.codecPrivate: + var data = fileHandle.readData(ofLength: Int(elementSize)) + data.removeNullBytes() + codecPrivate[trackNumber] = String(data: data, encoding: .ascii) + default: + fileHandle.seek(toFileOffset: fileHandle.offsetInFile + elementSize) + } + if codecPrivate[trackNumber] != nil { break } + } + } + if codec.contains(codecId) { + return (trackNumber, codecId) } } return nil @@ -123,7 +141,7 @@ class MKVTrackParser: MKVFileHandler { return nil } - private func parseBlocks(within clusterEndOffset: UInt64, trackNumber: [Int], clusterTimestamp: Int64, + private func parseBlocks(within clusterEndOffset: UInt64, trackNumber: [Int: String], clusterTimestamp: Int64, trackData: inout [Data]) { while fileHandle.offsetInFile < clusterEndOffset { // swiftformat:disable:next redundantSelf @@ -142,9 +160,9 @@ class MKVTrackParser: MKVFileHandler { // Step 5: Read the track number in the block and compare it guard let (blockTrackNumber, blockTimestamp) = readTrackNumber(from: fileHandle) as? (UInt64, Int64) else { continue } - if trackNumber.contains(Int(blockTrackNumber)) { + if trackNumber[Int(blockTrackNumber)] == "S_HDMV/PGS" { // Step 6: Calculate and encode the timestamp as 4 bytes in big-endian (PGS format) - let absPTS = calcAbsPTSForPGS(clusterTimestamp, blockTimestamp, timestampScale) + let absPTS = calcAbsPTS(clusterTimestamp, blockTimestamp) let pgsPTS = encodePTSForPGS(absPTS) // Step 7: Read the block data and add needed PGS headers and timestamps @@ -154,14 +172,62 @@ class MKVTrackParser: MKVFileHandler { var offset = 0 while (offset + 3) <= raw.count { let segmentSize = min(Int(raw.getUInt16BE(at: offset + 1)! + 3), raw.count - offset) - logger.debug("Segment size \(segmentSize) at \(offset) type 0x\(String(format: "%02x", raw[offset]))") + logger.debug("Segment size \(segmentSize) at \(offset) type \(raw[offset].hex())") blockData.append(pgsHeader) blockData.append(raw.subdata(in: offset ..< segmentSize + offset)) offset += segmentSize } - trackData[trackNumber.firstIndex { $0 == Int(blockTrackNumber) }!].append(blockData) + trackData[Int(blockTrackNumber - 1)].append(blockData) + } else if trackNumber[Int(blockTrackNumber)] == "S_VOBSUB" { + // swiftformat:disable all + // Step 6: Calculate and encode the timestamp as 5 bytes in big-endian (VobSub format) + let absPTS = calcAbsPTS(clusterTimestamp, blockTimestamp) + let vobSubPTS = encodePTSForVobSub(absPTS) + var segmentSize = Int(blockSize - (fileHandle.offsetInFile - blockStartOffset)) + let pesLength = withUnsafeBytes(of: UInt16(min(segmentSize, 2028)).bigEndian) { Array($0) } + // 2028 is the maximum size of a VobSub segment, so we need to split the data into multiple segments + // The first segment will contain the PTS data, while the rest will not, so it only gets 2019 bytes of data + // The rest of the segments will get 2024 bytes of data + + // Step 7: Read the block data and add needed VobSub headers and timestamps + var vobSubHeader = Data([0x00, 0x00, 0x01, 0xBA, // PS packet start code + 0x00, 0x00, 0x00, 0x00, 0x00, 0x0, // Null system clock reference + 0x00, 0x00, 0x00, // Null multiplexer rate + 0x00, // Stuffing length + 0x00, 0x00, 0x01, 0xBD]) // PES packet start code + vobSubHeader.append(contentsOf: pesLength) // PES packet length + vobSubHeader.append(contentsOf: [0x00, // PES miscellaneous data + 0x80, // PTS DTS flag + UInt8(vobSubPTS.count)]) // PTS data length + vobSubHeader.append(contentsOf: vobSubPTS) // PTS data + vobSubHeader.append(contentsOf: [0x00]) // Null stream ID + vobSubHeader.append(fileHandle.readData(ofLength: min(segmentSize, 2019))) + + segmentSize -= min(segmentSize, 2019) + + while segmentSize > 0 { + let nextSegmentSize = min(segmentSize, 2028) + let pesLength = withUnsafeBytes(of: UInt16(nextSegmentSize).bigEndian) { Array($0) } + vobSubHeader.append(contentsOf: [0x00, 0x00, 0x01, 0xBA, // PS packet start code + 0x00, 0x00, 0x00, 0x00, 0x00, 0x0, // Null system clock reference + 0x00, 0x00, 0x00, // Null multiplexer rate + 0x00, // Stuffing length + 0x00, 0x00, 0x01, 0xBD]) // PES packet start code + vobSubHeader.append(contentsOf: pesLength) // PES packet length + vobSubHeader.append(contentsOf: [0x00, // PES miscellaneous data + 0x00, // PTS DTS flag + 0x00]) // PTS data length + vobSubHeader.append(contentsOf: [0x00]) // Null stream ID + vobSubHeader.append(fileHandle.readData(ofLength: min(segmentSize, 2024))) + segmentSize -= min(segmentSize, 2024) + } + + trackData[Int(blockTrackNumber - 1)].append(vobSubHeader) + let offset = String(format: "%09X", trackData[Int(blockTrackNumber - 1)].count - vobSubHeader.count) + codecPrivate[Int(blockTrackNumber)]?.append("\ntimestamp: \(formatTime(absPTS)), filepos: \(offset)") + // swiftformat:enable all } else { // Skip this block because it's for a different track fileHandle.seek(toFileOffset: blockStartOffset + blockSize) @@ -169,9 +235,19 @@ class MKVTrackParser: MKVFileHandler { } } + private func formatTime(_ time: UInt64) -> String { + let time = TimeInterval(time) / 90000 + let hours = Int(time) / 3600 + let minutes = (Int(time) % 3600) / 60 + let seconds = Int(time) % 60 + let milliseconds = Int((time - TimeInterval(Int(time))) * 1000) + + return String(format: "%02d:%02d:%02d:%03d", hours, minutes, seconds, milliseconds) + } + // Function to read the track number, timestamp, and lacing type (if any) from a Block or SimpleBlock header private func readTrackNumber(from fileHandle: FileHandle) -> (UInt64?, Int64) { - let trackNumber = readVINT(from: fileHandle, unmodified: true) + let trackNumber = readVINT(from: fileHandle, elementSize: true) let timestamp = readFixedLengthNumber(fileHandle: fileHandle, length: 2) let suffix = fileHandle.readData(ofLength: 1).first ?? 0 diff --git a/Sources/macSubtitleOCR/SubtitleProcessor.swift b/Sources/macSubtitleOCR/SubtitleProcessor.swift index f5db341..44b0989 100644 --- a/Sources/macSubtitleOCR/SubtitleProcessor.swift +++ b/Sources/macSubtitleOCR/SubtitleProcessor.swift @@ -2,7 +2,7 @@ // SubtitleProcessor.swift // macSubtitleOCR // -// Created by Ethan Dye on 10/16/24. +// Created by Ethan Dye on 10/17/24. // Copyright © 2024 Ethan Dye. All rights reserved. // diff --git a/Sources/macSubtitleOCR/Subtitles/PGS/PGS.swift b/Sources/macSubtitleOCR/Subtitles/PGS/PGS.swift index 857c57f..eca58d2 100644 --- a/Sources/macSubtitleOCR/Subtitles/PGS/PGS.swift +++ b/Sources/macSubtitleOCR/Subtitles/PGS/PGS.swift @@ -107,7 +107,7 @@ struct PGS { case 0x16, 0x17: offset += segmentLength default: - logger.warning("Unknown segment type: \(segmentType, format: .hex), skipping...") + logger.warning("Unknown segment type: \(segmentType.hex()), skipping...") offset += segmentLength return nil } diff --git a/Sources/macSubtitleOCR/Subtitles/SRT/SRT.swift b/Sources/macSubtitleOCR/Subtitles/SRT/SRT.swift index 910271b..ebb00c2 100644 --- a/Sources/macSubtitleOCR/Subtitles/SRT/SRT.swift +++ b/Sources/macSubtitleOCR/Subtitles/SRT/SRT.swift @@ -12,7 +12,7 @@ import os struct SRT { // MARK: - Properties - private var subtitles: [Subtitle] = [] + private var subtitles: [Subtitle] private let logger = Logger(subsystem: "github.ecdye.macSubtitleOCR", category: "SRT") // MARK: - Getters / Setters @@ -45,6 +45,7 @@ struct SRT { let nextSubtitle = subtitles[subtitle.index + 1] if nextSubtitle.startTimestamp! <= subtitle.endTimestamp! { logger.warning("Fixing subtitle index \(subtitle.index) end timestamp!") + logger.warning("Got \(subtitle.endTimestamp!) and \(nextSubtitle.startTimestamp!)") if nextSubtitle.startTimestamp! - subtitle.startTimestamp! > 5 { endTimestamp = subtitle.startTimestamp! + 5 } else { diff --git a/Sources/macSubtitleOCR/Subtitles/VobSub/VobSub.swift b/Sources/macSubtitleOCR/Subtitles/VobSub/VobSub.swift index 2b2706c..5f7af11 100644 --- a/Sources/macSubtitleOCR/Subtitles/VobSub/VobSub.swift +++ b/Sources/macSubtitleOCR/Subtitles/VobSub/VobSub.swift @@ -22,11 +22,16 @@ struct VobSub { let subData = try subFile.readToEnd()! subFile.closeFile() let idx = VobSubIDX(URL(filePath: idx)) - subData.withUnsafeBytes { (pointer: UnsafeRawBufferPointer) in - extractSubtitleImages(buffer: pointer, idx: idx) + subData.withUnsafeBytes { (buffer: UnsafeRawBufferPointer) in + extractSubtitleImages(buffer: buffer, idx: idx) } } + init(_ buffer: UnsafeRawBufferPointer, _ idxData: String) throws { + let idx = VobSubIDX(idxData) + extractSubtitleImages(buffer: buffer, idx: idx) + } + // MARK: - Methods private mutating func extractSubtitleImages(buffer: UnsafeRawBufferPointer, idx: VobSubIDX) { diff --git a/Sources/macSubtitleOCR/Subtitles/VobSub/VobSubIDX.swift b/Sources/macSubtitleOCR/Subtitles/VobSub/VobSubIDX.swift index 49b396f..298086c 100644 --- a/Sources/macSubtitleOCR/Subtitles/VobSub/VobSubIDX.swift +++ b/Sources/macSubtitleOCR/Subtitles/VobSub/VobSubIDX.swift @@ -26,6 +26,14 @@ struct VobSubIDX { } } + init(_ idxData: String) { + do { + try parseIdxFile(idxData: idxData) + } catch { + fatalError("Failed to parse IDX file: \(error)") + } + } + // MARK: - Methods private mutating func parseIdxFile(idxData: String) throws { diff --git a/Sources/macSubtitleOCR/Subtitles/VobSub/VobSubParser.swift b/Sources/macSubtitleOCR/Subtitles/VobSub/VobSubParser.swift index 79cf70b..2b6a355 100644 --- a/Sources/macSubtitleOCR/Subtitles/VobSub/VobSubParser.swift +++ b/Sources/macSubtitleOCR/Subtitles/VobSub/VobSubParser.swift @@ -43,7 +43,8 @@ struct VobSubParser { repeat { let startOffset = offset guard buffer.loadUnaligned(fromByteOffset: offset, as: UInt32.self).bigEndian == MPEG2PacketType.psPacket else { - fatalError("Failed to find PS packet at offset \(offset)") + logger.warning("No PS packet at offset \(offset), trying to decode anyway") + break } offset += 4 @@ -54,7 +55,8 @@ struct VobSubParser { guard buffer.loadUnaligned(fromByteOffset: offset, as: UInt32.self).bigEndian == MPEG2PacketType.pesPacket else { - fatalError("Failed to find PES packet at offset \(offset)") + logger.warning("No PES packet at offset \(offset), trying to decode anyway") + break } offset += 4 @@ -79,7 +81,7 @@ struct VobSubParser { presentationTimestamp += UInt64(buffer[offset + ptsDataLength - 3] & 0xFE) << 14 presentationTimestamp += UInt64(buffer[offset + ptsDataLength - 4]) << 22 presentationTimestamp += UInt64(buffer[offset] & 0x0E) << 29 - subtitle.startTimestamp = TimeInterval(presentationTimestamp) / 90 / 1000 + subtitle.startTimestamp = TimeInterval(presentationTimestamp) / 90000 } offset += ptsDataLength @@ -115,7 +117,7 @@ struct VobSubParser { } while offset < nextOffset && controlHeaderCopied < controlSize! if controlHeaderCopied < controlSize! { - logger.warning("Failed to read control header completely") + logger.warning("Failed to read control header completely, \(controlHeaderCopied)/\(controlSize!)") for _ in controlHeaderCopied ..< controlSize! { controlHeader.append(0xFF) } diff --git a/Sources/macSubtitleOCR/macSubtitleOCR.swift b/Sources/macSubtitleOCR/macSubtitleOCR.swift index 684533c..cc3b049 100644 --- a/Sources/macSubtitleOCR/macSubtitleOCR.swift +++ b/Sources/macSubtitleOCR/macSubtitleOCR.swift @@ -93,9 +93,9 @@ struct macSubtitleOCR: AsyncParsableCommand { input.replacingOccurrences(of: ".sub", with: ".idx")) let result = try await processSubtitle(sub.subtitles, trackNumber: 0) results.append(result) - } else if input.hasSuffix(".mkv") { + } else if input.hasSuffix(".mkv") || input.hasSuffix(".mks") { let mkvStream = MKVSubtitleExtractor(filePath: input) - try mkvStream.parseTracks(codec: "S_HDMV/PGS") + try mkvStream.parseTracks(codec: ["S_HDMV/PGS", "S_VOBSUB"]) for track in mkvStream.tracks { logger.debug("Found subtitle track: \(track.trackNumber), Codec: \(track.codecId)") if experimentalOptions.saveSubtitleFile { @@ -104,13 +104,21 @@ struct macSubtitleOCR: AsyncParsableCommand { outputDirectory: URL(fileURLWithPath: outputDirectory)) } - // Open the PGS data stream - let pgs: PGS = try mkvStream.tracks[track.trackNumber].trackData - .withUnsafeBytes { (buffer: UnsafeRawBufferPointer) in - try PGS(buffer) - } - let result = try await processSubtitle(pgs.subtitles, trackNumber: track.trackNumber) - results.append(result) + if track.codecId == "S_HDMV/PGS" { + let pgs: PGS = try track.trackData + .withUnsafeBytes { (buffer: UnsafeRawBufferPointer) in + try PGS(buffer) + } + let result = try await processSubtitle(pgs.subtitles, trackNumber: track.trackNumber) + results.append(result) + } else if track.codecId == "S_VOBSUB" { + let vobSub: VobSub = try track.trackData + .withUnsafeBytes { (buffer: UnsafeRawBufferPointer) in + try VobSub(buffer, track.idxData ?? "") + } + let result = try await processSubtitle(vobSub.subtitles, trackNumber: track.trackNumber) + results.append(result) + } } } else if input.hasSuffix(".sup") { // Open the PGS data stream diff --git a/Tests/Resources/sintel.mkv b/Tests/Resources/sintel_pgs.mks similarity index 100% rename from Tests/Resources/sintel.mkv rename to Tests/Resources/sintel_pgs.mks diff --git a/Tests/Resources/sintel_pgs_vobsub.mks b/Tests/Resources/sintel_pgs_vobsub.mks new file mode 100644 index 0000000..60a2f06 Binary files /dev/null and b/Tests/Resources/sintel_pgs_vobsub.mks differ diff --git a/Tests/Resources/sintel_vobsub.mks b/Tests/Resources/sintel_vobsub.mks new file mode 100644 index 0000000..184cec6 Binary files /dev/null and b/Tests/Resources/sintel_vobsub.mks differ diff --git a/Tests/TestFilePaths.swift b/Tests/TestFilePaths.swift index 46476f0..d1c8373 100644 --- a/Tests/TestFilePaths.swift +++ b/Tests/TestFilePaths.swift @@ -11,7 +11,9 @@ import Foundation enum TestFilePaths: CaseIterable { case sup case sub - case mkv + case mkv_pgs + case mkv_vobsub + case mkv_pgs_vobsub var path: String { switch self { @@ -19,8 +21,12 @@ enum TestFilePaths: CaseIterable { Bundle.module.url(forResource: "sintel.sup", withExtension: nil)!.path case .sub: Bundle.module.url(forResource: "sintel.sub", withExtension: nil)!.path - case .mkv: - Bundle.module.url(forResource: "sintel.mkv", withExtension: nil)!.path + case .mkv_pgs: + Bundle.module.url(forResource: "sintel_pgs.mks", withExtension: nil)!.path + case .mkv_vobsub: + Bundle.module.url(forResource: "sintel_vobsub.mks", withExtension: nil)!.path + case .mkv_pgs_vobsub: + Bundle.module.url(forResource: "sintel_pgs_vobsub.mks", withExtension: nil)!.path } } } diff --git a/Tests/macSubtitleOCRTests.swift b/Tests/macSubtitleOCRTests.swift index cbc96f6..fb455e7 100644 --- a/Tests/macSubtitleOCRTests.swift +++ b/Tests/macSubtitleOCRTests.swift @@ -42,7 +42,7 @@ private func runTest(with options: [String]) async throws { try compareOutputs(with: outputPath, track: 0) // Compare output for track 1 if it's an MKV file - if options[0] == TestFilePaths.mkv.path { + if options[0].contains(".mks") { try compareOutputs(with: outputPath, track: 1) } }