diff --git a/Sources/macSubtitleOCR/MKV/EBML/EBML.swift b/Sources/macSubtitleOCR/MKV/EBML/EBML.swift index 2302260..89f06ba 100644 --- a/Sources/macSubtitleOCR/MKV/EBML/EBML.swift +++ b/Sources/macSubtitleOCR/MKV/EBML/EBML.swift @@ -15,6 +15,8 @@ enum EBML { static let cluster: UInt32 = 0x1F43_B675 static let codecID: UInt32 = 0x86 static let codecPrivate: UInt32 = 0x63A2 + static let language: UInt32 = 0x22B59C + static let languageBCP47: UInt32 = 0x22B59D static let segmentID: UInt32 = 0x1853_8067 static let simpleBlock: UInt32 = 0xA3 static let timestamp: UInt32 = 0xE7 diff --git a/Sources/macSubtitleOCR/MKV/EBML/EBMLParser.swift b/Sources/macSubtitleOCR/MKV/EBML/EBMLParser.swift index f771b52..f299e91 100644 --- a/Sources/macSubtitleOCR/MKV/EBML/EBMLParser.swift +++ b/Sources/macSubtitleOCR/MKV/EBML/EBMLParser.swift @@ -12,7 +12,7 @@ import os struct EBMLParser { // MARK: - Properties - private let logger = Logger(subsystem: "github.ecdye.macSubtitleOCR", category: "EBML") + // private let logger = Logger(subsystem: "github.ecdye.macSubtitleOCR", category: "EBML") private let fileHandle: FileHandle // MARK: - Lifecycle @@ -43,7 +43,7 @@ struct EBMLParser { for byte in fileHandle.readData(ofLength: Int(length - 1)) { value = (value << 8) | UInt64(byte) } - logger.debug("VINT: \(value.hex())") + // logger.debug("VINT: \(value.hex())") return value } diff --git a/Sources/macSubtitleOCR/MKV/MKV.swift b/Sources/macSubtitleOCR/MKV/MKV.swift index a8ef0f1..1410d29 100644 --- a/Sources/macSubtitleOCR/MKV/MKV.swift +++ b/Sources/macSubtitleOCR/MKV/MKV.swift @@ -13,7 +13,8 @@ struct MKV { // MARK: - Properties private(set) var tracks: [MKVTrack] = [] - private(set) var codecPrivate = [Int: String]() + private var codecPrivate = [Int: String]() + private var languages = [Int: String]() let fileHandle: FileHandle let endOfFile: UInt64 @@ -74,7 +75,8 @@ struct MKV { trackNumber: index, codecID: subtitleTracks[index + 1]!, trackData: data, - idxData: codecPrivate[index + 1])) + idxData: codecPrivate[index + 1], + language: languages[index + 1])) } } @@ -135,6 +137,7 @@ struct MKV { var trackNumber: Int? var trackType: UInt8? var codecID: String? + var language: String? while let (elementID, elementSize) = tryParseElement() { switch elementID { @@ -149,6 +152,12 @@ struct MKV { data.removeNullBytes() codecID = String(data: data, encoding: .ascii) logger.debug("Found codec ID: \(codecID!)") + case EBML.language, EBML.languageBCP47: + var data = fileHandle.readData(ofLength: Int(elementSize)) + data.removeNullBytes() + language = String(data: data, encoding: .ascii) + logger.debug("Found language: \(language!)") + languages[trackNumber!] = language default: fileHandle.seek(toFileOffset: fileHandle.offsetInFile + elementSize) } @@ -323,9 +332,10 @@ struct MKV { // If, by chance, we find a TimestampScale element, update it from the default if elementID == EBML.timestampScale { - timestampScale = Double(readFixedLengthNumber(fileHandle: fileHandle, length: Int(elementSize))) + let timestampScale = Double(readFixedLengthNumber(fileHandle: fileHandle, length: Int(elementSize))) + self.timestampScale = timestampScale // swiftformat:disable:next redundantSelf - // logger.debug("Found timestamp scale: \(self.timestampScale)") + logger.debug("Found timestamp scale: \(timestampScale)") continue } diff --git a/Sources/macSubtitleOCR/MKV/MKVTrack.swift b/Sources/macSubtitleOCR/MKV/MKVTrack.swift index 4a530bc..03124b6 100644 --- a/Sources/macSubtitleOCR/MKV/MKVTrack.swift +++ b/Sources/macSubtitleOCR/MKV/MKVTrack.swift @@ -13,4 +13,5 @@ struct MKVTrack { var codecID: String var trackData: Data var idxData: String? + var language: String? } diff --git a/Sources/macSubtitleOCR/macSubtitleOCR.swift b/Sources/macSubtitleOCR/macSubtitleOCR.swift index 6bc0def..32266e3 100644 --- a/Sources/macSubtitleOCR/macSubtitleOCR.swift +++ b/Sources/macSubtitleOCR/macSubtitleOCR.swift @@ -68,7 +68,7 @@ struct macSubtitleOCR: AsyncParsableCommand { // MARK: - Entrypoint - func run() async throws { + mutating func run() async throws { let fileHandler = FileHandler(outputDirectory: outputDirectory) let results = try await processInput() try await saveResults(fileHandler: fileHandler, results: results) @@ -76,7 +76,7 @@ struct macSubtitleOCR: AsyncParsableCommand { // MARK: - Methods - private func processInput() async throws -> [macSubtitleOCRResult] { + private mutating func processInput() async throws -> [macSubtitleOCRResult] { if ffmpegDecoder { try await processFFmpegDecoder() } else { @@ -84,7 +84,7 @@ struct macSubtitleOCR: AsyncParsableCommand { } } - private func processInternalDecoder() async throws -> [macSubtitleOCRResult] { + private mutating func processInternalDecoder() async throws -> [macSubtitleOCRResult] { var results: [macSubtitleOCRResult] = [] if input.hasSuffix(".sub") || input.hasSuffix(".idx") { @@ -103,6 +103,9 @@ struct macSubtitleOCR: AsyncParsableCommand { trackNumber: track.trackNumber, outputDirectory: URL(fileURLWithPath: outputDirectory)) } + if track.language != nil { + languages += ",\(track.language!)" + } if track.codecID == "S_HDMV/PGS" { let pgs: PGS = try track.trackData diff --git a/Tests/macSubtitleOCRTests.swift b/Tests/macSubtitleOCRTests.swift index fb455e7..342ba32 100644 --- a/Tests/macSubtitleOCRTests.swift +++ b/Tests/macSubtitleOCRTests.swift @@ -36,7 +36,7 @@ private func runTest(with options: [String]) async throws { let outputPath = options[1] // Run tests - let runner = try macSubtitleOCR.parse(options) + var runner = try macSubtitleOCR.parse(options) try await runner.run() try compareOutputs(with: outputPath, track: 0)