Skip to content

Commit

Permalink
Use language tags in MKV for hints in OCR
Browse files Browse the repository at this point in the history
Fixes #28

Signed-off-by: Ethan Dye <[email protected]>
  • Loading branch information
ecdye committed Oct 23, 2024
1 parent 48ae722 commit 34e4e37
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 10 deletions.
2 changes: 2 additions & 0 deletions Sources/macSubtitleOCR/MKV/EBML/EBML.swift
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ enum EBML {
static let cluster: UInt32 = 0x1F43_B675
static let codecID: UInt32 = 0x86
static let codecPrivate: UInt32 = 0x63A2
static let language: UInt32 = 0x22B59C
static let languageBCP47: UInt32 = 0x22B59D
static let segmentID: UInt32 = 0x1853_8067
static let simpleBlock: UInt32 = 0xA3
static let timestamp: UInt32 = 0xE7
Expand Down
4 changes: 2 additions & 2 deletions Sources/macSubtitleOCR/MKV/EBML/EBMLParser.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import os
struct EBMLParser {
// MARK: - Properties

private let logger = Logger(subsystem: "github.ecdye.macSubtitleOCR", category: "EBML")
// private let logger = Logger(subsystem: "github.ecdye.macSubtitleOCR", category: "EBML")
private let fileHandle: FileHandle

// MARK: - Lifecycle
Expand Down Expand Up @@ -43,7 +43,7 @@ struct EBMLParser {
for byte in fileHandle.readData(ofLength: Int(length - 1)) {
value = (value << 8) | UInt64(byte)
}
logger.debug("VINT: \(value.hex())")
// logger.debug("VINT: \(value.hex())")

return value
}
Expand Down
18 changes: 14 additions & 4 deletions Sources/macSubtitleOCR/MKV/MKV.swift
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ struct MKV {
// MARK: - Properties

private(set) var tracks: [MKVTrack] = []
private(set) var codecPrivate = [Int: String]()
private var codecPrivate = [Int: String]()
private var languages = [Int: String]()

let fileHandle: FileHandle
let endOfFile: UInt64
Expand Down Expand Up @@ -74,7 +75,8 @@ struct MKV {
trackNumber: index,
codecID: subtitleTracks[index + 1]!,
trackData: data,
idxData: codecPrivate[index + 1]))
idxData: codecPrivate[index + 1],
language: languages[index + 1]))
}
}

Expand Down Expand Up @@ -135,6 +137,7 @@ struct MKV {
var trackNumber: Int?
var trackType: UInt8?
var codecID: String?
var language: String?

while let (elementID, elementSize) = tryParseElement() {
switch elementID {
Expand All @@ -149,6 +152,12 @@ struct MKV {
data.removeNullBytes()
codecID = String(data: data, encoding: .ascii)
logger.debug("Found codec ID: \(codecID!)")
case EBML.language, EBML.languageBCP47:
var data = fileHandle.readData(ofLength: Int(elementSize))
data.removeNullBytes()
language = String(data: data, encoding: .ascii)
logger.debug("Found language: \(language!)")
languages[trackNumber!] = language
default:
fileHandle.seek(toFileOffset: fileHandle.offsetInFile + elementSize)
}
Expand Down Expand Up @@ -323,9 +332,10 @@ struct MKV {

// If, by chance, we find a TimestampScale element, update it from the default
if elementID == EBML.timestampScale {
timestampScale = Double(readFixedLengthNumber(fileHandle: fileHandle, length: Int(elementSize)))
let timestampScale = Double(readFixedLengthNumber(fileHandle: fileHandle, length: Int(elementSize)))
self.timestampScale = timestampScale
// swiftformat:disable:next redundantSelf
// logger.debug("Found timestamp scale: \(self.timestampScale)")
logger.debug("Found timestamp scale: \(timestampScale)")
continue
}

Expand Down
1 change: 1 addition & 0 deletions Sources/macSubtitleOCR/MKV/MKVTrack.swift
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ struct MKVTrack {
var codecID: String
var trackData: Data
var idxData: String?
var language: String?
}
9 changes: 6 additions & 3 deletions Sources/macSubtitleOCR/macSubtitleOCR.swift
Original file line number Diff line number Diff line change
Expand Up @@ -68,23 +68,23 @@ struct macSubtitleOCR: AsyncParsableCommand {

// MARK: - Entrypoint

func run() async throws {
mutating func run() async throws {
let fileHandler = FileHandler(outputDirectory: outputDirectory)
let results = try await processInput()
try await saveResults(fileHandler: fileHandler, results: results)
}

// MARK: - Methods

private func processInput() async throws -> [macSubtitleOCRResult] {
private mutating func processInput() async throws -> [macSubtitleOCRResult] {
if ffmpegDecoder {
try await processFFmpegDecoder()
} else {
try await processInternalDecoder()
}
}

private func processInternalDecoder() async throws -> [macSubtitleOCRResult] {
private mutating func processInternalDecoder() async throws -> [macSubtitleOCRResult] {
var results: [macSubtitleOCRResult] = []

if input.hasSuffix(".sub") || input.hasSuffix(".idx") {
Expand All @@ -103,6 +103,9 @@ struct macSubtitleOCR: AsyncParsableCommand {
trackNumber: track.trackNumber,
outputDirectory: URL(fileURLWithPath: outputDirectory))
}
if track.language != nil {
languages += ",\(track.language!)"
}

if track.codecID == "S_HDMV/PGS" {
let pgs: PGS = try track.trackData
Expand Down
2 changes: 1 addition & 1 deletion Tests/macSubtitleOCRTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ private func runTest(with options: [String]) async throws {
let outputPath = options[1]

// Run tests
let runner = try macSubtitleOCR.parse(options)
var runner = try macSubtitleOCR.parse(options)
try await runner.run()

try compareOutputs(with: outputPath, track: 0)
Expand Down

0 comments on commit 34e4e37

Please sign in to comment.