From a5892ec11048bb86e8f00e2037d52d506a973409 Mon Sep 17 00:00:00 2001 From: Ethan Dye Date: Thu, 24 Oct 2024 15:36:18 -0600 Subject: [PATCH] Add option to disable l to I correction Signed-off-by: Ethan Dye --- .../Subtitles/SubtitleProcessor.swift | 14 ++++++++++---- Sources/macSubtitleOCR/macSubtitleOCR.swift | 7 +++++-- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/Sources/macSubtitleOCR/Subtitles/SubtitleProcessor.swift b/Sources/macSubtitleOCR/Subtitles/SubtitleProcessor.swift index a963796..6621a0d 100644 --- a/Sources/macSubtitleOCR/Subtitles/SubtitleProcessor.swift +++ b/Sources/macSubtitleOCR/Subtitles/SubtitleProcessor.swift @@ -52,14 +52,15 @@ struct SubtitleProcessor { private let language: String private let fastMode: Bool private let disableLanguageCorrection: Bool + private let disableICorrection: Bool private let forceOldAPI: Bool private let outputDirectory: String private let maxConcurrentTasks: Int private let logger = Logger(subsystem: "github.ecdye.macSubtitleOCR", category: "SubtitleProcessor") init(subtitles: [Subtitle], trackNumber: Int, invert: Bool, saveImages: Bool, language: String, fastMode: Bool, - disableLanguageCorrection: Bool, - forceOldAPI: Bool, outputDirectory: String, maxConcurrentTasks: Int) { + disableLanguageCorrection: Bool, disableICorrection: Bool, forceOldAPI: Bool, outputDirectory: String, + maxConcurrentTasks: Int) { self.subtitles = subtitles self.trackNumber = trackNumber self.invert = invert @@ -67,6 +68,7 @@ struct SubtitleProcessor { self.language = language self.fastMode = fastMode self.disableLanguageCorrection = disableLanguageCorrection + self.disableICorrection = disableICorrection self.forceOldAPI = forceOldAPI self.outputDirectory = outputDirectory self.maxConcurrentTasks = maxConcurrentTasks @@ -103,8 +105,12 @@ struct SubtitleProcessor { } let (subtitleText, subtitleLines) = await recognizeText(from: subImage) - let pattern = #"\bl\b"# // Replace l with I when it's a single character - subtitle.text = subtitleText.replacingOccurrences(of: pattern, with: "I", options: .regularExpression) + if language.contains("en"), !disableICorrection { + let pattern = #"\bl\b"# // Replace l with I when it's a single character + subtitle.text = subtitleText.replacingOccurrences(of: pattern, with: "I", options: .regularExpression) + } else { + subtitle.text = subtitleText + } subtitle.imageData = nil // Clear the image data to save memory let jsonOut = SubtitleJSONResult(index: subIndex, lines: subtitleLines, text: subtitleText) diff --git a/Sources/macSubtitleOCR/macSubtitleOCR.swift b/Sources/macSubtitleOCR/macSubtitleOCR.swift index c82cfc3..1044811 100644 --- a/Sources/macSubtitleOCR/macSubtitleOCR.swift +++ b/Sources/macSubtitleOCR/macSubtitleOCR.swift @@ -26,14 +26,13 @@ struct ExperimentalOptions: ParsableArguments { var disableLanguageCorrection = false } -// The main struct representing the macSubtitleOCR command-line tool. @main struct macSubtitleOCR: AsyncParsableCommand { // MARK: - Properties static let configuration = CommandConfiguration( commandName: "macSubtitleOCR", - abstract: "macSubtitleOCR - Convert bitmap subtitles into SubRip format using the macOS OCR engine") + abstract: "macSubtitleOCR - Convert bitmap subtitles into SubRip format using the macOS Vision framework") @Argument(help: "Input subtitle file (supported formats: .sup, .sub, .idx, .mkv)") var input: String @@ -63,6 +62,9 @@ struct macSubtitleOCR: AsyncParsableCommand { @Flag(help: "Use FFmpeg decoder") var ffmpegDecoder = false + @Flag(help: "Disable correction of 'l' to 'I' in OCR results") + var disableICorrection = false + @OptionGroup(title: "Experimental Options", visibility: .hidden) var experimentalOptions: ExperimentalOptions @@ -164,6 +166,7 @@ struct macSubtitleOCR: AsyncParsableCommand { language: languages, fastMode: experimentalOptions.fastMode, disableLanguageCorrection: experimentalOptions.disableLanguageCorrection, + disableICorrection: disableICorrection, forceOldAPI: experimentalOptions.forceOldAPI, outputDirectory: outputDirectory, maxConcurrentTasks: maxThreads)