Skip to content

Commit

Permalink
Two-pass detection to balance speed and accuracy
Browse files Browse the repository at this point in the history
  • Loading branch information
cyanzhong committed Dec 2, 2024
1 parent 90da8f9 commit 38bfdc9
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 22 deletions.
57 changes: 40 additions & 17 deletions TextGrabber2/Sources/App.swift
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,15 @@ final class App: NSObject, NSApplicationDelegate {
item.setOn(SMAppService.mainApp.isEnabled)
return item
}()

func statusItemPosition() -> (CGRect, NSScreen?)? {
guard let button = statusItem.button, let window = button.window else {
Logger.log(.error, "Missing button or window to provide positioning info")
return nil
}

return (window.convertToScreen(button.frame), window.screen ?? .main)
}
}

// MARK: - Life Cycle
Expand Down Expand Up @@ -220,6 +229,7 @@ private extension App {
return Logger.assertFail("Missing menu to proceed")
}

currentResult = nil
pasteboardChangeCount = NSPasteboard.general.changeCount
clipboardItem.isHidden = NSPasteboard.general.isEmpty
saveImageItem.isEnabled = false
Expand All @@ -232,23 +242,36 @@ private extension App {
howToItem.isHidden = true

Task {
let resultData = await Recognizer.detect(image: image)
currentResult = resultData

hintItem.title = resultData.candidates.isEmpty ? Localized.menuTitleHintCapture : Localized.menuTitleHintCopy
howToItem.isHidden = !resultData.candidates.isEmpty
copyAllItem.isHidden = resultData.candidates.count < 2
saveImageItem.isEnabled = true

let separator = NSMenuItem.separator()
menu.insertItem(separator, at: menu.index(of: howToItem) + 1)
menu.removeItems { $0 is ResultItem }

for text in resultData.candidates.reversed() {
let item = ResultItem(title: text)
item.addAction { NSPasteboard.general.string = text }
menu.insertItem(item, at: menu.index(of: separator) + 1)
}
let fastResult = await Recognizer.detect(image: image, level: .fast)
showResult(fastResult, in: menu)

let accurateResult = await Recognizer.detect(image: image, level: .accurate)
showResult(accurateResult, in: menu)
}
}

func showResult(_ resultData: Recognizer.ResultData, in menu: NSMenu) {
guard currentResult != resultData else {
#if DEBUG
Logger.log(.debug, "No change in result data")
#endif
return
}

currentResult = resultData
hintItem.title = resultData.candidates.isEmpty ? Localized.menuTitleHintCapture : Localized.menuTitleHintCopy
howToItem.isHidden = !resultData.candidates.isEmpty
copyAllItem.isHidden = resultData.candidates.count < 2
saveImageItem.isEnabled = true

let separator = NSMenuItem.separator()
menu.insertItem(separator, at: menu.index(of: howToItem) + 1)
menu.removeItems { $0 is ResultItem }

for text in resultData.candidates.reversed() {
let item = ResultItem(title: text)
item.addAction { NSPasteboard.general.string = text }
menu.insertItem(item, at: menu.index(of: separator) + 1)
}
}
}
10 changes: 9 additions & 1 deletion TextGrabber2/Sources/Extensions/NSWindow+Extension.swift
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,16 @@ extension NSWindow {
}

var preferredRect = originalRect
preferredRect.origin.x += preferredRect.size.width - Constants.preferredWidth
preferredRect.size.width = Constants.preferredWidth

// Ensure the window always appears below the status item, within the screen boundaries
if let (rect, screen) = (NSApp.delegate as? App)?.statusItemPosition() {
preferredRect.origin.x = min(
max(rect.minX - Constants.breathPadding, Constants.breathPadding),
(screen?.frame.width ?? 1e6) - Constants.preferredWidth - Constants.breathPadding
)
}

swizzled_setFrame(preferredRect, display: display, animate: animate)
}
}
Expand All @@ -41,5 +48,6 @@ extension NSWindow {
private extension NSWindow {
enum Constants {
static let preferredWidth: Double = 240
static let breathPadding: Double = 8
}
}
7 changes: 3 additions & 4 deletions TextGrabber2/Sources/Recognizer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import AppKit
https://developer.apple.com/documentation/vision/recognizing_text_in_images
*/
enum Recognizer {
struct ResultData {
struct ResultData: Equatable {
let candidates: [String]

init(candidates: [String]) {
Expand All @@ -37,7 +37,7 @@ enum Recognizer {
}
}

static func detect(image: CGImage) async -> ResultData {
static func detect(image: CGImage, level: VNRequestTextRecognitionLevel) async -> ResultData {
await withCheckedContinuation { continuation in
let request = VNRecognizeTextRequest { request, error in
let candidates = request.results?
Expand All @@ -49,8 +49,7 @@ enum Recognizer {
}
}

// Prefer accuracy over speed
request.recognitionLevel = .accurate
request.recognitionLevel = level
request.usesLanguageCorrection = true
request.automaticallyDetectsLanguage = true

Expand Down

0 comments on commit 38bfdc9

Please sign in to comment.