vicliv commited on
Commit
6dadbc1
Β·
1 Parent(s): f607576

Speed up screenshot OCR by downscaling before tesseract

Browse files
Files changed (1) hide show
  1. app/screenshot.py +29 -8
app/screenshot.py CHANGED
@@ -51,16 +51,28 @@ SECOND_PASS_MIN_SHRINK = 0.02
51
  # OCR via tesseract subprocess
52
  # ──────────────────────────────────────────────────────────────
53
 
 
 
 
54
  def run_tesseract(image: np.ndarray, min_conf: int = 30) -> list[tuple]:
55
  """Call `tesseract` CLI, parse TSV output, return (x, y, w, h) boxes."""
 
 
 
 
 
 
 
 
 
56
  tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
57
  try:
58
- Image.fromarray(image).save(tmp.name)
59
  result = subprocess.run(
60
  ["tesseract", tmp.name, "stdout", "--psm", "3", "tsv"],
61
  capture_output=True,
62
  text=True,
63
- timeout=30,
64
  )
65
  except FileNotFoundError:
66
  print("[screenshot] tesseract binary not found")
@@ -105,12 +117,21 @@ def run_tesseract(image: np.ndarray, min_conf: int = 30) -> list[tuple]:
105
  continue
106
  if conf < min_conf:
107
  continue
108
- boxes.append((
109
- int(cols[idx_left]),
110
- int(cols[idx_top]),
111
- int(cols[idx_width]),
112
- int(cols[idx_height]),
113
- ))
 
 
 
 
 
 
 
 
 
114
  return boxes
115
 
116
 
 
51
  # OCR via tesseract subprocess
52
  # ──────────────────────────────────────────────────────────────
53
 
54
+ OCR_MAX_DIM = 1500 # downscale before tesseract for speed; boxes scaled back
55
+
56
+
57
  def run_tesseract(image: np.ndarray, min_conf: int = 30) -> list[tuple]:
58
  """Call `tesseract` CLI, parse TSV output, return (x, y, w, h) boxes."""
59
+ h, w = image.shape[:2]
60
+ scale = 1.0
61
+ ocr_image = image
62
+ if max(h, w) > OCR_MAX_DIM:
63
+ scale = OCR_MAX_DIM / float(max(h, w))
64
+ new_w = max(1, int(round(w * scale)))
65
+ new_h = max(1, int(round(h * scale)))
66
+ ocr_image = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_AREA)
67
+
68
  tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
69
  try:
70
+ Image.fromarray(ocr_image).save(tmp.name)
71
  result = subprocess.run(
72
  ["tesseract", tmp.name, "stdout", "--psm", "3", "tsv"],
73
  capture_output=True,
74
  text=True,
75
+ timeout=60,
76
  )
77
  except FileNotFoundError:
78
  print("[screenshot] tesseract binary not found")
 
117
  continue
118
  if conf < min_conf:
119
  continue
120
+ if scale != 1.0:
121
+ inv = 1.0 / scale
122
+ boxes.append((
123
+ int(round(int(cols[idx_left]) * inv)),
124
+ int(round(int(cols[idx_top]) * inv)),
125
+ int(round(int(cols[idx_width]) * inv)),
126
+ int(round(int(cols[idx_height]) * inv)),
127
+ ))
128
+ else:
129
+ boxes.append((
130
+ int(cols[idx_left]),
131
+ int(cols[idx_top]),
132
+ int(cols[idx_width]),
133
+ int(cols[idx_height]),
134
+ ))
135
  return boxes
136
 
137