Spaces:
Running
Running
Speed up screenshot OCR by downscaling before tesseract
Browse files- app/screenshot.py +29 -8
app/screenshot.py
CHANGED
|
@@ -51,16 +51,28 @@ SECOND_PASS_MIN_SHRINK = 0.02
|
|
| 51 |
# OCR via tesseract subprocess
|
| 52 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 53 |
|
|
|
|
|
|
|
|
|
|
| 54 |
def run_tesseract(image: np.ndarray, min_conf: int = 30) -> list[tuple]:
|
| 55 |
"""Call `tesseract` CLI, parse TSV output, return (x, y, w, h) boxes."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
|
| 57 |
try:
|
| 58 |
-
Image.fromarray(
|
| 59 |
result = subprocess.run(
|
| 60 |
["tesseract", tmp.name, "stdout", "--psm", "3", "tsv"],
|
| 61 |
capture_output=True,
|
| 62 |
text=True,
|
| 63 |
-
timeout=
|
| 64 |
)
|
| 65 |
except FileNotFoundError:
|
| 66 |
print("[screenshot] tesseract binary not found")
|
|
@@ -105,12 +117,21 @@ def run_tesseract(image: np.ndarray, min_conf: int = 30) -> list[tuple]:
|
|
| 105 |
continue
|
| 106 |
if conf < min_conf:
|
| 107 |
continue
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
return boxes
|
| 115 |
|
| 116 |
|
|
|
|
| 51 |
# OCR via tesseract subprocess
|
| 52 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 53 |
|
| 54 |
+
OCR_MAX_DIM = 1500 # downscale before tesseract for speed; boxes scaled back
|
| 55 |
+
|
| 56 |
+
|
| 57 |
def run_tesseract(image: np.ndarray, min_conf: int = 30) -> list[tuple]:
|
| 58 |
"""Call `tesseract` CLI, parse TSV output, return (x, y, w, h) boxes."""
|
| 59 |
+
h, w = image.shape[:2]
|
| 60 |
+
scale = 1.0
|
| 61 |
+
ocr_image = image
|
| 62 |
+
if max(h, w) > OCR_MAX_DIM:
|
| 63 |
+
scale = OCR_MAX_DIM / float(max(h, w))
|
| 64 |
+
new_w = max(1, int(round(w * scale)))
|
| 65 |
+
new_h = max(1, int(round(h * scale)))
|
| 66 |
+
ocr_image = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_AREA)
|
| 67 |
+
|
| 68 |
tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
|
| 69 |
try:
|
| 70 |
+
Image.fromarray(ocr_image).save(tmp.name)
|
| 71 |
result = subprocess.run(
|
| 72 |
["tesseract", tmp.name, "stdout", "--psm", "3", "tsv"],
|
| 73 |
capture_output=True,
|
| 74 |
text=True,
|
| 75 |
+
timeout=60,
|
| 76 |
)
|
| 77 |
except FileNotFoundError:
|
| 78 |
print("[screenshot] tesseract binary not found")
|
|
|
|
| 117 |
continue
|
| 118 |
if conf < min_conf:
|
| 119 |
continue
|
| 120 |
+
if scale != 1.0:
|
| 121 |
+
inv = 1.0 / scale
|
| 122 |
+
boxes.append((
|
| 123 |
+
int(round(int(cols[idx_left]) * inv)),
|
| 124 |
+
int(round(int(cols[idx_top]) * inv)),
|
| 125 |
+
int(round(int(cols[idx_width]) * inv)),
|
| 126 |
+
int(round(int(cols[idx_height]) * inv)),
|
| 127 |
+
))
|
| 128 |
+
else:
|
| 129 |
+
boxes.append((
|
| 130 |
+
int(cols[idx_left]),
|
| 131 |
+
int(cols[idx_top]),
|
| 132 |
+
int(cols[idx_width]),
|
| 133 |
+
int(cols[idx_height]),
|
| 134 |
+
))
|
| 135 |
return boxes
|
| 136 |
|
| 137 |
|