Fix: Patch TextDetector to have a better detection on single digit

This commit is contained in:
LmeSzinc 2024-05-19 20:41:19 +08:00
parent 9795103f4d
commit 17c4a13d4f
2 changed files with 49 additions and 10 deletions

View File

@ -1,15 +1,13 @@
import re
import time import time
from datetime import timedelta from datetime import timedelta
import cv2
import numpy as np import numpy as np
from pponnxcr.predict_system import BoxedResult from pponnxcr.predict_system import BoxedResult
import module.config.server as server import module.config.server as server
from module.base.button import ButtonWrapper from module.base.button import ButtonWrapper
from module.base.decorator import cached_property from module.base.decorator import cached_property
from module.base.utils import area_pad, corner2area, crop, extract_white_letters, float2str from module.base.utils import *
from module.exception import ScriptError from module.exception import ScriptError
from module.logger import logger from module.logger import logger
from module.ocr.models import OCR_MODEL, TextSystem from module.ocr.models import OCR_MODEL, TextSystem
@ -423,6 +421,12 @@ class Duration(Ocr):
class OcrWhiteLetterOnComplexBackground(Ocr): class OcrWhiteLetterOnComplexBackground(Ocr):
white_preprocess = True white_preprocess = True
# 0.6 by default, 0.2 for lower
box_thresh = 0.2
# (x, y) Enlarge detected boxes to `min_boxes`
# So standalone digits can be better detected
# Note that min_box should be 4px larger than the actual letter
min_box = None
def pre_process(self, image): def pre_process(self, image):
if self.white_preprocess: if self.white_preprocess:
@ -430,12 +434,46 @@ class OcrWhiteLetterOnComplexBackground(Ocr):
image = cv2.merge([image, image, image]) image = cv2.merge([image, image, image])
return image return image
@staticmethod
def enlarge_box(box, min_box):
area = corner2area(box)
center = (int(x) for x in area_center(area))
size_x, size_y = area_size(area)
min_x, min_y = min_box
if size_x < min_x or size_y < min_y:
size_x = max(size_x, min_x) // 2
size_y = max(size_y, min_y) // 2
area = area_offset((-size_x, -size_y, size_x, size_y), center)
box = area2corner(area)
box = np.array([box[0], box[1], box[3], box[2]]).astype(np.float32)
return box
else:
return box
def enlarge_boxes(self, boxes):
if self.min_box is None:
return boxes
boxes = [self.enlarge_box(box, self.min_box) for box in boxes]
boxes = np.array(boxes)
return boxes
def detect_and_ocr(self, *args, **kwargs): def detect_and_ocr(self, *args, **kwargs):
# Try hard to lower TextSystem.box_thresh # Try hard to lower TextSystem.box_thresh
backup = self.model.text_detector.box_thresh backup = self.model.text_detector.box_thresh
self.model.text_detector.box_thresh = 0.2 self.model.text_detector.box_thresh = 0.2
# Patch TextDetector
text_detector = self.model.text_detector
def text_detector_with_min_box(*args, **kwargs):
dt_boxes, elapse = text_detector(*args, **kwargs)
dt_boxes = self.enlarge_boxes(dt_boxes)
return dt_boxes, elapse
self.model.text_detector = text_detector_with_min_box
try:
result = super().detect_and_ocr(*args, **kwargs) result = super().detect_and_ocr(*args, **kwargs)
finally:
self.model.text_detector.box_thresh = backup self.model.text_detector.box_thresh = backup
self.model.text_detector = text_detector
return result return result

View File

@ -1,6 +1,5 @@
import re import re
import cv2
from pponnxcr.predict_system import BoxedResult from pponnxcr.predict_system import BoxedResult
from module.base.utils import area_center, area_in_area from module.base.utils import area_center, area_in_area
@ -28,6 +27,8 @@ class OcrItemName(Ocr):
class OcrPlannerResult(OcrWhiteLetterOnComplexBackground, OcrItemName): class OcrPlannerResult(OcrWhiteLetterOnComplexBackground, OcrItemName):
min_box = (16, 20)
def __init__(self): def __init__(self):
# Planner currently CN only # Planner currently CN only
super().__init__(OCR_RESULT, lang='cn') super().__init__(OCR_RESULT, lang='cn')
@ -65,10 +66,10 @@ class OcrPlannerResult(OcrWhiteLetterOnComplexBackground, OcrItemName):
return super().detect_and_ocr(image, *args, **kwargs) return super().detect_and_ocr(image, *args, **kwargs)
def pre_process(self, image): def pre_process(self, image):
r, g, b = cv2.split(image) # gray = rgb2gray(image)
cv2.max(r, g, dst=r) # from PIL import Image
cv2.max(r, b, dst=r) # Image.fromarray(gray).show()
image = cv2.merge([r, r, r]) # image = cv2.merge([gray, gray, gray])
return image return image