From 1f47d63af85ceb141ac3996783cbad5924653846 Mon Sep 17 00:00:00 2001 From: LmeSzinc <37934724+LmeSzinc@users.noreply.github.com> Date: Sun, 17 Sep 2023 08:09:00 +0800 Subject: [PATCH] Fix: Assignment duration and name OCR --- module/ocr/ocr.py | 21 ++++++++++++++++----- tasks/assignment/ui.py | 13 +++++-------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/module/ocr/ocr.py b/module/ocr/ocr.py index 09810e3ff..c46f80886 100644 --- a/module/ocr/ocr.py +++ b/module/ocr/ocr.py @@ -145,6 +145,12 @@ class Ocr: text=str([result for result, _ in result_list])) return result_list + def filter_detected(self, result: BoxedResult) -> bool: + """ + Return False to drop result. + """ + return True + def detect_and_ocr(self, image, direct_ocr=False) -> list[BoxedResult]: """ Args: @@ -160,13 +166,14 @@ class Ocr: image = crop(image, self.button.area) image = self.pre_process(image) # ocr - # image = enlarge_canvas(image) results: list[BoxedResult] = self.model.detect_and_ocr(image) # after proces for result in results: if not direct_ocr: result.box += self.button.area[:2] result.box = tuple(corner2area(result.box)) + + results = [result for result in results if self.filter_detected(result)] results = merge_buttons(results, thres_x=self.merge_thres_x, thres_y=self.merge_thres_y) for result in results: result.ocr_text = self.after_process(result.ocr_text) @@ -366,14 +373,18 @@ class Duration(Ocr): @classmethod def timedelta_regex(cls, lang): regex_str = { - 'cn': r'((?P\d{1,2})天)?' + 'cn': r'^(?P.*?)' + r'((?P\d{1,2})天)?' r'((?P\d{1,2})小时)?' r'((?P\d{1,2})分钟)?' - r'((?P\d{1,2})秒)', - 'en': r'((?P\d{1,2})\s*d\s*)?' + r'((?P\d{1,2})秒)?' + r'$', + 'en': r'^(?P.*?)' + r'((?P\d{1,2})\s*d\s*)?' r'((?P\d{1,2})\s*h\s*)?' r'((?P\d{1,2})\s*m\s*)?' - r'((?P\d{1,2})\s*s)' + r'((?P\d{1,2})\s*s)?' + r'$' }[lang] return re.compile(regex_str) diff --git a/tasks/assignment/ui.py b/tasks/assignment/ui.py index b7270218e..fc4a62b7b 100644 --- a/tasks/assignment/ui.py +++ b/tasks/assignment/ui.py @@ -60,16 +60,13 @@ class AssignmentOcr(Ocr): return None return re.compile('|'.join('(?P<%s>%s)' % pair for pair in rules)) + def filter_detected(self, result) -> bool: + # Drop duration rows + res = Duration.timedelta_regex(self.lang).search(result.ocr_text) + return not bool(res.group('seconds')) + def after_process(self, result: str): result = super().after_process(result) - # Drop duration - result = Duration.timedelta_regex(self.lang).sub('', result) - result = result.strip() - - if self.lang == 'cn': - # Hourglass icon may be detected as "豆" - result = result.replace('豆', '') - result = re.sub(r'\d$', '', result) if self.ocr_regex is None: return result