Fix: Assignment duration and name OCR

2024-11-22 08:37:42 +00:00 · 2023-09-17 08:09:00 +08:00 · 2023-09-17 08:09:00 +08:00 · 1f47d63af8
commit 1f47d63af8
parent 59b3c59f99
2 changed files with 21 additions and 13 deletions
--- a/module/ocr/ocr.py
+++ b/module/ocr/ocr.py
@ -145,6 +145,12 @@ class Ocr:
                    text=str([result for result, _ in result_list]))
        return result_list

+    def filter_detected(self, result: BoxedResult) -> bool:
+        """
+        Return False to drop result.
+        """
+        return True
+
    def detect_and_ocr(self, image, direct_ocr=False) -> list[BoxedResult]:
        """
        Args:
@ -160,13 +166,14 @@ class Ocr:
            image = crop(image, self.button.area)
        image = self.pre_process(image)
        # ocr
-        # image = enlarge_canvas(image)
        results: list[BoxedResult] = self.model.detect_and_ocr(image)
        # after proces
        for result in results:
            if not direct_ocr:
                result.box += self.button.area[:2]
            result.box = tuple(corner2area(result.box))
+
+        results = [result for result in results if self.filter_detected(result)]
        results = merge_buttons(results, thres_x=self.merge_thres_x, thres_y=self.merge_thres_y)
        for result in results:
            result.ocr_text = self.after_process(result.ocr_text)
@ -366,14 +373,18 @@ class Duration(Ocr):
    @classmethod
    def timedelta_regex(cls, lang):
        regex_str = {
-            'cn': r'((?P<days>\d{1,2})天)?'
+            'cn': r'^(?P<prefix>.*?)'
+                  r'((?P<days>\d{1,2})天)?'
                  r'((?P<hours>\d{1,2})小时)?'
                  r'((?P<minutes>\d{1,2})分钟)?'
-                  r'((?P<seconds>\d{1,2})秒)',
-            'en': r'((?P<days>\d{1,2})\s*d\s*)?'
+                  r'((?P<seconds>\d{1,2})秒)?'
+                  r'$',
+            'en': r'^(?P<prefix>.*?)'
+                  r'((?P<days>\d{1,2})\s*d\s*)?'
                  r'((?P<hours>\d{1,2})\s*h\s*)?'
                  r'((?P<minutes>\d{1,2})\s*m\s*)?'
-                  r'((?P<seconds>\d{1,2})\s*s)'
+                  r'((?P<seconds>\d{1,2})\s*s)?'
+                  r'$'
        }[lang]
        return re.compile(regex_str)

--- a/tasks/assignment/ui.py
+++ b/tasks/assignment/ui.py
@ -60,16 +60,13 @@ class AssignmentOcr(Ocr):
            return None
        return re.compile('|'.join('(?P<%s>%s)' % pair for pair in rules))

+    def filter_detected(self, result) -> bool:
+        # Drop duration rows
+        res = Duration.timedelta_regex(self.lang).search(result.ocr_text)
+        return not bool(res.group('seconds'))
+
    def after_process(self, result: str):
        result = super().after_process(result)
-        # Drop duration
-        result = Duration.timedelta_regex(self.lang).sub('', result)
-        result = result.strip()
-
-        if self.lang == 'cn':
-            # Hourglass icon may be detected as "豆"
-            result = result.replace('豆', '')
-            result = re.sub(r'\d$', '', result)

        if self.ocr_regex is None:
            return result