StarRailCopilot/module/ocr/ocr.py

import re
import time
from datetime import timedelta

import cv2
import numpy as np
from pponnxcr.predict_system import BoxedResult

import module.config.server as server
from module.base.button import ButtonWrapper
from module.base.decorator import cached_property
from module.base.utils import area_pad, corner2area, crop, extract_white_letters, float2str
from module.exception import ScriptError
from module.logger import logger
from module.ocr.models import OCR_MODEL, TextSystem
from module.ocr.utils import merge_buttons


class OcrResultButton:
    def __init__(self, boxed_result: BoxedResult, matched_keyword):
        """
        Args:
            boxed_result: BoxedResult from ppocr-onnx
            matched_keyword: Keyword object or None
        """
        self.area = boxed_result.box
        self.search = area_pad(self.area, pad=-20)
        # self.color =
        self.button = boxed_result.box

        if matched_keyword is not None:
            self.matched_keyword = matched_keyword
            self.name = str(matched_keyword)
        else:
            self.matched_keyword = None
            self.name = boxed_result.ocr_text

        self.text = boxed_result.ocr_text
        self.score = boxed_result.score

    def __str__(self):
        return self.name

    __repr__ = __str__

    def __eq__(self, other):
        return str(self) == str(other)

    def __hash__(self):
        return hash(self.name)

    def __bool__(self):
        return True

    @property
    def is_keyword_matched(self) -> bool:
        return self.matched_keyword is not None


class Ocr:
    # Merge results with box distance <= thres
    merge_thres_x = 0
    merge_thres_y = 0

    def __init__(self, button: ButtonWrapper, lang=None, name=None):
        """
        Args:
            button:
            lang: If None, use in-game language
            name: If None, use button.name
        """
        if lang is None:
            lang = server.lang
        if name is None:
            name = button.name

        self.button: ButtonWrapper = button
        self.lang: str = lang
        self.name: str = name

    @cached_property
    def model(self) -> TextSystem:
        return OCR_MODEL.get_by_lang(self.lang)

    def pre_process(self, image):
        """
        Args:
            image (np.ndarray): Shape (height, width, channel)

        Returns:
            np.ndarray: Shape (width, height)
        """
        return image

    def after_process(self, result):
        """
        Args:
            result (str): '第二行'

        Returns:
            str:
        """
        if result.startswith('UID'):
            result = 'UID'
        return result

    def format_result(self, result):
        """
        Will be overriden.
        """
        return result

    def _log_change(self, attr, func, before):
        after = func(before)
        if after != before:
            logger.attr(f'{self.name} {attr}', f'{before} -> {after}')
        return after

    def ocr_single_line(self, image, direct_ocr=False):
        # pre process
        start_time = time.time()
        if not direct_ocr:
            image = crop(image, self.button.area)
        image = self.pre_process(image)
        # ocr
        result, _ = self.model.ocr_single_line(image)
        # after proces
        result = self._log_change('after', self.after_process, result)
        result = self._log_change('format', self.format_result, result)
        logger.attr(name='%s %ss' % (self.name, float2str(time.time() - start_time)),
                    text=str(result))
        return result

    def ocr_multi_lines(self, image_list):
        # pre process
        start_time = time.time()
        image_list = [self.pre_process(image) for image in image_list]
        # ocr
        result_list = self.model.ocr_lines(image_list)
        result_list = [(result, score) for result, score in result_list]
        # after process
        result_list = [(self.after_process(result), score) for result, score in result_list]
        result_list = [(self.format_result(result), score) for result, score in result_list]
        logger.attr(name="%s %ss" % (self.name, float2str(time.time() - start_time)),
                    text=str([result for result, _ in result_list]))
        return result_list

    def filter_detected(self, result: BoxedResult) -> bool:
        """
        Return False to drop result.
        """
        return True

    def detect_and_ocr(self, image, direct_ocr=False) -> list[BoxedResult]:
        """
        Args:
            image:
            direct_ocr: True to ignore `button` attribute and feed the image to OCR model without cropping.

        Returns:

        """
        # pre process
        start_time = time.time()
        if not direct_ocr:
            image = crop(image, self.button.area)
        image = self.pre_process(image)
        # ocr
        results: list[BoxedResult] = self.model.detect_and_ocr(image)
        # after proces
        for result in results:
            if not direct_ocr:
                result.box += self.button.area[:2]
            result.box = tuple(corner2area(result.box))

        results = [result for result in results if self.filter_detected(result)]
        results = merge_buttons(results, thres_x=self.merge_thres_x, thres_y=self.merge_thres_y)
        for result in results:
            result.ocr_text = self.after_process(result.ocr_text)

        logger.attr(name='%s %ss' % (self.name, float2str(time.time() - start_time)),
                    text=str([result.ocr_text for result in results]))
        return results

    def _match_result(
            self,
            result: str,
            keyword_classes,
            lang: str = None,
            ignore_punctuation=True,
            ignore_digit=True):
        """
        Args:
            result (str):
            keyword_classes: A list of `Keyword` class or classes inherited `Keyword`

        Returns:
            If matched, return `Keyword` object or objects inherited `Keyword`
            If not match, return None
        """
        if not isinstance(keyword_classes, list):
            keyword_classes = [keyword_classes]

        # Digits will be considered as the index of keyword
        if ignore_digit:
            if result.isdigit():
                return None

        # Try in current lang
        for keyword_class in keyword_classes:
            try:
                matched = keyword_class.find(
                    result,
                    lang=lang,
                    ignore_punctuation=ignore_punctuation
                )
                return matched
            except ScriptError:
                continue

        return None

    def matched_single_line(
            self,
            image,
            keyword_classes,
            lang: str = None,
            ignore_punctuation=True
    ) -> OcrResultButton:
        """
        Args:
            image: Image to detect
            keyword_classes: `Keyword` class or classes inherited `Keyword`, or a list of them.
            lang:
            ignore_punctuation:

        Returns:
            OcrResultButton: Or None if it didn't matched known keywords.
        """
        result = self.ocr_single_line(image)

        result = self._match_result(
            result,
            keyword_classes=keyword_classes,
            lang=lang,
            ignore_punctuation=ignore_punctuation,
        )

        logger.attr(name=f'{self.name} matched',
                    text=result)
        return result

    def matched_multi_lines(
            self,
            image_list,
            keyword_classes,
            lang: str = None,
            ignore_punctuation=True
    ) -> list[OcrResultButton]:
        """
        Args:
            image_list:
            keyword_classes: `Keyword` class or classes inherited `Keyword`, or a list of them.
            lang:
            ignore_punctuation:

        Returns:
            List of matched OcrResultButton.
            OCR result which didn't matched known keywords will be dropped.
        """
        results = self.ocr_multi_lines(image_list)

        results = [self._match_result(
            result,
            keyword_classes=keyword_classes,
            lang=lang,
            ignore_punctuation=ignore_punctuation,
        ) for result in results]
        results = [result for result in results if result.is_keyword_matched]

        logger.attr(name=f'{self.name} matched',
                    text=results)
        return results

    def _product_button(
            self,
            boxed_result: BoxedResult,
            keyword_classes,
            lang: str = None,
            ignore_punctuation=True,
            ignore_digit=True
    ) -> OcrResultButton:
        if not isinstance(keyword_classes, list):
            keyword_classes = [keyword_classes]

        matched_keyword = self._match_result(
            boxed_result.ocr_text,
            keyword_classes=keyword_classes,
            lang=lang,
            ignore_punctuation=ignore_punctuation,
            ignore_digit=ignore_digit,
        )
        button = OcrResultButton(boxed_result, matched_keyword)
        return button

    def matched_ocr(self, image, keyword_classes, direct_ocr=False) -> list[OcrResultButton]:
        """
        Args:
            image: Screenshot
            keyword_classes: `Keyword` class or classes inherited `Keyword`, or a list of them.
            direct_ocr: True to ignore `button` attribute and feed the image to OCR model without cropping.

        Returns:
            List of matched OcrResultButton.
            OCR result which didn't matched known keywords will be dropped.
        """
        results = self.detect_and_ocr(image, direct_ocr=direct_ocr)

        results = [self._product_button(result, keyword_classes) for result in results]
        results = [result for result in results if result.is_keyword_matched]

        logger.attr(name=f'{self.name} matched',
                    text=results)
        return results


class Digit(Ocr):
    def __init__(self, button: ButtonWrapper, lang='en', name=None):
        super().__init__(button, lang=lang, name=name)

    def format_result(self, result) -> int:
        """
        Returns:
            int:
        """
        result = super().after_process(result)
        logger.attr(name=self.name, text=str(result))

        res = re.search(r'(\d+)', result)
        if res:
            return int(res.group(1))
        else:
            logger.warning(f'No digit found in {result}')
            return 0


class DigitCounter(Ocr):
    def __init__(self, button: ButtonWrapper, lang='en', name=None):
        super().__init__(button, lang=lang, name=name)

    @classmethod
    def is_format_matched(cls, result) -> bool:
        return '/' in result

    def format_result(self, result) -> tuple[int, int, int]:
        """
        Do OCR on a counter, such as `14/15`, and returns 14, 1, 15

        Returns:
            int:
        """
        result = super().after_process(result)
        logger.attr(name=self.name, text=str(result))

        res = re.search(r'(\d+)\s*/\s*(\d+)', result)
        if res:
            groups = [int(s) for s in res.groups()]
            current, total = int(groups[0]), int(groups[1])
            # current = min(current, total)
            return current, total - current, total
        else:
            logger.warning(f'No digit counter found in {result}')
            return 0, 0, 0


class Duration(Ocr):
    @classmethod
    def timedelta_regex(cls, lang):
        regex_str = {
            'cn': r'^(?P<prefix>.*?)'
                  r'((?P<days>\d{1,2})\s*天\s*)?'
                  r'((?P<hours>\d{1,2})\s*小时\s*)?'
                  r'((?P<minutes>\d{1,2})\s*分钟\s*)?'
                  r'((?P<seconds>\d{1,2})\s*秒)?'
                  r'(?P<suffix>[^天时钟秒]*?)$',
            'en': r'^(?P<prefix>.*?)'
                  r'((?P<days>\d{1,2})\s*d\s*)?'
                  r'((?P<hours>\d{1,2})\s*h\s*)?'
                  r'((?P<minutes>\d{1,2})\s*m\s*)?'
                  r'((?P<seconds>\d{1,2})\s*s)?'
                  r'(?P<suffix>[^dhms]*?)$'
        }[lang]
        return re.compile(regex_str)

    def after_process(self, result):
        result = super().after_process(result)
        result = result.strip('.,。，')
        result = result.replace('Oh', '0h').replace('oh', '0h')
        return result

    def format_result(self, result: str) -> timedelta:
        """
        Do OCR on a duration, such as `18d 2h 13m 30s`, `2h`, `13m 30s`, `9s`

        Returns:
            timedelta:
        """
        matched = self.timedelta_regex(self.lang).search(result)
        if not matched:
            return timedelta()
        days = self._sanitize_number(matched.group('days'))
        hours = self._sanitize_number(matched.group('hours'))
        minutes = self._sanitize_number(matched.group('minutes'))
        seconds = self._sanitize_number(matched.group('seconds'))
        return timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds)

    @staticmethod
    def _sanitize_number(number) -> int:
        if number is None:
            return 0
        return int(number)


class OcrWhiteLetterOnComplexBackground(Ocr):
    def pre_process(self, image):
        image = extract_white_letters(image, threshold=255)
        image = cv2.merge([image, image, image])
        return image

    def detect_and_ocr(self, *args, **kwargs):
        # Try hard to lower TextSystem.box_thresh
        backup = self.model.text_detector.box_thresh
        self.model.text_detector.box_thresh = 0.2

        result = super().detect_and_ocr(*args, **kwargs)

        self.model.text_detector.box_thresh = backup
        return result
Add: Task assignment 2023-06-19 00:39:41 +00:00			`import re`
Add: OCR model and downgrade to py3.10 since onnxruntime doesn't support 3.11 yet 2023-05-21 01:26:58 +00:00			`import time`
Add: Task assignment 2023-06-19 00:39:41 +00:00			`from datetime import timedelta`
Add: OCR model and downgrade to py3.10 since onnxruntime doesn't support 3.11 yet 2023-05-21 01:26:58 +00:00
Add: Detect and switch characters 2023-10-22 17:49:48 +00:00			`import cv2`
Refactor: Get in-game language from plane name 2023-09-15 05:29:22 +00:00			`import numpy as np`
Fix: import BoxedResult 2023-09-10 11:23:27 +00:00			`from pponnxcr.predict_system import BoxedResult`
Add: OCR model and downgrade to py3.10 since onnxruntime doesn't support 3.11 yet 2023-05-21 01:26:58 +00:00
			`import module.config.server as server`
			`from module.base.button import ButtonWrapper`
			`from module.base.decorator import cached_property`
Add: Detect and switch characters 2023-10-22 17:49:48 +00:00			`from module.base.utils import area_pad, corner2area, crop, extract_white_letters, float2str`
Add: OCR model and downgrade to py3.10 since onnxruntime doesn't support 3.11 yet 2023-05-21 01:26:58 +00:00			`from module.exception import ScriptError`
			`from module.logger import logger`
Opt: Set rec_batch_num=1 for faster ocr 2023-09-08 15:00:56 +00:00			`from module.ocr.models import OCR_MODEL, TextSystem`
Add: Merge multi-line OCR results 2023-05-21 07:40:36 +00:00			`from module.ocr.utils import merge_buttons`
Add: OCR model and downgrade to py3.10 since onnxruntime doesn't support 3.11 yet 2023-05-21 01:26:58 +00:00

			`class OcrResultButton:`
Add: Record progress of battle pass quests 2024-01-01 16:43:34 +00:00			`def __init__(self, boxed_result: BoxedResult, matched_keyword):`
Add: Goto dungeon from page_guide 2023-06-14 16:15:14 +00:00			`"""`
			`Args:`
			`boxed_result: BoxedResult from ppocr-onnx`
Refactor: Get in-game language from plane name 2023-09-15 05:29:22 +00:00			`matched_keyword: Keyword object or None`
Add: Goto dungeon from page_guide 2023-06-14 16:15:14 +00:00			`"""`
Add: OCR model and downgrade to py3.10 since onnxruntime doesn't support 3.11 yet 2023-05-21 01:26:58 +00:00			`self.area = boxed_result.box`
			`self.search = area_pad(self.area, pad=-20)`
			`# self.color =`
			`self.button = boxed_result.box`

Refactor: Get in-game language from plane name 2023-09-15 05:29:22 +00:00			`if matched_keyword is not None:`
			`self.matched_keyword = matched_keyword`
			`self.name = str(matched_keyword)`
			`else:`
Add: OCR model and downgrade to py3.10 since onnxruntime doesn't support 3.11 yet 2023-05-21 01:26:58 +00:00			`self.matched_keyword = None`
			`self.name = boxed_result.ocr_text`

			`self.text = boxed_result.ocr_text`
			`self.score = boxed_result.score`

			`def __str__(self):`
			`return self.name`

			`__repr__ = __str__`

			`def __eq__(self, other):`
			`return str(self) == str(other)`

			`def __hash__(self):`
			`return hash(self.name)`

			`def __bool__(self):`
			`return True`

Refactor: Get in-game language from plane name 2023-09-15 05:29:22 +00:00			`@property`
			`def is_keyword_matched(self) -> bool:`
			`return self.matched_keyword is not None`

Add: OCR model and downgrade to py3.10 since onnxruntime doesn't support 3.11 yet 2023-05-21 01:26:58 +00:00
			`class Ocr:`
Add: Merge multi-line OCR results 2023-05-21 07:40:36 +00:00			`# Merge results with box distance <= thres`
			`merge_thres_x = 0`
			`merge_thres_y = 0`

Add: OCR model and downgrade to py3.10 since onnxruntime doesn't support 3.11 yet 2023-05-21 01:26:58 +00:00			`def __init__(self, button: ButtonWrapper, lang=None, name=None):`
Refactor: Migrate to pponnxcr 2023-09-08 14:23:57 +00:00			`"""`
			`Args:`
			`button:`
			`lang: If None, use in-game language`
			`name: If None, use button.name`
			`"""`
			`if lang is None:`
			`lang = server.lang`
			`if name is None:`
			`name = button.name`
Add: OCR model and downgrade to py3.10 since onnxruntime doesn't support 3.11 yet 2023-05-21 01:26:58 +00:00
Refactor: Migrate to pponnxcr 2023-09-08 14:23:57 +00:00			`self.button: ButtonWrapper = button`
			`self.lang: str = lang`
			`self.name: str = name`
Fix: Ocr.lang should be lazy loaded 2023-07-02 07:30:47 +00:00
Add: OCR model and downgrade to py3.10 since onnxruntime doesn't support 3.11 yet 2023-05-21 01:26:58 +00:00			`@cached_property`
			`def model(self) -> TextSystem:`
Refactor: Migrate to pponnxcr 2023-09-08 14:23:57 +00:00			`return OCR_MODEL.get_by_lang(self.lang)`
Add: OCR model and downgrade to py3.10 since onnxruntime doesn't support 3.11 yet 2023-05-21 01:26:58 +00:00
			`def pre_process(self, image):`
			`"""`
			`Args:`
			`image (np.ndarray): Shape (height, width, channel)`

			`Returns:`
			`np.ndarray: Shape (width, height)`
			`"""`
			`return image`

			`def after_process(self, result):`
			`"""`
			`Args:`
			`result (str): '第二行'`

			`Returns:`
			`str:`
			`"""`
Opt: Faster OCR on rectangle images 2023-06-13 19:26:48 +00:00			`if result.startswith('UID'):`
			`result = 'UID'`
Add: OCR model and downgrade to py3.10 since onnxruntime doesn't support 3.11 yet 2023-05-21 01:26:58 +00:00			`return result`

Fix: OCR_TRAILBLAZE_POWER length varies by value 2023-06-17 08:19:13 +00:00			`def format_result(self, result):`
			`"""`
			`Will be overriden.`
			`"""`
			`return result`

Fix: [EN] Assignment name in multiple rows 2023-09-15 17:37:06 +00:00			`def _log_change(self, attr, func, before):`
			`after = func(before)`
			`if after != before:`
			`logger.attr(f'{self.name} {attr}', f'{before} -> {after}')`
			`return after`

Fix: Get SimUni points at double event 2023-09-17 01:34:17 +00:00			`def ocr_single_line(self, image, direct_ocr=False):`
Add: OCR model and downgrade to py3.10 since onnxruntime doesn't support 3.11 yet 2023-05-21 01:26:58 +00:00			`# pre process`
			`start_time = time.time()`
Fix: Get SimUni points at double event 2023-09-17 01:34:17 +00:00			`if not direct_ocr:`
			`image = crop(image, self.button.area)`
Add: OCR model and downgrade to py3.10 since onnxruntime doesn't support 3.11 yet 2023-05-21 01:26:58 +00:00			`image = self.pre_process(image)`
			`# ocr`
			`result, _ = self.model.ocr_single_line(image)`
			`# after proces`
Fix: [EN] Assignment name in multiple rows 2023-09-15 17:37:06 +00:00			`result = self._log_change('after', self.after_process, result)`
			`result = self._log_change('format', self.format_result, result)`
Add: OCR model and downgrade to py3.10 since onnxruntime doesn't support 3.11 yet 2023-05-21 01:26:58 +00:00			`logger.attr(name='%s %ss' % (self.name, float2str(time.time() - start_time)),`
			`text=str(result))`
			`return result`

Add: forgotten hall stage goto 2023-06-29 16:32:33 +00:00			`def ocr_multi_lines(self, image_list):`
			`# pre process`
			`start_time = time.time()`
			`image_list = [self.pre_process(image) for image in image_list]`
			`# ocr`
			`result_list = self.model.ocr_lines(image_list)`
			`result_list = [(result, score) for result, score in result_list]`
			`# after process`
			`result_list = [(self.after_process(result), score) for result, score in result_list]`
			`result_list = [(self.format_result(result), score) for result, score in result_list]`
			`logger.attr(name="%s %ss" % (self.name, float2str(time.time() - start_time)),`
			`text=str([result for result, _ in result_list]))`
			`return result_list`

Fix: Assignment duration and name OCR 2023-09-17 00:09:00 +00:00			`def filter_detected(self, result: BoxedResult) -> bool:`
			`"""`
			`Return False to drop result.`
			`"""`
			`return True`

Add: Merge multi-line OCR results 2023-05-21 07:40:36 +00:00			`def detect_and_ocr(self, image, direct_ocr=False) -> list[BoxedResult]:`
			`"""`
			`Args:`
			`image:`
			direct_ocr: True to ignore `button` attribute and feed the image to OCR model without cropping.

			`Returns:`

			`"""`
Add: OCR model and downgrade to py3.10 since onnxruntime doesn't support 3.11 yet 2023-05-21 01:26:58 +00:00			`# pre process`
			`start_time = time.time()`
Add: Merge multi-line OCR results 2023-05-21 07:40:36 +00:00			`if not direct_ocr:`
			`image = crop(image, self.button.area)`
Add: OCR model and downgrade to py3.10 since onnxruntime doesn't support 3.11 yet 2023-05-21 01:26:58 +00:00			`image = self.pre_process(image)`
			`# ocr`
			`results: list[BoxedResult] = self.model.detect_and_ocr(image)`
			`# after proces`
			`for result in results:`
Add: Merge multi-line OCR results 2023-05-21 07:40:36 +00:00			`if not direct_ocr:`
			`result.box += self.button.area[:2]`
			`result.box = tuple(corner2area(result.box))`
Fix: Assignment duration and name OCR 2023-09-17 00:09:00 +00:00
			`results = [result for result in results if self.filter_detected(result)]`
Add: Merge multi-line OCR results 2023-05-21 07:40:36 +00:00			`results = merge_buttons(results, thres_x=self.merge_thres_x, thres_y=self.merge_thres_y)`
Add: Goto dungeon from page_guide 2023-06-14 16:15:14 +00:00			`for result in results:`
			`result.ocr_text = self.after_process(result.ocr_text)`
Add: Merge multi-line OCR results 2023-05-21 07:40:36 +00:00
Add: OCR model and downgrade to py3.10 since onnxruntime doesn't support 3.11 yet 2023-05-21 01:26:58 +00:00			`logger.attr(name='%s %ss' % (self.name, float2str(time.time() - start_time)),`
			`text=str([result.ocr_text for result in results]))`
			`return results`

Refactor: Get in-game language from plane name 2023-09-15 05:29:22 +00:00			`def _match_result(`
			`self,`
			`result: str,`
			`keyword_classes,`
			`lang: str = None,`
			`ignore_punctuation=True,`
			`ignore_digit=True):`
Add: OCR model and downgrade to py3.10 since onnxruntime doesn't support 3.11 yet 2023-05-21 01:26:58 +00:00			`"""`
			`Args:`
Refactor: Get in-game language from plane name 2023-09-15 05:29:22 +00:00			`result (str):`
			keyword_classes: A list of `Keyword` class or classes inherited `Keyword`

			`Returns:`
			If matched, return `Keyword` object or objects inherited `Keyword`
			`If not match, return None`
			`"""`
			`if not isinstance(keyword_classes, list):`
			`keyword_classes = [keyword_classes]`

			`# Digits will be considered as the index of keyword`
			`if ignore_digit:`
			`if result.isdigit():`
			`return None`

			`# Try in current lang`
			`for keyword_class in keyword_classes:`
			`try:`
			`matched = keyword_class.find(`
			`result,`
			`lang=lang,`
			`ignore_punctuation=ignore_punctuation`
			`)`
			`return matched`
			`except ScriptError:`
			`continue`

			`return None`

			`def matched_single_line(`
			`self,`
			`image,`
			`keyword_classes,`
			`lang: str = None,`
			`ignore_punctuation=True`
			`) -> OcrResultButton:`
			`"""`
			`Args:`
			`image: Image to detect`
Add: Goto dungeon from page_guide 2023-06-14 16:15:14 +00:00			keyword_classes: `Keyword` class or classes inherited `Keyword`, or a list of them.
Refactor: Get in-game language from plane name 2023-09-15 05:29:22 +00:00			`lang:`
			`ignore_punctuation:`

			`Returns:`
			`OcrResultButton: Or None if it didn't matched known keywords.`
			`"""`
			`result = self.ocr_single_line(image)`

			`result = self._match_result(`
			`result,`
			`keyword_classes=keyword_classes,`
			`lang=lang,`
			`ignore_punctuation=ignore_punctuation,`
			`)`

			`logger.attr(name=f'{self.name} matched',`
			`text=result)`
			`return result`

			`def matched_multi_lines(`
			`self,`
			`image_list,`
			`keyword_classes,`
			`lang: str = None,`
			`ignore_punctuation=True`
			`) -> list[OcrResultButton]:`
			`"""`
			`Args:`
			`image_list:`
			keyword_classes: `Keyword` class or classes inherited `Keyword`, or a list of them.
			`lang:`
			`ignore_punctuation:`
Add: OCR model and downgrade to py3.10 since onnxruntime doesn't support 3.11 yet 2023-05-21 01:26:58 +00:00
			`Returns:`
			`List of matched OcrResultButton.`
			`OCR result which didn't matched known keywords will be dropped.`
			`"""`
Refactor: Get in-game language from plane name 2023-09-15 05:29:22 +00:00			`results = self.ocr_multi_lines(image_list)`

			`results = [self._match_result(`
			`result,`
			`keyword_classes=keyword_classes,`
			`lang=lang,`
			`ignore_punctuation=ignore_punctuation,`
			`) for result in results]`
			`results = [result for result in results if result.is_keyword_matched]`

			`logger.attr(name=f'{self.name} matched',`
			`text=results)`
			`return results`

			`def _product_button(`
			`self,`
			`boxed_result: BoxedResult,`
			`keyword_classes,`
			`lang: str = None,`
			`ignore_punctuation=True,`
			`ignore_digit=True`
			`) -> OcrResultButton:`
Add: Goto dungeon from page_guide 2023-06-14 16:15:14 +00:00			`if not isinstance(keyword_classes, list):`
			`keyword_classes = [keyword_classes]`

Refactor: Get in-game language from plane name 2023-09-15 05:29:22 +00:00			`matched_keyword = self._match_result(`
			`boxed_result.ocr_text,`
			`keyword_classes=keyword_classes,`
			`lang=lang,`
			`ignore_punctuation=ignore_punctuation,`
			`ignore_digit=ignore_digit,`
			`)`
			`button = OcrResultButton(boxed_result, matched_keyword)`
			`return button`

			`def matched_ocr(self, image, keyword_classes, direct_ocr=False) -> list[OcrResultButton]:`
			`"""`
			`Args:`
			`image: Screenshot`
			keyword_classes: `Keyword` class or classes inherited `Keyword`, or a list of them.
			direct_ocr: True to ignore `button` attribute and feed the image to OCR model without cropping.
Add: Goto dungeon from page_guide 2023-06-14 16:15:14 +00:00
Refactor: Get in-game language from plane name 2023-09-15 05:29:22 +00:00			`Returns:`
			`List of matched OcrResultButton.`
			`OCR result which didn't matched known keywords will be dropped.`
			`"""`
Add: Goto dungeon from page_guide 2023-06-14 16:15:14 +00:00			`results = self.detect_and_ocr(image, direct_ocr=direct_ocr)`
Refactor: Get in-game language from plane name 2023-09-15 05:29:22 +00:00
			`results = [self._product_button(result, keyword_classes) for result in results]`
			`results = [result for result in results if result.is_keyword_matched]`

Add: OCR model and downgrade to py3.10 since onnxruntime doesn't support 3.11 yet 2023-05-21 01:26:58 +00:00			`logger.attr(name=f'{self.name} matched',`
Add: Keyword extract 2023-05-22 12:20:31 +00:00			`text=results)`
Add: OCR model and downgrade to py3.10 since onnxruntime doesn't support 3.11 yet 2023-05-21 01:26:58 +00:00			`return results`
Add: Get trailblaze power before and after combat 2023-06-16 17:04:17 +00:00

			`class Digit(Ocr):`
Refactor: Migrate to pponnxcr 2023-09-08 14:23:57 +00:00			`def __init__(self, button: ButtonWrapper, lang='en', name=None):`
Add: Get trailblaze power before and after combat 2023-06-16 17:04:17 +00:00			`super().__init__(button, lang=lang, name=name)`

Fix: OCR_TRAILBLAZE_POWER length varies by value 2023-06-17 08:19:13 +00:00			`def format_result(self, result) -> int:`
Add: Get trailblaze power before and after combat 2023-06-16 17:04:17 +00:00			`"""`
			`Returns:`
			`int:`
			`"""`
			`result = super().after_process(result)`
			`logger.attr(name=self.name, text=str(result))`

			`res = re.search(r'(\d+)', result)`
			`if res:`
			`return int(res.group(1))`
			`else:`
			`logger.warning(f'No digit found in {result}')`
			`return 0`


			`class DigitCounter(Ocr):`
Refactor: Migrate to pponnxcr 2023-09-08 14:23:57 +00:00			`def __init__(self, button: ButtonWrapper, lang='en', name=None):`
Add: Get trailblaze power before and after combat 2023-06-16 17:04:17 +00:00			`super().__init__(button, lang=lang, name=name)`

Chore: Abstract DigitCounter.is_format_matched() 2024-01-01 14:45:22 +00:00			`@classmethod`
			`def is_format_matched(cls, result) -> bool:`
			`return '/' in result`

Fix: OCR_TRAILBLAZE_POWER length varies by value 2023-06-17 08:19:13 +00:00			`def format_result(self, result) -> tuple[int, int, int]:`
Add: Get trailblaze power before and after combat 2023-06-16 17:04:17 +00:00			`"""`
			Do OCR on a counter, such as `14/15`, and returns 14, 1, 15

			`Returns:`
			`int:`
			`"""`
			`result = super().after_process(result)`
			`logger.attr(name=self.name, text=str(result))`

Chore: Abstract DigitCounter.is_format_matched() 2024-01-01 14:45:22 +00:00			`res = re.search(r'(\d+)\s/\s(\d+)', result)`
Add: Get trailblaze power before and after combat 2023-06-16 17:04:17 +00:00			`if res:`
			`groups = [int(s) for s in res.groups()]`
			`current, total = int(groups[0]), int(groups[1])`
Add: Combat module 2023-06-16 19:15:26 +00:00			`# current = min(current, total)`
Add: Get trailblaze power before and after combat 2023-06-16 17:04:17 +00:00			`return current, total - current, total`
			`else:`
			`logger.warning(f'No digit counter found in {result}')`
			`return 0, 0, 0`
Add: Task assignment 2023-06-19 00:39:41 +00:00

			`class Duration(Ocr):`
Fix: [EN] Assignment name in multiple rows 2023-09-15 17:37:06 +00:00			`@classmethod`
			`def timedelta_regex(cls, lang):`
Fix: Help text & details of assignment - https://github.com/LmeSzinc/StarRailCopilot/pull/23#discussion_r1235492724 - https://github.com/LmeSzinc/StarRailCopilot/pull/23#discussion_r1235495266 - https://github.com/LmeSzinc/StarRailCopilot/pull/23#discussion_r1235497216 - https://github.com/LmeSzinc/StarRailCopilot/pull/23#discussion_r1235500507 2023-06-21 14:08:57 +00:00			`regex_str = {`
Fix: Assignment duration and name OCR 2023-09-17 00:09:00 +00:00			`'cn': r'^(?P<prefix>.*?)'`
Add: Support for event assignments 2023-09-26 07:06:43 +00:00			`r'((?P<days>\d{1,2})\s天\s)?'`
			`r'((?P<hours>\d{1,2})\s小时\s)?'`
			`r'((?P<minutes>\d{1,2})\s分钟\s)?'`
			`r'((?P<seconds>\d{1,2})\s*秒)?'`
			`r'(?P<suffix>[^天时钟秒]*?)$',`
Fix: Assignment duration and name OCR 2023-09-17 00:09:00 +00:00			`'en': r'^(?P<prefix>.*?)'`
			`r'((?P<days>\d{1,2})\sd\s)?'`
Fix: [EN] Assignment name in multiple rows 2023-09-15 17:37:06 +00:00			`r'((?P<hours>\d{1,2})\sh\s)?'`
			`r'((?P<minutes>\d{1,2})\sm\s)?'`
Fix: Assignment duration and name OCR 2023-09-17 00:09:00 +00:00			`r'((?P<seconds>\d{1,2})\s*s)?'`
Add: Support for event assignments 2023-09-26 07:06:43 +00:00			`r'(?P<suffix>[^dhms]*?)$'`
Fix: [EN] Assignment name in multiple rows 2023-09-15 17:37:06 +00:00			`}[lang]`
Fix: Help text & details of assignment - https://github.com/LmeSzinc/StarRailCopilot/pull/23#discussion_r1235492724 - https://github.com/LmeSzinc/StarRailCopilot/pull/23#discussion_r1235495266 - https://github.com/LmeSzinc/StarRailCopilot/pull/23#discussion_r1235497216 - https://github.com/LmeSzinc/StarRailCopilot/pull/23#discussion_r1235500507 2023-06-21 14:08:57 +00:00			`return re.compile(regex_str)`
Add: Task assignment 2023-06-19 00:39:41 +00:00
Fix: Handle OCR error of "20h." 2023-09-20 04:35:54 +00:00			`def after_process(self, result):`
			`result = super().after_process(result)`
			`result = result.strip('.,。，')`
Fix: handle duration OCR error "Oh" 2023-09-26 19:14:51 +00:00			`result = result.replace('Oh', '0h').replace('oh', '0h')`
Fix: Handle OCR error of "20h." 2023-09-20 04:35:54 +00:00			`return result`

Add: Task assignment 2023-06-19 00:39:41 +00:00			`def format_result(self, result: str) -> timedelta:`
			`"""`
Opt: delay battle pass to next version if possible 2023-07-08 04:09:23 +00:00			Do OCR on a duration, such as `18d 2h 13m 30s`, `2h`, `13m 30s`, `9s`
Add: Task assignment 2023-06-19 00:39:41 +00:00
			`Returns:`
			`timedelta:`
			`"""`
Fix: [EN] Assignment name in multiple rows 2023-09-15 17:37:06 +00:00			`matched = self.timedelta_regex(self.lang).search(result)`
			`if not matched:`
Add: Task assignment 2023-06-19 00:39:41 +00:00			`return timedelta()`
Opt: delay battle pass to next version if possible 2023-07-08 04:09:23 +00:00			`days = self._sanitize_number(matched.group('days'))`
Add: Task assignment 2023-06-19 00:39:41 +00:00			`hours = self._sanitize_number(matched.group('hours'))`
			`minutes = self._sanitize_number(matched.group('minutes'))`
			`seconds = self._sanitize_number(matched.group('seconds'))`
Opt: delay battle pass to next version if possible 2023-07-08 04:09:23 +00:00			`return timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds)`
Add: Task assignment 2023-06-19 00:39:41 +00:00
Fix: Ocr.lang should be lazy loaded 2023-07-02 07:30:47 +00:00			`@staticmethod`
			`def _sanitize_number(number) -> int:`
Add: Task assignment 2023-06-19 00:39:41 +00:00			`if number is None:`
			`return 0`
			`return int(number)`
Add: Detect and switch characters 2023-10-22 17:49:48 +00:00

			`class OcrWhiteLetterOnComplexBackground(Ocr):`
			`def pre_process(self, image):`
			`image = extract_white_letters(image, threshold=255)`
			`image = cv2.merge([image, image, image])`
			`return image`

			`def detect_and_ocr(self, args, *kwargs):`
			`# Try hard to lower TextSystem.box_thresh`
			`backup = self.model.text_detector.box_thresh`
			`self.model.text_detector.box_thresh = 0.2`

			`result = super().detect_and_ocr(args, *kwargs)`

			`self.model.text_detector.box_thresh = backup`
			`return result`