mirror of
https://github.com/Xtao-Labs/iShotaBot.git
synced 2024-11-24 09:15:51 +00:00
102 lines
3.1 KiB
Python
102 lines
3.1 KiB
Python
from typing import List
|
||
|
||
|
||
class cache:
|
||
def __init__(self, start, end):
|
||
self.start = start
|
||
self.end = end
|
||
|
||
def __eq__(self, other):
|
||
if isinstance(other, self.__class__):
|
||
return self.start == other.start and self.end == other.end
|
||
else:
|
||
return False
|
||
|
||
|
||
def merge(intervals: List[cache]):
|
||
"""合并重复区间
|
||
:param intervals: 待去重区间
|
||
:returns: 去重的区间(根据开始位置逆序)
|
||
"""
|
||
if len(intervals) == 0:
|
||
return []
|
||
intervals = sorted(intervals, key=lambda s: s.start)
|
||
outputs = [intervals[0]]
|
||
for s in intervals:
|
||
last_interval = outputs[-1]
|
||
if last_interval.end < s.start:
|
||
outputs.append(s)
|
||
else:
|
||
last_interval.end = max(last_interval.end, s.end)
|
||
return sorted(outputs, key=lambda s: s.start, reverse=True)
|
||
|
||
|
||
def compare(origin: str, dest: str, sensitive: int):
|
||
"""标记重复区间
|
||
:param origin: 待查重文本
|
||
:param dest: 返回的文本
|
||
:param sensitive: 敏感长度
|
||
:returns: 重复区间数组(根据开始位置逆序)
|
||
"""
|
||
length = max(len(origin), len(dest)) ** 2
|
||
matrix = [0 for i in range(length)]
|
||
cache_array: List[cache] = []
|
||
|
||
def convert(index_y: int, index_x: int):
|
||
return index_y * len(origin) + index_x
|
||
|
||
def remove(arr: List[cache], obj: cache):
|
||
return list(filter(lambda s: s != obj, arr))
|
||
|
||
def new_cache(end: int, offset: int):
|
||
start = end - offset
|
||
start = 0 if start < 0 else start + 1
|
||
return cache(start, offset + start)
|
||
|
||
for index, s in enumerate(origin):
|
||
if dest[0] == s:
|
||
matrix[index] = 1
|
||
|
||
for index_x, x in enumerate(dest):
|
||
for index_y, y in enumerate(origin):
|
||
index = convert(index_y, index_x)
|
||
pre_index = convert(index_y - 1, index_x - 1)
|
||
if x == y:
|
||
if index_y == 0:
|
||
matrix[index] = 1
|
||
continue
|
||
matrix[index] = matrix[pre_index] + 1
|
||
if matrix[index] >= sensitive:
|
||
cache_array.append(new_cache(index_y, matrix[index]))
|
||
if matrix[index] > sensitive:
|
||
cache_array = remove(
|
||
cache_array, new_cache(index_y - 1, matrix[pre_index])
|
||
)
|
||
return merge(cache_array)
|
||
|
||
|
||
def render(s: str, flag: List[cache], tag: str):
|
||
"""给重复区间加tag
|
||
:param s: raw text
|
||
:param flag: repeat area Array
|
||
:param tag: used tag, default em
|
||
:returns: tagged text
|
||
"""
|
||
arr = list(s)
|
||
for i in flag:
|
||
arr.insert(i.end, f"</{tag}>")
|
||
arr.insert(i.start, f"<{tag}>")
|
||
return "".join(arr)
|
||
|
||
|
||
def diff_text(origin: str, dest: str, sensitive=4, tag="strong"):
|
||
"""对文本重复对比,给重复部分加tag
|
||
:param origin: 待查重文本
|
||
:param dest: 服务器返回的文本
|
||
:param sensitive: 敏感长度
|
||
:param tag: HTML tag, example a, em
|
||
:returns: 做好标记的文本
|
||
"""
|
||
flag = compare(dest, origin, sensitive)
|
||
return render(dest, flag, tag)
|