diff --git a/defs/asoulcnki.py b/defs/asoulcnki.py new file mode 100644 index 0000000..577e7ad --- /dev/null +++ b/defs/asoulcnki.py @@ -0,0 +1,114 @@ +import time +from io import BytesIO +from PIL import Image +import httpx +import jinja2 +import random +from os import sep +from init import logger +from defs.browser import html_to_pic +from defs.diff import diff_text + +env = jinja2.Environment(enable_async=True) +with open(f"resources{sep}templates{sep}article.html", "r", encoding="utf-8") as f: + article_data = f.read() +article_tpl = env.from_string(article_data) + + +async def check_text(text: str): + try: + url = 'https://asoulcnki.asia/v1/api/check' + async with httpx.AsyncClient() as client: + resp = await client.post(url=url, json={'text': text}) + result = resp.json() + + if result['code'] != 0: + return None, None + + data = result['data'] + if not data['related']: + return None, '没有找到重复的小作文捏' + + rate = data['rate'] + related = data['related'][0] + reply_url = str(related['reply_url']).strip() + reply = related['reply'] + + msg = ['枝网文本复制检测报告', + '', + '总复制比 {:.2f}%'.format(rate * 100), + f'相似小作文: 地点 - ' + f'{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(reply["ctime"]))}',] + + image = await render_reply(reply, diff=text) + if not image: + return None, "\n".join(msg) + return image, "\n".join(msg) + except Exception as e: + logger.warning(f"Error in check_text: {e}") + return None, None + + +async def random_text(keyword: str = ""): + try: + url = 'https://asoulcnki.asia/v1/api/ranking' + params = { + 'pageSize': 10, + 'pageNum': 1, + 'timeRangeMode': 0, + 'sortMode': 0 + } + if keyword: + params['keywords'] = keyword + else: + params['pageNum'] = random.randint(1, 100) + + async with httpx.AsyncClient() as client: + resp = await client.get(url=url, params=params) + result = resp.json() + + if result['code'] != 0: + return None, None + + replies = result['data']['replies'] + if not replies: + return None, '没有找到小作文捏' + + reply = random.choice(replies) + image = await render_reply(reply) + reply_url = f"https://t.bilibili.com/{reply['dynamic_id']}/#reply{reply['rpid']}" + if not image: + return None, f'转到小作文' + return image, f'转到小作文' + except Exception as e: + logger.warning(f"Error in random_text: {e}") + return None, None + + +async def render_reply(reply: dict, diff: str = ""): + try: + article = {} + article['username'] = reply['m_name'] + article['like'] = reply['like_num'] + article['all_like'] = reply['similar_like_sum'] + article['quote'] = reply['similar_count'] + article['text'] = diff_text( + diff, reply['content']) if diff else reply['content'] + article['time'] = time.strftime( + "%Y-%m-%d", time.localtime(reply['ctime'])) + + html = await article_tpl.render_async(article=article) + img_raw = await html_to_pic(html, wait=0, viewport={"width": 500, "height": 100}) + # 将bytes结果转化为字节流 + bytes_stream = BytesIO(img_raw) + # 读取到图片 + img = Image.open(bytes_stream) + imgByteArr = BytesIO() # 初始化一个空字节流 + img.save(imgByteArr, format('PNG')) # 把我们得图片以 PNG 保存到空字节流 + imgByteArr = imgByteArr.getvalue() # 无视指针,获取全部内容,类型由io流变成bytes。 + with open(f"data{sep}asoulcnki.png", 'wb') as i: + i.write(imgByteArr) + return f"data{sep}asoulcnki.png" + except Exception as e: + logger.warning(f"Error in render_reply: {e}") + return None diff --git a/defs/browser.py b/defs/browser.py new file mode 100644 index 0000000..d8566f6 --- /dev/null +++ b/defs/browser.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +@Author : yanyongyu +@Date : 2021-03-12 13:42:43 +@LastEditors : yanyongyu +@LastEditTime : 2021-11-01 14:05:41 +@Description : None +@GitHub : https://github.com/yanyongyu +""" +__author__ = "yanyongyu" + +import asyncio +import platform +from contextlib import asynccontextmanager +from os import getcwd +from typing import Optional, AsyncIterator +from playwright.async_api import Page, Browser, async_playwright, Error +from init import logger +from uvicorn.loops import asyncio as _asyncio +from uvicorn import config + + +def asyncio_setup(): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + +@property +def should_reload(self): + return False + + +if platform.system() == "Windows": + _asyncio.asyncio_setup = asyncio_setup + config.Config.should_reload = should_reload + logger.warning("检测到当前为 Windows 系统,已自动注入猴子补丁") + +_browser: Optional[Browser] = None +_playwright = None + + +async def init(**kwargs) -> Browser: + global _browser + global _playwright + _playwright = await async_playwright().start() + try: + _browser = await launch_browser(**kwargs) + except Error: + await install_browser() + _browser = await launch_browser(**kwargs) + return _browser + + +async def launch_browser(**kwargs) -> Browser: + return await _playwright.chromium.launch(**kwargs) + + +async def get_browser(**kwargs) -> Browser: + return _browser or await init(**kwargs) + + +@asynccontextmanager +async def get_new_page(**kwargs) -> AsyncIterator[Page]: + browser = await get_browser() + page = await browser.new_page(**kwargs) + try: + yield page + finally: + await page.close() + + +async def shutdown_browser(): + await _browser.close() + await _playwright.stop() + + +async def install_browser(): + logger.info("正在安装 chromium") + import sys + from playwright.__main__ import main + sys.argv = ['', 'install', 'chromium'] + try: + main() + except SystemExit: + pass + + +async def html_to_pic( + html: str, wait: int = 0, template_path: str = f"file://{getcwd()}", **kwargs +) -> bytes: + """html转图片 + Args: + html (str): html文本 + wait (int, optional): 等待时间. Defaults to 0. + template_path (str, optional): 模板路径 如 "file:///path/to/template/" + Returns: + bytes: 图片, 可直接发送 + """ + # logger.debug(f"html:\n{html}") + if "file:" not in template_path: + raise "template_path 应该为 file:///path/to/template" + async with get_new_page(**kwargs) as page: + await page.goto(template_path) + await page.set_content(html, wait_until="networkidle") + await page.wait_for_timeout(wait) + img_raw = await page.screenshot(full_page=True) + return img_raw diff --git a/defs/diff.py b/defs/diff.py new file mode 100644 index 0000000..5708bbe --- /dev/null +++ b/defs/diff.py @@ -0,0 +1,100 @@ +from typing import List + + +class cache: + def __init__(self, start, end): + self.start = start + self.end = end + + def __eq__(self, other): + if isinstance(other, self.__class__): + return self.start == other.start and self.end == other.end + else: + return False + + +def merge(intervals: List[cache]): + """合并重复区间 + :param intervals: 待去重区间 + :returns: 去重的区间(根据开始位置逆序) + """ + if len(intervals) == 0: + return [] + intervals = sorted(intervals, key=lambda s: s.start) + outputs = [intervals[0]] + for s in intervals: + last_interval = outputs[-1] + if last_interval.end < s.start: + outputs.append(s) + else: + last_interval.end = max(last_interval.end, s.end) + return sorted(outputs, key=lambda s: s.start, reverse=True) + + +def compare(origin: str, dest: str, sensitive: int): + """标记重复区间 + :param origin: 待查重文本 + :param dest: 返回的文本 + :param sensitive: 敏感长度 + :returns: 重复区间数组(根据开始位置逆序) + """ + length = max(len(origin), len(dest)) ** 2 + matrix = [0 for i in range(length)] + cache_array: List[cache] = [] + + def convert(index_y: int, index_x: int): + return index_y * len(origin) + index_x + + def remove(arr: List[cache], obj: cache): + return list(filter(lambda s: s != obj, arr)) + + def new_cache(end: int, offset: int): + start = end - offset + start = 0 if start < 0 else start + 1 + return cache(start, offset + start) + + for index, s in enumerate(origin): + if dest[0] == s: + matrix[index] = 1 + + for index_x, x in enumerate(dest): + for index_y, y in enumerate(origin): + index = convert(index_y, index_x) + pre_index = convert(index_y - 1, index_x - 1) + if x == y: + if index_y == 0: + matrix[index] = 1 + continue + matrix[index] = matrix[pre_index] + 1 + if matrix[index] >= sensitive: + cache_array.append(new_cache(index_y, matrix[index])) + if matrix[index] > sensitive: + cache_array = remove(cache_array, new_cache( + index_y - 1, matrix[pre_index])) + return merge(cache_array) + + +def render(s: str, flag: List[cache], tag: str): + """给重复区间加tag + :param s: raw text + :param flag: repeat area Array + :param tag: used tag, default em + :returns: tagged text + """ + arr = list(s) + for i in flag: + arr.insert(i.end, f'{tag}>') + arr.insert(i.start, f'<{tag}>') + return ''.join(arr) + + +def diff_text(origin: str, dest: str, sensitive=4, tag='strong'): + """对文本重复对比,给重复部分加tag + :param origin: 待查重文本 + :param dest: 服务器返回的文本 + :param sensitive: 敏感长度 + :param tag: HTML tag, example a, em + :returns: 做好标记的文本 + """ + flag = compare(dest, origin, sensitive) + return render(dest, flag, tag) diff --git a/init.py b/init.py index 5ca4eba..84b39c7 100644 --- a/init.py +++ b/init.py @@ -1,7 +1,4 @@ import logging - -from pyrogram.types import User - from defs.glover import ipv6 from pyrogram import Client from logging import getLogger, INFO, ERROR, StreamHandler, basicConfig diff --git a/modules/ask.py b/modules/ask.py index 3c3bc56..8abacaf 100644 --- a/modules/ask.py +++ b/modules/ask.py @@ -10,6 +10,8 @@ from defs.ask import how_many, what_time, how_long, hif, handle_pers, who filters.regex(r"^问")) async def ask(client: Client, message: Message): msg = message + if not message.text: + raise ContinuePropagation message = message.text.strip()[1:] handled = False if re.findall("几|多少", message): diff --git a/modules/asoulcnki.py b/modules/asoulcnki.py new file mode 100644 index 0000000..0745bde --- /dev/null +++ b/modules/asoulcnki.py @@ -0,0 +1,45 @@ +from pyrogram import Client, filters, ContinuePropagation +from pyrogram.types import Message +from defs.asoulcnki import check_text, random_text +from init import user_me + + +@Client.on_message(filters.incoming & ~filters.edited & + filters.command(["查重", f"查重@{user_me.username}"])) +async def asoulcnki_process(client: Client, message: Message): + text = message.reply_to_message.text if message.reply_to_message else " ".join(message.text.split(" ")[1:]) + if not text: + await message.reply("请输入文本。") + raise ContinuePropagation + if len(text) >= 1000: + await message.reply('文本过长,长度须在10-1000之间', quote=True) + raise ContinuePropagation + elif len(text) <= 10: + await message.reply('文本过短,长度须在10-1000之间', quote=True) + raise ContinuePropagation + image, text = await check_text(text) + if image: + await message.reply_photo(image, quote=True, caption=text) + else: + if text: + await message.reply(text, quote=True) + else: + await message.reply('出错了,请稍后再试', quote=True) + raise ContinuePropagation + + +@Client.on_message(filters.incoming & ~filters.edited & + filters.command(["小作文", f"小作文@{user_me.username}"])) +async def asoulcnki_random(client: Client, message: Message): + text = message.reply_to_message.text if message.reply_to_message else " ".join(message.text.split(" ")[1:]) + if not text: + text = "" + image, text = await random_text(text) + if image: + await message.reply_photo(image, quote=True, caption=text) + else: + if text: + await message.reply(text, quote=True) + else: + await message.reply('出错了,请稍后再试', quote=True) + raise ContinuePropagation diff --git a/modules/start.py b/modules/start.py index 5e40472..f3ddf65 100644 --- a/modules/start.py +++ b/modules/start.py @@ -11,6 +11,7 @@ des = """本机器人特性: ★ 我有个朋友 ★ 简易版问与答 ★ 网易云音乐 +★ 查重、小作文 """ diff --git a/requirements.txt b/requirements.txt index 5c01637..7fa5a09 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,6 @@ coloredlogs qrcode pyncm mutagen +playwright +uvicorn +jinja2 diff --git a/resources/templates/article.html b/resources/templates/article.html new file mode 100644 index 0000000..edcdf01 --- /dev/null +++ b/resources/templates/article.html @@ -0,0 +1,107 @@ + +
+ + + +