支持发病小作文查重、随机生成

This commit is contained in:
xtaodada 2022-03-05 21:41:36 +08:00
parent 571cbd13ef
commit a6eb6ffc3d
Signed by: xtaodada
GPG Key ID: 4CBB3F4FA8C85659
9 changed files with 480 additions and 3 deletions

114
defs/asoulcnki.py Normal file
View File

@ -0,0 +1,114 @@
import time
from io import BytesIO
from PIL import Image
import httpx
import jinja2
import random
from os import sep
from init import logger
from defs.browser import html_to_pic
from defs.diff import diff_text
env = jinja2.Environment(enable_async=True)
with open(f"resources{sep}templates{sep}article.html", "r", encoding="utf-8") as f:
article_data = f.read()
article_tpl = env.from_string(article_data)
async def check_text(text: str):
try:
url = 'https://asoulcnki.asia/v1/api/check'
async with httpx.AsyncClient() as client:
resp = await client.post(url=url, json={'text': text})
result = resp.json()
if result['code'] != 0:
return None, None
data = result['data']
if not data['related']:
return None, '没有找到重复的小作文捏'
rate = data['rate']
related = data['related'][0]
reply_url = str(related['reply_url']).strip()
reply = related['reply']
msg = ['枝网文本复制检测报告',
'',
'总复制比 {:.2f}%'.format(rate * 100),
f'相似小作文: <a href="{reply_url}">地点</a> - '
f'{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(reply["ctime"]))}',]
image = await render_reply(reply, diff=text)
if not image:
return None, "\n".join(msg)
return image, "\n".join(msg)
except Exception as e:
logger.warning(f"Error in check_text: {e}")
return None, None
async def random_text(keyword: str = ""):
try:
url = 'https://asoulcnki.asia/v1/api/ranking'
params = {
'pageSize': 10,
'pageNum': 1,
'timeRangeMode': 0,
'sortMode': 0
}
if keyword:
params['keywords'] = keyword
else:
params['pageNum'] = random.randint(1, 100)
async with httpx.AsyncClient() as client:
resp = await client.get(url=url, params=params)
result = resp.json()
if result['code'] != 0:
return None, None
replies = result['data']['replies']
if not replies:
return None, '没有找到小作文捏'
reply = random.choice(replies)
image = await render_reply(reply)
reply_url = f"https://t.bilibili.com/{reply['dynamic_id']}/#reply{reply['rpid']}"
if not image:
return None, f'<a href="{reply_url}">转到小作文</a>'
return image, f'<a href="{reply_url}">转到小作文</a>'
except Exception as e:
logger.warning(f"Error in random_text: {e}")
return None, None
async def render_reply(reply: dict, diff: str = ""):
try:
article = {}
article['username'] = reply['m_name']
article['like'] = reply['like_num']
article['all_like'] = reply['similar_like_sum']
article['quote'] = reply['similar_count']
article['text'] = diff_text(
diff, reply['content']) if diff else reply['content']
article['time'] = time.strftime(
"%Y-%m-%d", time.localtime(reply['ctime']))
html = await article_tpl.render_async(article=article)
img_raw = await html_to_pic(html, wait=0, viewport={"width": 500, "height": 100})
# 将bytes结果转化为字节流
bytes_stream = BytesIO(img_raw)
# 读取到图片
img = Image.open(bytes_stream)
imgByteArr = BytesIO() # 初始化一个空字节流
img.save(imgByteArr, format('PNG')) # 把我们得图片以 PNG 保存到空字节流
imgByteArr = imgByteArr.getvalue() # 无视指针获取全部内容类型由io流变成bytes。
with open(f"data{sep}asoulcnki.png", 'wb') as i:
i.write(imgByteArr)
return f"data{sep}asoulcnki.png"
except Exception as e:
logger.warning(f"Error in render_reply: {e}")
return None

108
defs/browser.py Normal file
View File

@ -0,0 +1,108 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@Author : yanyongyu
@Date : 2021-03-12 13:42:43
@LastEditors : yanyongyu
@LastEditTime : 2021-11-01 14:05:41
@Description : None
@GitHub : https://github.com/yanyongyu
"""
__author__ = "yanyongyu"
import asyncio
import platform
from contextlib import asynccontextmanager
from os import getcwd
from typing import Optional, AsyncIterator
from playwright.async_api import Page, Browser, async_playwright, Error
from init import logger
from uvicorn.loops import asyncio as _asyncio
from uvicorn import config
def asyncio_setup():
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
@property
def should_reload(self):
return False
if platform.system() == "Windows":
_asyncio.asyncio_setup = asyncio_setup
config.Config.should_reload = should_reload
logger.warning("检测到当前为 Windows 系统,已自动注入猴子补丁")
_browser: Optional[Browser] = None
_playwright = None
async def init(**kwargs) -> Browser:
global _browser
global _playwright
_playwright = await async_playwright().start()
try:
_browser = await launch_browser(**kwargs)
except Error:
await install_browser()
_browser = await launch_browser(**kwargs)
return _browser
async def launch_browser(**kwargs) -> Browser:
return await _playwright.chromium.launch(**kwargs)
async def get_browser(**kwargs) -> Browser:
return _browser or await init(**kwargs)
@asynccontextmanager
async def get_new_page(**kwargs) -> AsyncIterator[Page]:
browser = await get_browser()
page = await browser.new_page(**kwargs)
try:
yield page
finally:
await page.close()
async def shutdown_browser():
await _browser.close()
await _playwright.stop()
async def install_browser():
logger.info("正在安装 chromium")
import sys
from playwright.__main__ import main
sys.argv = ['', 'install', 'chromium']
try:
main()
except SystemExit:
pass
async def html_to_pic(
html: str, wait: int = 0, template_path: str = f"file://{getcwd()}", **kwargs
) -> bytes:
"""html转图片
Args:
html (str): html文本
wait (int, optional): 等待时间. Defaults to 0.
template_path (str, optional): 模板路径 "file:///path/to/template/"
Returns:
bytes: 图片, 可直接发送
"""
# logger.debug(f"html:\n{html}")
if "file:" not in template_path:
raise "template_path 应该为 file:///path/to/template"
async with get_new_page(**kwargs) as page:
await page.goto(template_path)
await page.set_content(html, wait_until="networkidle")
await page.wait_for_timeout(wait)
img_raw = await page.screenshot(full_page=True)
return img_raw

100
defs/diff.py Normal file
View File

@ -0,0 +1,100 @@
from typing import List
class cache:
def __init__(self, start, end):
self.start = start
self.end = end
def __eq__(self, other):
if isinstance(other, self.__class__):
return self.start == other.start and self.end == other.end
else:
return False
def merge(intervals: List[cache]):
"""合并重复区间
:param intervals: 待去重区间
:returns: 去重的区间根据开始位置逆序
"""
if len(intervals) == 0:
return []
intervals = sorted(intervals, key=lambda s: s.start)
outputs = [intervals[0]]
for s in intervals:
last_interval = outputs[-1]
if last_interval.end < s.start:
outputs.append(s)
else:
last_interval.end = max(last_interval.end, s.end)
return sorted(outputs, key=lambda s: s.start, reverse=True)
def compare(origin: str, dest: str, sensitive: int):
"""标记重复区间
:param origin: 待查重文本
:param dest: 返回的文本
:param sensitive: 敏感长度
:returns: 重复区间数组根据开始位置逆序
"""
length = max(len(origin), len(dest)) ** 2
matrix = [0 for i in range(length)]
cache_array: List[cache] = []
def convert(index_y: int, index_x: int):
return index_y * len(origin) + index_x
def remove(arr: List[cache], obj: cache):
return list(filter(lambda s: s != obj, arr))
def new_cache(end: int, offset: int):
start = end - offset
start = 0 if start < 0 else start + 1
return cache(start, offset + start)
for index, s in enumerate(origin):
if dest[0] == s:
matrix[index] = 1
for index_x, x in enumerate(dest):
for index_y, y in enumerate(origin):
index = convert(index_y, index_x)
pre_index = convert(index_y - 1, index_x - 1)
if x == y:
if index_y == 0:
matrix[index] = 1
continue
matrix[index] = matrix[pre_index] + 1
if matrix[index] >= sensitive:
cache_array.append(new_cache(index_y, matrix[index]))
if matrix[index] > sensitive:
cache_array = remove(cache_array, new_cache(
index_y - 1, matrix[pre_index]))
return merge(cache_array)
def render(s: str, flag: List[cache], tag: str):
"""给重复区间加tag
:param s: raw text
:param flag: repeat area Array
:param tag: used tag, default em
:returns: tagged text
"""
arr = list(s)
for i in flag:
arr.insert(i.end, f'</{tag}>')
arr.insert(i.start, f'<{tag}>')
return ''.join(arr)
def diff_text(origin: str, dest: str, sensitive=4, tag='strong'):
"""对文本重复对比给重复部分加tag
:param origin: 待查重文本
:param dest: 服务器返回的文本
:param sensitive: 敏感长度
:param tag: HTML tag, example a, em
:returns: 做好标记的文本
"""
flag = compare(dest, origin, sensitive)
return render(dest, flag, tag)

View File

@ -1,7 +1,4 @@
import logging
from pyrogram.types import User
from defs.glover import ipv6
from pyrogram import Client
from logging import getLogger, INFO, ERROR, StreamHandler, basicConfig

View File

@ -10,6 +10,8 @@ from defs.ask import how_many, what_time, how_long, hif, handle_pers, who
filters.regex(r"^问"))
async def ask(client: Client, message: Message):
msg = message
if not message.text:
raise ContinuePropagation
message = message.text.strip()[1:]
handled = False
if re.findall("几|多少", message):

45
modules/asoulcnki.py Normal file
View File

@ -0,0 +1,45 @@
from pyrogram import Client, filters, ContinuePropagation
from pyrogram.types import Message
from defs.asoulcnki import check_text, random_text
from init import user_me
@Client.on_message(filters.incoming & ~filters.edited &
filters.command(["查重", f"查重@{user_me.username}"]))
async def asoulcnki_process(client: Client, message: Message):
text = message.reply_to_message.text if message.reply_to_message else " ".join(message.text.split(" ")[1:])
if not text:
await message.reply("请输入文本。")
raise ContinuePropagation
if len(text) >= 1000:
await message.reply('文本过长长度须在10-1000之间', quote=True)
raise ContinuePropagation
elif len(text) <= 10:
await message.reply('文本过短长度须在10-1000之间', quote=True)
raise ContinuePropagation
image, text = await check_text(text)
if image:
await message.reply_photo(image, quote=True, caption=text)
else:
if text:
await message.reply(text, quote=True)
else:
await message.reply('出错了,请稍后再试', quote=True)
raise ContinuePropagation
@Client.on_message(filters.incoming & ~filters.edited &
filters.command(["小作文", f"小作文@{user_me.username}"]))
async def asoulcnki_random(client: Client, message: Message):
text = message.reply_to_message.text if message.reply_to_message else " ".join(message.text.split(" ")[1:])
if not text:
text = ""
image, text = await random_text(text)
if image:
await message.reply_photo(image, quote=True, caption=text)
else:
if text:
await message.reply(text, quote=True)
else:
await message.reply('出错了,请稍后再试', quote=True)
raise ContinuePropagation

View File

@ -11,6 +11,7 @@ des = """本机器人特性:
我有个朋友
简易版问与答
网易云音乐
查重小作文
"""

View File

@ -7,3 +7,6 @@ coloredlogs
qrcode
pyncm
mutagen
playwright
uvicorn
jinja2

View File

@ -0,0 +1,107 @@
<html>
<head>
<style>
@font-face {
font-family: "iconfont";
src: url("https://at.alicdn.com/t/font_2949364_zcqb63am41o.woff2?t=1637832961945")
format("woff2");
}
.iconfont {
font-family: "iconfont" !important;
font-size: 16px;
font-style: normal;
-webkit-font-smoothing: antialiased;
-moz-osx-font-smoothing: grayscale;
}
.icon-like:before {
content: "\e6bf";
}
.icon-all-like:before {
content: "\e6c1";
}
.icon-qoute:before {
content: "\e620";
}
.icon-user:before {
content: "\e6f4";
}
.icon-time:before {
content: "\e703";
}
body {
background-color: rgba(243, 244, 246, 1);
}
.article {
display: flex;
flex-direction: column;
border-radius: 5px;
border-color: rgba(229, 231, 235, 1);
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
background-color: rgba(249, 250, 251, 1);
}
.article-header {
display: flex;
flex-direction: row;
align-items: center;
margin-left: 8px;
margin-right: 8px;
padding: 8px;
}
.article-footer {
display: flex;
flex-direction: row;
justify-content: space-between;
align-items: center;
margin-left: 8px;
margin-right: 8px;
padding: 8px;
}
.article-text {
margin-left: 8px;
margin-right: 8px;
padding: 8px;
border-radius: 5px;
background-color: rgba(229, 231, 235, 1);
line-height: 1.625;
word-break: break-all;
white-space: pre-wrap;
}
.article-text strong {
font-weight: 400;
background-color: rgba(243, 244, 246, 1);
color: rgba(239, 68, 68, 1);
}
</style>
</head>
<body>
<div class="image">
<div class="article">
<div class="article-header">
<div>
<span class="iconfont icon-user"></span>
{{ article["username"] }}
</div>
</div>
<div class="article-text">{{ article["text"] }}</div>
<div class="article-footer">
<div>
<span class="iconfont icon-all-like" />
{{ article["all_like"] }}
</div>
<div>
<span class="iconfont icon-like" />
{{ article["like"] }}
</div>
<div>
<span class="iconfont icon-qoute" />
{{ article["quote"] }}
</div>
<div>
<span class="iconfont icon-time" />
{{ article["time"] }}
</div>
</div>
</div>
</div>
</body>
</html>