支持发病小作文查重、随机生成

This commit is contained in:
xtaodada 2022-03-05 21:41:36 +08:00
parent 571cbd13ef
commit a6eb6ffc3d
Signed by: xtaodada
GPG Key ID: 4CBB3F4FA8C85659
9 changed files with 480 additions and 3 deletions

114
defs/asoulcnki.py Normal file
View File

@ -0,0 +1,114 @@
import time
from io import BytesIO
from PIL import Image
import httpx
import jinja2
import random
from os import sep
from init import logger
from defs.browser import html_to_pic
from defs.diff import diff_text
env = jinja2.Environment(enable_async=True)
with open(f"resources{sep}templates{sep}article.html", "r", encoding="utf-8") as f:
article_data = f.read()
article_tpl = env.from_string(article_data)
async def check_text(text: str):
try:
url = 'https://asoulcnki.asia/v1/api/check'
async with httpx.AsyncClient() as client:
resp = await client.post(url=url, json={'text': text})
result = resp.json()
if result['code'] != 0:
return None, None
data = result['data']
if not data['related']:
return None, '没有找到重复的小作文捏'
rate = data['rate']
related = data['related'][0]
reply_url = str(related['reply_url']).strip()
reply = related['reply']
msg = ['枝网文本复制检测报告',
'',
'总复制比 {:.2f}%'.format(rate * 100),
f'相似小作文: <a href="{reply_url}">地点</a> - '
f'{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(reply["ctime"]))}',]
image = await render_reply(reply, diff=text)
if not image:
return None, "\n".join(msg)
return image, "\n".join(msg)
except Exception as e:
logger.warning(f"Error in check_text: {e}")
return None, None
async def random_text(keyword: str = ""):
try:
url = 'https://asoulcnki.asia/v1/api/ranking'
params = {
'pageSize': 10,
'pageNum': 1,
'timeRangeMode': 0,
'sortMode': 0
}
if keyword:
params['keywords'] = keyword
else:
params['pageNum'] = random.randint(1, 100)
async with httpx.AsyncClient() as client:
resp = await client.get(url=url, params=params)
result = resp.json()
if result['code'] != 0:
return None, None
replies = result['data']['replies']
if not replies:
return None, '没有找到小作文捏'
reply = random.choice(replies)
image = await render_reply(reply)
reply_url = f"https://t.bilibili.com/{reply['dynamic_id']}/#reply{reply['rpid']}"
if not image:
return None, f'<a href="{reply_url}">转到小作文</a>'
return image, f'<a href="{reply_url}">转到小作文</a>'
except Exception as e:
logger.warning(f"Error in random_text: {e}")
return None, None
async def render_reply(reply: dict, diff: str = ""):
try:
article = {}
article['username'] = reply['m_name']
article['like'] = reply['like_num']
article['all_like'] = reply['similar_like_sum']
article['quote'] = reply['similar_count']
article['text'] = diff_text(
diff, reply['content']) if diff else reply['content']
article['time'] = time.strftime(
"%Y-%m-%d", time.localtime(reply['ctime']))
html = await article_tpl.render_async(article=article)
img_raw = await html_to_pic(html, wait=0, viewport={"width": 500, "height": 100})
# 将bytes结果转化为字节流
bytes_stream = BytesIO(img_raw)
# 读取到图片
img = Image.open(bytes_stream)
imgByteArr = BytesIO() # 初始化一个空字节流
img.save(imgByteArr, format('PNG')) # 把我们得图片以 PNG 保存到空字节流
imgByteArr = imgByteArr.getvalue() # 无视指针获取全部内容类型由io流变成bytes。
with open(f"data{sep}asoulcnki.png", 'wb') as i:
i.write(imgByteArr)
return f"data{sep}asoulcnki.png"
except Exception as e:
logger.warning(f"Error in render_reply: {e}")
return None

108
defs/browser.py Normal file
View File

@ -0,0 +1,108 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@Author : yanyongyu
@Date : 2021-03-12 13:42:43
@LastEditors : yanyongyu
@LastEditTime : 2021-11-01 14:05:41
@Description : None
@GitHub : https://github.com/yanyongyu
"""
__author__ = "yanyongyu"
import asyncio
import platform
from contextlib import asynccontextmanager
from os import getcwd
from typing import Optional, AsyncIterator
from playwright.async_api import Page, Browser, async_playwright, Error
from init import logger
from uvicorn.loops import asyncio as _asyncio
from uvicorn import config
def asyncio_setup():
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
@property
def should_reload(self):
return False
if platform.system() == "Windows":
_asyncio.asyncio_setup = asyncio_setup
config.Config.should_reload = should_reload
logger.warning("检测到当前为 Windows 系统,已自动注入猴子补丁")
_browser: Optional[Browser] = None
_playwright = None
async def init(**kwargs) -> Browser:
global _browser
global _playwright
_playwright = await async_playwright().start()
try:
_browser = await launch_browser(**kwargs)
except Error:
await install_browser()
_browser = await launch_browser(**kwargs)
return _browser
async def launch_browser(**kwargs) -> Browser:
return await _playwright.chromium.launch(**kwargs)
async def get_browser(**kwargs) -> Browser:
return _browser or await init(**kwargs)
@asynccontextmanager
async def get_new_page(**kwargs) -> AsyncIterator[Page]:
browser = await get_browser()
page = await browser.new_page(**kwargs)
try:
yield page
finally:
await page.close()
async def shutdown_browser():
await _browser.close()
await _playwright.stop()
async def install_browser():
logger.info("正在安装 chromium")
import sys
from playwright.__main__ import main
sys.argv = ['', 'install', 'chromium']
try:
main()
except SystemExit:
pass
async def html_to_pic(
html: str, wait: int = 0, template_path: str = f"file://{getcwd()}", **kwargs
) -> bytes:
"""html转图片
Args:
html (str): html文本
wait (int, optional): 等待时间. Defaults to 0.
template_path (str, optional): 模板路径 "file:///path/to/template/"
Returns:
bytes: 图片, 可直接发送
"""
# logger.debug(f"html:\n{html}")
if "file:" not in template_path:
raise "template_path 应该为 file:///path/to/template"
async with get_new_page(**kwargs) as page:
await page.goto(template_path)
await page.set_content(html, wait_until="networkidle")
await page.wait_for_timeout(wait)
img_raw = await page.screenshot(full_page=True)
return img_raw

100
defs/diff.py Normal file
View File

@ -0,0 +1,100 @@
from typing import List
class cache:
def __init__(self, start, end):
self.start = start
self.end = end
def __eq__(self, other):
if isinstance(other, self.__class__):
return self.start == other.start and self.end == other.end
else:
return False
def merge(intervals: List[cache]):
"""合并重复区间
:param intervals: 待去重区间
:returns: 去重的区间根据开始位置逆序
"""
if len(intervals) == 0:
return []
intervals = sorted(intervals, key=lambda s: s.start)
outputs = [intervals[0]]
for s in intervals:
last_interval = outputs[-1]
if last_interval.end < s.start:
outputs.append(s)
else:
last_interval.end = max(last_interval.end, s.end)
return sorted(outputs, key=lambda s: s.start, reverse=True)
def compare(origin: str, dest: str, sensitive: int):
"""标记重复区间
:param origin: 待查重文本
:param dest: 返回的文本
:param sensitive: 敏感长度
:returns: 重复区间数组根据开始位置逆序
"""
length = max(len(origin), len(dest)) ** 2
matrix = [0 for i in range(length)]
cache_array: List[cache] = []
def convert(index_y: int, index_x: int):
return index_y * len(origin) + index_x
def remove(arr: List[cache], obj: cache):
return list(filter(lambda s: s != obj, arr))
def new_cache(end: int, offset: int):
start = end - offset
start = 0 if start < 0 else start + 1
return cache(start, offset + start)
for index, s in enumerate(origin):
if dest[0] == s:
matrix[index] = 1
for index_x, x in enumerate(dest):
for index_y, y in enumerate(origin):
index = convert(index_y, index_x)
pre_index = convert(index_y - 1, index_x - 1)
if x == y:
if index_y == 0:
matrix[index] = 1
continue
matrix[index] = matrix[pre_index] + 1
if matrix[index] >= sensitive:
cache_array.append(new_cache(index_y, matrix[index]))
if matrix[index] > sensitive:
cache_array = remove(cache_array, new_cache(
index_y - 1, matrix[pre_index]))
return merge(cache_array)
def render(s: str, flag: List[cache], tag: str):
"""给重复区间加tag
:param s: raw text
:param flag: repeat area Array
:param tag: used tag, default em
:returns: tagged text
"""
arr = list(s)
for i in flag:
arr.insert(i.end, f'</{tag}>')
arr.insert(i.start, f'<{tag}>')
return ''.join(arr)
def diff_text(origin: str, dest: str, sensitive=4, tag='strong'):
"""对文本重复对比给重复部分加tag
:param origin: 待查重文本
:param dest: 服务器返回的文本
:param sensitive: 敏感长度
:param tag: HTML tag, example a, em
:returns: 做好标记的文本
"""
flag = compare(dest, origin, sensitive)
return render(dest, flag, tag)

View File

@ -1,7 +1,4 @@
import logging import logging
from pyrogram.types import User
from defs.glover import ipv6 from defs.glover import ipv6
from pyrogram import Client from pyrogram import Client
from logging import getLogger, INFO, ERROR, StreamHandler, basicConfig from logging import getLogger, INFO, ERROR, StreamHandler, basicConfig

View File

@ -10,6 +10,8 @@ from defs.ask import how_many, what_time, how_long, hif, handle_pers, who
filters.regex(r"^问")) filters.regex(r"^问"))
async def ask(client: Client, message: Message): async def ask(client: Client, message: Message):
msg = message msg = message
if not message.text:
raise ContinuePropagation
message = message.text.strip()[1:] message = message.text.strip()[1:]
handled = False handled = False
if re.findall("几|多少", message): if re.findall("几|多少", message):

45
modules/asoulcnki.py Normal file
View File

@ -0,0 +1,45 @@
from pyrogram import Client, filters, ContinuePropagation
from pyrogram.types import Message
from defs.asoulcnki import check_text, random_text
from init import user_me
@Client.on_message(filters.incoming & ~filters.edited &
filters.command(["查重", f"查重@{user_me.username}"]))
async def asoulcnki_process(client: Client, message: Message):
text = message.reply_to_message.text if message.reply_to_message else " ".join(message.text.split(" ")[1:])
if not text:
await message.reply("请输入文本。")
raise ContinuePropagation
if len(text) >= 1000:
await message.reply('文本过长长度须在10-1000之间', quote=True)
raise ContinuePropagation
elif len(text) <= 10:
await message.reply('文本过短长度须在10-1000之间', quote=True)
raise ContinuePropagation
image, text = await check_text(text)
if image:
await message.reply_photo(image, quote=True, caption=text)
else:
if text:
await message.reply(text, quote=True)
else:
await message.reply('出错了,请稍后再试', quote=True)
raise ContinuePropagation
@Client.on_message(filters.incoming & ~filters.edited &
filters.command(["小作文", f"小作文@{user_me.username}"]))
async def asoulcnki_random(client: Client, message: Message):
text = message.reply_to_message.text if message.reply_to_message else " ".join(message.text.split(" ")[1:])
if not text:
text = ""
image, text = await random_text(text)
if image:
await message.reply_photo(image, quote=True, caption=text)
else:
if text:
await message.reply(text, quote=True)
else:
await message.reply('出错了,请稍后再试', quote=True)
raise ContinuePropagation

View File

@ -11,6 +11,7 @@ des = """本机器人特性:
我有个朋友 我有个朋友
简易版问与答 简易版问与答
网易云音乐 网易云音乐
查重小作文
""" """

View File

@ -7,3 +7,6 @@ coloredlogs
qrcode qrcode
pyncm pyncm
mutagen mutagen
playwright
uvicorn
jinja2

View File

@ -0,0 +1,107 @@
<html>
<head>
<style>
@font-face {
font-family: "iconfont";
src: url("https://at.alicdn.com/t/font_2949364_zcqb63am41o.woff2?t=1637832961945")
format("woff2");
}
.iconfont {
font-family: "iconfont" !important;
font-size: 16px;
font-style: normal;
-webkit-font-smoothing: antialiased;
-moz-osx-font-smoothing: grayscale;
}
.icon-like:before {
content: "\e6bf";
}
.icon-all-like:before {
content: "\e6c1";
}
.icon-qoute:before {
content: "\e620";
}
.icon-user:before {
content: "\e6f4";
}
.icon-time:before {
content: "\e703";
}
body {
background-color: rgba(243, 244, 246, 1);
}
.article {
display: flex;
flex-direction: column;
border-radius: 5px;
border-color: rgba(229, 231, 235, 1);
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
background-color: rgba(249, 250, 251, 1);
}
.article-header {
display: flex;
flex-direction: row;
align-items: center;
margin-left: 8px;
margin-right: 8px;
padding: 8px;
}
.article-footer {
display: flex;
flex-direction: row;
justify-content: space-between;
align-items: center;
margin-left: 8px;
margin-right: 8px;
padding: 8px;
}
.article-text {
margin-left: 8px;
margin-right: 8px;
padding: 8px;
border-radius: 5px;
background-color: rgba(229, 231, 235, 1);
line-height: 1.625;
word-break: break-all;
white-space: pre-wrap;
}
.article-text strong {
font-weight: 400;
background-color: rgba(243, 244, 246, 1);
color: rgba(239, 68, 68, 1);
}
</style>
</head>
<body>
<div class="image">
<div class="article">
<div class="article-header">
<div>
<span class="iconfont icon-user"></span>
{{ article["username"] }}
</div>
</div>
<div class="article-text">{{ article["text"] }}</div>
<div class="article-footer">
<div>
<span class="iconfont icon-all-like" />
{{ article["all_like"] }}
</div>
<div>
<span class="iconfont icon-like" />
{{ article["like"] }}
</div>
<div>
<span class="iconfont icon-qoute" />
{{ article["quote"] }}
</div>
<div>
<span class="iconfont icon-time" />
{{ article["time"] }}
</div>
</div>
</div>
</div>
</body>
</html>