diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bb5db92 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.idea/ +venv/ +__pycache__/ +config.py \ No newline at end of file diff --git a/config.gen.py b/config.gen.py new file mode 100644 index 0000000..eaee1e3 --- /dev/null +++ b/config.gen.py @@ -0,0 +1,29 @@ +TOKEN = "token here" + +# 频率限制次数,每个群每小时内,只能主动触发2次任务 +LIMIT_COUNT = 2 + +# 私有模式,仅授权群组可用 False:关闭 True:打开 +EXCLUSIVE_MODE = False + +# 配置私有模式群组id列表(不私有请忽略) 例如:[-1001324252532, -100112415423] +EXCLUSIVE_LIST = [] + +# 主动触发命令仅管理员有效 False:否 True:是 +RANK_COMMAND_MODE = True + +# 中文字体路径 +FRONT = 'fonts/ZhuZiAWan-2.ttc' + +# Redis 配置 +REDIS_CONFIG = {'host': '127.0.0.1', 'port': 6379, 'db': 1} + +# 拥有者 id 配置 +OWNER = 0 + +# 日志频道 id 0 为不启用 +CHANNEL = 0 + +# 帮助信息 +HELP = 'Group Word Cloud Bot\n\n/start - 查看此帮助信息\n/ping - 我还活着吗?\n/rank - 手动生成词云(绒布球)\n\n' \ + '此项目开源于:https://git.io/JnrvH' diff --git a/config.py b/config.py deleted file mode 100644 index ca2bc4b..0000000 --- a/config.py +++ /dev/null @@ -1,10 +0,0 @@ -TOKEN = "1749418611:AAGOV2XB5mkMXqX-J_wtNu7KkrkhO_Xylmg" - -# 频率限制次数,每个群每小时内,只能主动触发10次任务 -LIMIT_COUNT = 10 - -# 私有模式,仅授权群组可用 0:关闭 1:打开 -EXCLUSIVE_MODE = 0 - -# 主动触发命令仅管理员有效 0:否 1:是 -RANK_COMMAND_MODE = 1 diff --git a/connector.py b/connector.py index 6740652..c266cb9 100644 --- a/connector.py +++ b/connector.py @@ -1,6 +1,8 @@ import redis +from config import REDIS_CONFIG -pool = redis.ConnectionPool(host='127.0.0.1', port=6379, encoding='utf8', decode_responses=True) +pool = redis.ConnectionPool(host=REDIS_CONFIG['host'], port=REDIS_CONFIG['port'], db=REDIS_CONFIG['db'], + encoding='utf8', decode_responses=True) def get_connection(): diff --git a/fonts/ZhuZiAWan-2.ttc b/fonts/ZhuZiAWan-2.ttc new file mode 100644 index 0000000..97083bb Binary files /dev/null and b/fonts/ZhuZiAWan-2.ttc differ diff --git a/font.ttf b/fonts/font.ttf similarity index 100% rename from font.ttf rename to fonts/font.ttf diff --git a/func.py b/func.py index 853055d..853d810 100644 --- a/func.py +++ b/func.py @@ -3,7 +3,7 @@ import time import connector import telegram from telegram.ext import CommandHandler, MessageHandler, Filters -from config import TOKEN, LIMIT_COUNT, EXCLUSIVE_MODE, RANK_COMMAND_MODE +from config import TOKEN, LIMIT_COUNT, EXCLUSIVE_MODE, RANK_COMMAND_MODE, OWNER, EXCLUSIVE_LIST, CHANNEL, HELP import schedule from task import add_task @@ -11,17 +11,37 @@ bot = telegram.Bot(token=TOKEN) def start(update, context): + # 限制不为群组 + chat_type = update.effective_chat.type + if chat_type == "supergroup": + return try: connector.get_connection().keys() - print('进入start函数') + print('进入 start 函数') update.message.reply_text( - '在呢!系统运行正常~', + HELP, + parse_mode='HTML' ) except Exception as e: print(e) print('进入start函数') - update.message.reply_text("系统故障,Redis连接失败,请检查!") - update.message.reply_text("错误信息:" + str(e)) + if update.effective_user.id == OWNER: + update.message.reply_text(f"系统故障,Redis连接失败,错误信息:\n{e}") + + +def ping(update, context): + # 限制不为群组 + chat_type = update.effective_chat.type + if chat_type == "supergroup": + return + try: + connector.get_connection().keys() + print('进入 ping 函数') + update.message.reply_text( + 'pong~', + ) + except Exception as e: + print(e) def rank(update, context): @@ -36,9 +56,9 @@ def rank(update, context): username = update.effective_user.id # 限制为群组 if chat_type != "supergroup": - update.message.reply_text("此命令只有在群组中有效") + update.message.reply_text("此命令只有在群组中有效!") return - if RANK_COMMAND_MODE == 1: + if RANK_COMMAND_MODE: try: chat_member = bot.get_chat_member(chat_id, user_id) status = chat_member["status"] @@ -46,7 +66,6 @@ def rank(update, context): if status == "creator" or status == "administrator": print("用户权限正确") else: - update.message.reply_text("此命令仅对管理员开放") return except Exception as e: print(e) @@ -71,6 +90,12 @@ def rank(update, context): return add_task(chat_id) print("群组: {},用户: {}|{} 发起了主动触发请求".format(chat_id, username, user_id, )) + if not CHANNEL == 0: + ctext = f'#WORDCLOUD #APPLY \n' \ + f'群组 ID:`{chat_id}`\n' \ + f'用户 ID:`{user_id}`\n' \ + f'执行操作:`主动生成词云`' + bot.send_message(chat_id=CHANNEL, text=ctext, parse_mode="Markdown") update.message.reply_text("统计数据将在分析完毕后发送到当前群组,请稍等~") except Exception as e: print("主动触发任务失败,请检查") @@ -91,30 +116,19 @@ def chat_content_exec(update, context): if len(text) > 80: return # 独享模式(仅授权群组可用) - if EXCLUSIVE_MODE == 1 and chat_id not in ["1231242141"]: + if EXCLUSIVE_MODE and chat_id not in EXCLUSIVE_LIST: print(chat_id + " 为未认证群组,取消入库") return - try: - username = update.effective_user.username - except Exception as e: - username = update.effective_user.id user = update.message.from_user firstname = str(user["first_name"]) - lastname = str(user["last_name"]) - name = "" - if firstname != "None": - name = firstname + " " - if lastname != "None": - name += lastname - if len(name) == 0: - name = username + name = firstname print("\n---------------------------") print("内容: " + text[:10]) print("群组类型: " + str(chat_type)) print("用户ID: " + str(user_id)) print("chat_id: " + str(chat_id)) - if "/" in text: - print("这是一条指令信息,跳过") + if text.startswith('/') or '//' in text: + print("这是一条指令或者链接信息,跳过") return else: if text[-1] not in [",", "。", "!", ":", "?", "!", "?", ",", ":", "."]: @@ -136,5 +150,6 @@ def check_schedule(): start_handler = CommandHandler('start', start) +ping_handler = CommandHandler('ping', ping) rank_handler = CommandHandler('rank', rank) chat_content_handler = MessageHandler(Filters.text, chat_content_exec) diff --git a/images/README.md b/images/README.md new file mode 100644 index 0000000..e665438 --- /dev/null +++ b/images/README.md @@ -0,0 +1,126 @@ +## 当前版本 + +v2.3 + +## 更新维护日志 + +[更新维护日志](https://github.com/devourbots/word_cloud_bot/wiki/%E6%9B%B4%E6%96%B0%E7%BB%B4%E6%8A%A4%E6%97%A5%E5%BF%97) + +## 有问题请加群组反馈 +Telegram 交流反馈群组 [点击加入](https://t.me/devourbots) + +## 演示 + +![xq9iR.png](https://s3.jpg.cm/2021/05/05/xq9iR.png) + +## 配置要求 + +内存:1G以上 + +## 安装方法 + +### 使用 Docker 安装 +Docker官方安装地址:[点击访问](https://docs.docker.com/engine/install/) + +```angular2html +cd /root + +# 拉取Redis镜像 +docker pull redis + +# 创建 entrypoint.sh 入口文件 +echo '#! /bin/sh \ +cd /root/word_cloud_bot && python3 main.py >> output 2>&1 & +tail -f /dev/null' > /root/entrypoint.sh + +# 创建 Dockerfile +wget -O /root/Dockerfile https://github.com/devourbots/word_cloud_bot/raw/master/Dockerfile + +# 使用命令查看所有时区 +timedatectl list-timezones + +找到您所在的时区,例如: +上海 Asia/Shanghai +纽约 America/New_York + +# 编辑Dockerfile +vi /root/Dockerfile + +# 在第7行修改服务器所属时区,原文件为: +RUN ln -s /usr/share/zoneinfo/Asia/Shanghai /etc/localtime +修改为纽约当地时,修改后: +RUN ln -s /usr/share/zoneinfo/America/New_York /etc/localtime + +# 在第10行修改你的机器人TOKEN +修改后: +RUN sed -i '1c TOKEN = "1749418611:AAGcpouQ4EWSDITLQXFozHjMgT_-MsVSmDM"' /root/word_cloud_bot/config.py + + +# 根据 Dockerfile 创建镜像 +docker build . -t world_cloud_bot:latest + +# 运行 Redis 镜像,此步在前 +docker run -d -p 6379:6379 redis:latest + +# 注意!!! +请关闭服务器 6379 端口的外网访问权限!!!如果您的主机提供商提供了安全组策略(阿里云、腾讯云、AWS等等),可以在控制台关闭6379端口。 +如果您的主机商不支持自定义安全组,请根据您的发行版系统自行搜索防火墙关闭端口的方式,检测方式在下方。 +不要抱有侥幸心理!不要抱有侥幸心理!不要抱有侥幸心理! + +# 运行 机器人,此步在后 +docker run -d --net=host world_cloud_bot:latest +``` + +[端口检测工具](http://tool.chinaz.com/port/), 请确保 6379 是关闭状态 + +![xlu8H.png](https://s3.jpg.cm/2021/05/06/xlu8H.png) + +## 使用方法 + +使用 `/start` 指令测试机器人与 Redis 数据库的连通情况 + +使用 `/rank` 指令主动触发词云任务,在 config.py 里可以设置每个群组每小时主动触发次数的限制 + +将机器人拉入群组,设置为管理员(受机器人API所限,只有授予管理员权限后,机器人才能接收到所有用户的普通聊天文本,此机器人不需要其他权限,您可以将所有权限关闭) + +所有聊天内容每天定时清理,仅用于本地分词,无其他任何用途 + +![xqyvt.png](https://s3.jpg.cm/2021/05/05/xqyvt.png) + +### 将机器人设置为仅自己群组可用 + +如何编辑 Docker 容器中的文件请自行 Google + +如果您不想让别人使用你的机器人,那么可以将 `config.py` 文件中的 `EXCLUSIVE_MODE = 0`改为 `EXCLUSIVE_MODE = 1` + +![DGbSy.png](https://s3.jpg.cm/2021/05/09/DGbSy.png) + +编辑 `/root/word_cloud_bot/func.py`,在 94 行左右,将自己的 群组ID 加入到列表中。 +这里的`EXCLUSIVE_MODE = 1`不要改动,注意区分! + + +例如我两个的群组ID分别为:-127892174935、-471892571924 + +那么修改后为: +```angular2html +if EXCLUSIVE_MODE == 1 and chat_id not in ["-127892174935", "-471892571924"]: + print(chat_id + " 为未认证群组,取消入库") + return +``` + +![DGHR5.png](https://s3.jpg.cm/2021/05/09/DGHR5.png) + +### 设置 /rank 指令对普通用户开放 + +编辑 `/root/word_cloud_bot/config.py`, 将 `RANK_COMMAND_MODE = 1` 改为 `RANK_COMMAND_MODE = 0` + +![DGJuC.png](https://s3.jpg.cm/2021/05/09/DGJuC.png) + + +### 信息推送密度 + +![xW3jh.png](https://s3.jpg.cm/2021/05/05/xW3jh.png) + +默认分别会在当地时间 11:00、18:00、23:30 推送三次数据统计报告,并会在 23:59 清空当日统计数据, +如需更密集的数据推送,可以编辑 /root/word_cloud_bot/main.py ,按照示例格式自行增加,相关的 docker 技术操作不再赘述 + diff --git a/images/default.png b/images/default.png new file mode 100644 index 0000000..eac74fa Binary files /dev/null and b/images/default.png differ diff --git a/main.py b/main.py index 9c5381f..90b30c5 100644 --- a/main.py +++ b/main.py @@ -1,11 +1,12 @@ from telegram.ext import Updater from config import TOKEN -from func import start_handler, chat_content_handler, check_schedule, rank_handler +from func import start_handler, ping_handler, chat_content_handler, check_schedule, rank_handler import schedule from task import schedule_task, flush_redis, do_task import threading -schedule.every().day.at('11:00').do(schedule_task) +# 开始定时任务 - 群组分析 +schedule.every().day.at('12:00').do(schedule_task) schedule.every().day.at('18:00').do(schedule_task) schedule.every().day.at('23:30').do(schedule_task) schedule.every().day.at('23:59').do(flush_redis) @@ -22,6 +23,7 @@ updater = Updater(token=TOKEN, use_context=True) dispatcher = updater.dispatcher dispatcher.add_handler(start_handler) +dispatcher.add_handler(ping_handler) dispatcher.add_handler(rank_handler) dispatcher.add_handler(chat_content_handler) diff --git a/task.py b/task.py index 022f2bd..13fd1bf 100644 --- a/task.py +++ b/task.py @@ -8,7 +8,7 @@ import telegram import time import os import connector -from config import TOKEN +from config import TOKEN, FRONT, CHANNEL bot = telegram.Bot(token=TOKEN) @@ -24,10 +24,10 @@ def schedule_task(): if "chat_content" in i: group_list.append(i[:i.find("_")]) # print(group_list) - print("运行定时任务,让任务队列中添加任务,任务数量:{}".format(len(group_list))) + print("运行定时任务,向任务队列中添加任务,任务数量:{}".format(len(group_list))) for group in group_list: try: - # 网任务队列中添加任务 + # 向任务队列中添加任务 task_queue.put(group) except Exception as e: print("群组:{} | 词云数据分析生成失败,请查看报错信息".format(group)) @@ -49,9 +49,20 @@ def do_task(): stop_time = float(time.time()) print("当前群组处理耗时:" + str(stop_time - start_time)) print("---------------------------") + ctext = f'#WORDCLOUD \n' \ + f'群组 ID:`{group}`\n' \ + f'执行操作:`生成词云`\n' \ + f'结果:`成功`\n' \ + f'处理耗时:`{str(stop_time - start_time)[:5]}`' except Exception as e: print("群组: {} | 处理失败,可能是机器人已经被移出群组,请检查报错!".format(group)) print(e) + ctext = f'#WORDCLOUD #SCHEDULE \n' \ + f'群组 ID:`{group}`\n' \ + f'执行操作:`生成词云`\n' \ + f'结果:`失败`\n' + if not CHANNEL == 0: + bot.send_message(chat_id=CHANNEL, text=ctext, parse_mode="Markdown") time.sleep(1) @@ -61,12 +72,12 @@ def add_task(group): # 核心函数,分词统计 def generate(group): - mk = imageio.imread("/root/word_cloud_bot/circle.png") + mk = imageio.imread("circle.png") # 构建并配置词云对象w,注意要加scale参数,提高清晰度 w = wordcloud.WordCloud(width=800, height=800, background_color='white', - font_path='/root/word_cloud_bot/font.ttf', + font_path=FRONT, mask=mk, scale=5) r = connector.get_connection() @@ -76,15 +87,7 @@ def generate(group): chat_content = r.get("{}_chat_content".format(group)) if chat_content is None: - print("数据库中不存在此群组数据") - try: - time.sleep(1) - bot.send_message( - chat_id=group, - text="数据库中不存在群组数据,请检查是否授予机器人管理员权限,并通过聊天添加数据量,嗨起来吧~\n" - ) - except Exception as e: - print("群组: {} | 机器人发送信息失败".format(group)) + print("数据库中不存在此群组 {} 数据".format(group)) return word_list = [] words = pseg.cut(chat_content, use_paddle=True) # paddle模式 @@ -107,8 +110,11 @@ def generate(group): user_message_amount = r.hgetall("{}_user_message_amount".format(group)) user_message_amount = sorted(user_message_amount.items(), key=lambda kv: (int(kv[1])), reverse=True) + # 截至时间 + date = time.strftime("%Y年%m月%d日", time.localtime()) + ' ⏱ ' + time.strftime("%H:%M", time.localtime()) + text = f'📅 截至 {date}\n' + # 分析高频词 if len(word_list) > 0: - # 分析高频词 word_amount = {} # print(word_amount) for word in word_list: @@ -128,77 +134,63 @@ def generate(group): hot_word_string = "" # 默认展示前5位,少于5个则全部展示 for i in range(min(5, len(word_amount))): - hot_word_string += "\t\t\t\t\t\t\t\t" + "`" + str(word_amount[i][0]) + "`" + ": " + str( + hot_word_string += "\t\t\t\t\t\t\t\t" + "👥 `" + str(word_amount[i][0]) + "`" + ":" + str( word_amount[i][1]) + "\n" # print(hot_word_string) - bot.send_message( - chat_id=group, - text="🎤 今日话题榜 🎤\n" - "📅 {}\n" - "⏱ 截至今天{}\n" - "🗣️ 本群{}位朋友共产生{}条发言\n" - "🤹‍ 大家今天讨论最多的是:\n\n" - "{}\n" - "看下有没有你感兴趣的话题? 👏".format( - time.strftime("%Y年%m月%d日", time.localtime()), - time.strftime("%H:%M", time.localtime()), - user_amount, - total_message_amount, - hot_word_string), - parse_mode="Markdown" - ) + text += f"🗣️ 本群 {user_amount} 位成员共产生 {total_message_amount} 条发言\n" \ + f"🤹‍ 大家今天讨论最多的是:\n\n{hot_word_string}\n" + else: + text += '无法分析出当前群组的热词列表,可能是数据量过小,嗨起来吧~\n' else: - bot.send_message( - chat_id=group, - text="当前聊天数据量过小,嗨起来吧~" - ) + text += '无法分析出当前群组的热词列表,可能是数据量过小,嗨起来吧~\n' + # 分析活跃用户 if len(user_message_amount) > 0: # print("排序后的用户:" + str(user_message_amount)) top_5_user = "" # 默认展示前5位,少于5个则全部展示 for i in range(min(5, len(user_message_amount))): dis_name = str(user_message_amount[i][0]) - top_5_user += "\t\t\t\t\t\t\t\t" + "🎖`" + dis_name[:min(8, len(dis_name))] + "`" + " 贡献: " + str( + top_5_user += "\t\t\t\t\t\t\t\t" + "🎖 `" + dis_name[:min(10, len(dis_name))] + "`" + " 贡献:" + str( user_message_amount[i][1]) + "\n" # print(top_5_user) - bot.send_message( - chat_id=group, - text="🏵 今日活跃用户排行榜 🏵\n" - "📅 {}\n" - "⏱ 截至今天{}\n\n" - "{}\n" - "感谢这些朋友今天的分享! 👏 \n" - "遇到问题,向他们请教说不定有惊喜😃".format( - time.strftime("%Y年%m月%d日", time.localtime()), - time.strftime("%H:%M", time.localtime()), - top_5_user), - parse_mode="Markdown" - ) + text += f"🏵 今日活跃用户排行榜:\n\n{top_5_user}" else: - bot.send_message( - chat_id=group, - text="当前聊天数据量过小,嗨起来吧~" - ) + text = '无法分析出当前群组的活跃用户列表,可能是数据量过小,嗨起来吧~' + # 开始创建词云 + img_path = 'images/default.png' try: string = " ".join(word_list) # 将string变量传入w的generate()方法,给词云输入文字 w.generate(string) - # 将词云图片导出到当前文件夹 - w.to_file('{}_chat_word_cloud.png'.format(group)) - bot.send_photo( - chat_id=group, - photo=open("{}_chat_word_cloud.png".format(group), "rb") - ) - os.remove("{}_chat_word_cloud.png".format(group)) + # 将词云图片导出到 images 文件夹 + w.to_file('images/{}_chat_word_cloud.png'.format(group)) + img_path = 'images/{}_chat_word_cloud.png'.format(group) except Exception as e: print(e) print("词云图片生成失败") - # bot.send_message( - # chat_id=group, - # text="当前聊天数据量过小,嗨起来吧~" - # ) + + # 发送结果 + try: + bot.send_photo( + chat_id=group, + photo=open(img_path, "rb"), + caption=text, + parse_mode='Markdown', + disable_notification=True + ) + except Exception as e: + print(e) + r.delete('{}_chat_content'.format(group)) + print("发送结果失败") + + # 删除图片 + try: + os.remove("images/{}_chat_word_cloud.png".format(group)) + except Exception as e: + print(e) + print("删除图片失败") def flush_redis(): diff --git a/test/__pycache__/connector.cpython-37.pyc b/test/__pycache__/connector.cpython-37.pyc deleted file mode 100644 index 2e4ee2f..0000000 Binary files a/test/__pycache__/connector.cpython-37.pyc and /dev/null differ