v2.0版本

2021-05-08 10:18:24 +08:00 · 2021-05-08 10:18:24 +08:00 · 01850f5fba
commit 01850f5fba
parent c5730887f6
4 changed files with 110 additions and 22 deletions
--- a/config.py
+++ b/config.py
@ -1 +1,4 @@
 TOKEN = "1749418611:AAGOV2XB5mkMXqX-J_wtNu7KkrkhO_Xylmg"
+
+# 频率限制次数，每个群每小时内，只能主动触发10次任务
+LIMIT_COUNT = 10
--- a/func.py
+++ b/func.py
@ -1,9 +1,11 @@
+import datetime
 import time
 import connector
 import telegram
 from telegram.ext import CommandHandler, MessageHandler, Filters
-from config import TOKEN
+from config import TOKEN, LIMIT_COUNT, EX_TIME
 import schedule
+from task import add_task

 bot = telegram.Bot(token=TOKEN)

@ -22,6 +24,45 @@ def start(update, context):
        update.message.reply_text("错误信息：" + str(e))


+def rank(update, context):
+    try:
+        r = connector.get_connection()
+        chat_type = update.effective_chat.type
+        user_id = update.effective_user.id
+        chat_id = update.effective_message.chat_id
+        try:
+            username = update.effective_user.username
+        except Exception as e:
+            username = update.effective_user.id
+        # 限制为群组
+        if chat_type != "supergroup":
+            update.message.reply_text("此命令只有在群组中有效")
+        if r.exists("{}_frequency_limit".format(chat_id)):
+            r.setrange("{}_frequency_limit".format(chat_id), 0, int(r.get("{}_frequency_limit".format(chat_id))) + 1)
+        else:
+            struct_time = time.localtime(time.time())
+            # 数据过期时间为当前小时的 59 分
+            ex_time = datetime.datetime(
+                struct_time.tm_year,
+                struct_time.tm_mon,
+                struct_time.tm_mday,
+                struct_time.tm_hour,
+                59
+            )
+            r.set("{}_frequency_limit".format(chat_id), 1)
+            r.expireat("{}_frequency_limit".format(chat_id), ex_time)
+        count = int(r.get("{}_frequency_limit".format(chat_id)))
+        if count > LIMIT_COUNT:
+            update.message.reply_text("该群组在这个小时内的生成配额已经用完，请稍后再试~")
+            return
+        add_task(chat_id)
+        print("群组: {}，用户: {}|{} 发起了主动触发请求".format(username, user_id, chat_id))
+        update.message.reply_text("统计数据将在分析完毕后发送到当前群组，请稍等~")
+    except Exception as e:
+        print("主动触发任务失败，请检查")
+        print(e)
+
+
 def chat_content_exec(update, context):
    try:
        r = connector.get_connection()
@ -80,4 +121,5 @@ def check_schedule():


 start_handler = CommandHandler('start', start)
+rank_handler = CommandHandler('rank', rank)
 chat_content_handler = MessageHandler(Filters.text, chat_content_exec)
--- a/main.py
+++ b/main.py
@ -1,22 +1,28 @@
 from telegram.ext import Updater
 from config import TOKEN
-from func import start_handler, chat_content_handler, check_schedule
+from func import start_handler, chat_content_handler, check_schedule, rank_handler
 import schedule
-from task import do_task, flush_redis
+from task import schedule_task, flush_redis, do_task
 import threading

-schedule.every().day.at('11:00').do(do_task)
-schedule.every().day.at('18:00').do(do_task)
-schedule.every().day.at('23:30').do(do_task)
+schedule.every().day.at('11:00').do(schedule_task)
+schedule.every().day.at('18:00').do(schedule_task)
+schedule.every().day.at('23:30').do(schedule_task)
 schedule.every().day.at('23:59').do(flush_redis)
+
+# 测试代码，每分钟推送数据，非测试目的不要取消注释下一行
 # schedule.every(1).minutes.do(do_task)

+# 开启分析线程，当队列中由任务时，会取出任务分析生成数据
+threading.Thread(target=do_task).start()
+
 threading.Thread(target=check_schedule).start()

 updater = Updater(token=TOKEN, use_context=True)
 dispatcher = updater.dispatcher

 dispatcher.add_handler(start_handler)
+dispatcher.add_handler(rank_handler)
 dispatcher.add_handler(chat_content_handler)

 updater.start_polling()
--- a/task.py
+++ b/task.py
@ -1,4 +1,6 @@
 import re
+import queue
+import threading
 import jieba
 import jieba.posseg as pseg
 import wordcloud
@ -11,17 +13,10 @@ from config import TOKEN

 bot = telegram.Bot(token=TOKEN)

-mk = imageio.imread("/root/word_cloud_bot/circle.png")
-# 构建并配置词云对象w，注意要加scale参数，提高清晰度
-w = wordcloud.WordCloud(width=800,
-                        height=800,
-                        background_color='white',
-                        font_path='/root/word_cloud_bot/font.ttf',
-                        mask=mk,
-                        scale=5)
+task_queue = queue.Queue()


-def do_task():
+def schedule_task():
    try:
        r = connector.get_connection()
        key_list = r.keys()
@ -32,7 +27,10 @@ def do_task():
        # print(group_list)
        for group in group_list:
            try:
-                generate(group)
+                # 网任务队列中添加任务
+                task_queue.put(group)
+                # threading.Thread(target=generate, args=(group,)).start()
+                # time.sleep(0.5)
            except Exception as e:
                print("群组：{} | 词云数据分析生成失败，请查看报错信息".format(group))
                print(e)
@ -42,13 +40,53 @@ def do_task():
        print(e)


+def do_task():
+    while True:
+        group = task_queue.get()
+        try:
+            print("---------------------------")
+            print("群组: {} | 分析处理中...".format(group))
+            start_time = float(time.time())
+            generate(group)
+            stop_time = float(time.time())
+            print("当前群组处理耗时：" + str(stop_time - start_time))
+            print("---------------------------")
+        except Exception as e:
+            print("群组: {} | 处理失败，请检查报错！".format(group))
+            print(e)
+        time.sleep(1)
+
+
+def add_task(group):
+    task_queue.put(group)
+
+
+# 核心函数，分词统计
 def generate(group):
+    mk = imageio.imread("/root/word_cloud_bot/circle.png")
+    # 构建并配置词云对象w，注意要加scale参数，提高清晰度
+    w = wordcloud.WordCloud(width=800,
+                            height=800,
+                            background_color='white',
+                            font_path='/root/word_cloud_bot/font.ttf',
+                            mask=mk,
+                            scale=5)
    r = connector.get_connection()
    print("当前处理的群组：" + str(group))
-    start_time = float(time.time())
    # 生成词云图片
    jieba.enable_paddle()  # 启动paddle模式。 0.40版之后开始支持，早期版本不支持
-    words = pseg.cut(r.get("{}_chat_content".format(group)), use_paddle=True)  # paddle模式
+    chat_content = r.get("{}_chat_content".format(group))
+    if chat_content is None:
+        print("数据库中不存在此群组数据")
+        try:
+            bot.send_message(
+                chat_id=group,
+                text="数据库中不存在群组数据，请检查是否授予机器人管理员权限\n"
+            )
+        except Exception as e:
+            print("群组: {} | 机器人发送信息失败".format(group))
+        return
+    words = pseg.cut(chat_content, use_paddle=True)  # paddle模式
    word_list = []
    for word, flag in words:
        # print(word + "\t" + flag)
@ -140,11 +178,10 @@ def generate(group):

    os.remove("{}_chat_word_cloud.png".format(group))

-    stop_time = float(time.time())
-    print("当前群组处理耗时：" + str(stop_time - start_time))
-

 def flush_redis():
    r = connector.get_connection()
    r.flushall()
    print("已清空数据库")
+
+