None

2021-05-05 15:35:04 +08:00 · 2021-05-05 15:35:04 +08:00 · 722ecfef28
commit 722ecfef28
parent e81e30fb07
5 changed files with 109 additions and 94 deletions
--- a/connector.py
+++ b/connector.py
@ -1,21 +1,7 @@
 import redis
 # 连接 redis
 # 指定主机地址，port与服务器连接，redis默认数据库有16个，默认db是0
 # r = redis.Redis(host='127.0.0.1', port=6379, encoding='utf8', decode_responses=True)  # password='**'
 # print(r.get('foo'))
 # r.set('foo', '[1,2,3]')
 # print(r.get('foo'))
 # print(r.keys())
 # r.delete('foo')
 # print(r.keys())
 pool = redis.ConnectionPool(host='127.0.0.1', port=6379, encoding='utf8', decode_responses=True)
 # r = redis.Redis(connection_pool=pool)
 # r.set('foo', 'Bar')
 # print(r.get('foo'))
 def get_connection():
    return redis.StrictRedis(connection_pool=pool)
--- a/func.py
+++ b/func.py
@ -1,16 +1,7 @@
 import datetime
 import threading
 import connector
 import telegram
 from telegram import InlineKeyboardMarkup, InlineKeyboardButton, ForceReply
 from telegram.ext import CommandHandler, MessageHandler, Filters, ConversationHandler, CallbackQueryHandler
 from config import TOKEN
 import sqlite3
 import time
 import os
 import importlib
 import requests
 bot = telegram.Bot(token=TOKEN)
@ -28,6 +19,10 @@ def chat_content_exec(update, context):
    chat_type = update.effective_chat.type
    user_id = update.effective_user.id
    chat_id = update.effective_message.chat_id
    try:
        username = update.effective_user.username
    except Exception as e:
        username = update.effective_user.id
    print("\n---------------------------")
    print("内容: " + text)
    print("群组类型: " + str(chat_type))
@ -41,7 +36,7 @@ def chat_content_exec(update, context):
        else:
            r.append("{}_chat_content".format(chat_id), text)
        r.incrby("{}_total_message_amount".format(chat_id))
-        r.hincrby("{}_user_message_amount".format(chat_id), user_id)
+        r.hincrby("{}_user_message_amount".format(chat_id), username)
    print("---------------------------")
--- a/main.py
+++ b/main.py
@ -1,7 +1,5 @@
 from telegram.ext import Updater
 from config import TOKEN
 from telegram import InlineKeyboardMarkup, InlineKeyboardButton, ForceReply
 from telegram.ext import CommandHandler, MessageHandler, Filters, ConversationHandler, CallbackQueryHandler
 from func import start_handler, chat_content_handler
 updater = Updater(token=TOKEN, use_context=True)
--- a/test/rediswr.py
+++ b/test/rediswr.py
@ -1,26 +1,14 @@
 # encoding=utf-8
 import re
 import redis
 import jieba
 import jieba.posseg as pseg
 import time  # 引入time模块
 import wordcloud
 # 导入imageio库中的imread函数，并用这个函数读取本地图片，作为词云形状图片
 import imageio
 import telegram
 import time
 import os
-# import datetime
+bot = telegram.Bot(token="1749418611:AAGOV2XB5mkMXqX-J_wtNu7KkrkhO_Xylmg")
 # import threading
 # import telegram
 # from telegram import InlineKeyboardMarkup, InlineKeyboardButton, ForceReply
 # from telegram.ext import CommandHandler, MessageHandler, Filters, ConversationHandler, CallbackQueryHandler
 # from config import TOKEN
 # import sqlite3
 # import time
 # import os
 # import importlib
 # import requests
 #
 # bot = telegram.Bot(token=TOKEN)
 pool = redis.ConnectionPool(host='127.0.0.1', port=6379, encoding='utf8', decode_responses=True)
@ -31,66 +19,111 @@ group_list = []
 for i in key_list:
    if "chat_content" in i:
        group_list.append(i[:i.find("_")])
-print(group_list)
+# print(group_list)
-# mk = imageio.imread("/root/Jupyter/circle.png")
+mk = imageio.imread("/root/Jupyter/circle.png")
 # w = wordcloud.WordCloud(mask=mk)
 # 构建并配置词云对象w，注意要加scale参数，提高清晰度
 w = wordcloud.WordCloud(width=800,
                        height=800,
                        background_color='white',
                        font_path='/root/Jupyter/hanyiqihei.ttf',
-                        # mask=mk,
+                        mask=mk,
                        scale=5)
 for group in group_list:
-    start_time = float(time.time())
+    try:
-    # 生成词云图片
+        print("当前处理的群组：" + str(group))
-    jieba.enable_paddle()  # 启动paddle模式。 0.40版之后开始支持，早期版本不支持
+        start_time = float(time.time())
-    words = pseg.cut(r.get("{}_chat_content".format(group)), use_paddle=True)  # paddle模式
+        # 生成词云图片
-    word_list = []
+        jieba.enable_paddle()  # 启动paddle模式。 0.40版之后开始支持，早期版本不支持
-    for word, flag in words:
+        words = pseg.cut(r.get("{}_chat_content".format(group)), use_paddle=True)  # paddle模式
-        # print(word + "\t" + flag)
+        word_list = []
-        if flag in ["n", "nr", "nz", "PER", "f", "ns", "LOC", "s", "nt", "ORG", "nw"]:
+        for word, flag in words:
-            # 判断该词是否有效，不为空格
+            # print(word + "\t" + flag)
-            if re.match(r"^\s+?$", word) is None:
+            if flag in ["n", "nr", "nz", "PER", "f", "ns", "LOC", "s", "nt", "ORG", "nw"]:
-                word_list.append(word)
+                # 判断该词是否有效，不为空格
-    # print(word_list)
+                if re.match(r"^\s+?$", word) is None:
                    word_list.append(word)
        # print(word_list)
-    # 分析高频词
+        # 分析高频词
-    word_amount = {}
+        word_amount = {}
-    print(word_amount)
+        # print(word_amount)
-    for word in word_list:
+        for word in word_list:
-        # 判断该词是否之前已经出现
+            # 判断该词是否之前已经出现
-        if word_amount.get(word) is not None:
+            if word_amount.get(word) is not None:
-            word_amount[word] = word_amount.get(word) + 1
+                word_amount[word] = word_amount.get(word) + 1
-        else:
+            else:
-            word_amount[word] = 1
+                word_amount[word] = 1
-    print(word_amount)
+        # print(word_amount)
-    word_amount = sorted(word_amount.items(), key=lambda kv: (kv[1]), reverse=True)
+        word_amount = sorted(word_amount.items(), key=lambda kv: (kv[1]), reverse=True)
-    print("排序后的热词：" + str(word_amount))
+        # print("排序后的热词：" + str(word_amount))
-    hot_word_string = ""
+        hot_word_string = ""
-    for i in range(min(5, len(word_amount))):
+        for i in range(min(5, len(word_amount))):
-        hot_word_string += str(word_amount[i][0]) + "\t热度: " + str(word_amount[i][1]) + "\n"
+            hot_word_string += "\t\t\t\t\t\t\t\t" + "`" + str(word_amount[i][0]) + "`" + ": " + str(
-    print(hot_word_string)
+                word_amount[i][1]) + "\n"
-    # 获取消息总数
+        # print(hot_word_string)
-    total_message_amount = r.get("{}_total_message_amount".format(group))
+        # 获取消息总数
        total_message_amount = r.get("{}_total_message_amount".format(group))
-    # 获取发言用户数
+        # print("总发言数: " + total_message_amount)
    user_amount = len(r.hkeys("{}_user_message_amount".format(group)))
    # 获取所有用户发言数字典
    user_message_amount = r.hgetall("-1001403536948_user_message_amount")
    user_message_amount = sorted(user_message_amount.items(), key=lambda kv: (kv[1]), reverse=True)
    print("排序后的用户：" + str(user_message_amount))
    top_5_user = ""
    for i in range(min(5, len(user_message_amount))):
        top_5_user += str(user_message_amount[i][0]) + "\t发言数: " + str(user_message_amount[i][1]) + "\n"
    print(top_5_user)
    string = " ".join(word_list)
    # 将string变量传入w的generate()方法，给词云输入文字
    w.generate(string)
    # 将词云图片导出到当前文件夹
    w.to_file('{}_chat_word_cloud.png'.format(group))
-    stop_time = float(time.time())
+        # 获取发言用户数
-    print("当前群组处理耗时：" + str(stop_time - start_time))
+        user_amount = len(r.hkeys("{}_user_message_amount".format(group)))
        # 获取所有用户发言数字典
        user_message_amount = r.hgetall("-1001403536948_user_message_amount")
        user_message_amount = sorted(user_message_amount.items(), key=lambda kv: (kv[1]), reverse=True)
        # print("排序后的用户：" + str(user_message_amount))
        top_5_user = ""
        for i in range(min(5, len(user_message_amount))):
            top_5_user += "\t\t\t\t\t\t\t\t" + "🎖`" + str(user_message_amount[i][0]) + "`" + " 贡献: " + str(
                user_message_amount[i][1]) + "\n"
        # print(top_5_user)
        string = " ".join(word_list)
        # 将string变量传入w的generate()方法，给词云输入文字
        w.generate(string)
        # 将词云图片导出到当前文件夹
        w.to_file('{}_chat_word_cloud.png'.format(group))
        bot.send_message(
            chat_id=group,
            text="🎤 今日话题榜 🎤\n"
                 "📅 {}\n"
                 "⏱ 截至今天{}\n"
                 "🗣️ 本群{}位朋友共产生{}条发言\n"
                 "🤹‍ 大家今天讨论最多的是：\n\n"
                 "{}\n"
                 "看下有没有你感兴趣的话题? 👏".format(
                time.strftime("%Y年%m月%d日", time.localtime()),
                time.strftime("%H:%M", time.localtime()),
                user_amount,
                total_message_amount,
                hot_word_string),
            parse_mode="Markdown"
        )
        bot.send_message(
            chat_id=group,
            text="🏵 今日活跃用户排行榜 🏵\n"
                 "📅 {}\n"
                 "⏱ 截至今天{}\n\n"
                 "{}\n"
                 "感谢这些朋友今天的分享! 👏 \n"
                 "遇到问题,向他们请教说不定有惊喜😃".format(
                time.strftime("%Y年%m月%d日", time.localtime()),
                time.strftime("%H:%M", time.localtime()),
                top_5_user),
            parse_mode="Markdown"
        )
        bot.send_photo(
            chat_id=group,
            photo=open("{}_chat_word_cloud.png".format(group), "rb")
        )
        os.remove("{}_chat_word_cloud.png".format(group))
        stop_time = float(time.time())
        print("当前群组处理耗时：" + str(stop_time - start_time))
    except Exception as e:
        print(e)
        continue
--- a/test/test1.py
+++ b/test/test1.py
@ -9,7 +9,6 @@ word_amount['y2222'] = 2
 word_amount['y3333'] = 4
 word_amount['y4444'] = 3
 print(word_amount.get("123"))
 print(word_amount.get("y4444"))
@ -28,3 +27,7 @@ rst = re.match(r"^\s+?$", str)
 print(rst)
 import time
 print(time.strftime("%Y年%m月%d日", time.localtime()))
 print(time.strftime("%H:%M", time.localtime()))