None

2021-05-05 14:50:25 +08:00 · 2021-05-05 14:50:25 +08:00 · e81e30fb07
commit e81e30fb07
parent 2e975d2e8c
4 changed files with 118 additions and 42 deletions
--- a/func.py
+++ b/func.py
@ -30,16 +30,20 @@ def chat_content_exec(update, context):
    chat_id = update.effective_message.chat_id
    print("\n---------------------------")
    print("内容: " + text)
-    if "/" in text:
-        print("这是一条指令信息")
    print("群组类型: " + str(chat_type))
    print("用户ID: " + str(user_id))
    print("chat_id: " + str(chat_id))
-    r.append("{}_chat_content".format(chat_id), text)
-    r.incrby("{}_user_message_amount", user_id)
+    if "/" in text:
+        print("这是一条指令信息，跳过")
+    else:
+        if text[-1] not in ["，", "。", "！", "：", "？", "!", "?", ",", ":", "."]:
+            r.append("{}_chat_content".format(chat_id), text + "。")
+        else:
+            r.append("{}_chat_content".format(chat_id), text)
+        r.incrby("{}_total_message_amount".format(chat_id))
+        r.hincrby("{}_user_message_amount".format(chat_id), user_id)
    print("---------------------------")


-
 start_handler = CommandHandler('start', start)
 chat_content_handler = MessageHandler(Filters.text, chat_content_exec)
--- a/test/redishset.py
+++ b/test/redishset.py
@ -8,3 +8,5 @@ r = redis.StrictRedis(connection_pool=pool)
 # r.hincrby('user', "b")
 print(r.hget("user", "a"))
 print(r.hget("user", "b"))
+r.delete()
+
--- a/test/rediswr.py
+++ b/test/rediswr.py
@ -1,56 +1,96 @@
-import redis
 # encoding=utf-8
+import re
+import redis
 import jieba
-import wordcloud
 import jieba.posseg as pseg
+import time  # 引入time模块
+import wordcloud
 # 导入imageio库中的imread函数，并用这个函数读取本地图片，作为词云形状图片
 import imageio
-import time  # 引入time模块

-pool = redis.ConnectionPool(host='127.0.0.1', port=6379, encoding='utf8', decode_responses=True, db=0)
-start_time = float(time.time())
+# import datetime
+# import threading
+# import telegram
+# from telegram import InlineKeyboardMarkup, InlineKeyboardButton, ForceReply
+# from telegram.ext import CommandHandler, MessageHandler, Filters, ConversationHandler, CallbackQueryHandler
+# from config import TOKEN
+# import sqlite3
+# import time
+# import os
+# import importlib
+# import requests
+#
+# bot = telegram.Bot(token=TOKEN)
+
+pool = redis.ConnectionPool(host='127.0.0.1', port=6379, encoding='utf8', decode_responses=True)
+
 r = redis.StrictRedis(connection_pool=pool)
-with open("/root/Jupyter/143751443703354.txt", "r") as file:
-    i = 0
-    for line in file.readlines():
-        i += 1
-        r.append("maozedong", line)
-        if i == 10:
-            break
-#     content = file.read()
-# print(content)
-print(r.get("maozedong"))

-mk = imageio.imread("/root/Jupyter/circle.png")
-w = wordcloud.WordCloud(mask=mk)
+key_list = r.keys()
+group_list = []
+for i in key_list:
+    if "chat_content" in i:
+        group_list.append(i[:i.find("_")])
+print(group_list)

+# mk = imageio.imread("/root/Jupyter/circle.png")
+# w = wordcloud.WordCloud(mask=mk)
 # 构建并配置词云对象w，注意要加scale参数，提高清晰度
 w = wordcloud.WordCloud(width=800,
                        height=800,
                        background_color='white',
                        font_path='/root/Jupyter/hanyiqihei.ttf',
-                        mask=mk,
+                        # mask=mk,
                        scale=5)

-# 对来自外部文件的文本进行中文分词，得到string
+for group in group_list:
+    start_time = float(time.time())
+    # 生成词云图片
+    jieba.enable_paddle()  # 启动paddle模式。 0.40版之后开始支持，早期版本不支持
+    words = pseg.cut(r.get("{}_chat_content".format(group)), use_paddle=True)  # paddle模式
+    word_list = []
+    for word, flag in words:
+        # print(word + "\t" + flag)
+        if flag in ["n", "nr", "nz", "PER", "f", "ns", "LOC", "s", "nt", "ORG", "nw"]:
+            # 判断该词是否有效，不为空格
+            if re.match(r"^\s+?$", word) is None:
+                word_list.append(word)
+    # print(word_list)

-jieba.enable_paddle()  # 启动paddle模式。 0.40版之后开始支持，早期版本不支持
-words = pseg.cut(r.get("maozedong"), use_paddle=True)  # paddle模式
-word_list = []
-for word, flag in words:
-    # print(word + "\t" + flag)
-    if flag in ["n", "nr", "nz", "PER", "f", "ns", "LOC", "s", "nt", "ORG", "nw"]:
-        word_list.append(word)
+    # 分析高频词
+    word_amount = {}
+    print(word_amount)
+    for word in word_list:
+        # 判断该词是否之前已经出现
+        if word_amount.get(word) is not None:
+            word_amount[word] = word_amount.get(word) + 1
+        else:
+            word_amount[word] = 1
+    print(word_amount)
+    word_amount = sorted(word_amount.items(), key=lambda kv: (kv[1]), reverse=True)
+    print("排序后的热词：" + str(word_amount))
+    hot_word_string = ""
+    for i in range(min(5, len(word_amount))):
+        hot_word_string += str(word_amount[i][0]) + "\t热度: " + str(word_amount[i][1]) + "\n"
+    print(hot_word_string)
+    # 获取消息总数
+    total_message_amount = r.get("{}_total_message_amount".format(group))

-string = " ".join(word_list)
+    # 获取发言用户数
+    user_amount = len(r.hkeys("{}_user_message_amount".format(group)))
+    # 获取所有用户发言数字典
+    user_message_amount = r.hgetall("-1001403536948_user_message_amount")
+    user_message_amount = sorted(user_message_amount.items(), key=lambda kv: (kv[1]), reverse=True)
+    print("排序后的用户：" + str(user_message_amount))
+    top_5_user = ""
+    for i in range(min(5, len(user_message_amount))):
+        top_5_user += str(user_message_amount[i][0]) + "\t发言数: " + str(user_message_amount[i][1]) + "\n"
+    print(top_5_user)
+    string = " ".join(word_list)
+    # 将string变量传入w的generate()方法，给词云输入文字
+    w.generate(string)
+    # 将词云图片导出到当前文件夹
+    w.to_file('{}_chat_word_cloud.png'.format(group))

-
-# 将string变量传入w的generate()方法，给词云输入文字
-w.generate(string)
-
-# 将词云图片导出到当前文件夹
-w.to_file('maozedong-3.png')
-
-
-stop_time = float(time.time())
-print(stop_time - start_time)
+    stop_time = float(time.time())
+    print("当前群组处理耗时：" + str(stop_time - start_time))
--- a/test/test1.py
+++ b/test/test1.py
@ -0,0 +1,30 @@
+group_name = '-1001403536948_chat_content'
+
+print(group_name[:group_name.find("_")])
+
+word_amount = {}
+
+word_amount['y1111'] = 1
+word_amount['y2222'] = 2
+word_amount['y3333'] = 4
+word_amount['y4444'] = 3
+
+
+print(word_amount.get("123"))
+print(word_amount.get("y4444"))
+
+print(word_amount)
+word_amount = sorted(word_amount.items(), key=lambda word: (word[1]))
+print(word_amount)
+
+print("--------------")
+import re
+
+str = '''
+   23
+'''
+
+rst = re.match(r"^\s+?$", str)
+
+print(rst)
+