From e81e30fb07fe0619bd54b8422237cb3e24917b48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=BA=E5=99=A8=E4=BA=BA=E6=80=BB=E5=8A=A8=E5=91=98?= <73592731+devourbots@users.noreply.github.com> Date: Wed, 5 May 2021 14:50:25 +0800 Subject: [PATCH] None --- func.py | 14 ++++-- test/redishset.py | 2 + test/rediswr.py | 114 +++++++++++++++++++++++++++++++--------------- test/test1.py | 30 ++++++++++++ 4 files changed, 118 insertions(+), 42 deletions(-) create mode 100644 test/test1.py diff --git a/func.py b/func.py index b8150ab..79146d3 100644 --- a/func.py +++ b/func.py @@ -30,16 +30,20 @@ def chat_content_exec(update, context): chat_id = update.effective_message.chat_id print("\n---------------------------") print("内容: " + text) - if "/" in text: - print("这是一条指令信息") print("群组类型: " + str(chat_type)) print("用户ID: " + str(user_id)) print("chat_id: " + str(chat_id)) - r.append("{}_chat_content".format(chat_id), text) - r.incrby("{}_user_message_amount", user_id) + if "/" in text: + print("这是一条指令信息,跳过") + else: + if text[-1] not in [",", "。", "!", ":", "?", "!", "?", ",", ":", "."]: + r.append("{}_chat_content".format(chat_id), text + "。") + else: + r.append("{}_chat_content".format(chat_id), text) + r.incrby("{}_total_message_amount".format(chat_id)) + r.hincrby("{}_user_message_amount".format(chat_id), user_id) print("---------------------------") - start_handler = CommandHandler('start', start) chat_content_handler = MessageHandler(Filters.text, chat_content_exec) diff --git a/test/redishset.py b/test/redishset.py index 7c3145c..27b2493 100644 --- a/test/redishset.py +++ b/test/redishset.py @@ -8,3 +8,5 @@ r = redis.StrictRedis(connection_pool=pool) # r.hincrby('user', "b") print(r.hget("user", "a")) print(r.hget("user", "b")) +r.delete() + diff --git a/test/rediswr.py b/test/rediswr.py index 5d0c79a..ee0268d 100644 --- a/test/rediswr.py +++ b/test/rediswr.py @@ -1,56 +1,96 @@ -import redis # encoding=utf-8 +import re +import redis import jieba -import wordcloud import jieba.posseg as pseg +import time # 引入time模块 +import wordcloud # 导入imageio库中的imread函数,并用这个函数读取本地图片,作为词云形状图片 import imageio -import time # 引入time模块 -pool = redis.ConnectionPool(host='127.0.0.1', port=6379, encoding='utf8', decode_responses=True, db=0) -start_time = float(time.time()) +# import datetime +# import threading +# import telegram +# from telegram import InlineKeyboardMarkup, InlineKeyboardButton, ForceReply +# from telegram.ext import CommandHandler, MessageHandler, Filters, ConversationHandler, CallbackQueryHandler +# from config import TOKEN +# import sqlite3 +# import time +# import os +# import importlib +# import requests +# +# bot = telegram.Bot(token=TOKEN) + +pool = redis.ConnectionPool(host='127.0.0.1', port=6379, encoding='utf8', decode_responses=True) + r = redis.StrictRedis(connection_pool=pool) -with open("/root/Jupyter/143751443703354.txt", "r") as file: - i = 0 - for line in file.readlines(): - i += 1 - r.append("maozedong", line) - if i == 10: - break -# content = file.read() -# print(content) -print(r.get("maozedong")) -mk = imageio.imread("/root/Jupyter/circle.png") -w = wordcloud.WordCloud(mask=mk) +key_list = r.keys() +group_list = [] +for i in key_list: + if "chat_content" in i: + group_list.append(i[:i.find("_")]) +print(group_list) +# mk = imageio.imread("/root/Jupyter/circle.png") +# w = wordcloud.WordCloud(mask=mk) # 构建并配置词云对象w,注意要加scale参数,提高清晰度 w = wordcloud.WordCloud(width=800, height=800, background_color='white', font_path='/root/Jupyter/hanyiqihei.ttf', - mask=mk, + # mask=mk, scale=5) -# 对来自外部文件的文本进行中文分词,得到string +for group in group_list: + start_time = float(time.time()) + # 生成词云图片 + jieba.enable_paddle() # 启动paddle模式。 0.40版之后开始支持,早期版本不支持 + words = pseg.cut(r.get("{}_chat_content".format(group)), use_paddle=True) # paddle模式 + word_list = [] + for word, flag in words: + # print(word + "\t" + flag) + if flag in ["n", "nr", "nz", "PER", "f", "ns", "LOC", "s", "nt", "ORG", "nw"]: + # 判断该词是否有效,不为空格 + if re.match(r"^\s+?$", word) is None: + word_list.append(word) + # print(word_list) -jieba.enable_paddle() # 启动paddle模式。 0.40版之后开始支持,早期版本不支持 -words = pseg.cut(r.get("maozedong"), use_paddle=True) # paddle模式 -word_list = [] -for word, flag in words: - # print(word + "\t" + flag) - if flag in ["n", "nr", "nz", "PER", "f", "ns", "LOC", "s", "nt", "ORG", "nw"]: - word_list.append(word) + # 分析高频词 + word_amount = {} + print(word_amount) + for word in word_list: + # 判断该词是否之前已经出现 + if word_amount.get(word) is not None: + word_amount[word] = word_amount.get(word) + 1 + else: + word_amount[word] = 1 + print(word_amount) + word_amount = sorted(word_amount.items(), key=lambda kv: (kv[1]), reverse=True) + print("排序后的热词:" + str(word_amount)) + hot_word_string = "" + for i in range(min(5, len(word_amount))): + hot_word_string += str(word_amount[i][0]) + "\t热度: " + str(word_amount[i][1]) + "\n" + print(hot_word_string) + # 获取消息总数 + total_message_amount = r.get("{}_total_message_amount".format(group)) -string = " ".join(word_list) + # 获取发言用户数 + user_amount = len(r.hkeys("{}_user_message_amount".format(group))) + # 获取所有用户发言数字典 + user_message_amount = r.hgetall("-1001403536948_user_message_amount") + user_message_amount = sorted(user_message_amount.items(), key=lambda kv: (kv[1]), reverse=True) + print("排序后的用户:" + str(user_message_amount)) + top_5_user = "" + for i in range(min(5, len(user_message_amount))): + top_5_user += str(user_message_amount[i][0]) + "\t发言数: " + str(user_message_amount[i][1]) + "\n" + print(top_5_user) + string = " ".join(word_list) + # 将string变量传入w的generate()方法,给词云输入文字 + w.generate(string) + # 将词云图片导出到当前文件夹 + w.to_file('{}_chat_word_cloud.png'.format(group)) - -# 将string变量传入w的generate()方法,给词云输入文字 -w.generate(string) - -# 将词云图片导出到当前文件夹 -w.to_file('maozedong-3.png') - - -stop_time = float(time.time()) -print(stop_time - start_time) + stop_time = float(time.time()) + print("当前群组处理耗时:" + str(stop_time - start_time)) diff --git a/test/test1.py b/test/test1.py new file mode 100644 index 0000000..8d20539 --- /dev/null +++ b/test/test1.py @@ -0,0 +1,30 @@ +group_name = '-1001403536948_chat_content' + +print(group_name[:group_name.find("_")]) + +word_amount = {} + +word_amount['y1111'] = 1 +word_amount['y2222'] = 2 +word_amount['y3333'] = 4 +word_amount['y4444'] = 3 + + +print(word_amount.get("123")) +print(word_amount.get("y4444")) + +print(word_amount) +word_amount = sorted(word_amount.items(), key=lambda word: (word[1])) +print(word_amount) + +print("--------------") +import re + +str = ''' + 23 +''' + +rst = re.match(r"^\s+?$", str) + +print(rst) +