This commit is contained in:
机器人总动员 2021-05-05 15:35:04 +08:00
parent e81e30fb07
commit 722ecfef28
5 changed files with 109 additions and 94 deletions

View File

@ -1,21 +1,7 @@
import redis
# 连接 redis
# 指定主机地址port与服务器连接redis默认数据库有16个默认db是0
# r = redis.Redis(host='127.0.0.1', port=6379, encoding='utf8', decode_responses=True) # password='**'
# print(r.get('foo'))
# r.set('foo', '[1,2,3]')
# print(r.get('foo'))
# print(r.keys())
# r.delete('foo')
# print(r.keys())
pool = redis.ConnectionPool(host='127.0.0.1', port=6379, encoding='utf8', decode_responses=True)
# r = redis.Redis(connection_pool=pool)
# r.set('foo', 'Bar')
# print(r.get('foo'))
def get_connection():
return redis.StrictRedis(connection_pool=pool)

15
func.py
View File

@ -1,16 +1,7 @@
import datetime
import threading
import connector
import telegram
from telegram import InlineKeyboardMarkup, InlineKeyboardButton, ForceReply
from telegram.ext import CommandHandler, MessageHandler, Filters, ConversationHandler, CallbackQueryHandler
from config import TOKEN
import sqlite3
import time
import os
import importlib
import requests
bot = telegram.Bot(token=TOKEN)
@ -28,6 +19,10 @@ def chat_content_exec(update, context):
chat_type = update.effective_chat.type
user_id = update.effective_user.id
chat_id = update.effective_message.chat_id
try:
username = update.effective_user.username
except Exception as e:
username = update.effective_user.id
print("\n---------------------------")
print("内容: " + text)
print("群组类型: " + str(chat_type))
@ -41,7 +36,7 @@ def chat_content_exec(update, context):
else:
r.append("{}_chat_content".format(chat_id), text)
r.incrby("{}_total_message_amount".format(chat_id))
r.hincrby("{}_user_message_amount".format(chat_id), user_id)
r.hincrby("{}_user_message_amount".format(chat_id), username)
print("---------------------------")

View File

@ -1,7 +1,5 @@
from telegram.ext import Updater
from config import TOKEN
from telegram import InlineKeyboardMarkup, InlineKeyboardButton, ForceReply
from telegram.ext import CommandHandler, MessageHandler, Filters, ConversationHandler, CallbackQueryHandler
from func import start_handler, chat_content_handler
updater = Updater(token=TOKEN, use_context=True)

View File

@ -1,26 +1,14 @@
# encoding=utf-8
import re
import redis
import jieba
import jieba.posseg as pseg
import time # 引入time模块
import wordcloud
# 导入imageio库中的imread函数并用这个函数读取本地图片作为词云形状图片
import imageio
import telegram
import time
import os
# import datetime
# import threading
# import telegram
# from telegram import InlineKeyboardMarkup, InlineKeyboardButton, ForceReply
# from telegram.ext import CommandHandler, MessageHandler, Filters, ConversationHandler, CallbackQueryHandler
# from config import TOKEN
# import sqlite3
# import time
# import os
# import importlib
# import requests
#
# bot = telegram.Bot(token=TOKEN)
bot = telegram.Bot(token="1749418611:AAGOV2XB5mkMXqX-J_wtNu7KkrkhO_Xylmg")
pool = redis.ConnectionPool(host='127.0.0.1', port=6379, encoding='utf8', decode_responses=True)
@ -31,66 +19,111 @@ group_list = []
for i in key_list:
if "chat_content" in i:
group_list.append(i[:i.find("_")])
print(group_list)
# print(group_list)
# mk = imageio.imread("/root/Jupyter/circle.png")
# w = wordcloud.WordCloud(mask=mk)
mk = imageio.imread("/root/Jupyter/circle.png")
# 构建并配置词云对象w注意要加scale参数提高清晰度
w = wordcloud.WordCloud(width=800,
height=800,
background_color='white',
font_path='/root/Jupyter/hanyiqihei.ttf',
# mask=mk,
mask=mk,
scale=5)
for group in group_list:
start_time = float(time.time())
# 生成词云图片
jieba.enable_paddle() # 启动paddle模式。 0.40版之后开始支持,早期版本不支持
words = pseg.cut(r.get("{}_chat_content".format(group)), use_paddle=True) # paddle模式
word_list = []
for word, flag in words:
# print(word + "\t" + flag)
if flag in ["n", "nr", "nz", "PER", "f", "ns", "LOC", "s", "nt", "ORG", "nw"]:
# 判断该词是否有效,不为空格
if re.match(r"^\s+?$", word) is None:
word_list.append(word)
# print(word_list)
try:
print("当前处理的群组:" + str(group))
start_time = float(time.time())
# 生成词云图片
jieba.enable_paddle() # 启动paddle模式。 0.40版之后开始支持,早期版本不支持
words = pseg.cut(r.get("{}_chat_content".format(group)), use_paddle=True) # paddle模式
word_list = []
for word, flag in words:
# print(word + "\t" + flag)
if flag in ["n", "nr", "nz", "PER", "f", "ns", "LOC", "s", "nt", "ORG", "nw"]:
# 判断该词是否有效,不为空格
if re.match(r"^\s+?$", word) is None:
word_list.append(word)
# print(word_list)
# 分析高频词
word_amount = {}
print(word_amount)
for word in word_list:
# 判断该词是否之前已经出现
if word_amount.get(word) is not None:
word_amount[word] = word_amount.get(word) + 1
else:
word_amount[word] = 1
print(word_amount)
word_amount = sorted(word_amount.items(), key=lambda kv: (kv[1]), reverse=True)
print("排序后的热词:" + str(word_amount))
hot_word_string = ""
for i in range(min(5, len(word_amount))):
hot_word_string += str(word_amount[i][0]) + "\t热度: " + str(word_amount[i][1]) + "\n"
print(hot_word_string)
# 获取消息总数
total_message_amount = r.get("{}_total_message_amount".format(group))
# 分析高频词
word_amount = {}
# print(word_amount)
for word in word_list:
# 判断该词是否之前已经出现
if word_amount.get(word) is not None:
word_amount[word] = word_amount.get(word) + 1
else:
word_amount[word] = 1
# print(word_amount)
word_amount = sorted(word_amount.items(), key=lambda kv: (kv[1]), reverse=True)
# print("排序后的热词:" + str(word_amount))
hot_word_string = ""
for i in range(min(5, len(word_amount))):
hot_word_string += "\t\t\t\t\t\t\t\t" + "`" + str(word_amount[i][0]) + "`" + ": " + str(
word_amount[i][1]) + "\n"
# print(hot_word_string)
# 获取消息总数
total_message_amount = r.get("{}_total_message_amount".format(group))
# 获取发言用户数
user_amount = len(r.hkeys("{}_user_message_amount".format(group)))
# 获取所有用户发言数字典
user_message_amount = r.hgetall("-1001403536948_user_message_amount")
user_message_amount = sorted(user_message_amount.items(), key=lambda kv: (kv[1]), reverse=True)
print("排序后的用户:" + str(user_message_amount))
top_5_user = ""
for i in range(min(5, len(user_message_amount))):
top_5_user += str(user_message_amount[i][0]) + "\t发言数: " + str(user_message_amount[i][1]) + "\n"
print(top_5_user)
string = " ".join(word_list)
# 将string变量传入w的generate()方法,给词云输入文字
w.generate(string)
# 将词云图片导出到当前文件夹
w.to_file('{}_chat_word_cloud.png'.format(group))
# print("总发言数: " + total_message_amount)
stop_time = float(time.time())
print("当前群组处理耗时:" + str(stop_time - start_time))
# 获取发言用户数
user_amount = len(r.hkeys("{}_user_message_amount".format(group)))
# 获取所有用户发言数字典
user_message_amount = r.hgetall("-1001403536948_user_message_amount")
user_message_amount = sorted(user_message_amount.items(), key=lambda kv: (kv[1]), reverse=True)
# print("排序后的用户:" + str(user_message_amount))
top_5_user = ""
for i in range(min(5, len(user_message_amount))):
top_5_user += "\t\t\t\t\t\t\t\t" + "🎖`" + str(user_message_amount[i][0]) + "`" + " 贡献: " + str(
user_message_amount[i][1]) + "\n"
# print(top_5_user)
string = " ".join(word_list)
# 将string变量传入w的generate()方法,给词云输入文字
w.generate(string)
# 将词云图片导出到当前文件夹
w.to_file('{}_chat_word_cloud.png'.format(group))
bot.send_message(
chat_id=group,
text="🎤 今日话题榜 🎤\n"
"📅 {}\n"
"⏱ 截至今天{}\n"
"🗣️ 本群{}位朋友共产生{}条发言\n"
"🤹‍ 大家今天讨论最多的是:\n\n"
"{}\n"
"看下有没有你感兴趣的话题? 👏".format(
time.strftime("%Y年%m月%d", time.localtime()),
time.strftime("%H:%M", time.localtime()),
user_amount,
total_message_amount,
hot_word_string),
parse_mode="Markdown"
)
bot.send_message(
chat_id=group,
text="🏵 今日活跃用户排行榜 🏵\n"
"📅 {}\n"
"⏱ 截至今天{}\n\n"
"{}\n"
"感谢这些朋友今天的分享! 👏 \n"
"遇到问题,向他们请教说不定有惊喜😃".format(
time.strftime("%Y年%m月%d", time.localtime()),
time.strftime("%H:%M", time.localtime()),
top_5_user),
parse_mode="Markdown"
)
bot.send_photo(
chat_id=group,
photo=open("{}_chat_word_cloud.png".format(group), "rb")
)
os.remove("{}_chat_word_cloud.png".format(group))
stop_time = float(time.time())
print("当前群组处理耗时:" + str(stop_time - start_time))
except Exception as e:
print(e)
continue

View File

@ -9,7 +9,6 @@ word_amount['y2222'] = 2
word_amount['y3333'] = 4
word_amount['y4444'] = 3
print(word_amount.get("123"))
print(word_amount.get("y4444"))
@ -28,3 +27,7 @@ rst = re.match(r"^\s+?$", str)
print(rst)
import time
print(time.strftime("%Y年%m月%d", time.localtime()))
print(time.strftime("%H:%M", time.localtime()))