基于DrissionPage和matplotlib.pyplot对于抖音雷总下面的评论进行词云生成和情感分析
用到了很少的爬取和可视化的东西,本人还是菜鸟,别喷我
所有的东西在注释中均已标注清楚
稍后会发出本作的电子版报告

*注:我写的还有一个小bug,打开网站后必须手动打开评论区并且滑到底部,虽然但是也懒得改了,应付期末考试足矣

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#导入自动化模块
import datetime
import time
import numpy as np
from PIL import Image
import jieba
import pandas as pd
from pymysql import connect
import wordcloud

from sqlalchemy import create_engine

import pymysql


from DrissionPage import ChromiumPage

from textblob import TextBlob

def analyze_sentiment(text):
blob = TextBlob(text)
sentiment = blob.sentiment.polarity
return sentiment

import matplotlib.pyplot as plt

def visualize_sentiment(sentiment_score):
plt.bar(['Sentiment'], [sentiment_score], color=['blue'])
plt.ylim(-1, 1)
plt.ylabel('Sentiment Score')
plt.title('Sentiment Analysis Result')
plt.show()



#打开浏览器
driver = ChromiumPage()

driver.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36",
# 可添加其他必要请求头
}

#监听数据包
driver.listen.start('aweme/v1/web/comment/list/')

#访问网站
driver.get('https://www.douyin.com/user/MS4wLjABAAAAompXkPoYOGsA152dqYoytKycjIZ_aCCxHwGmLX5IsDM?from_tab_name=main&modal_id=7481262407885524261')

# 链接数据库系统
db = pymysql.connect(host='localhost', port=3306, user='root', passwd='123456', db='douyin', charset='utf8mb4')
cursor = db.cursor()

for page in range(2):
# 下滑页面到底部
driver.scroll.to_bottom()
#等待一下数据包加载
resp = driver.listen.wait()
#直接获取数据包返回的响应数据
json_data =resp.response.body

#提取json里面的评论数据
comments =json_data['comments']



#for提取列表的元素
for index in comments:

#提取相关内容
text =index['text']#评论内容
nickname = index['user']['nickname']#昵称
create_time = index['create_time']#评论时间
date=datetime.datetime.fromtimestamp(create_time)

ip_label = index['ip_label']#地区

#把数据放到字典里面
dit ={
'name' : nickname,
'area' : ip_label,
'comment' : text,
}
print(dit)
sql = "INSERT INTO douyin_comments (name, ip, comment_text) VALUES (%s, %s, %s)"
cursor.execute(sql, (nickname, ip_label, text))
db.commit()
engine = create_engine('mysql+pymysql://root:123456@localhost/douyin')
df = pd.read_sql_table('douyin_comments', con=engine)
connect = ' '.join([i for i in df['comment_text']])
#jieba分割词汇
string = ' '.join(jieba.lcut(connect))
#词云图的配置
font_path = r'C:\Windows\Fonts\msyh.ttc'
wc = wordcloud.WordCloud(
font_path='msyh.ttc',
width=800,
height=400,
stopwords={'了','的','我','你','是','把','都','能','就','这','有','也'},
)
#导入词汇内容
wc.generate(string)
#导出词云图片
wc.to_file('douyin_wordcloud.png')


sentiment_score = analyze_sentiment(string)

visualize_sentiment(sentiment_score)