1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
| #导入自动化模块 import datetime import time import numpy as np from PIL import Image import jieba import pandas as pd from pymysql import connect import wordcloud
from sqlalchemy import create_engine
import pymysql
from DrissionPage import ChromiumPage
from textblob import TextBlob
def analyze_sentiment(text): blob = TextBlob(text) sentiment = blob.sentiment.polarity return sentiment
import matplotlib.pyplot as plt
def visualize_sentiment(sentiment_score): plt.bar(['Sentiment'], [sentiment_score], color=['blue']) plt.ylim(-1, 1) plt.ylabel('Sentiment Score') plt.title('Sentiment Analysis Result') plt.show()
#打开浏览器 driver = ChromiumPage()
driver.headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36", # 可添加其他必要请求头 }
#监听数据包 driver.listen.start('aweme/v1/web/comment/list/')
#访问网站 driver.get('https://www.douyin.com/user/MS4wLjABAAAAompXkPoYOGsA152dqYoytKycjIZ_aCCxHwGmLX5IsDM?from_tab_name=main&modal_id=7481262407885524261')
# 链接数据库系统 db = pymysql.connect(host='localhost', port=3306, user='root', passwd='123456', db='douyin', charset='utf8mb4') cursor = db.cursor()
for page in range(2): # 下滑页面到底部 driver.scroll.to_bottom() #等待一下数据包加载 resp = driver.listen.wait() #直接获取数据包返回的响应数据 json_data =resp.response.body
#提取json里面的评论数据 comments =json_data['comments']
#for提取列表的元素 for index in comments:
#提取相关内容 text =index['text']#评论内容 nickname = index['user']['nickname']#昵称 create_time = index['create_time']#评论时间 date=datetime.datetime.fromtimestamp(create_time)
ip_label = index['ip_label']#地区
#把数据放到字典里面 dit ={ 'name' : nickname, 'area' : ip_label, 'comment' : text, } print(dit) sql = "INSERT INTO douyin_comments (name, ip, comment_text) VALUES (%s, %s, %s)" cursor.execute(sql, (nickname, ip_label, text)) db.commit() engine = create_engine('mysql+pymysql://root:123456@localhost/douyin') df = pd.read_sql_table('douyin_comments', con=engine) connect = ' '.join([i for i in df['comment_text']]) #jieba分割词汇 string = ' '.join(jieba.lcut(connect)) #词云图的配置 font_path = r'C:\Windows\Fonts\msyh.ttc' wc = wordcloud.WordCloud( font_path='msyh.ttc', width=800, height=400, stopwords={'了','的','我','你','是','把','都','能','就','这','有','也'}, ) #导入词汇内容 wc.generate(string) #导出词云图片 wc.to_file('douyin_wordcloud.png')
sentiment_score = analyze_sentiment(string)
visualize_sentiment(sentiment_score)
|