Loading...

关于

本网站为基于Python、Flask、Echarts、WordCloud等技术实现的的可视化数据分析的小项目(练手用)
下滑查看代码
下载项目源码:Github

本人博客:https://yzyyz.top

首页 信息 评分 词云 留言

爬虫

from bs4 import BeautifulSoup       #网页解析,获取数据
import urllib.request
import xlwt         #excel操作
import re           #正则,匹配文字
import sqlite3      #数据库操作

def main():
    baseurl="https://movie.douban.com/top250?start="
    datalist=getData(baseurl) #获取数据
    path = ".\\" #存储路径
    dbpath = "movie.db"
    saveData2(datalist,dbpath)
    # saveData(datalist,path) #存储数据


def askUrl(url):  #请求网页 返回html
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWe bKit/537.36 (KHTML, like Gecko) Chrome/93.0.4544.0 Safari/537.36 Edg/93.0.933.1",
    }#头部信息用户代理
    request = urllib.request.Request(url,headers=headers)  #对象
    try:
        response = urllib.request.urlopen(request)  #获取
        html = response.read().decode("utf-8")
    except Exception as err:
        print("出现错误",err)
    return html

findLink=re.compile(r'<a href="(.*?)">')
findImg=re.compile(r'<img.*src="(.*?)"',re.S) #re.S忽略换行符
fingTitle=re.compile(r'<span class="title">(.*?)</span>')
fingRating=re.compile(r'<span class="rating_num" property="v:average">(.*?)</span>')
findCNumber=re.compile(r'<span>(\d*)人评价</span>')
findDics=re.compile(r'<span class="inq">(.*?)</span>',re.S)
findInfo=re.compile(r'<p class="">(.*?)</p>',re.S)

def getData(baseurl):  #获取数据
    datalist=[]
    c=0
    for i in range(0,10):  #10次获取,一页25条
        url=baseurl+str(i*25)
        html=askUrl(url)
        soup = BeautifulSoup(html,"lxml") #BeautifulSoup解析
        data = []#用于保存一部的信息
        for item in soup.find_all('div' , class_="item"):
            #print(item)   #测试
            item=str(item)
            link=re.findall(findLink, item)[0]
            img=re.findall(findImg,item)[0]
            title=re.findall(fingTitle,item)
            ranting=re.findall(fingRating,item)[0]
            cnumber=re.findall(findCNumber,item)[0]
            dic = re.findall(findDics, item)
            info = re.findall(findInfo,item)[0]
            info=re.sub('<br(\s+)?/>(/s+)?',"",info)
            info=re.sub('/',"",info)
            data.append(link)
            data.append(img)
            if len(title) == 2:
                ctitle = title[0]
                etitle = title[1]
                data.append(ctitle)
                data.append(etitle)
            else:
                ctitle = title[0]
                etitle = ''
                data.append(ctitle)
                data.append(etitle)
            data.append(ranting)
            data.append(cnumber)
            if len(dic) == 0:
                data.append('')
            else:
                dic=dic[0].replace("。","")
                data.append(dic)
            data.append(info.strip())
            c+=1
            datalist.append(data)
            data=[]
    for items in datalist:
        print(items)
    return datalist



def saveData(datalist,path):  #储存数据
    book = xlwt.Workbook(encoding='utf-8')
    sheet = book.add_sheet('豆瓣电影Top250',cell_overwrite_ok=True)
    print("test")
    col=('链接','图片链接','中文名','外文名','评分','评价人数','描述','信息')
    for i in range(0,8):
        sheet.write(0,i,col[i]) #列名
    for i in range(0,250):
        print("%d条"%i)
        data=datalist[i]
        for j in range(0,8):
            sheet.write(i+1,j,data[j])
    book.save('test1.xls')


def saveData2(datalist,dbpath):
    createdb(dbpath)
    conn = sqlite3.connect(dbpath)
    cur=conn.cursor()

    for data in datalist:
        for index in range(len(data)):
            if index == 4 or index==5:
                continue
            data[index]='"'+data[index]+'"'
        sql='''
            insert into movie250 (info_link,pic_link,name,ename,cnumber,rate,dec,info)
            values(%s)
            '''%",".join(data)
        print(sql)
        cur.execute(sql)
        conn.commit()
    conn.close()

def createdb(dbpath):
    sql='''
    create table movie250
    ( id integer primary key autoincrement,
    info_link text,
    pic_link text,
    name text,
    ename text,
    cnumber numeric,
    rate numeric,
    dec text,
    info text
    )
    '''
    conn = sqlite3.connect(dbpath)
    cur=conn.cursor()
    cur.execute(sql)
    conn.commit()
    conn.close()



if __name__ == "__main__":
    main()
    print('爬取完毕')
          
         

Flask

from flask import Flask,render_template,request
import sqlite3
import os
import time
app = Flask(__name__)

#主页
@app.route('/')
def hello_world():
    return render_template("/index.html")

#主页
@app.route('/index')
def index():
    return render_template("/index.html")

#信息页面
@app.route('/infor')
def infor():
    datalist=[]
    conn=sqlite3.connect("movie.db")
    cur=conn.cursor()
    sql='''
    select * from movie250
    '''
    data=cur.execute(sql)
    for item in data:
        datalist.append(item)
    conn.commit()
    cur.close()
    conn.commit()
    conn.close()
    return render_template("/infor.html",datalist=datalist)

#评分页面
@app.route('/rate')
def rate():
    datalist = []
    conn = sqlite3.connect("movie.db")
    cur = conn.cursor()
    sql = '''
        select cnumber,count(cnumber) from movie250 group by cnumber
        '''
    data = cur.execute(sql)
    for item in data:
        datalist.append(item)
    conn.commit()
    cur.close()
    conn.commit()
    conn.close()
    sc=[]
    count=[]
    for data in datalist:
        sc.append(data[0])
        count.append(data[1])
    return render_template("/rate.html",sc=sc,count=count)


#词云页面
@app.route('/word')
def word():
    return render_template("/word.html")

#关于页面
@app.route('/team')
def team():
    return render_template("/team.html")

#留言页面
@app.route("/msg",methods=['POST','GET'])
def msg():
    if request.method == 'POST':
        msg = request.form
        saveMsg(msg,'msg.db')
        smsg=showmsg()
        return render_template("test1.html",smsg=smsg)
    else:
        smsg=showmsg()
        return render_template("test1.html",smsg=smsg)

#留言页面
@app.route('/inmsg')
def inputmsg():
    smsg=showmsg()
    return render_template('test1.html',smsg=smsg)

#创建留言数据库
def createtable(path):
    sql = '''
       create table msgs
       ( id integer primary key autoincrement,
       username text,
       email text,
       msg text,
       msgtime text
       )
       '''
    conn = sqlite3.connect(path)
    cur = conn.cursor()
    cur.execute(sql)
    conn.commit()
    conn.close()

#储存留言,传入表单信息和数据库路径
def saveMsg(msglist,dbpath):
    now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    #os判断数据库是否存在
    #存在则追加数据
    if os.path.exists('msg.db')==True:
        # 连接数据库
        conn = sqlite3.connect(dbpath)
        cur=conn.cursor()
        #建个空列表存数据
        data=[]
        for ii in msglist.items():
            data.append(ii[1])
        data.append(now)
        for i in range(len(data)):
            data[i]='"'+data[i]+'"'
            #加双引号
        sql='''
                insert into msgs (username,email,msg,msgtime)
                values(%s)
                '''%",".join(data)
        #上面用英文逗号连接要写入的内容
        # print(sql)
        #打印一手,测试用
        cur.execute(sql)
        conn.commit()
        conn.close()
    #没数据库时
    else:
        #createtable()创建数据库,其他的同上
        createtable(dbpath)
        conn = sqlite3.connect(dbpath)
        cur=conn.cursor()
        data=[]
        for ii in msglist.items():
            data.append(ii[1])
        data.append(now)
        for i in range(len(data)):
            data[i]='"'+data[i]+'"'
        sql='''
                insert into msgs (username,email,msg,msgtime)
                values(%s)
                '''%",".join(data)
        print(sql)
        cur.execute(sql)
        conn.commit()
        conn.close()

#显示留言
def showmsg():
    if os.path.exists('msg.db') == True:
        #空列表存数据
        datalist = []
        #连接数据库
        conn = sqlite3.connect("msg.db")
        cur = conn.cursor()
        sql = '''
        select * from msgs
        '''
        data = cur.execute(sql)
        #data出来是sqlite的对象,遍历,加入列表datalist
        for item in data:
            datalist.append(item)
        conn.commit()
        cur.close()
        conn.commit()
        conn.close()
        smsgdata=datalist
        #记得用了好几个datalist了,写个smsgdata用一下(ShowMsgData)
        return smsgdata
    else:
        createtable('msg.db')
        datalist = []
        conn = sqlite3.connect("msg.db")
        cur = conn.cursor()
        sql = '''
                select * from msgs
                '''
        data = cur.execute(sql)
        for item in data:
            datalist.append(item)
        conn.commit()
        cur.close()
        conn.commit()
        conn.close()
        smsgdata = datalist
        return smsgdata

if __name__ == '__main__':
    app.run(debug='TRUE')


        

Copyright © All rights reserved | by yzyyz.top