# -*- codeing = utf-8 -*-
import pandas as pd
import numpy as np
import xlrd
import xlwt
import collections
from collections import Counter
book = xlrd.open_workbook('biliroblox.xls')
sheet1 = book.sheet_by_index(0)
rows = sheet1.col_values(1) # up主
up = pd.unique(rows)
up1 = len(up)
print(f"本次爬取{up1}位up主")
guankan = sheet1.col_values(2) # 观看量
shipinzongliang = len(guankan) - 1
print(f"共{shipinzongliang}个视频")
yiwan = []
for i in guankan:
if i != '观看量' and int(i) < 10000:
yiwan.append(i)
yiwanyixia = len(yiwan)
zhanbi1 = '%.2f%%' %(yiwanyixia / 980 *100)
print(f"播放量一万以下有{yiwanyixia}个视频,占比 {zhanbi1}")
wuwan = []
for i in guankan:
if i != '观看量' and int(i) >= 10000 and int(i) < 50000:
wuwan.append(i)
wuwanyixia = len(wuwan)
zhanbi2 = '%.2f%%' %(wuwanyixia / 980 *100)
print(f"播放量五万以下有{wuwanyixia}个视频,占比 {zhanbi2}")
shiwan = []
for i in guankan:
if i != '观看量' and int(i) >= 50000 and int(i) < 100000:
shiwan.append(i)
shiwanyixia = len(shiwan)
zhanbi3 = '%.2f%%' %(shiwanyixia / 980 *100)
print(f"播放量十万以下有{shiwanyixia}个视频,占比 {zhanbi3}")
ershiwan = []
for i in guankan:
if i != '观看量' and int(i) >= 100000 and int(i) < 200000:
ershiwan.append(i)
ershiwanyixia = len(ershiwan)
zhanbi4 = '%.2f%%' %(ershiwanyixia / 980 *100)
print(f"播放量二十万以下有{ershiwanyixia}个视频,占比 {zhanbi4}")
max = []
for i in guankan:
if i != '观看量' and int(i) >= 200000:
max.append(i)
ershiwanyishang = len(max)
zhanbi5 = '%.2f%%' %(ershiwanyishang / 980 *100)
print(f"播放量二十万以上有{ershiwanyishang}个视频,占比 {zhanbi5}")
yonghu = Counter(rows).most_common(10)
print(f"视频数量前10的视频up主:{yonghu}")
data = pd.DataFrame()
data['index'] = rows
data['value'] = guankan
data1 = data.drop(980,axis=0)
data2 = data1.loc[data1['index']=='小飞象解说']
data2 = data2['value'].values.astype(int)
data2 = data2.sum()
print(f"小飞象解说视频播放量{data2}")
data3 = data1.loc[data1['index']=='面面解说实况']
data3 = data3['value'].values.astype(int)
data3 = data3.sum()
print(f"面面解说实况视频播放量{data3}")
data4 = data1.loc[data1['index']=='吊德斯DioDes']
data4 = data4['value'].values.astype(int)
data4 = data4.sum()
print(f"吊德斯DioDes视频播放量{data4}")
data5 = data1.loc[data1['index']=='abbey小熙熙']
data5 = data5['value'].values.astype(int)
data5 = data5.sum()
print(f"abbey小熙熙视频播放量{data5}")
data6 = data1.loc[data1['index']=='虾扯蛋Nonsense']
data6 = data6['value'].values.astype(int)
data6 = data6.sum()
print(f"虾扯蛋Nonsense视频播放量{data6}")
data7 = data1.loc[data1['index']=='小格游戏解说']
data7 = data7['value'].values.astype(int)
data7 = data7.sum()
print(f"小格游戏解说视频播放量{data7}")
data8 = data1.loc[data1['index']=='鲤鱼Ace']
data8 = data8['value'].values.astype(int)
data8 = data8.sum()
print(f"鲤鱼Ace视频播放量{data8}")
data9 = data1.loc[data1['index']=='KyLin默寒']
data9 = data9['value'].values.astype(int)
data9 = data9.sum()
print(f"KyLin默寒播放量{data9}")
data10 = data1.loc[data1['index']=='ANyEo']
data10 = data10['value'].values.astype(int)
data10 = data10.sum()
print(f"ANyEo播放量{data10}")
data11 = data1.loc[data1['index']=='RBLX第一精神病院院长']
data11 = data11['value'].values.astype(int)
data11 = data11.sum()
print(f"RBLX第一精神病院院长播放量{data11}")
胭惜雨
2021年04月21日