# -*- codeing = utf-8 -*-
import pandas as pd
import numpy as np
import xlrd
import xlwt
import collections
from collections import Counter

book = xlrd.open_workbook('biliroblox.xls')
sheet1 = book.sheet_by_index(0)


rows = sheet1.col_values(1) # up主
up = pd.unique(rows)
up1 = len(up)
print(f"本次爬取{up1}位up主")

guankan = sheet1.col_values(2) # 观看量
shipinzongliang = len(guankan) - 1
print(f"共{shipinzongliang}个视频")

yiwan = []
for i in guankan:
    if i != '观看量' and int(i) < 10000:
        yiwan.append(i)
yiwanyixia = len(yiwan)
zhanbi1 = '%.2f%%' %(yiwanyixia / 980 *100)

print(f"播放量一万以下有{yiwanyixia}个视频,占比 {zhanbi1}")

wuwan = []
for i in guankan:
    if i != '观看量' and int(i) >= 10000 and int(i) < 50000:
        wuwan.append(i)
wuwanyixia = len(wuwan)
zhanbi2 = '%.2f%%' %(wuwanyixia / 980 *100)
print(f"播放量五万以下有{wuwanyixia}个视频,占比 {zhanbi2}")

shiwan = []
for i in guankan:
    if i != '观看量' and int(i) >= 50000 and int(i) < 100000:
        shiwan.append(i)
shiwanyixia = len(shiwan)
zhanbi3 = '%.2f%%' %(shiwanyixia / 980 *100)
print(f"播放量十万以下有{shiwanyixia}个视频,占比 {zhanbi3}")

ershiwan = []
for i in guankan:
    if i != '观看量' and int(i) >= 100000 and int(i) < 200000:
        ershiwan.append(i)
ershiwanyixia = len(ershiwan)
zhanbi4 = '%.2f%%' %(ershiwanyixia / 980 *100)
print(f"播放量二十万以下有{ershiwanyixia}个视频,占比 {zhanbi4}")

max = []
for i in guankan:
    if i != '观看量' and int(i) >= 200000:
        max.append(i)
ershiwanyishang = len(max)
zhanbi5 = '%.2f%%' %(ershiwanyishang / 980 *100)
print(f"播放量二十万以上有{ershiwanyishang}个视频,占比 {zhanbi5}")

yonghu = Counter(rows).most_common(10)

print(f"视频数量前10的视频up主:{yonghu}")

data = pd.DataFrame()
data['index'] = rows
data['value'] = guankan
data1 = data.drop(980,axis=0)



data2 = data1.loc[data1['index']=='小飞象解说']
data2 = data2['value'].values.astype(int)
data2 = data2.sum()
print(f"小飞象解说视频播放量{data2}")

data3 = data1.loc[data1['index']=='面面解说实况']
data3 = data3['value'].values.astype(int)
data3 = data3.sum()
print(f"面面解说实况视频播放量{data3}")

data4 = data1.loc[data1['index']=='吊德斯DioDes']
data4 = data4['value'].values.astype(int)
data4 = data4.sum()
print(f"吊德斯DioDes视频播放量{data4}")

data5 = data1.loc[data1['index']=='abbey小熙熙']
data5 = data5['value'].values.astype(int)
data5 = data5.sum()
print(f"abbey小熙熙视频播放量{data5}")

data6 = data1.loc[data1['index']=='虾扯蛋Nonsense']
data6 = data6['value'].values.astype(int)
data6 = data6.sum()
print(f"虾扯蛋Nonsense视频播放量{data6}")

data7 = data1.loc[data1['index']=='小格游戏解说']
data7 = data7['value'].values.astype(int)
data7 = data7.sum()
print(f"小格游戏解说视频播放量{data7}")

data8 = data1.loc[data1['index']=='鲤鱼Ace']
data8 = data8['value'].values.astype(int)
data8 = data8.sum()
print(f"鲤鱼Ace视频播放量{data8}")

data9 = data1.loc[data1['index']=='KyLin默寒']
data9 = data9['value'].values.astype(int)
data9 = data9.sum()
print(f"KyLin默寒播放量{data9}")

data10 = data1.loc[data1['index']=='ANyEo']
data10 = data10['value'].values.astype(int)
data10 = data10.sum()
print(f"ANyEo播放量{data10}")

data11 = data1.loc[data1['index']=='RBLX第一精神病院院长']
data11 = data11['value'].values.astype(int)
data11 = data11.sum()
print(f"RBLX第一精神病院院长播放量{data11}")

胭惜雨

2021年04月21日

此站点使用Akismet来减少垃圾评论。了解我们如何处理您的评论数据