python实现统计博客园博文数量、阅读量、评论数
-零 人气:0
如何使用
1.将代码中的headurl替换成你的博客链接即可。
类似: https://www.cnblogs.com/-wenlihttps://img.qb5200.com/download-x/default.html?page=
2.爬取页数限制,看自己更新多少页博文。
while i<55
源码
from bs4 import BeautifulSoup import requests from requests.exceptions import RequestException import re def get_one_page(url,headers): try: response = requests.get(url,headers=headers) if response.status_code ==200: return response.text except RequestException: return None def parse_one_page(html): global item soup = BeautifulSoup(html, 'lxml') divs = soup.find_all('div',class_='day') for i, child in enumerate(divs): list = [] i=0 dic = {} titles = child.find_all('div',class_='postTitle') infomations = child.find_all('div', class_='postDesc') for title in titles: partitle = title.find_all('a',class_='postTitle2') partitleinfo = partitle[0].get_text() partitleinfo = partitleinfo.replace('\n', '') partitleinfo = partitleinfo.replace(' ', '') #print(partitleinfo) list.append(partitleinfo) for infomation in infomations: info = infomation.get_text() #获得文本 info = info.replace('\n', '') #去掉换行 info = info.replace(' ', '') #去掉空白字符 result = re.match('^.*阅读.(\d+)..*评论.(\d+)..*编辑$', info) # print(result.group(1)) # print(result.group(2)) dic["阅读量"] = result.group(1) dic["评论量"] = result.group(2) item[list[i]] = dic i+=1 def statistics(): global item readtotal = 0 commandtotal = 0 blogtotal = 0 for v in item.values(): readtotal = readtotal + int(v['阅读量']) commandtotal = commandtotal + int(v['评论量']) blogtotal += 1 print('总博文量:', blogtotal) print('总阅读量:',readtotal) print('总评论量:', commandtotal) def main(): headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'} headurl = ''//此处链接是你的博客园博客链接 i = 1 while i<55: url = headurl + str(i) print(url) #获取源码 html = get_one_page(url,headers) #解析源码 parse_one_page(html) i += 1 #统计功能 #print(item) statistics() if __name__ == '__main__': item = {} main()
加载全部内容