时间:2021-07-01 10:21:17 帮助过:76人阅读
# encoding=utf8
# author:shell-von
import requests
import re
aid = '3210612'
api_key = "http://interface.bilibili.com/count?key=27f582250563d5d6b11d6833&aid=%s"
data = requests.get(api_key % aid).content
regex = r"\('(?:.|#)([\w_]+)'\)\.html\('?(\d+)'?\)"
print dict(re.findall(regex, data))
以前写过一个。。。。aid = 3295561;
api = 'http://interface.bilibili.com/count?key=b9415053057bb00966665eaa';
data = regexp(webread(api,'aid',aid),'#(\w)+\D*(\d)+','tokens');
data = [data{:}]
说下大概的思路。import urllib2
import re
from StringIO import StringIO
import gzip
def find_cid_aid(html):
target = re.compile('EmbedPlayer(?P.*?)',re.DOTALL)
cidaid = target.search(html)
cidaid = html[cidaid.start('args'):cidaid.end('args')]
cid = cidaid.find('cid=')
aid = cidaid.find('&aid=')
index = aid
while cidaid[index] != '"':
index += 1
return (cidaid[cid + 4:aid],cidaid[aid + 5:index])
def find_how_many(cid_aid):
target = re.compile(r'(?P.*?) ',re.DOTALL)
cid = cid_aid[0]
aid = cid_aid[1]
addr = r'http://interface.bilibili.com/player?id=cid:' + cid + '&aid=' + aid
f = urllib2.urlopen(addr)
res = f.read()
target = target.search(res)
return res[target.start('result'):target.end('result')]
headers = {'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', \
'Accept-Language':'zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3', \
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; rv:28.0) Gecko/20100101 Firefox/28.0',\
'Host':'www.bilibili.com', \
'Accept-Encoding':'gzip, deflate', \
'Cache-Control':'max-age=0', \
'Connection':'keep-alive'}
request = urllib2.Request(r'http://www.bilibili.com/video/av2046145/', headers=headers)
html = urllib2.urlopen(request)
if html.info().get('Content-Encoding') == 'gzip':
buf = StringIO(html.read())
f = gzip.GzipFile(fileobj=buf)
html = f.read()
cid_aid = find_cid_aid(html)
print find_how_many(cid_aid)
获取cid aid请求http://interface.bilibili.com/playercid=1511100&aid=1044050
http://interface.bilibili.com/player?id=cid:1511100&aid=1044050
4611
你在电脑屏幕上面看到的一切都是数据来着啊。B站的网页也只不过是一堆代码而已。稍微获取一下源代码,解gzip压缩,转换一下编码,正则表达式搜索一下,就能出来了,很简单的。