时间:2021-07-01 10:21:17 帮助过:106人阅读
1、抓取煎蛋网上的图片。
2、代码如下:
import urllib.request import os #to open the url def url_open(url): req=urllib.request.Request(url) req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.3; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0') response=urllib.request.urlopen(url) html=response.read() return html #to get the num of page like 1,2,3,4... def get_page(url): html=url_open(url).decode('utf-8') a=html.find('current-comment-page')+23 #add the 23 offset th arrive at the [2356] b=html.find(']',a) #print(html[a:b]) return html[a:b] #find the url of imgs and return the url of arr def find_imgs(url): html=url_open(url).decode('utf-8') img_addrs=[] a=html.find('img src=') while a!=-1: b=html.find('.jpg',a,a+255) # if false : return -1 if b!=-1: img_addrs.append('http:'+html[a+9:b+4]) else: b=a+9 a=html.find('img src=',b) #print(img_addrs) return img_addrs #print('http:'+each) #save the imgs def save_imgs(folder,img_addrs): for each in img_addrs: filename=each.split('/')[-1] #get the last member of arr,that is the name with open(filename,'wb') as f: img = url_open(each) f.write(img) def download_mm(folder='mm',pages=10): os.mkdir(folder) os.chdir(folder) url='http://jandan.net/ooxx/' page_num=int(get_page(url)) for i in range(pages): page_num -= i page_url = url + 'page-' + str(page_num) + '#comments' img_addrs=find_imgs(page_url) save_imgs(folder,img_addrs) if __name__ == '__main__': download_mm()
相关推荐:
如何用Python爬虫获取那些价值博文
Python爬虫获取美剧的网站
以上就是Python爬虫获取图片并下载保存至本地的详细内容,更多请关注Gxl网其它相关文章!