百度圖檔是動态加載的,本例隻是抓取了網頁上的js源碼,做的正則比對
#encoding=utf-8
import urllib, urllib2
import os
import re
url = r'http://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1492068395730_R&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&
imgPath = r'/home/lhy/PycharmProjects/images/imgs/fish'
imgHtml = urllib2.urlopen(url).read().decode('utf-8')
# test html
# print(imgHtml)
urls = re.findall(r'"objURL":"(.*?)"', imgHtml)
if not os.path.isdir(imgPath):
os.mkdir(imgPath)
index = 1
for url in urls:
print("下載下傳:", url)
# 未能正确獲得網頁 就進行異常處理
try:
res = urllib2.urlopen(url)
if str(res.status) != '200':
print('未下載下傳成功:', url)
continue
except Exception as e:
print('未下載下傳成功:', url)
filename = os.path.join(imgPath, str(index) + '.jpg')
with open(filename, 'wb') as f:
f.write(res.read())
print('下載下傳完成\n')
index += 1
print("下載下傳結束,一共下載下傳了 %s 張圖檔" % (index - 1))