天天看点

python下载百度图片_python 下载百度图片

百度图片是动态加载的,本例只是抓取了网页上的js源码,做的正则匹配

#encoding=utf-8

import urllib, urllib2

import os

import re

url = r'http://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1492068395730_R&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&

imgPath = r'/home/lhy/PycharmProjects/images/imgs/fish'

imgHtml = urllib2.urlopen(url).read().decode('utf-8')

# test html

# print(imgHtml)

urls = re.findall(r'"objURL":"(.*?)"', imgHtml)

if not os.path.isdir(imgPath):

os.mkdir(imgPath)

index = 1

for url in urls:

print("下载:", url)

# 未能正确获得网页 就进行异常处理

try:

res = urllib2.urlopen(url)

if str(res.status) != '200':

print('未下载成功:', url)

continue

except Exception as e:

print('未下载成功:', url)

filename = os.path.join(imgPath, str(index) + '.jpg')

with open(filename, 'wb') as f:

f.write(res.read())

print('下载完成\n')

index += 1

print("下载结束,一共下载了 %s 张图片" % (index - 1))