天天看點

python 爬取天堂圖檔網腳本

import requests
from lxml import etree
url = 'https://www.ivsky.com/bizhi/'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
}
res = requests.get(url=url, headers=headers).text
tree = etree.HTML(res)
li_list = tree.xpath('//div[@class="left"]/ul[@class="ali"]/li')
for li in li_list:
    a = li.xpath('.//a/@href')[0]
    print(a)
    a_url = 'https://www.ivsky.com' + a
    print(a_url)
    res2 = requests.get(url=a_url, headers=headers).text
    atree = etree.HTML(res2)
    li_list2 = atree.xpath('//div[@class="left"]/ul[@class="pli"]/li')
    a = 0
    for li2 in li_list2:
        img = li2.xpath('.//img/@src')[0]
        img_name = li2.xpath('.//img/@alt')[0]
        res3 = requests.get(url='https:' + img, headers=headers).content
        a += 1
        print('正在下載下傳:%s' % str(a) + '_' + img_name)
        with open('./img_list/' + str(a) + '_' + img_name + '.jpg', 'wb') as f:
            f.write(res3)