import os
import requests
from lxml import etree
import urllib.parse
def scarch(url):
word = input('請輸入你想要的妹子類型……')
keyword = urllib.parse.quote(word)
url = url + keyword + "/"
# 發送第一次請求 https://www.mzitu.com/search/黑絲/
response = requests.get(url=url,headers=headers)
etrees = etree.HTML(response.text)
all_pageNum = etrees.xpath('//div[@class="nav-links"]/a[4]/text()')
all_pageNum = "".join(all_pageNum)
if len(all_pageNum)<0:
print(f"你找的{word}類型的妹子圖檔共有1頁")
else:
print(f"你找的{word}類型的妹子圖檔共有{all_pageNum}頁")
start_page = int(input("請輸入開始頁碼:"))
end_page = int(input("請輸入結束頁碼:"))
f = f"./{word}/"
if not os.path.exists(f):
os.mkdir(f)
print(f"已為您預設建立目錄,目錄名稱為{word}")
for i in range(start_page,end_page+1):
# print("OK!!!")
yes = str(i)
new_url = f"https://www.mzitu.com/search/{keyword}/page/"
img_url = new_url + yes +"/"
# 發送第二次請求 # https: // www.mzitu.com / search / % E7 % BE % 8E % E5 % A5 % B3 / page / 5 /
# print(img_url)
response = requests.get(url=img_url, headers=headers)
# print(response.status_code)
etrees_page = etree.HTML(response.text)
img_detail_li = etrees_page.xpath('//ul[@id="pins"]/li')
get_taotu(img_detail_li,f)
# 擷取套圖詳情
def get_taotu(img_detail_li,f):
try:
for i in img_detail_li: # # https: // m.mzitu.com / 190349
all_href = i.xpath('./a/@href')[0]
all_title = str(i.xpath('./a/img/@alt')[0])
print(all_href,type(all_href))
print(all_title)
print(type(all_title))
print(f"開始下載下傳套圖{all_title}")
# for x in all_href:
# print(x)
# exit()
filepath = f + all_title
if not os.path.exists(filepath):
os.makedirs(filepath)
response = requests.get(url=all_href, headers=headers)
# page_text = response.text
# filename = f + str(a) + ".html"
# with open(filename,"w",encoding="utf-8") as f:
# f.write(page_text)
# exit()
etrees_page = etree.HTML(response.text)
img_allNum = int(etrees_page.xpath('/html/body/div[2]/div[1]/div[4]/a[5]/span/text()')[0])
# print(type(img_allNum), img_allNum)
print(f"目前頁共有{img_allNum}張圖檔")
# exit()
for v in range(1,img_allNum+1):
img_bigurl = all_href + "/" + str(v)
# print(img_bigurl)
response = requests.get(url=img_bigurl, headers=headers)
# print(response.status_code)
# filename = f + str(v) + ".html"
# with open(filename,"w",encoding="utf-8") as f:
# f.write(response.text)
# print("寫入成功!!!")
# exit()
etrees_page = etree.HTML(response.text)
# print(response.text)
# 擷取圖檔名字
# / html / body / div[2] / div[1] / h2
img_name = str(etrees_page.xpath('/html/body/div[2]/div[1]/h2/text()')[0])
# img_name = "".join(img_name)
# print(type(img_name),img_name)
# 擷取圖檔内容
img_url = str(etrees_page.xpath('/html/body/div[2]/div[1]/div[3]/p/a/img/@src')[0])
# img_url = "".join(img_url)
# print(type(img_url),img_url)
# exit()
response = requests.get(url=img_url, headers=headers)
print(f"開始下載下傳高清大圖------{img_name}")
page_content = response.content
filename = filepath + "/" + str(v) + ".jpg"
with open (filename,"wb") as f:
f.write(page_content)
except:
print("這一套套圖下載下傳完畢。。。")
if __name__ == '__main__':
url = "https://www.mzitu.com/search/"
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36',
'Referer': url
}
scarch(url)