天天看點

下載下傳圖檔Python程式優化異步處理

作者:缥缈峰靈鹫宮大護法
import requests
import re
import os
from bs4 import BeautifulSoup 
import asyncio
import aiohttp

head = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}

async def download_wallpaper(wallpaperUrl, img_path):
    async with aiohttp.ClientSession() as session:
        async with session.post(wallpaperUrl, headers=head) as response:
            bigimg_content = await response.read()
            with open(img_path,'wb') as f:
                f.write(bigimg_content)
                print(f'save {img_path}')
     
async def waillpaper():
    url = 'http://wallpaperswide.com/latest_wallpapers.html'

    async with aiohttp.ClientSession() as session:
        async with  session.get(url=url,timeout=100) as response:

            page_txt = await response.read()

            ex='<div class="thumb">.*?<img src="(.*?)" alt.*?</div>'

            img_list = re.findall(ex,page_txt.decode('utf-8'),re.S)

            imgpath = './img/'

            if not os.path.exists(imgpath):
                os.mkdir(imgpath)

            for img in img_list:
                img_name = img.split('/')[-1]
                img_path = imgpath + img_name

                big_image = 'http://wallpaperswide.com/'+img_name.split('-')[0] +'-wallpapers'+'.html'
                print("bigimage: " + big_image)
               
                img_response = await session.get(big_image,timeout=100)

                page_text = await img_response.read()
                ex='<div align="center" class="picture_wrapper_details".*?<img src="(.*?)" alt.*?</div>'
                
                #大圖示位址
                big_image_list = re.findall(ex,page_text.decode('utf-8'),re.S)

                #  tittle = "HD 16:9 2048 x 1152 wallpaper"

                soup = BeautifulSoup(page_text,'html.parser')

                # wallpaperlist = soup.find_all('a',title='HD 16:9 2048 x 1152 wallpaper')
                title = soup.find_all('a',title='HD 16:9 2048 x 1152 wallpaper')

                #[<a href="/download/basim___assassins_creed_mirage_2023_video_game-wallpaper-2048x1152.jpg" target="_self" title="HD 16:9 2048 x 1152 wallpaper">2048x1152</a>]
                print(title[0].get('href'))
                wallpaperUrl = 'http://wallpaperswide.com'+title[0].get('href')#桌面位址
                #  http://wallpaperswide.com/download/basim___assassins_creed_mirage_2023_video_game-wallpaper-1920x1080.jpg
                
                if not os.path.exists(img_path):
                      await download_wallpaper(wallpaperUrl, img_path)
                else:
                    print('{img_path} already exists')    


if __name__ == '__main__':
    loop = asyncio.get_event_loop()
    try:
        loop.run_until_complete(waillpaper())
    finally:
        loop.close()