天天看點

aiohttp 異步http請求-3.異步批量下載下傳圖檔

前言

當我們需要批量下載下傳圖檔的時候,requests 庫會比較慢,如果一個個下載下傳,出現阻塞的時候,後面的都會阻塞卡住,假死狀态。當然你用多線程也能提高效率。

這裡介紹用aiohttp 異步批量下載下傳圖檔

異步批量下載下傳圖檔

話不多說,直接看代碼

import aiohttp
import asyncio
from pathlib import Path


async def down_img(session, url):
    """下載下傳圖檔"""
    name = url.split('/')[-1]  # 獲得圖檔名字
    img = await session.get(url)
    # 觸發到await就切換,等待get到資料
    content = await img.read()
    # 讀取内容
    with open('./down_img/'+str(name), 'wb') as f:
        # 寫入至檔案
        f.write(content)
        print(f'{name} 下載下傳完成!')
    return str(url)


async def main(URL):
    # 建立會話session
    async with aiohttp.ClientSession() as session:
        # 建立所有任務
        tasks = [asyncio.create_task(down_img(session, img_url)) for img_url in URL]
        # 觸發await,等待任務完成
        done, pending = await asyncio.wait(tasks)
        all_results = [done_task.result() for done_task in done]
        # 擷取所有結果
        print("ALL RESULT:"+str(all_results))


URL = [
    'https://cdn.pixabay.com/photo/2014/10/07/13/48/mountain-477832_960_720.jpg',
    'https://cdn.pixabay.com/photo/2013/07/18/10/56/railroad-163518_960_720.jpg',
    'https://cdn.pixabay.com/photo/2018/03/12/20/07/maldives-3220702_960_720.jpg',
    'https://cdn.pixabay.com/photo/2017/08/04/17/56/dolomites-2580866_960_720.jpg',
    'https://cdn.pixabay.com/photo/2016/06/20/03/15/pier-1467984_960_720.jpg',
    'https://cdn.pixabay.com/photo/2014/07/30/02/00/iceberg-404966_960_720.jpg',
    'https://cdn.pixabay.com/photo/2014/11/02/10/41/plane-513641_960_720.jpg',
    'https://cdn.pixabay.com/photo/2015/10/30/20/13/sea-1014710_960_720.jpg'
]

fp = Path('./down_img')
if not fp.exists():
    fp.mkdir()


loop = asyncio.get_event_loop()
loop.run_until_complete(main(URL))      

運作結果

mountain-477832_960_720.jpg 下載下傳完成!
railroad-163518_960_720.jpg 下載下傳完成!
maldives-3220702_960_720.jpg 下載下傳完成!
dolomites-2580866_960_720.jpg 下載下傳完成!
pier-1467984_960_720.jpg 下載下傳完成!
plane-513641_960_720.jpg 下載下傳完成!
iceberg-404966_960_720.jpg 下載下傳完成!
sea-1014710_960_720.jpg 下載下傳完成!
ALL RESULT:['https://cdn.pixabay.com/photo/2014/07/30/02/00/iceberg-404966_960_720.jpg', 'https://cdn.pixabay.com/photo/2018/03/12/20/07/maldives-3220702_960_720.jpg', 'https://cdn.pixabay.com/photo/2014/10/07/13/48/mountain-477832_960_720.jpg', 'https://cdn.pixabay.com/photo/2014/11/02/10/41/plane-513641_960_720.jpg', 'https://cdn.pixabay.com/photo/2017/08/04/17/56/dolomites-2580866_960_720.jpg', 'https://cdn.pixabay.com/photo/2013/07/18/10/56/railroad-163518_960_720.jpg', 'https://cdn.pixabay.com/photo/2015/10/30/20/13/sea-1014710_960_720.jpg', 'https://cdn.pixabay.com/photo/2016/06/20/03/15/pier-1467984_960_720.jpg']      
aiohttp 異步http請求-3.異步批量下載下傳圖檔

Semaphore控制并發

上面的代碼是把8個url一起加入到并發任務,當url數量很多的時候,我們希望可以控制并發量,于是可以用到Semaphore控制并發。

semaphore = asyncio.Semaphore(2)  # 限制并發量為2      

優化後的代碼

import aiohttp
import asyncio
from pathlib import Path


async def down_img(session, url, semaphore):
    """下載下傳圖檔"""
    async with semaphore:
        name = url.split('/')[-1]  # 獲得圖檔名字
        img = await session.get(url)
        # 觸發到await就切換,等待get到資料
        content = await img.read()
        # 讀取内容
        with open('./down_img/'+str(name), 'wb') as f:
            # 寫入至檔案
            f.write(content)
            print(f'{name} 下載下傳完成!')
        return str(url)


async def main(URL):
    semaphore = asyncio.Semaphore(2)  # 限制并發量為2
    # 建立會話session
    async with aiohttp.ClientSession() as session:
        # 建立所有任務
        tasks = [asyncio.create_task(down_img(session, img_url, semaphore)) for img_url in URL]
        # 觸發await,等待任務完成
        done, pending = await asyncio.wait(tasks)
        all_results = [done_task.result() for done_task in done]
        # 擷取所有結果
        print("ALL RESULT:"+str(all_results))


URL = [
    'https://cdn.pixabay.com/photo/2014/10/07/13/48/mountain-477832_960_720.jpg',
    'https://cdn.pixabay.com/photo/2013/07/18/10/56/railroad-163518_960_720.jpg',
    'https://cdn.pixabay.com/photo/2018/03/12/20/07/maldives-3220702_960_720.jpg',
    'https://cdn.pixabay.com/photo/2017/08/04/17/56/dolomites-2580866_960_720.jpg',
    'https://cdn.pixabay.com/photo/2016/06/20/03/15/pier-1467984_960_720.jpg',
    'https://cdn.pixabay.com/photo/2014/07/30/02/00/iceberg-404966_960_720.jpg',
    'https://cdn.pixabay.com/photo/2014/11/02/10/41/plane-513641_960_720.jpg',
    'https://cdn.pixabay.com/photo/2015/10/30/20/13/sea-1014710_960_720.jpg'
]

fp = Path('./down_img')
if not fp.exists():
    fp.mkdir()

loop = asyncio.get_event_loop()
loop.run_until_complete(main(URL))      

使用TCPConnector控制并發

TCPConnector 使用 limit 參數控制并發數

conn = aiohttp.TCPConnector(limit=2)
    # 建立會話session
    async with aiohttp.ClientSession(connector=conn) as session:      

完成代碼如下

import aiohttp
import asyncio
from pathlib import Path


async def down_img(session, url):
    """下載下傳圖檔"""
    name = url.split('/')[-1]  # 獲得圖檔名字
    img = await session.get(url)
    # 觸發到await就切換,等待get到資料
    content = await img.read()
    # 讀取内容
    with open('./down_img/'+str(name), 'wb') as f:
        # 寫入至檔案
        f.write(content)
        print(f'{name} 下載下傳完成!')
    return str(url)


async def main(URL):
    conn = aiohttp.TCPConnector(limit=2)
    # 建立會話session
    async with aiohttp.ClientSession(connector=conn) as session:
        # 建立所有任務
        tasks = [asyncio.create_task(down_img(session, img_url)) for img_url in URL]
        # 觸發await,等待任務完成
        done, pending = await asyncio.wait(tasks)
        all_results = [done_task.result() for done_task in done]
        # 擷取所有結果
        print("ALL RESULT:"+str(all_results))


URL = [
    'https://cdn.pixabay.com/photo/2014/10/07/13/48/mountain-477832_960_720.jpg',
    'https://cdn.pixabay.com/photo/2013/07/18/10/56/railroad-163518_960_720.jpg',
    'https://cdn.pixabay.com/photo/2018/03/12/20/07/maldives-3220702_960_720.jpg',
    'https://cdn.pixabay.com/photo/2017/08/04/17/56/dolomites-2580866_960_720.jpg',
    'https://cdn.pixabay.com/photo/2016/06/20/03/15/pier-1467984_960_720.jpg',
    'https://cdn.pixabay.com/photo/2014/07/30/02/00/iceberg-404966_960_720.jpg',
    'https://cdn.pixabay.com/photo/2014/11/02/10/41/plane-513641_960_720.jpg',
    'https://cdn.pixabay.com/photo/2015/10/30/20/13/sea-1014710_960_720.jpg'
]

fp = Path('./down_img')
if not fp.exists():
    fp.mkdir()

loop = asyncio.get_event_loop()
loop.run_until_complete(main(URL))