python3中aiohttp asyncio 高效率爬取圖片例子,本地儲存

NO IMAGE
import asyncio
import aiohttp
import time
import random
import os
from lxml import etree
path='F:\\wuso\\'      #檔案儲存路徑

targe_url=[]            
for i in range(0,200):#total 178        建立任務連結
    targe_url.append('https://wuso.me/forum-photos-{}.html'.format(i))
async def run(url):
path='F:\\wuso\\'
    headers={'Accept':'text/html,application/xhtml xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding':'gb2312,utf-8',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0',
'Accept-Language':'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
'Connection':'Keep-alive'
}
try:
async with aiohttp.ClientSession() as session:
aiohttp.Timeout(5)
async with session.get(url,headers=headers) as response:
res=await response.text()
res=etree.HTML(res)
fName=res.xpath("//div[@class='c cl']/a/@title")
#print(fName)
                fLink=res.xpath("//div[@class='c cl']/a/@href")
for i in range(len(fName)):
async with session.get(fLink[i],headers=headers) as imgres:
t=await imgres.text()
t=etree.HTML(t)
imgLinks=t.xpath('//img/@zoomfile')
try:
if not os.path.exists(path fName[i]):
os.makedirs(path fName[i])
for link in imgLinks:
async with session.get('https://wuso.me/' link,headers=headers) as img:
try:
imgcode=await img.read()
try:
with open(path fName[i] '\\' link.split('/')[-1],'wb') as f:
f.write(imgcode)
f.close()
print(link.split('/')[-1],'Saved')
except:
print('檔案建立失敗')
pass
                                    except:
print('二進位制檔案讀取失敗')
pass
                        except:
print('Img儲存失敗')
pass
    except:
print('response失敗*******************************************')
pass
start=time.time()
loop=asyncio.get_event_loop()
tasks=[]
for u in targe_url:
tasks.append(asyncio.ensure_future(run(u)))
loop.run_until_complete(asyncio.wait(tasks))
loop.close()
print('total {}pages,time cost:{}'.format(len(tasks),time.time()-start))