请问这个 asyncio 异步访问页面怎么写可以更加快?
資深大佬 : yagamil 0
代码如下: 先在主页获取子页面,大概 64 个,然后并发访问子页面, 总用时大概 36s,感觉还是有点慢。
URL_MAP = {'home_page': 'https://xxx/stocks/industry', 'base': 'https://xxx.com'} class App(BaseService): def __init__(self): super(App, self).__init__() async def home_page(self): start = time.time() async with aiohttp.ClientSession() as session: async with session.get(url=URL_MAP['home_page'], headers=headers) as response: html = await response.text() # 这个阻塞 resp = Selector(text=html) industries = resp.xpath('//ul[@class="list-unstyled"]/a') task_list =[] for industry in industries: json_data = {} industry_url = industry.xpath('.//@href').extract_first() industry_name = industry.xpath('.//li/text()').extract_first() json_data['industry_url'] = industry_url json_data['industry_name'] = industry_name task = asyncio.ensure_future(self.detail_list(session, industry_url, json_data)) task_list.append(task) await asyncio.gather(*task_list) end = time.time() print(f'time used {end-start}') async def detail_list(self, session, url, json_data): async with session.get(URL_MAP['base']+url, headers=headers) as response: response = await response.text() self.parse_detail(response, json_data) def parse_detail(self, html, json_data=None): resp = Selector(text=html) # info = resp.xpath('//div[@id="v_desc"]/div[@class="info open"]/text()').extract_first() title =resp.xpath('//title/text()').extract_first() print(title) app = Holdle() loop = asyncio.get_event_loop() loop.run_until_complete(app.home_page())
大佬有話說 (7)