求助一个优化代码的方法——爬取一个网站的资源下载链接
Question:
- 执行代码感觉跑的速度非常慢
希望得到一些改进优化的建议:
Code :
import requests
import re
'''
第一页:https://www.5438.com.cn/list/11.html?page=1
第二页:https://www.5438.com.cn/list/11.html?page=2
'''
'''
获取分页链接
'''
def get_page_url():
urls_list = []
for i in range(25):
url = 'https://www.5438.com.cn/list/11.html?page={}'.format(i+1)
urls_list.append(url)
return urls_list
'''
获取文章链接
'''
def from_airticle_get_download_url():
aireticle_urls = []
for x in get_page_url():
#构造文章链接
req = requests.get(x).text
pat = '<a href="(.*?)" rel="bookmark" target="_blank">'
lite_urls = re.findall(pat, req)
for j in lite_urls:
url = 'https://www.5438.com.cn{}'\
.format(j)
aireticle_urls.append(url)
return aireticle_urls
'''
获取资源下载链接
'''
def get_download_urls():
download_urls_list = []
for j in from_airticle_get_download_url():
req_download_urls = requests.get(j).text
pat = 'download-popup btn_down" href="(.*?)"'
a = re.findall(pat, req_download_urls)
download_urls_list.append(a)
print(download_urls_list)
get_download_urls()
推荐文章: