关于class类内函数调用的问题——附上代码请求指点
代码
import requests
from lxml import etree
class Chaidanzhuanjia(object):
def __init__(self,times):
self.times = int(times)
self.headers = headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36'
}
# data{
# #第一页
# #https://movie.douban.com/subject/30171424/reviews
# #第二页
# #https://movie.douban.com/subject/30171424/reviews?start=20
# #第三页
# #https://movie.douban.com/subject/30171424/reviews?start=40
# }
# 创建爬取函数
def request_page(self):
for i in range(self.times):
#根据页面规则构造页面url
url = 'https://movie.douban.com/subject/30171424/reviews?start={}'.format(i*20)
response = requests.get(url=url,headers=self.headers).text
print(response)
return response
#创建清洗数据函数
def wash_data(self):
data = request_page()
html = etree.HTML(data)
html = etree.tostring(html)
print(type(html))
if __name__ == '__main__':
page = int(input('输入爬取页数:'))
chaidanzhuanjia = Chaidanzhuanjia(page)
chaidanzhuanjia.request_page()
chaidanzhuanjia.wash_data()
问题
- Chaidanzhuanjia()类内如何将request_page(),return的html数据传到wash_data()函数里面进行清洗呢?
推荐文章: