requests 获取中国统计信息网,内容中文乱码
问题代码
#!usr/bin/python3
# -*- coding: utf-8 -*-
def getData():
url ="http://www.tjcn.org/tjgb/23sc/35900.html"
headers = {
...
}
r = requests.get(url, headers=headers)
etree = html.etree
r.encoding = 'utf-8'
filename = './data/**.txt'
with open(filename, 'w', encoding='utf-8') as f:
f.write(r.text)
错误结果
修正代码
#!usr/bin/python3
# -*- coding: utf-8 -*-
def getData():
url ="http://www.tjcn.org/tjgb/23sc/35900.html"
headers = {
...
}
r = requests.get(url, headers=headers)
etree = html.etree
r.encoding = 'gb2312'
filename = './data/**.txt'
with open(filename, 'w', encoding='utf-8') as f:
f.write(r.text)
输出正确 主要是编码方式不同造成的
推荐文章: