python3 运行如下代码,前面有效果,后面就出现格式不统一怎么回事,求大神告知

# CrowTaobaoPrice.py
import requests
import re
from requests.exceptions import RequestException
import sys
import json

url='https://2018.cn/m/index.php?mod=category&catid=1&page='
headers={'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Mobile Safari/537.36',
         'Cookie':'Cookie: EMARTICLE1=null; EMARTICLE2=null; EMARTICLE0=null; EMARTICLE3=05-14%2013%3A27%3A33@%23%24%u5168%u53A6%u95E8%u4E0A%u95E8%u56DE%u6536%u9EC4%u91D1%20%u6E56%u91CC%20%u601D%u660E%20%u6D77%u6CA7%20%u7FD4%u5B89%20%u540C%u5B89%20%u96C6%u7F8E%20%u674F%u6797%20-%20%u53A6%u95E8%u7269%u54C1%u56DE%u6536%20-%20%u53A6%u95E82018%u4FE1%u606F%u6E2F%u624B%u673A%u7248@%23%24https%3A//2018.cn/m/index.php%3Fmod%3Dinformation%26id%3D4405759; EMARTICLE4=05-14%2013%3A28%3A33@%23%24%u4E8C%u624B%u949B%u5408%u91D1%u5C55%u67DC%u56DE%u6536%u5DE5%u827A%u54C1%u6536%u8D2D%u73E0%u5B9D%u5C55%u793A%u67DC%u4ED3%u5E93%u5C55%u793A%u67DC%u56DE%u6536%u5B9E%u6728%u67DC%u53F0%20-%20%u5317%u4EAC%u7269%u54C1%u56DE%u6536%20-%20%u5317%u4EAC2018%u4FE1%u606F%u6E2F%u624B%u673A%u7248@%23%24https%3A//2018.cn/m/index.php%3Fmod%3Dinformation%26id%3D4448010; EMARTICLE5=05-14%2013%3A29%3A19@%23%24%u5168%u5317%u4EAC%u9AD8%u4EF7%u4E0A%u95E8%u56DE%u6536%u529E%u516C%u5BB6%u5177%u4E2D%u592E%u7A7A%u8C03%u56DE%u6536%u529E%u516C%u7528%u54C1%20-%20%u5317%u4EAC%u5BB6%u5177/%u529E%u516C%u5BB6%u5177%20-%20%u5317%u4EAC2018%u4FE1%u606F%u6E2F%u624B%u673A%u7248@%23%24https%3A//2018.cn/m/index.php%3Fmod%3Dinformation%26id%3D4448013; EMARTICLE7=05-14%2019%3A39%3A05@%23%24%u5E93%u535A%u58EB%u71C3%u6C14%u7076%u79E6%u7687%u5C9B%u7EF4%u4FEE%u7535%u8BDD%u591A%u5C11%uFF1F%u7EF4%u4FEE%u5E08%u508524%u5C0F%u65F6%u4E0A%u95E8%u7EF4%u4FEE%20-%20%u79E6%u7687%u5C9B%u5176%u4ED6%u54C1%u724C%20-%20%u79E6%u7687%u5C9B2018%u4FE1%u606F%u6E2F%u624B%u673A%u7248@%23%24https%3A//2018.cn/m/index.php%3Fmod%3Dinformation%26id%3D4456631; EMARTICLE8=05-14%2013%3A34%3A58@%23%24%u65E0%u9521%u535A%u4E16%u6EDA%u7B52%u6D17%u8863%u673A%u5168%u5E02%u7EDF%u4E00%u670D%u52A1%u7F51%u70B924%u5C0F%u65F6%u62A5%u4FEE%u7535%u8BDD%20-%20%u65E0%u9521%u6D17%u8863%u673A%u7EF4%u4FEE%20-%20%u65E0%u95212018%u4FE1%u606F%u6E2F%u624B%u673A%u7248@%23%24https%3A//2018.cn/m/index.php%3Fmod%3Dinformation%26id%3D4456634; EMARTICLE9=05-14 19:39:11@#$%u4E1C%u839E%u4E1C%u57CE%u5E9F%u94C1%u5E9F%u94A2%u56DE%u6536%u516C%u53F8%u9AD8%u4EF7%u56DE%u6536%u5851%u80F6%u5E9F%u6A21%u5177%u62A5%u4EF7%20-%20%u4E1C%u839E%u7269%u54C1%u56DE%u6536%20-%20%u4E1C%u839E2018%u4FE1%u606F%u6E2F%u624B%u673A%u7248@%23%24https%3A//2018.cn/m/index.php%3Fmod%3Dinformation%26id%3D4456318; EMARTICLE6=05-15 20:31:48@#$%u5409%u6797%u5E02%u5404%u79CD%u9AD8%u4F4E%u7AEF%u914D%u7F6E%u7535%u8111%u56DE%u6536%u8BC4%u4F30%u4E0A%u95E8%u53D6%u8D27%u4EE5%u4E00%u6761%u9F99%20-%20%u5409%u6797%u7269%u54C1%u56DE%u6536%20-%20%u5409%u67972018%u4FE1%u606F%u6E2F%u624B%u673A%u7248@%23%24https%3A//2018.cn/m/index.php%3Fmod%3Dinformation%26id%3D4456630; UM_distinctid=17208040fa497-05e8cc9963ce1-d373666-1fa400-17208040fa5564; CNZZDATA1253450280=2048944961-1589270953-https%253A%252F%252Fwww.baidu.com%252F%7C1589545383'}

def grt_one_page(url):
    try:
        response = requests.get(url,headers=headers)
        if response.status_code == 200:
            return response.text
        return None
    except RequestException:
        return None
def parse_one_page(html):
    pattern = re.compile(r'<li class="">.*?<a\shref="(.*?)(\d{7})">.*?<img\sclass="thumbnail"\ssrc="(.*?)"\salt="([\u4E00-\u9FA5]+)</strong>">.*?price">.*?(\d+).*?</li>',re.S)
    items=re.findall(pattern,html)
    i=0
    for item in items:
        i=i+1
        if i==4:break
        yield {
            'link': item[0]+item[1],
            'goods_id':item[1],
            'good_small_log':item[2],
            'goods_name':item[3].strip(),
            'goods_price':item[4]
        }
    return  items
def write_to_file(content):
    with open('F://result.json','a',encoding='utf-8') as f:
        f.write(json.dumps(content,ensure_ascii=False))

if __name__ == '__main__':
    for i in range(1,5):
        al=str(i)
        html = grt_one_page(url+al)
        results=parse_one_page(html)
        for result in results:
            write_to_file(result)
            print(result)
本作品采用《CC 协议》,转载必须注明作者和本文链接
讨论数量: 1
Jason990420

我是连不上 www.2018.cn, 只能这么猜

  • 可能你的连结中, 出现异常
  • 指的是 html 内容因服务器觉得你的连线异常, 所以给你不一样的内容
  • 也许是要求登入, 也许是要求验证, 也许是....
  • 所以你要去检查下, 当内容不对时, 下载的 HTML 内容对不对.
2个月前 评论

请勿发布不友善或者负能量的内容。与人为善,比聪明更重要!