请问为什么 find 函数无法找到正确的匹配

请问为什么这里找不到正确的匹配
代码：

import requests, bs4
print('\n欢迎使用python翻译器,python翻译器支持任何您能想象到的语言，都能翻译成中文，甚者连古文也可以，分隔单词请用%20')
a=input('\n请输入翻译内容：') 
b='https://translate.google.cn/#view=home&op=translate&sl=auto&tl=zh-CN&text={}'.format(a)
res = requests.get(b)
novel = res.text
bs = bs4.BeautifulSoup(res.text, 'html.parser')
w = bs.find(class_="tlid-translation translation").text
print(w)

谢谢大家的帮忙！
思路：
我认为是find函数错了，我不知道这里应该放什么参数才能找到链接在下方
google翻译

爬虫网络

Lele1357

35 声望

爱编程的小屁孩

0 人点赞

推荐文章：

更多推荐...

博客

收集了一些各大网站 python 的登陆方式,希望对学习 python 的小白，和想写爬虫的你们有所帮助,,本项目用于研究和分享各大网站的模拟登陆方式 17 / 5 |

joeyun

见习助教 75 声望

最佳答案

import requests
import re
import json
import time


class GoogleTranslator():
    _host = 'translate.google.cn'

    _headers = {
        'Host': _host,
        'User-Agent':
        'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Mobile Safari/537.36',
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
        'Accept-Encoding': 'gzip, deflate, br',
        'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
        'Referer': 'https://' + _host,
        'Connection': 'keep-alive',
        'Cache-Control': 'max-age=0'
    }

    _language = {
        'afrikaans': 'af',
        'arabic': 'ar',
        'belarusian': 'be',
        'bulgarian': 'bg',
        'catalan': 'ca',
        'czech': 'cs',
        'welsh': 'cy',
        'danish': 'da',
        'german': 'de',
        'greek': 'el',
        'english': 'en',
        'esperanto': 'eo',
        'spanish': 'es',
        'estonian': 'et',
        'persian': 'fa',
        'finnish': 'fi',
        'french': 'fr',
        'irish': 'ga',
        'galician': 'gl',
        'hindi': 'hi',
        'croatian': 'hr',
        'hungarian': 'hu',
        'indonesian': 'id',
        'icelandic': 'is',
        'italian': 'it',
        'hebrew': 'iw',
        'japanese': 'ja',
        'korean': 'ko',
        'latin': 'la',
        'lithuanian': 'lt',
        'latvian': 'lv',
        'macedonian': 'mk',
        'malay': 'ms',
        'maltese': 'mt',
        'dutch': 'nl',
        'norwegian': 'no',
        'polish': 'pl',
        'portuguese': 'pt',
        'romanian': 'ro',
        'russian': 'ru',
        'slovak': 'sk',
        'slovenian': 'sl',
        'albanian': 'sq',
        'serbian': 'sr',
        'swedish': 'sv',
        'swahili': 'sw',
        'thai': 'th',
        'filipino': 'tl',
        'turkish': 'tr',
        'ukrainian': 'uk',
        'vietnamese': 'vi',
        'yiddish': 'yi',
        'chinese_simplified': 'zh-CN',
        'chinese_traditional': 'zh-TW',
        'auto': 'auto'
    }
    _url = 'https://' + _host + '/translate_a/single'
    _params = {
        'client': 'webapp',
        'sl': 'en',
        'tl': 'zh-CN',
        'hl': 'zh-CN',
        'dt': 'at',
        'dt': 'bd',
        'dt': 'ex',
        'dt': 'ld',
        'dt': 'md',
        'dt': 'qca',
        'dt': 'rw',
        'dt': 'rm',
        'dt': 'ss',
        'dt': 't',
        'otf': '1',
        'ssel': '0',
        'tsel': '0',
        'kc': '1'
    }

    __cookies = None

    __googleTokenKey = '376032.257956'
    __googleTokenKeyUpdataTime = 600.0
    __googleTokenKeyRetireTime = time.time() + 600.0

    def __init__(self, src='en', dest='zh-CN', tkkUpdataTime=600.0):
        if src not in self._language and src not in self._language.values():
            src = 'auto'
        if dest not in self._language and dest not in self._language.values():
            dest = 'auto'
        self._params['sl'] = src
        self._params['tl'] = dest
        self.googleTokenKeyUpdataTime = tkkUpdataTime
        self.__updateGoogleTokenKey()

    def __updateGoogleTokenKey(self):
        self.__googleTokenKey = self.__getGoogleTokenKey()
        self.__googleTokenKeyRetireTime = time.time(
        ) + self.__googleTokenKeyUpdataTime

    def __getGoogleTokenKey(self):
        """Get the Google TKK from https://translate.google.cn"""
        # TKK example: '435075.3634891900'
        result = ''
        try:
            res = requests.get('https://' + self._host, timeout=3)
            res.raise_for_status()
            self.__cookies = res.cookies
            result = re.search(r'tkk\:\'(\d+\.\d+)?\'', res.text).group(1)
        except requests.exceptions.ReadTimeout as ex:
            print('ERROR: ' + str(ex))
            time.sleep(1)
        return result

    def __getGoogleToken(self, a, TKK):
        """Calculate Google tk from TKK """

        # https://www.cnblogs.com/chicsky/p/7443830.html
        # if text = 'Tablet Developer' and TKK = '435102.3120524463', then tk = '315066.159012'

        def RL(a, b):
            for d in range(0, len(b) - 2, 3):
                c = b[d + 2]
                c = ord(c[0]) - 87 if 'a' <= c else int(c)
                c = a >> c if '+' == b[d + 1] else a << c
                a = a + c & 4294967295 if '+' == b[d] else a ^ c
            return a

        g = []
        f = 0
        while f < len(a):
            c = ord(a[f])
            if 128 > c:
                g.append(c)
            else:
                if 2048 > c:
                    g.append((c >> 6) | 192)
                else:
                    if (55296 == (c & 64512)) and (f + 1 < len(a)) and (
                            56320 == (ord(a[f + 1]) & 64512)):
                        f += 1
                        c = 65536 + ((c & 1023) << 10) + (ord(a[f]) & 1023)
                        g.append((c >> 18) | 240)
                        g.append((c >> 12) & 63 | 128)
                    else:
                        g.append((c >> 12) | 224)
                        g.append((c >> 6) & 63 | 128)
                g.append((c & 63) | 128)
            f += 1

        e = TKK.split('.')
        h = int(e[0]) or 0
        t = h
        for item in g:
            t += item
            t = RL(t, '+-a^+6')
        t = RL(t, '+-3^+b+-f')
        t ^= int(e[1]) or 0
        if 0 > t:
            t = (t & 2147483647) + 2147483648
        result = t % 1000000
        return str(result) + '.' + str(result ^ h)

5年前评论

讨论数量: 8

Coolest

见习助教 395 声望

卧槽，你会requests了？

5年前评论

Jason990420

1.9k 声望 / 個人 @ 個人

这网页不是简单的requests.get就能爬下来的, 你没爬到正确的网页內容.

错误的内容

<body> <script>(function(){ ...

正确的内容

<body class="displaying-homepage show-result"> <script>(function(){ ...

所以找不到tlid-translation translation, 也就没有翻译的结果

別怪 find 函数, 它是无辜的.

5年前评论

Coolest

233

Coolest

那google翻译的网页要怎么爬取正确的内容‘？’

Jason990420

1.9k 声望 / 個人 @ 個人

很难...

主要内容大都是由js传回, 再建立网页HTML内容, 和一般爬虫所对应的HTML不同, 所以变得很复杂.
google本身很注重它的智慧财产权, 它会利用一些方法来确认是网页还是爬虫, 比如请求的内容涵盖的范围, 比如请求与响应之间的授权, 加密等等.
一旦它认为不是正常网页存取, 会封锁中断连线.
还有存取量上也有所限制, 太多了, 也会封锁中断连线. 所以一般都是采用google的api, 免费的也会有限制.

5年前评论

joeyun

见习助教 75 声望

我有一个运用google翻译的代码段

5年前评论

Jason990420

1.9k 声望 / 個人 @ 個人

贴上来看看啊

5年前评论

joeyun

见习助教 75 声望

import requests
import re
import json
import time


class GoogleTranslator():
    _host = 'translate.google.cn'

    _headers = {
        'Host': _host,
        'User-Agent':
        'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Mobile Safari/537.36',
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
        'Accept-Encoding': 'gzip, deflate, br',
        'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
        'Referer': 'https://' + _host,
        'Connection': 'keep-alive',
        'Cache-Control': 'max-age=0'
    }

    _language = {
        'afrikaans': 'af',
        'arabic': 'ar',
        'belarusian': 'be',
        'bulgarian': 'bg',
        'catalan': 'ca',
        'czech': 'cs',
        'welsh': 'cy',
        'danish': 'da',
        'german': 'de',
        'greek': 'el',
        'english': 'en',
        'esperanto': 'eo',
        'spanish': 'es',
        'estonian': 'et',
        'persian': 'fa',
        'finnish': 'fi',
        'french': 'fr',
        'irish': 'ga',
        'galician': 'gl',
        'hindi': 'hi',
        'croatian': 'hr',
        'hungarian': 'hu',
        'indonesian': 'id',
        'icelandic': 'is',
        'italian': 'it',
        'hebrew': 'iw',
        'japanese': 'ja',
        'korean': 'ko',
        'latin': 'la',
        'lithuanian': 'lt',
        'latvian': 'lv',
        'macedonian': 'mk',
        'malay': 'ms',
        'maltese': 'mt',
        'dutch': 'nl',
        'norwegian': 'no',
        'polish': 'pl',
        'portuguese': 'pt',
        'romanian': 'ro',
        'russian': 'ru',
        'slovak': 'sk',
        'slovenian': 'sl',
        'albanian': 'sq',
        'serbian': 'sr',
        'swedish': 'sv',
        'swahili': 'sw',
        'thai': 'th',
        'filipino': 'tl',
        'turkish': 'tr',
        'ukrainian': 'uk',
        'vietnamese': 'vi',
        'yiddish': 'yi',
        'chinese_simplified': 'zh-CN',
        'chinese_traditional': 'zh-TW',
        'auto': 'auto'
    }
    _url = 'https://' + _host + '/translate_a/single'
    _params = {
        'client': 'webapp',
        'sl': 'en',
        'tl': 'zh-CN',
        'hl': 'zh-CN',
        'dt': 'at',
        'dt': 'bd',
        'dt': 'ex',
        'dt': 'ld',
        'dt': 'md',
        'dt': 'qca',
        'dt': 'rw',
        'dt': 'rm',
        'dt': 'ss',
        'dt': 't',
        'otf': '1',
        'ssel': '0',
        'tsel': '0',
        'kc': '1'
    }

    __cookies = None

    __googleTokenKey = '376032.257956'
    __googleTokenKeyUpdataTime = 600.0
    __googleTokenKeyRetireTime = time.time() + 600.0

    def __init__(self, src='en', dest='zh-CN', tkkUpdataTime=600.0):
        if src not in self._language and src not in self._language.values():
            src = 'auto'
        if dest not in self._language and dest not in self._language.values():
            dest = 'auto'
        self._params['sl'] = src
        self._params['tl'] = dest
        self.googleTokenKeyUpdataTime = tkkUpdataTime
        self.__updateGoogleTokenKey()

    def __updateGoogleTokenKey(self):
        self.__googleTokenKey = self.__getGoogleTokenKey()
        self.__googleTokenKeyRetireTime = time.time(
        ) + self.__googleTokenKeyUpdataTime

    def __getGoogleTokenKey(self):
        """Get the Google TKK from https://translate.google.cn"""
        # TKK example: '435075.3634891900'
        result = ''
        try:
            res = requests.get('https://' + self._host, timeout=3)
            res.raise_for_status()
            self.__cookies = res.cookies
            result = re.search(r'tkk\:\'(\d+\.\d+)?\'', res.text).group(1)
        except requests.exceptions.ReadTimeout as ex:
            print('ERROR: ' + str(ex))
            time.sleep(1)
        return result

    def __getGoogleToken(self, a, TKK):
        """Calculate Google tk from TKK """

        # https://www.cnblogs.com/chicsky/p/7443830.html
        # if text = 'Tablet Developer' and TKK = '435102.3120524463', then tk = '315066.159012'

        def RL(a, b):
            for d in range(0, len(b) - 2, 3):
                c = b[d + 2]
                c = ord(c[0]) - 87 if 'a' <= c else int(c)
                c = a >> c if '+' == b[d + 1] else a << c
                a = a + c & 4294967295 if '+' == b[d] else a ^ c
            return a

        g = []
        f = 0
        while f < len(a):
            c = ord(a[f])
            if 128 > c:
                g.append(c)
            else:
                if 2048 > c:
                    g.append((c >> 6) | 192)
                else:
                    if (55296 == (c & 64512)) and (f + 1 < len(a)) and (
                            56320 == (ord(a[f + 1]) & 64512)):
                        f += 1
                        c = 65536 + ((c & 1023) << 10) + (ord(a[f]) & 1023)
                        g.append((c >> 18) | 240)
                        g.append((c >> 12) & 63 | 128)
                    else:
                        g.append((c >> 12) | 224)
                        g.append((c >> 6) & 63 | 128)
                g.append((c & 63) | 128)
            f += 1

        e = TKK.split('.')
        h = int(e[0]) or 0
        t = h
        for item in g:
            t += item
            t = RL(t, '+-a^+6')
        t = RL(t, '+-3^+b+-f')
        t ^= int(e[1]) or 0
        if 0 > t:
            t = (t & 2147483647) + 2147483648
        result = t % 1000000
        return str(result) + '.' + str(result ^ h)

5年前评论

fsfs

11 声望

很有思路

5年前评论

Jason990420

1.9k 声望 / 個人 @ 個人

嗯, 原来以为是旧的google翻译, 确实可行

5年前评论

讨论应以学习和精进为目的。请勿发布不友善或者负能量的内容，与人为善，比聪明更重要！

帮助

请问为什么 find 函数无法找到正确的匹配

推荐文章：

社区赞助商

关于 LearnKu

资源推荐

服务提供商

其他信息

请问为什么 find 函数无法找到正确的匹配

推荐文章：

社区赞助商

关于 LearnKu

资源推荐

服务提供商

其他信息

请登录