想请教一下各位大佬CURL的问题
以下就是我要请求的目标链接的php代码和python代码,php返回的是403,但是我使用简单的python请求有时正常有时403,但是用php的curl直接请求就是403了,请问各位大佬怎么解决,帮忙调试一下,谢谢
$url = 'https://hstock.org/ru';
$headers = [
"Content-type:application/json;",
'Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Language:ja,en-US;q=0.9,en;q=0.8',
'Accept-Encoding:gzip',
'Cache-Control:max-age=0',
'Sec-Ch-Ua-Mobile:?0',
'Sec-Ch-Ua-Platform:"Windows"',
'Sec-Fetch-Dest:document',
'Sec-Fetch-Mode:navigate',
'Sec-Fetch-Site:none',
'Sec-Fetch-User:?1',
'Upgrade-Insecure-Requests:1',
'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.31',
];
// 初始化cURL会话
$ch = curl_init();
// 设置cURL选项
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false); // 不自动处理重定向
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); //这个是重点,规避ssl的证书检查。
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);
curl_setopt($ch, CURLOPT_HEADER, true); // 获取响应头信息
curl_setopt($ch, CURLOPT_NOBODY, false); // 获取响应体
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
// 执行请求并获取响应
$response = curl_exec($ch);
// 获取HTTP状态码
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
// 关闭cURL会话
curl_close($ch);
halt($response);
// 分离响应头和响应体
list($header, $body) = explode("\r\n\r\n", $response, 1);
// 提取重定向URL(如果有)
$redirectUrl = null;
if (preg_match('/Location: (.+?)\r\n/', $header, $matches)) {
$redirectUrl = trim($matches[1]);
}
// 处理响应
if ($httpCode == 200) {
echo '页面请求成功!' . PHP_EOL;
echo '状态码:' . $httpCode . PHP_EOL;
echo '请求URL:' . $url . PHP_EOL;
// 解析HTML
$html = str_get_html($body);
echo $html; // 这里简单输出整个HTML对象,你可能需要更具体的操作
} elseif ($httpCode == 302) {
echo '页面已重定向到新的URL地址:' . $redirectUrl . PHP_EOL;
} else {
echo '页面请求失败,状态码为:' . $httpCode . PHP_EOL;
}
import requests
from bs4 import BeautifulSoup
url = 'https://hstock.org/ru'
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'ja,en-US;q=0.9,en;q=0.8',
'Cache-Control': 'max-age=0',
'Sec-Ch-Ua-Mobile': '?0',
'Sec-Ch-Ua-Platform': '"Windows"',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.31',
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
print('页面请求成功!')
print(response.status_code)
print(response.url)
soup = BeautifulSoup(response.content, 'html.parser')
print(soup)
elif response.status_code == 302:
redirect_url = response.headers.get('Location')
print('页面已重定向到新的URL地址:', redirect_url)
else:
print('页面请求失败,状态码为:', response.status_code)
python 测试header 缺少了 "Content-type:application/json;" 加上看看
访问频率过高、访问量过大或者网站设置了访问限制,相应的增大对应的操作间隔
用tcpdump抓包详细比对下header头
我是想知道PHP的代码为什么不能请求,一请求就是403