import requests
from bs4 import BeautifulSoup
import time
import random
from fake_useragent import UserAgent
def get_baidu_search_count(keyword, use_proxy=False):
"""
获取百度搜索某关键词的结果数量
参数:
keyword (str): 搜索关键词
use_proxy (bool): 是否使用代理(需自行配置代理池)
返回:
dict: 包含结果数量、状态码、错误信息的字典
"""
# 1. 构造搜索 URL
encoded_keyword = requests.utils.quote(keyword) # 对关键词进行 URL 编码
url = f"https://www.baidu.com/s?wd={encoded_keyword}&rsv_spt=1&rsv_iqid=0x8d8a3b5a00007d8f&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&tn=baiduhome_pg&rsv_enter=1&rsv_dl=tb&rsv_sug3=8&rsv_sug1=7&rsv_sug7=100&rsv_sug2=0&inputT=0&rsv_sug4=0"
# 2. 构造请求头(模拟真实浏览器)
headers = {
"User-Agent": UserAgent().random, # 随机 User-Agent
"Referer": "https://www.baidu.com/", # 来源页设为百度首页
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
"Connection": "keep-alive"
}
# 3. 发送请求(带错误重试)
try:
# 可选:使用代理(需自行配置代理列表)
proxies = None
if use_proxy:
proxies = {
"http": "http://your_proxy_ip:port",
"https": "https://your_proxy_ip:port"
}
response = requests.get(
url=url,
headers=headers,
proxies=proxies,
timeout=10 # 超时时间
)
response.raise_for_status() # 抛出 HTTP 错误(如 403、500)
except requests.exceptions.RequestException as e:
return {
"status": "error",
"message": f"请求失败: {str(e)}",
"keyword": keyword,
"count": 0
}
# 4. 解析搜索结果数量
try:
soup = BeautifulSoup(response.text, "html.parser")
# 百度结果数量的 HTML 特征(可能随页面改版调整)
# 典型位置:<div class="nums">找到约 1,234,567 条结果 (用时 0.123 秒)</div>
result_num_div = soup.find("div", class_="nums")
if not result_num_div:
# 备用解析方案(兼容不同页面版本)
result_num_div = soup.find("span", class_="nums_total")
if result_num_div:
# 提取文本中的数字部分(去除逗号和单位)
num_text = result_num_div.get_text().strip()
count = int(num_text.replace(",", "").split()[0]) # 示例:"找到约 1,234 条结果" → 1234
else:
return {
"status": "warning",
"message": "未找到结果数量元素",
"keyword": keyword,
"count": 0
}
except Exception as e:
return {
"status": "error",
"message": f"解析失败: {str(e)}",
"keyword": keyword,
"count": 0
}
# 5. 返回成功结果
return {
"status": "success",
"keyword": keyword,
"count": count,
"message": f"成功获取 {keyword} 的搜索结果数量"
}
# ------------------- 测试用例 -------------------
if __name__ == "__main__":
keywords = ["Python 教程", "人工智能 发展趋势", "爬虫 技术"]
for keyword in keywords:
print(f"正在查询关键词: {keyword}")
# 添加随机延迟(模拟真实用户行为,避免被反爬)
time.sleep(random.uniform(3, 8)) # 间隔 3-8 秒
result = get_baidu_search_count(keyword)
if result["status"] == "success":
print(f"关键词: {keyword}")
print(f"结果数量: {result['count']:,}") # 格式化输出(如 1,234,567)
print(f"提示: {result['message']}
")
else:
print(f"关键词: {keyword}")
print(f"错误: {result['message']}
")本文网址:http://www.hhu99.com/article/4.html转载请注明出处!文章内容为作者原创或者采编,不代表本站立场,如有侵犯,请联系a5b5_su@163.com。