import requests from bs4 import BeautifulSoup import time import random from fake_useragent import UserAgent def get_baidu_search_count(keyword, use_proxy=False): """ 获取百度搜索某关键词的结果数量 参数: keyword (str): 搜索关键词 use_proxy (bool): 是否使用代理(需自行配置代理池) 返回: dict: 包含结果数量、状态码、错误信息的字典 """ # 1. 构造搜索 URL encoded_keyword = requests.utils.quote(keyword) # 对关键词进行 URL 编码 url = f"https://www.baidu.com/s?wd={encoded_keyword}&rsv_spt=1&rsv_iqid=0x8d8a3b5a00007d8f&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&tn=baiduhome_pg&rsv_enter=1&rsv_dl=tb&rsv_sug3=8&rsv_sug1=7&rsv_sug7=100&rsv_sug2=0&inputT=0&rsv_sug4=0" # 2. 构造请求头(模拟真实浏览器) headers = { "User-Agent": UserAgent().random, # 随机 User-Agent "Referer": "https://www.baidu.com/", # 来源页设为百度首页 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", "Connection": "keep-alive" } # 3. 发送请求(带错误重试) try: # 可选:使用代理(需自行配置代理列表) proxies = None if use_proxy: proxies = { "http": "http://your_proxy_ip:port", "https": "https://your_proxy_ip:port" } response = requests.get( url=url, headers=headers, proxies=proxies, timeout=10 # 超时时间 ) response.raise_for_status() # 抛出 HTTP 错误(如 403、500) except requests.exceptions.RequestException as e: return { "status": "error", "message": f"请求失败: {str(e)}", "keyword": keyword, "count": 0 } # 4. 解析搜索结果数量 try: soup = BeautifulSoup(response.text, "html.parser") # 百度结果数量的 HTML 特征(可能随页面改版调整) # 典型位置:<div class="nums">找到约 1,234,567 条结果 (用时 0.123 秒)</div> result_num_div = soup.find("div", class_="nums") if not result_num_div: # 备用解析方案(兼容不同页面版本) result_num_div = soup.find("span", class_="nums_total") if result_num_div: # 提取文本中的数字部分(去除逗号和单位) num_text = result_num_div.get_text().strip() count = int(num_text.replace(",", "").split()[0]) # 示例:"找到约 1,234 条结果" → 1234 else: return { "status": "warning", "message": "未找到结果数量元素", "keyword": keyword, "count": 0 } except Exception as e: return { "status": "error", "message": f"解析失败: {str(e)}", "keyword": keyword, "count": 0 } # 5. 返回成功结果 return { "status": "success", "keyword": keyword, "count": count, "message": f"成功获取 {keyword} 的搜索结果数量" } # ------------------- 测试用例 ------------------- if __name__ == "__main__": keywords = ["Python 教程", "人工智能 发展趋势", "爬虫 技术"] for keyword in keywords: print(f"正在查询关键词: {keyword}") # 添加随机延迟(模拟真实用户行为,避免被反爬) time.sleep(random.uniform(3, 8)) # 间隔 3-8 秒 result = get_baidu_search_count(keyword) if result["status"] == "success": print(f"关键词: {keyword}") print(f"结果数量: {result['count']:,}") # 格式化输出(如 1,234,567) print(f"提示: {result['message']} ") else: print(f"关键词: {keyword}") print(f"错误: {result['message']} ")
本文网址:http://www.hhu99.com/article/4.html转载请注明出处!文章内容为作者原创或者采编,不代表本站立场,如有侵犯,请联系a5b5_su@163.com。