import requests import redis import random import time import threading import json from typing import Dict, List, Optional, Union from datetime import datetime, timedelta from concurrent.futures import ThreadPoolExecutor, as_completed class EnhancedProxyManager: def __init__( self, proxy_api_url: str = 'https://dps.kdlapi.com/api/getdps?secret_id=o4itop21b4byqg47eevx&signature=3d3fuvm6raah1xyjecl6bby1mj6gtx0c&num=3&format=json', valid_check_url: str = 'https://dps.kdlapi.com/api/checkdpsvalid?secret_id=o4itop21b4byqg47eevx&signature=3d3fuvm6raah1xyjecl6bby1mj6gtx0c', redis_host: str = '192.168.18.123', redis_port: int = 6379, redis_db: int = 7, redis_password: str = 'wlkj2018', redis_key: str = 'proxy_pool', update_interval: int = 3600, max_retries: int = 3, proxy_timeout: int = 10, auto_refresh: bool = False, max_pool_size: int = 50, enable_api_validation: bool = True # 新增:是否启用API验证开关 ): """ 增强版代理管理器 - 支持多IP池和手动代理管理 :param proxy_api_url: 获取代理的API地址 :param redis_host: Redis主机地址 :param redis_port: Redis端口 :param redis_db: Redis数据库 :param redis_password: Redis密码 :param redis_key: Redis中存储代理的键前缀 :param update_interval: 代理更新间隔(秒) :param max_retries: 最大重试次数 :param proxy_timeout: 代理连接超时时间(秒) :param auto_refresh: 是否自动从API获取代理 :param max_pool_size: 代理池最大容量 """ self.proxy_api_url = proxy_api_url self.redis_key_prefix = redis_key self.update_interval = update_interval self.max_retries = max_retries self.proxy_timeout = proxy_timeout self.auto_refresh = auto_refresh self.max_pool_size = max_pool_size self.valid_check_url = valid_check_url self.enable_api_validation = enable_api_validation # Redis连接 self.redis_conn = redis.StrictRedis( host=redis_host, port=redis_port, db=redis_db, password=redis_password, decode_responses=True ) # 线程安全控制 self.lock = threading.Lock() self.condition = threading.Condition() # 启动维护线程 if self.auto_refresh: self._start_maintenance_thread() def _start_maintenance_thread(self): """启动后台维护线程""" def maintenance_loop(): while True: with self.condition: self._refresh_api_proxies() self.condition.notify_all() time.sleep(self.update_interval) thread = threading.Thread(target=maintenance_loop, daemon=True) thread.start() def _get_redis_key(self, proxy_type: str) -> str: """获取Redis键名""" return f"{self.redis_key_prefix}:{proxy_type}" def _check_proxy_valid(self, proxy_list: List[str]) -> Dict[str, bool]: """通过API检查代理是否有效""" if not self.enable_api_validation or not proxy_list: return {} try: # 拼接代理参数:proxy=ip1:port1,ip2:port2 proxy_param = '&proxy=' + ','.join(proxy_list) response = requests.get(self.valid_check_url + proxy_param, timeout=10) if response.status_code == 200: data = response.json() if data.get('code') == 0: return data.get('data', {}) except Exception as e: print(f"API验证代理有效性失败: {e}") return {} def _refresh_api_proxies(self) -> bool: """从API获取最新代理并存入Redis""" api_key = self._get_redis_key('api') # 前置检查:如果已有足够数量且未过期的代理,则不需要刷新 existing_proxies = self.redis_conn.hgetall(api_key) # 检查代理数量是否足够(大于等于4)且未过期 if len(existing_proxies) >= 4: # 检查代理是否过期(假设代理有效期为24小时) current_time = datetime.now() need_refresh = False for proxy_json in existing_proxies.values(): proxy = json.loads(proxy_json) last_checked = datetime.strptime(proxy['last_checked'], "%Y-%m-%d %H:%M:%S") if (current_time - last_checked) > timedelta(hours=1): need_refresh = True break if not need_refresh: print("当前有足够数量且未过期的代理,无需刷新") return False try: response = requests.get(self.proxy_api_url, timeout=self.proxy_timeout) if response.status_code == 200: data = response.json() if data.get('code') == 0 and data.get('data'): # 清空旧API代理 api_key = self._get_redis_key('api') # self.redis_conn.delete(api_key) # 添加新代理 for proxy_data in data['data']['proxy_list']: proxy = { 'http': f"http://{proxy_data}", 'https': f"http://{proxy_data}", 'server': proxy_data, 'source': 'api', 'last_checked': datetime.now().strftime("%Y-%m-%d %H:%M:%S"), 'status': 'active' } self.redis_conn.hset( api_key, proxy_data, json.dumps(proxy) ) return True except Exception as e: print(f"更新API代理失败: {e}") return False def add_manual_proxies(self, proxies: Union[List[str], str]) -> int: """ 手动添加代理到池中 :param proxies: 代理列表(格式: ["ip:port", ...] 或 "ip:port") :return: 成功添加的代理数量 """ if isinstance(proxies, str): proxies = [proxies] added = 0 manual_key = self._get_redis_key('manual') # 验证并添加代理 def _check_and_add(proxy): nonlocal added proxy_dict = { 'http': f"http://{proxy}", 'https': f"http://{proxy}", 'server': proxy, 'source': 'manual', 'last_checked': datetime.now().strftime("%Y-%m-%d %H:%M:%S"), 'status': 'active' } if self._is_proxy_working(proxy_dict): with self.lock: # 检查是否已存在 if not self.redis_conn.hexists(manual_key, proxy): self.redis_conn.hset(manual_key, proxy, json.dumps(proxy_dict)) added += 1 # 使用线程池并行验证 with ThreadPoolExecutor(max_workers=10) as executor: executor.map(_check_and_add, proxies) print(f"成功添加 {added} 个手动代理") return added def _is_proxy_working(self, proxy: Dict) -> bool: """检查代理是否可用""" test_urls = [ "http://httpbin.org/ip", "http://www.google.com/gen_204" ] proxies = { 'http': proxy['http'], 'https': proxy['https'] } for url in test_urls: try: response = requests.get( url, proxies=proxies, timeout=self.proxy_timeout, allow_redirects=False ) if 200 <= response.status_code < 500: return True except: continue return False def get_random_proxy(self) -> Optional[Dict]: """ 随机获取一个可用代理 :return: 代理字典或None(如果无可用代理) """ # 优先从API代理获取 # api_key = self._get_redis_key('api') manual_key = self._get_redis_key('manual') # 获取所有活跃代理 proxies = [] # 先检查API代理 # api_proxies = self.redis_conn.hgetall(api_key) # for proxy_json in api_proxies.values(): # proxy = json.loads(proxy_json) # if proxy.get('status') == 'active': # proxies.append(proxy) # 如果API代理不可用或auto_refresh关闭,检查手动代理 if not proxies or not self.auto_refresh: manual_proxies = self.redis_conn.hgetall(manual_key) for proxy_json in manual_proxies.values(): proxy = json.loads(proxy_json) if proxy.get('status') == 'active': proxies.append(proxy) if not proxies: if self.auto_refresh: print("代理池为空,尝试从API获取...") self._refresh_api_proxies() return self.get_random_proxy() else: print("代理池为空且自动刷新已关闭") return None # 随机选择一个代理 selected = random.choice(proxies) selected['_redis_key'] = self._get_redis_key(selected['source']) return selected def mark_proxy_failed(self, proxy: Dict): """标记代理为失败并从池中移除""" if '_redis_key' not in proxy: return # 如果是API代理且启用验证,先检查是否真的失效 if proxy.get('source') == 'api' and self.enable_api_validation: valid_status = self._check_proxy_valid([proxy['server']]) is_valid = valid_status.get(proxy['server'], False) if is_valid: print(f"代理 {proxy['server']} API验证仍有效,暂不移除") return with self.lock: self.redis_conn.hdel(proxy['_redis_key'], proxy['server']) print(f"代理 {proxy['server']} 已被移除") def request_with_proxy( self, method: str, url: str, retry_count: int = 0, **kwargs ) -> requests.Response: """ 使用代理发送请求 :param method: HTTP方法 :param url: 请求URL :param retry_count: 内部重试计数 :param kwargs: 其他requests参数 :return: Response对象 """ if retry_count >= self.max_retries: raise requests.exceptions.RequestException(f"达到最大重试次数 {self.max_retries}") proxy = self.get_random_proxy() if not proxy: raise requests.exceptions.RequestException("无可用代理") try: response = requests.request( method, url, proxies={ 'http': proxy['http'], 'https': proxy['https'] }, timeout=self.proxy_timeout, **kwargs ) if response.status_code >= 400: raise requests.exceptions.HTTPError( f"HTTP错误: {response.status_code}", response=response ) return response except requests.exceptions.RequestException as e: print(f"代理 {proxy['server']} 请求失败: {e}") self.mark_proxy_failed(proxy) return self.request_with_proxy(method, url, retry_count + 1, **kwargs) def get_pool_status(self) -> Dict: """获取代理池状态""" api_key = self._get_redis_key('api') manual_key = self._get_redis_key('manual') return { 'api_proxies': self.redis_conn.hlen(api_key), 'manual_proxies': self.redis_conn.hlen(manual_key), 'auto_refresh': self.auto_refresh, 'last_update': datetime.now().strftime("%Y-%m-%d %H:%M:%S") }