342 lines
12 KiB
Python
342 lines
12 KiB
Python
import requests
|
||
import redis
|
||
import random
|
||
import time
|
||
import threading
|
||
import json
|
||
from typing import Dict, List, Optional, Union
|
||
from datetime import datetime, timedelta
|
||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||
|
||
|
||
class EnhancedProxyManager:
|
||
def __init__(
|
||
self,
|
||
proxy_api_url: str = 'https://dps.kdlapi.com/api/getdps?secret_id=o4itop21b4byqg47eevx&signature=3d3fuvm6raah1xyjecl6bby1mj6gtx0c&num=3&format=json',
|
||
valid_check_url: str = 'https://dps.kdlapi.com/api/checkdpsvalid?secret_id=o4itop21b4byqg47eevx&signature=3d3fuvm6raah1xyjecl6bby1mj6gtx0c',
|
||
redis_host: str = '192.168.18.123',
|
||
redis_port: int = 6379,
|
||
redis_db: int = 7,
|
||
redis_password: str = 'wlkj2018',
|
||
redis_key: str = 'proxy_pool',
|
||
update_interval: int = 3600,
|
||
max_retries: int = 3,
|
||
proxy_timeout: int = 10,
|
||
auto_refresh: bool = False,
|
||
max_pool_size: int = 50,
|
||
enable_api_validation: bool = True # 新增:是否启用API验证开关
|
||
):
|
||
"""
|
||
增强版代理管理器 - 支持多IP池和手动代理管理
|
||
|
||
:param proxy_api_url: 获取代理的API地址
|
||
:param redis_host: Redis主机地址
|
||
:param redis_port: Redis端口
|
||
:param redis_db: Redis数据库
|
||
:param redis_password: Redis密码
|
||
:param redis_key: Redis中存储代理的键前缀
|
||
:param update_interval: 代理更新间隔(秒)
|
||
:param max_retries: 最大重试次数
|
||
:param proxy_timeout: 代理连接超时时间(秒)
|
||
:param auto_refresh: 是否自动从API获取代理
|
||
:param max_pool_size: 代理池最大容量
|
||
"""
|
||
self.proxy_api_url = proxy_api_url
|
||
self.redis_key_prefix = redis_key
|
||
self.update_interval = update_interval
|
||
self.max_retries = max_retries
|
||
self.proxy_timeout = proxy_timeout
|
||
self.auto_refresh = auto_refresh
|
||
self.max_pool_size = max_pool_size
|
||
self.valid_check_url = valid_check_url
|
||
self.enable_api_validation = enable_api_validation
|
||
|
||
# Redis连接
|
||
self.redis_conn = redis.StrictRedis(
|
||
host=redis_host,
|
||
port=redis_port,
|
||
db=redis_db,
|
||
password=redis_password,
|
||
decode_responses=True
|
||
)
|
||
|
||
# 线程安全控制
|
||
self.lock = threading.Lock()
|
||
self.condition = threading.Condition()
|
||
|
||
# 启动维护线程
|
||
if self.auto_refresh:
|
||
self._start_maintenance_thread()
|
||
|
||
def _start_maintenance_thread(self):
|
||
"""启动后台维护线程"""
|
||
|
||
def maintenance_loop():
|
||
while True:
|
||
with self.condition:
|
||
self._refresh_api_proxies()
|
||
self.condition.notify_all()
|
||
time.sleep(self.update_interval)
|
||
|
||
thread = threading.Thread(target=maintenance_loop, daemon=True)
|
||
thread.start()
|
||
|
||
def _get_redis_key(self, proxy_type: str) -> str:
|
||
"""获取Redis键名"""
|
||
return f"{self.redis_key_prefix}:{proxy_type}"
|
||
|
||
def _check_proxy_valid(self, proxy_list: List[str]) -> Dict[str, bool]:
|
||
"""通过API检查代理是否有效"""
|
||
if not self.enable_api_validation or not proxy_list:
|
||
return {}
|
||
|
||
try:
|
||
# 拼接代理参数:proxy=ip1:port1,ip2:port2
|
||
proxy_param = '&proxy=' + ','.join(proxy_list)
|
||
response = requests.get(self.valid_check_url + proxy_param, timeout=10)
|
||
|
||
if response.status_code == 200:
|
||
data = response.json()
|
||
if data.get('code') == 0:
|
||
return data.get('data', {})
|
||
except Exception as e:
|
||
print(f"API验证代理有效性失败: {e}")
|
||
return {}
|
||
|
||
def _refresh_api_proxies(self) -> bool:
|
||
"""从API获取最新代理并存入Redis"""
|
||
api_key = self._get_redis_key('api')
|
||
|
||
# 前置检查:如果已有足够数量且未过期的代理,则不需要刷新
|
||
existing_proxies = self.redis_conn.hgetall(api_key)
|
||
|
||
# 检查代理数量是否足够(大于等于4)且未过期
|
||
if len(existing_proxies) >= 4:
|
||
# 检查代理是否过期(假设代理有效期为24小时)
|
||
current_time = datetime.now()
|
||
need_refresh = False
|
||
|
||
for proxy_json in existing_proxies.values():
|
||
proxy = json.loads(proxy_json)
|
||
last_checked = datetime.strptime(proxy['last_checked'], "%Y-%m-%d %H:%M:%S")
|
||
if (current_time - last_checked) > timedelta(hours=1):
|
||
need_refresh = True
|
||
break
|
||
|
||
if not need_refresh:
|
||
print("当前有足够数量且未过期的代理,无需刷新")
|
||
return False
|
||
|
||
try:
|
||
response = requests.get(self.proxy_api_url, timeout=self.proxy_timeout)
|
||
if response.status_code == 200:
|
||
data = response.json()
|
||
if data.get('code') == 0 and data.get('data'):
|
||
# 清空旧API代理
|
||
api_key = self._get_redis_key('api')
|
||
# self.redis_conn.delete(api_key)
|
||
|
||
# 添加新代理
|
||
for proxy_data in data['data']['proxy_list']:
|
||
proxy = {
|
||
'http': f"http://{proxy_data}",
|
||
'https': f"http://{proxy_data}",
|
||
'server': proxy_data,
|
||
'source': 'api',
|
||
'last_checked': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||
'status': 'active'
|
||
}
|
||
self.redis_conn.hset(
|
||
api_key,
|
||
proxy_data,
|
||
json.dumps(proxy)
|
||
)
|
||
return True
|
||
except Exception as e:
|
||
print(f"更新API代理失败: {e}")
|
||
return False
|
||
|
||
def add_manual_proxies(self, proxies: Union[List[str], str]) -> int:
|
||
"""
|
||
手动添加代理到池中
|
||
|
||
:param proxies: 代理列表(格式: ["ip:port", ...] 或 "ip:port")
|
||
:return: 成功添加的代理数量
|
||
"""
|
||
if isinstance(proxies, str):
|
||
proxies = [proxies]
|
||
|
||
added = 0
|
||
manual_key = self._get_redis_key('manual')
|
||
|
||
# 验证并添加代理
|
||
def _check_and_add(proxy):
|
||
nonlocal added
|
||
proxy_dict = {
|
||
'http': f"http://{proxy}",
|
||
'https': f"http://{proxy}",
|
||
'server': proxy,
|
||
'source': 'manual',
|
||
'last_checked': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||
'status': 'active'
|
||
}
|
||
|
||
if self._is_proxy_working(proxy_dict):
|
||
with self.lock:
|
||
# 检查是否已存在
|
||
if not self.redis_conn.hexists(manual_key, proxy):
|
||
self.redis_conn.hset(manual_key, proxy, json.dumps(proxy_dict))
|
||
added += 1
|
||
|
||
# 使用线程池并行验证
|
||
with ThreadPoolExecutor(max_workers=10) as executor:
|
||
executor.map(_check_and_add, proxies)
|
||
|
||
print(f"成功添加 {added} 个手动代理")
|
||
return added
|
||
|
||
def _is_proxy_working(self, proxy: Dict) -> bool:
|
||
"""检查代理是否可用"""
|
||
test_urls = [
|
||
"http://httpbin.org/ip",
|
||
"http://www.google.com/gen_204"
|
||
]
|
||
|
||
proxies = {
|
||
'http': proxy['http'],
|
||
'https': proxy['https']
|
||
}
|
||
|
||
for url in test_urls:
|
||
try:
|
||
response = requests.get(
|
||
url,
|
||
proxies=proxies,
|
||
timeout=self.proxy_timeout,
|
||
allow_redirects=False
|
||
)
|
||
if 200 <= response.status_code < 500:
|
||
return True
|
||
except:
|
||
continue
|
||
return False
|
||
|
||
def get_random_proxy(self) -> Optional[Dict]:
|
||
"""
|
||
随机获取一个可用代理
|
||
|
||
:return: 代理字典或None(如果无可用代理)
|
||
"""
|
||
# 优先从API代理获取
|
||
# api_key = self._get_redis_key('api')
|
||
manual_key = self._get_redis_key('manual')
|
||
|
||
# 获取所有活跃代理
|
||
proxies = []
|
||
|
||
# 先检查API代理
|
||
# api_proxies = self.redis_conn.hgetall(api_key)
|
||
# for proxy_json in api_proxies.values():
|
||
# proxy = json.loads(proxy_json)
|
||
# if proxy.get('status') == 'active':
|
||
# proxies.append(proxy)
|
||
|
||
# 如果API代理不可用或auto_refresh关闭,检查手动代理
|
||
if not proxies or not self.auto_refresh:
|
||
manual_proxies = self.redis_conn.hgetall(manual_key)
|
||
for proxy_json in manual_proxies.values():
|
||
proxy = json.loads(proxy_json)
|
||
if proxy.get('status') == 'active':
|
||
proxies.append(proxy)
|
||
|
||
if not proxies:
|
||
if self.auto_refresh:
|
||
print("代理池为空,尝试从API获取...")
|
||
self._refresh_api_proxies()
|
||
return self.get_random_proxy()
|
||
else:
|
||
print("代理池为空且自动刷新已关闭")
|
||
return None
|
||
|
||
# 随机选择一个代理
|
||
selected = random.choice(proxies)
|
||
selected['_redis_key'] = self._get_redis_key(selected['source'])
|
||
return selected
|
||
|
||
def mark_proxy_failed(self, proxy: Dict):
|
||
"""标记代理为失败并从池中移除"""
|
||
if '_redis_key' not in proxy:
|
||
return
|
||
|
||
# 如果是API代理且启用验证,先检查是否真的失效
|
||
if proxy.get('source') == 'api' and self.enable_api_validation:
|
||
valid_status = self._check_proxy_valid([proxy['server']])
|
||
is_valid = valid_status.get(proxy['server'], False)
|
||
|
||
if is_valid:
|
||
print(f"代理 {proxy['server']} API验证仍有效,暂不移除")
|
||
return
|
||
|
||
with self.lock:
|
||
self.redis_conn.hdel(proxy['_redis_key'], proxy['server'])
|
||
print(f"代理 {proxy['server']} 已被移除")
|
||
|
||
def request_with_proxy(
|
||
self,
|
||
method: str,
|
||
url: str,
|
||
retry_count: int = 0,
|
||
**kwargs
|
||
) -> requests.Response:
|
||
"""
|
||
使用代理发送请求
|
||
|
||
:param method: HTTP方法
|
||
:param url: 请求URL
|
||
:param retry_count: 内部重试计数
|
||
:param kwargs: 其他requests参数
|
||
:return: Response对象
|
||
"""
|
||
if retry_count >= self.max_retries:
|
||
raise requests.exceptions.RequestException(f"达到最大重试次数 {self.max_retries}")
|
||
|
||
proxy = self.get_random_proxy()
|
||
if not proxy:
|
||
raise requests.exceptions.RequestException("无可用代理")
|
||
|
||
try:
|
||
response = requests.request(
|
||
method,
|
||
url,
|
||
proxies={
|
||
'http': proxy['http'],
|
||
'https': proxy['https']
|
||
},
|
||
timeout=self.proxy_timeout,
|
||
**kwargs
|
||
)
|
||
|
||
if response.status_code >= 400:
|
||
raise requests.exceptions.HTTPError(
|
||
f"HTTP错误: {response.status_code}",
|
||
response=response
|
||
)
|
||
|
||
return response
|
||
|
||
except requests.exceptions.RequestException as e:
|
||
print(f"代理 {proxy['server']} 请求失败: {e}")
|
||
self.mark_proxy_failed(proxy)
|
||
return self.request_with_proxy(method, url, retry_count + 1, **kwargs)
|
||
|
||
def get_pool_status(self) -> Dict:
|
||
"""获取代理池状态"""
|
||
api_key = self._get_redis_key('api')
|
||
manual_key = self._get_redis_key('manual')
|
||
|
||
return {
|
||
'api_proxies': self.redis_conn.hlen(api_key),
|
||
'manual_proxies': self.redis_conn.hlen(manual_key),
|
||
'auto_refresh': self.auto_refresh,
|
||
'last_update': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||
} |