stock_fundamentals/src/scripts/ProxyIP.py

342 lines
12 KiB
Python
Raw Normal View History

2025-07-03 15:57:04 +08:00
import requests
import redis
import random
import time
import threading
import json
from typing import Dict, List, Optional, Union
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor, as_completed
class EnhancedProxyManager:
def __init__(
self,
proxy_api_url: str = 'https://dps.kdlapi.com/api/getdps?secret_id=o4itop21b4byqg47eevx&signature=3d3fuvm6raah1xyjecl6bby1mj6gtx0c&num=3&format=json',
valid_check_url: str = 'https://dps.kdlapi.com/api/checkdpsvalid?secret_id=o4itop21b4byqg47eevx&signature=3d3fuvm6raah1xyjecl6bby1mj6gtx0c',
redis_host: str = '192.168.18.123',
redis_port: int = 6379,
redis_db: int = 7,
redis_password: str = 'wlkj2018',
redis_key: str = 'proxy_pool',
update_interval: int = 3600,
max_retries: int = 3,
proxy_timeout: int = 10,
auto_refresh: bool = False,
max_pool_size: int = 50,
enable_api_validation: bool = True # 新增是否启用API验证开关
):
"""
增强版代理管理器 - 支持多IP池和手动代理管理
:param proxy_api_url: 获取代理的API地址
:param redis_host: Redis主机地址
:param redis_port: Redis端口
:param redis_db: Redis数据库
:param redis_password: Redis密码
:param redis_key: Redis中存储代理的键前缀
:param update_interval: 代理更新间隔()
:param max_retries: 最大重试次数
:param proxy_timeout: 代理连接超时时间()
:param auto_refresh: 是否自动从API获取代理
:param max_pool_size: 代理池最大容量
"""
self.proxy_api_url = proxy_api_url
self.redis_key_prefix = redis_key
self.update_interval = update_interval
self.max_retries = max_retries
self.proxy_timeout = proxy_timeout
self.auto_refresh = auto_refresh
self.max_pool_size = max_pool_size
self.valid_check_url = valid_check_url
self.enable_api_validation = enable_api_validation
# Redis连接
self.redis_conn = redis.StrictRedis(
host=redis_host,
port=redis_port,
db=redis_db,
password=redis_password,
decode_responses=True
)
# 线程安全控制
self.lock = threading.Lock()
self.condition = threading.Condition()
# 启动维护线程
if self.auto_refresh:
self._start_maintenance_thread()
def _start_maintenance_thread(self):
"""启动后台维护线程"""
def maintenance_loop():
while True:
with self.condition:
self._refresh_api_proxies()
self.condition.notify_all()
time.sleep(self.update_interval)
thread = threading.Thread(target=maintenance_loop, daemon=True)
thread.start()
def _get_redis_key(self, proxy_type: str) -> str:
"""获取Redis键名"""
return f"{self.redis_key_prefix}:{proxy_type}"
def _check_proxy_valid(self, proxy_list: List[str]) -> Dict[str, bool]:
"""通过API检查代理是否有效"""
if not self.enable_api_validation or not proxy_list:
return {}
try:
# 拼接代理参数proxy=ip1:port1,ip2:port2
proxy_param = '&proxy=' + ','.join(proxy_list)
response = requests.get(self.valid_check_url + proxy_param, timeout=10)
if response.status_code == 200:
data = response.json()
if data.get('code') == 0:
return data.get('data', {})
except Exception as e:
print(f"API验证代理有效性失败: {e}")
return {}
def _refresh_api_proxies(self) -> bool:
"""从API获取最新代理并存入Redis"""
api_key = self._get_redis_key('api')
# 前置检查:如果已有足够数量且未过期的代理,则不需要刷新
existing_proxies = self.redis_conn.hgetall(api_key)
# 检查代理数量是否足够大于等于4且未过期
if len(existing_proxies) >= 4:
# 检查代理是否过期假设代理有效期为24小时
current_time = datetime.now()
need_refresh = False
for proxy_json in existing_proxies.values():
proxy = json.loads(proxy_json)
last_checked = datetime.strptime(proxy['last_checked'], "%Y-%m-%d %H:%M:%S")
if (current_time - last_checked) > timedelta(hours=1):
need_refresh = True
break
if not need_refresh:
print("当前有足够数量且未过期的代理,无需刷新")
return False
try:
response = requests.get(self.proxy_api_url, timeout=self.proxy_timeout)
if response.status_code == 200:
data = response.json()
if data.get('code') == 0 and data.get('data'):
# 清空旧API代理
api_key = self._get_redis_key('api')
# self.redis_conn.delete(api_key)
# 添加新代理
for proxy_data in data['data']['proxy_list']:
proxy = {
'http': f"http://{proxy_data}",
'https': f"http://{proxy_data}",
'server': proxy_data,
'source': 'api',
'last_checked': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
'status': 'active'
}
self.redis_conn.hset(
api_key,
proxy_data,
json.dumps(proxy)
)
return True
except Exception as e:
print(f"更新API代理失败: {e}")
return False
def add_manual_proxies(self, proxies: Union[List[str], str]) -> int:
"""
手动添加代理到池中
:param proxies: 代理列表(格式: ["ip:port", ...] "ip:port")
:return: 成功添加的代理数量
"""
if isinstance(proxies, str):
proxies = [proxies]
added = 0
manual_key = self._get_redis_key('manual')
# 验证并添加代理
def _check_and_add(proxy):
nonlocal added
proxy_dict = {
'http': f"http://{proxy}",
'https': f"http://{proxy}",
'server': proxy,
'source': 'manual',
'last_checked': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
'status': 'active'
}
if self._is_proxy_working(proxy_dict):
with self.lock:
# 检查是否已存在
if not self.redis_conn.hexists(manual_key, proxy):
self.redis_conn.hset(manual_key, proxy, json.dumps(proxy_dict))
added += 1
# 使用线程池并行验证
with ThreadPoolExecutor(max_workers=10) as executor:
executor.map(_check_and_add, proxies)
print(f"成功添加 {added} 个手动代理")
return added
def _is_proxy_working(self, proxy: Dict) -> bool:
"""检查代理是否可用"""
test_urls = [
"http://httpbin.org/ip",
"http://www.google.com/gen_204"
]
proxies = {
'http': proxy['http'],
'https': proxy['https']
}
for url in test_urls:
try:
response = requests.get(
url,
proxies=proxies,
timeout=self.proxy_timeout,
allow_redirects=False
)
if 200 <= response.status_code < 500:
return True
except:
continue
return False
def get_random_proxy(self) -> Optional[Dict]:
"""
随机获取一个可用代理
:return: 代理字典或None(如果无可用代理)
"""
# 优先从API代理获取
# api_key = self._get_redis_key('api')
manual_key = self._get_redis_key('manual')
# 获取所有活跃代理
proxies = []
# 先检查API代理
# api_proxies = self.redis_conn.hgetall(api_key)
# for proxy_json in api_proxies.values():
# proxy = json.loads(proxy_json)
# if proxy.get('status') == 'active':
# proxies.append(proxy)
# 如果API代理不可用或auto_refresh关闭检查手动代理
if not proxies or not self.auto_refresh:
manual_proxies = self.redis_conn.hgetall(manual_key)
for proxy_json in manual_proxies.values():
proxy = json.loads(proxy_json)
if proxy.get('status') == 'active':
proxies.append(proxy)
if not proxies:
if self.auto_refresh:
print("代理池为空尝试从API获取...")
self._refresh_api_proxies()
return self.get_random_proxy()
else:
print("代理池为空且自动刷新已关闭")
return None
# 随机选择一个代理
selected = random.choice(proxies)
selected['_redis_key'] = self._get_redis_key(selected['source'])
return selected
def mark_proxy_failed(self, proxy: Dict):
"""标记代理为失败并从池中移除"""
if '_redis_key' not in proxy:
return
# 如果是API代理且启用验证先检查是否真的失效
if proxy.get('source') == 'api' and self.enable_api_validation:
valid_status = self._check_proxy_valid([proxy['server']])
is_valid = valid_status.get(proxy['server'], False)
if is_valid:
print(f"代理 {proxy['server']} API验证仍有效暂不移除")
return
with self.lock:
self.redis_conn.hdel(proxy['_redis_key'], proxy['server'])
print(f"代理 {proxy['server']} 已被移除")
def request_with_proxy(
self,
method: str,
url: str,
retry_count: int = 0,
**kwargs
) -> requests.Response:
"""
使用代理发送请求
:param method: HTTP方法
:param url: 请求URL
:param retry_count: 内部重试计数
:param kwargs: 其他requests参数
:return: Response对象
"""
if retry_count >= self.max_retries:
raise requests.exceptions.RequestException(f"达到最大重试次数 {self.max_retries}")
proxy = self.get_random_proxy()
if not proxy:
raise requests.exceptions.RequestException("无可用代理")
try:
response = requests.request(
method,
url,
proxies={
'http': proxy['http'],
'https': proxy['https']
},
timeout=self.proxy_timeout,
**kwargs
)
if response.status_code >= 400:
raise requests.exceptions.HTTPError(
f"HTTP错误: {response.status_code}",
response=response
)
return response
except requests.exceptions.RequestException as e:
print(f"代理 {proxy['server']} 请求失败: {e}")
self.mark_proxy_failed(proxy)
return self.request_with_proxy(method, url, retry_count + 1, **kwargs)
def get_pool_status(self) -> Dict:
"""获取代理池状态"""
api_key = self._get_redis_key('api')
manual_key = self._get_redis_key('manual')
return {
'api_proxies': self.redis_conn.hlen(api_key),
'manual_proxies': self.redis_conn.hlen(manual_key),
'auto_refresh': self.auto_refresh,
'last_update': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}