stock_fundamentals/src/scripts/ProxyIP.py

342 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import requests
import redis
import random
import time
import threading
import json
from typing import Dict, List, Optional, Union
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor, as_completed
class EnhancedProxyManager:
def __init__(
self,
proxy_api_url: str = 'https://dps.kdlapi.com/api/getdps?secret_id=o4itop21b4byqg47eevx&signature=3d3fuvm6raah1xyjecl6bby1mj6gtx0c&num=3&format=json',
valid_check_url: str = 'https://dps.kdlapi.com/api/checkdpsvalid?secret_id=o4itop21b4byqg47eevx&signature=3d3fuvm6raah1xyjecl6bby1mj6gtx0c',
redis_host: str = '192.168.18.123',
redis_port: int = 6379,
redis_db: int = 7,
redis_password: str = 'wlkj2018',
redis_key: str = 'proxy_pool',
update_interval: int = 3600,
max_retries: int = 3,
proxy_timeout: int = 10,
auto_refresh: bool = False,
max_pool_size: int = 50,
enable_api_validation: bool = True # 新增是否启用API验证开关
):
"""
增强版代理管理器 - 支持多IP池和手动代理管理
:param proxy_api_url: 获取代理的API地址
:param redis_host: Redis主机地址
:param redis_port: Redis端口
:param redis_db: Redis数据库
:param redis_password: Redis密码
:param redis_key: Redis中存储代理的键前缀
:param update_interval: 代理更新间隔(秒)
:param max_retries: 最大重试次数
:param proxy_timeout: 代理连接超时时间(秒)
:param auto_refresh: 是否自动从API获取代理
:param max_pool_size: 代理池最大容量
"""
self.proxy_api_url = proxy_api_url
self.redis_key_prefix = redis_key
self.update_interval = update_interval
self.max_retries = max_retries
self.proxy_timeout = proxy_timeout
self.auto_refresh = auto_refresh
self.max_pool_size = max_pool_size
self.valid_check_url = valid_check_url
self.enable_api_validation = enable_api_validation
# Redis连接
self.redis_conn = redis.StrictRedis(
host=redis_host,
port=redis_port,
db=redis_db,
password=redis_password,
decode_responses=True
)
# 线程安全控制
self.lock = threading.Lock()
self.condition = threading.Condition()
# 启动维护线程
if self.auto_refresh:
self._start_maintenance_thread()
def _start_maintenance_thread(self):
"""启动后台维护线程"""
def maintenance_loop():
while True:
with self.condition:
self._refresh_api_proxies()
self.condition.notify_all()
time.sleep(self.update_interval)
thread = threading.Thread(target=maintenance_loop, daemon=True)
thread.start()
def _get_redis_key(self, proxy_type: str) -> str:
"""获取Redis键名"""
return f"{self.redis_key_prefix}:{proxy_type}"
def _check_proxy_valid(self, proxy_list: List[str]) -> Dict[str, bool]:
"""通过API检查代理是否有效"""
if not self.enable_api_validation or not proxy_list:
return {}
try:
# 拼接代理参数proxy=ip1:port1,ip2:port2
proxy_param = '&proxy=' + ','.join(proxy_list)
response = requests.get(self.valid_check_url + proxy_param, timeout=10)
if response.status_code == 200:
data = response.json()
if data.get('code') == 0:
return data.get('data', {})
except Exception as e:
print(f"API验证代理有效性失败: {e}")
return {}
def _refresh_api_proxies(self) -> bool:
"""从API获取最新代理并存入Redis"""
api_key = self._get_redis_key('api')
# 前置检查:如果已有足够数量且未过期的代理,则不需要刷新
existing_proxies = self.redis_conn.hgetall(api_key)
# 检查代理数量是否足够大于等于4且未过期
if len(existing_proxies) >= 4:
# 检查代理是否过期假设代理有效期为24小时
current_time = datetime.now()
need_refresh = False
for proxy_json in existing_proxies.values():
proxy = json.loads(proxy_json)
last_checked = datetime.strptime(proxy['last_checked'], "%Y-%m-%d %H:%M:%S")
if (current_time - last_checked) > timedelta(hours=1):
need_refresh = True
break
if not need_refresh:
print("当前有足够数量且未过期的代理,无需刷新")
return False
try:
response = requests.get(self.proxy_api_url, timeout=self.proxy_timeout)
if response.status_code == 200:
data = response.json()
if data.get('code') == 0 and data.get('data'):
# 清空旧API代理
api_key = self._get_redis_key('api')
# self.redis_conn.delete(api_key)
# 添加新代理
for proxy_data in data['data']['proxy_list']:
proxy = {
'http': f"http://{proxy_data}",
'https': f"http://{proxy_data}",
'server': proxy_data,
'source': 'api',
'last_checked': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
'status': 'active'
}
self.redis_conn.hset(
api_key,
proxy_data,
json.dumps(proxy)
)
return True
except Exception as e:
print(f"更新API代理失败: {e}")
return False
def add_manual_proxies(self, proxies: Union[List[str], str]) -> int:
"""
手动添加代理到池中
:param proxies: 代理列表(格式: ["ip:port", ...] 或 "ip:port")
:return: 成功添加的代理数量
"""
if isinstance(proxies, str):
proxies = [proxies]
added = 0
manual_key = self._get_redis_key('manual')
# 验证并添加代理
def _check_and_add(proxy):
nonlocal added
proxy_dict = {
'http': f"http://{proxy}",
'https': f"http://{proxy}",
'server': proxy,
'source': 'manual',
'last_checked': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
'status': 'active'
}
if self._is_proxy_working(proxy_dict):
with self.lock:
# 检查是否已存在
if not self.redis_conn.hexists(manual_key, proxy):
self.redis_conn.hset(manual_key, proxy, json.dumps(proxy_dict))
added += 1
# 使用线程池并行验证
with ThreadPoolExecutor(max_workers=10) as executor:
executor.map(_check_and_add, proxies)
print(f"成功添加 {added} 个手动代理")
return added
def _is_proxy_working(self, proxy: Dict) -> bool:
"""检查代理是否可用"""
test_urls = [
"http://httpbin.org/ip",
"http://www.google.com/gen_204"
]
proxies = {
'http': proxy['http'],
'https': proxy['https']
}
for url in test_urls:
try:
response = requests.get(
url,
proxies=proxies,
timeout=self.proxy_timeout,
allow_redirects=False
)
if 200 <= response.status_code < 500:
return True
except:
continue
return False
def get_random_proxy(self) -> Optional[Dict]:
"""
随机获取一个可用代理
:return: 代理字典或None(如果无可用代理)
"""
# 优先从API代理获取
# api_key = self._get_redis_key('api')
manual_key = self._get_redis_key('manual')
# 获取所有活跃代理
proxies = []
# 先检查API代理
# api_proxies = self.redis_conn.hgetall(api_key)
# for proxy_json in api_proxies.values():
# proxy = json.loads(proxy_json)
# if proxy.get('status') == 'active':
# proxies.append(proxy)
# 如果API代理不可用或auto_refresh关闭检查手动代理
if not proxies or not self.auto_refresh:
manual_proxies = self.redis_conn.hgetall(manual_key)
for proxy_json in manual_proxies.values():
proxy = json.loads(proxy_json)
if proxy.get('status') == 'active':
proxies.append(proxy)
if not proxies:
if self.auto_refresh:
print("代理池为空尝试从API获取...")
self._refresh_api_proxies()
return self.get_random_proxy()
else:
print("代理池为空且自动刷新已关闭")
return None
# 随机选择一个代理
selected = random.choice(proxies)
selected['_redis_key'] = self._get_redis_key(selected['source'])
return selected
def mark_proxy_failed(self, proxy: Dict):
"""标记代理为失败并从池中移除"""
if '_redis_key' not in proxy:
return
# 如果是API代理且启用验证先检查是否真的失效
if proxy.get('source') == 'api' and self.enable_api_validation:
valid_status = self._check_proxy_valid([proxy['server']])
is_valid = valid_status.get(proxy['server'], False)
if is_valid:
print(f"代理 {proxy['server']} API验证仍有效暂不移除")
return
with self.lock:
self.redis_conn.hdel(proxy['_redis_key'], proxy['server'])
print(f"代理 {proxy['server']} 已被移除")
def request_with_proxy(
self,
method: str,
url: str,
retry_count: int = 0,
**kwargs
) -> requests.Response:
"""
使用代理发送请求
:param method: HTTP方法
:param url: 请求URL
:param retry_count: 内部重试计数
:param kwargs: 其他requests参数
:return: Response对象
"""
if retry_count >= self.max_retries:
raise requests.exceptions.RequestException(f"达到最大重试次数 {self.max_retries}")
proxy = self.get_random_proxy()
if not proxy:
raise requests.exceptions.RequestException("无可用代理")
try:
response = requests.request(
method,
url,
proxies={
'http': proxy['http'],
'https': proxy['https']
},
timeout=self.proxy_timeout,
**kwargs
)
if response.status_code >= 400:
raise requests.exceptions.HTTPError(
f"HTTP错误: {response.status_code}",
response=response
)
return response
except requests.exceptions.RequestException as e:
print(f"代理 {proxy['server']} 请求失败: {e}")
self.mark_proxy_failed(proxy)
return self.request_with_proxy(method, url, retry_count + 1, **kwargs)
def get_pool_status(self) -> Dict:
"""获取代理池状态"""
api_key = self._get_redis_key('api')
manual_key = self._get_redis_key('manual')
return {
'api_proxies': self.redis_conn.hlen(api_key),
'manual_proxies': self.redis_conn.hlen(manual_key),
'auto_refresh': self.auto_refresh,
'last_update': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}