stock_fundamentals/src/quantitative_analysis/batch_stock_price_collector.py

196 lines
6.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import requests
import pandas as pd
from datetime import datetime
import sys
import os
import redis
import json
# 添加项目根目录到路径便于导入scripts.config
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(project_root)
# 导入代理管理器
from src.scripts.ProxyIP import EnhancedProxyManager
# 读取雪球headers和Redis配置
try:
from src.scripts.config import XUEQIU_HEADERS
from src.valuation_analysis.config import REDIS_CONFIG
except ImportError:
XUEQIU_HEADERS = {
'User-Agent': 'Mozilla/5.0',
'Cookie': '', # 需要填写雪球cookie
}
REDIS_CONFIG = {
'host': 'localhost',
'port': 6379,
'db': 0,
'password': None
}
REDIS_KEY = 'xq_stock_changes_latest' # 存放行情的主键
# 创建全局代理管理器实例
proxy_manager = EnhancedProxyManager()
def get_redis_conn():
"""获取Redis连接"""
pool = redis.ConnectionPool(
host=REDIS_CONFIG['host'],
port=REDIS_CONFIG['port'],
db=REDIS_CONFIG.get('db', 0),
password=REDIS_CONFIG.get('password', None),
decode_responses=True
)
return redis.Redis(connection_pool=pool)
def fetch_and_store_stock_data(page_size=90):
"""
批量采集雪球A股上证、深证、科创板股票的最新行情数据并保存到Redis。
:param page_size: 每页采集数量
"""
base_url = 'https://stock.xueqiu.com/v5/stock/screener/quote/list.json'
types = ['sha', 'sza', 'kcb'] # 上证、深证、科创板
headers = XUEQIU_HEADERS
all_data = []
for stock_type in types:
params = {
'page': 1,
'size': page_size,
'order': 'desc',
'order_by': 'dividend_yield',
'market': 'CN',
'type': stock_type
}
# 初次请求以获取总页数,使用代理
response = proxy_manager.request_with_proxy('get', base_url, headers=headers, params=params)
# response = requests.get(base_url, headers=headers, params=params)
if response.status_code != 200:
print(f"请求 {stock_type} 数据失败,状态码:{response.status_code}")
continue
data = response.json()
total_count = data['data']['count']
total_pages = (total_count // page_size) + 1
for page in range(1, total_pages + 1):
params['page'] = page
# response = requests.get(base_url, headers=headers, params=params)
response = proxy_manager.request_with_proxy('get', base_url, headers=headers, params=params)
if response.status_code == 200:
data = response.json()
all_data.extend(data['data']['list'])
print(f"成功采集第 {page}/{total_pages} 页数据")
else:
print(f"请求 {stock_type} 数据第 {page} 页失败,状态码:{response.status_code}")
# 转换为 DataFrame
df = pd.DataFrame(all_data)
if not df.empty:
df['fetch_time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
# 存入Redis使用hash结构key为symbolvalue为json字符串
r = get_redis_conn()
pipe = r.pipeline()
# 先清空旧数据
r.delete(REDIS_KEY)
for _, row in df.iterrows():
symbol = row.get('symbol')
if not symbol:
continue
# 只保留必要字段也可直接存row.to_dict()
value = row.to_dict()
pipe.hset(REDIS_KEY, symbol, json.dumps(value, ensure_ascii=False))
pipe.execute()
print(f"成功将数据写入Redis哈希 {REDIS_KEY},共{len(df)}条记录。")
# 返回DataFrame供其他脚本使用
return df
else:
print("未获取到任何数据。")
return pd.DataFrame()
def format_stock_code(stock_code):
"""
统一股票代码格式支持600001.SH、SH600001、000001.SZ、SZ000001等
返回雪球格式如SH600001、SZ000001和Redis存储格式如SZ000978
"""
stock_code = stock_code.upper()
if '.' in stock_code:
code, market = stock_code.split('.')
if market == 'SH':
return f'SH{code}', f'{market}{code}'
elif market == 'SZ':
return f'SZ{code}', f'{market}{code}'
elif market == 'BJ':
return f'BJ{code}', f'{market}{code}'
else:
return stock_code, stock_code
elif stock_code.startswith(('SH', 'SZ', 'BJ')):
return stock_code, stock_code
else:
# 默认返回原始
return stock_code, stock_code
def get_stock_realtime_info_from_redis(stock_code):
"""
根据股票代码从Redis查询实时行情并封装为指定结构。
:param stock_code: 支持600001.SH、SH600001、000001.SZ、SZ000001等
:return: dict or None
"""
_, redis_code = format_stock_code(stock_code)
r = get_redis_conn()
value = r.hget(REDIS_KEY, redis_code)
if not value:
return None
try:
data = json.loads(value)
except Exception:
return None
# 封装为指定结构
result = {
"code": None,
"crawlDate": None,
"marketValue": None,
"maxPrice": None,
"minPrice": None,
"nowPrice": None,
"pbRate": None,
"rangeRiseAndFall": None,
"shortName": None,
"todayStartPrice": None,
"ttm": None,
"turnoverRate": None,
"yesterdayEndPrice": None
}
# 赋值映射
result["code"] = data.get("symbol")
result["crawlDate"] = data.get("fetch_time")
result["marketValue"] = data.get("market_capital")
result["maxPrice"] = data.get("high") if "high" in data else data.get("high52w")
result["minPrice"] = data.get("low") if "low" in data else data.get("low52w")
result["nowPrice"] = data.get("current")
result["pbRate"] = data.get("pb")
result["rangeRiseAndFall"] = data.get("percent")
result["shortName"] = data.get("name")
result["todayStartPrice"] = data.get("open")
result["ttm"] = data.get("pe_ttm")
result["turnoverRate"] = data.get("turnover_rate")
result["yesterdayEndPrice"] = data.get("last_close") if "last_close" in data else data.get("pre_close")
# 兼容部分字段缺失
if result["maxPrice"] is None and "high" in data:
result["maxPrice"] = data["high"]
if result["minPrice"] is None and "low" in data:
result["minPrice"] = data["low"]
return result
if __name__ == '__main__':
fetch_and_store_stock_data()