stock_fundamentals/src/quantitative_analysis/batch_stock_price_collector.py

183 lines
6.0 KiB
Python
Raw Normal View History

2025-06-17 15:52:59 +08:00
import requests
import pandas as pd
from datetime import datetime
import sys
import os
import redis
import json
# 添加项目根目录到路径便于导入scripts.config
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(project_root)
# 读取雪球headers和Redis配置
try:
from src.scripts.config import XUEQIU_HEADERS
from src.valuation_analysis.config import REDIS_CONFIG
except ImportError:
XUEQIU_HEADERS = {
'User-Agent': 'Mozilla/5.0',
'Cookie': '', # 需要填写雪球cookie
}
REDIS_CONFIG = {
'host': 'localhost',
'port': 6379,
'db': 0,
'password': None
}
REDIS_KEY = 'xq_stock_changes_latest' # 存放行情的主键
def get_redis_conn():
"""获取Redis连接"""
pool = redis.ConnectionPool(
host=REDIS_CONFIG['host'],
port=REDIS_CONFIG['port'],
db=REDIS_CONFIG.get('db', 0),
password=REDIS_CONFIG.get('password', None),
decode_responses=True
)
return redis.Redis(connection_pool=pool)
def fetch_and_store_stock_data(page_size=90):
"""
批量采集雪球A股上证深证科创板股票的最新行情数据并保存到Redis
:param page_size: 每页采集数量
"""
base_url = 'https://stock.xueqiu.com/v5/stock/screener/quote/list.json'
types = ['sha', 'sza', 'kcb'] # 上证、深证、科创板
headers = XUEQIU_HEADERS
all_data = []
for stock_type in types:
params = {
'page': 1,
'size': page_size,
'order': 'desc',
2025-06-24 10:51:05 +08:00
'order_by': 'dividend_yield',
2025-06-17 15:52:59 +08:00
'market': 'CN',
'type': stock_type
}
# 初次请求以获取总页数
response = requests.get(base_url, headers=headers, params=params)
if response.status_code != 200:
print(f"请求 {stock_type} 数据失败,状态码:{response.status_code}")
continue
data = response.json()
total_count = data['data']['count']
total_pages = (total_count // page_size) + 1
for page in range(1, total_pages + 1):
params['page'] = page
response = requests.get(base_url, headers=headers, params=params)
if response.status_code == 200:
data = response.json()
all_data.extend(data['data']['list'])
else:
print(f"请求 {stock_type} 数据第 {page} 页失败,状态码:{response.status_code}")
# 转换为 DataFrame
df = pd.DataFrame(all_data)
if not df.empty:
df['fetch_time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
# 存入Redis使用hash结构key为symbolvalue为json字符串
r = get_redis_conn()
pipe = r.pipeline()
# 先清空旧数据
r.delete(REDIS_KEY)
for _, row in df.iterrows():
symbol = row.get('symbol')
if not symbol:
continue
# 只保留必要字段也可直接存row.to_dict()
value = row.to_dict()
pipe.hset(REDIS_KEY, symbol, json.dumps(value, ensure_ascii=False))
pipe.execute()
print(f"成功将数据写入Redis哈希 {REDIS_KEY},共{len(df)}条记录。")
else:
print("未获取到任何数据。")
def format_stock_code(stock_code):
"""
统一股票代码格式支持600001.SHSH600001000001.SZSZ000001等
返回雪球格式如SH600001SZ000001和Redis存储格式如SZ000978
"""
stock_code = stock_code.upper()
if '.' in stock_code:
code, market = stock_code.split('.')
if market == 'SH':
return f'SH{code}', f'{market}{code}'
elif market == 'SZ':
return f'SZ{code}', f'{market}{code}'
elif market == 'BJ':
return f'BJ{code}', f'{market}{code}'
else:
return stock_code, stock_code
elif stock_code.startswith(('SH', 'SZ', 'BJ')):
return stock_code, stock_code
else:
# 默认返回原始
return stock_code, stock_code
def get_stock_realtime_info_from_redis(stock_code):
"""
根据股票代码从Redis查询实时行情并封装为指定结构
:param stock_code: 支持600001.SHSH600001000001.SZSZ000001等
:return: dict or None
"""
_, redis_code = format_stock_code(stock_code)
r = get_redis_conn()
value = r.hget(REDIS_KEY, redis_code)
if not value:
return None
try:
data = json.loads(value)
except Exception:
return None
# 封装为指定结构
result = {
"code": None,
"crawlDate": None,
"marketValue": None,
"maxPrice": None,
"minPrice": None,
"nowPrice": None,
"pbRate": None,
"rangeRiseAndFall": None,
"shortName": None,
"todayStartPrice": None,
"ttm": None,
"turnoverRate": None,
"yesterdayEndPrice": None
}
# 赋值映射
result["code"] = data.get("symbol")
result["crawlDate"] = data.get("fetch_time")
result["marketValue"] = data.get("market_capital")
result["maxPrice"] = data.get("high") if "high" in data else data.get("high52w")
result["minPrice"] = data.get("low") if "low" in data else data.get("low52w")
result["nowPrice"] = data.get("current")
result["pbRate"] = data.get("pb")
result["rangeRiseAndFall"] = data.get("percent")
result["shortName"] = data.get("name")
result["todayStartPrice"] = data.get("open")
result["ttm"] = data.get("pe_ttm")
result["turnoverRate"] = data.get("turnover_rate")
result["yesterdayEndPrice"] = data.get("last_close") if "last_close" in data else data.get("pre_close")
# 兼容部分字段缺失
if result["maxPrice"] is None and "high" in data:
result["maxPrice"] = data["high"]
if result["minPrice"] is None and "low" in data:
result["minPrice"] = data["low"]
return result
if __name__ == '__main__':
fetch_and_store_stock_data()