import requests import pandas as pd from datetime import datetime import sys import os import redis import json # 添加项目根目录到路径,便于导入scripts.config project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(project_root) # 导入代理管理器 from src.scripts.ProxyIP import EnhancedProxyManager # 读取雪球headers和Redis配置 try: from src.scripts.config import XUEQIU_HEADERS from src.valuation_analysis.config import REDIS_CONFIG except ImportError: XUEQIU_HEADERS = { 'User-Agent': 'Mozilla/5.0', 'Cookie': '', # 需要填写雪球cookie } REDIS_CONFIG = { 'host': 'localhost', 'port': 6379, 'db': 0, 'password': None } REDIS_KEY = 'xq_stock_changes_latest' # 存放行情的主键 # 创建全局代理管理器实例 proxy_manager = EnhancedProxyManager() def get_redis_conn(): """获取Redis连接""" pool = redis.ConnectionPool( host=REDIS_CONFIG['host'], port=REDIS_CONFIG['port'], db=REDIS_CONFIG.get('db', 0), password=REDIS_CONFIG.get('password', None), decode_responses=True ) return redis.Redis(connection_pool=pool) def fetch_and_store_stock_data(page_size=90): """ 批量采集雪球A股(上证、深证、科创板)股票的最新行情数据,并保存到Redis。 :param page_size: 每页采集数量 """ base_url = 'https://stock.xueqiu.com/v5/stock/screener/quote/list.json' types = ['sha', 'sza', 'kcb'] # 上证、深证、科创板 headers = XUEQIU_HEADERS all_data = [] for stock_type in types: params = { 'page': 1, 'size': page_size, 'order': 'desc', 'order_by': 'dividend_yield', 'market': 'CN', 'type': stock_type } # 初次请求以获取总页数,使用代理 response = proxy_manager.request_with_proxy('get', base_url, headers=headers, params=params) # response = requests.get(base_url, headers=headers, params=params) if response.status_code != 200: print(f"请求 {stock_type} 数据失败,状态码:{response.status_code}") continue data = response.json() total_count = data['data']['count'] total_pages = (total_count // page_size) + 1 for page in range(1, total_pages + 1): params['page'] = page # response = requests.get(base_url, headers=headers, params=params) response = proxy_manager.request_with_proxy('get', base_url, headers=headers, params=params) if response.status_code == 200: data = response.json() all_data.extend(data['data']['list']) print(f"成功采集第 {page}/{total_pages} 页数据") else: print(f"请求 {stock_type} 数据第 {page} 页失败,状态码:{response.status_code}") # 转换为 DataFrame df = pd.DataFrame(all_data) if not df.empty: df['fetch_time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S') # 存入Redis,使用hash结构,key为symbol,value为json字符串 r = get_redis_conn() pipe = r.pipeline() # 先清空旧数据 r.delete(REDIS_KEY) for _, row in df.iterrows(): symbol = row.get('symbol') if not symbol: continue # 只保留必要字段,也可直接存row.to_dict() value = row.to_dict() pipe.hset(REDIS_KEY, symbol, json.dumps(value, ensure_ascii=False)) pipe.execute() print(f"成功将数据写入Redis哈希 {REDIS_KEY},共{len(df)}条记录。") # 返回DataFrame供其他脚本使用 return df else: print("未获取到任何数据。") return pd.DataFrame() def format_stock_code(stock_code): """ 统一股票代码格式,支持600001.SH、SH600001、000001.SZ、SZ000001等 返回雪球格式(如SH600001、SZ000001)和Redis存储格式(如SZ000978) """ stock_code = stock_code.upper() if '.' in stock_code: code, market = stock_code.split('.') if market == 'SH': return f'SH{code}', f'{market}{code}' elif market == 'SZ': return f'SZ{code}', f'{market}{code}' elif market == 'BJ': return f'BJ{code}', f'{market}{code}' else: return stock_code, stock_code elif stock_code.startswith(('SH', 'SZ', 'BJ')): return stock_code, stock_code else: # 默认返回原始 return stock_code, stock_code def get_stock_realtime_info_from_redis(stock_code): """ 根据股票代码从Redis查询实时行情,并封装为指定结构。 :param stock_code: 支持600001.SH、SH600001、000001.SZ、SZ000001等 :return: dict or None """ _, redis_code = format_stock_code(stock_code) r = get_redis_conn() value = r.hget(REDIS_KEY, redis_code) if not value: return None try: data = json.loads(value) except Exception: return None # 封装为指定结构 result = { "code": None, "crawlDate": None, "marketValue": None, "maxPrice": None, "minPrice": None, "nowPrice": None, "pbRate": None, "rangeRiseAndFall": None, "shortName": None, "todayStartPrice": None, "ttm": None, "turnoverRate": None, "yesterdayEndPrice": None } # 赋值映射 result["code"] = data.get("symbol") result["crawlDate"] = data.get("fetch_time") result["marketValue"] = data.get("market_capital") result["maxPrice"] = data.get("high") if "high" in data else data.get("high52w") result["minPrice"] = data.get("low") if "low" in data else data.get("low52w") result["nowPrice"] = data.get("current") result["pbRate"] = data.get("pb") result["rangeRiseAndFall"] = data.get("percent") result["shortName"] = data.get("name") result["todayStartPrice"] = data.get("open") result["ttm"] = data.get("pe_ttm") result["turnoverRate"] = data.get("turnover_rate") result["yesterdayEndPrice"] = data.get("last_close") if "last_close" in data else data.get("pre_close") # 兼容部分字段缺失 if result["maxPrice"] is None and "high" in data: result["maxPrice"] = data["high"] if result["minPrice"] is None and "low" in data: result["minPrice"] = data["low"] return result if __name__ == '__main__': fetch_and_store_stock_data()