stock_fundamentals/src/quantitative_analysis/batch_stock_price_collector.py

import requests
import pandas as pd
from datetime import datetime
import sys
import os
import redis
import json

# 添加项目根目录到路径，便于导入scripts.config
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(project_root)

# 导入代理管理器
from src.scripts.ProxyIP import EnhancedProxyManager

# 读取雪球headers和Redis配置
try:
    from src.scripts.config import XUEQIU_HEADERS
    from src.valuation_analysis.config import REDIS_CONFIG
except ImportError:
    XUEQIU_HEADERS = {
        'User-Agent': 'Mozilla/5.0',
        'Cookie': '',  # 需要填写雪球cookie
    }
    REDIS_CONFIG = {
        'host': 'localhost',
        'port': 6379,
        'db': 0,
        'password': None
    }

REDIS_KEY = 'xq_stock_changes_latest'  # 存放行情的主键

# 创建全局代理管理器实例
proxy_manager = EnhancedProxyManager()


def get_redis_conn():
    """获取Redis连接"""
    pool = redis.ConnectionPool(
        host=REDIS_CONFIG['host'],
        port=REDIS_CONFIG['port'],
        db=REDIS_CONFIG.get('db', 0),
        password=REDIS_CONFIG.get('password', None),
        decode_responses=True
    )
    return redis.Redis(connection_pool=pool)


def fetch_and_store_stock_data(page_size=90):
    """
    批量采集雪球A股（上证、深证、科创板）股票的最新行情数据，并保存到Redis。
    :param page_size: 每页采集数量
    """
    base_url = 'https://stock.xueqiu.com/v5/stock/screener/quote/list.json'
    types = ['sha', 'sza', 'kcb']  # 上证、深证、科创板
    headers = XUEQIU_HEADERS

    all_data = []

    for stock_type in types:
        params = {
            'page': 1,
            'size': page_size,
            'order': 'desc',
            'order_by': 'dividend_yield',
            'market': 'CN',
            'type': stock_type
        }

        # 初次请求以获取总页数，使用代理
        response = proxy_manager.request_with_proxy('get', base_url, headers=headers, params=params)
        # response = requests.get(base_url, headers=headers, params=params)
        if response.status_code != 200:
            print(f"请求 {stock_type} 数据失败，状态码：{response.status_code}")
            continue

        data = response.json()
        total_count = data['data']['count']
        total_pages = (total_count // page_size) + 1

        for page in range(1, total_pages + 1):
            params['page'] = page
            # response = requests.get(base_url, headers=headers, params=params)
            response = proxy_manager.request_with_proxy('get', base_url, headers=headers, params=params)
            if response.status_code == 200:
                data = response.json()
                all_data.extend(data['data']['list'])
                print(f"成功采集第 {page}/{total_pages} 页数据")
            else:
                print(f"请求 {stock_type} 数据第 {page} 页失败，状态码：{response.status_code}")
    # 转换为 DataFrame
    df = pd.DataFrame(all_data)

    if not df.empty:
        df['fetch_time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        # 存入Redis，使用hash结构，key为symbol，value为json字符串
        r = get_redis_conn()
        pipe = r.pipeline()
        # 先清空旧数据
        r.delete(REDIS_KEY)
        for _, row in df.iterrows():
            symbol = row.get('symbol')
            if not symbol:
                continue
            # 只保留必要字段，也可直接存row.to_dict()
            value = row.to_dict()
            pipe.hset(REDIS_KEY, symbol, json.dumps(value, ensure_ascii=False))
        pipe.execute()
        print(f"成功将数据写入Redis哈希 {REDIS_KEY}，共{len(df)}条记录。")

        # 返回DataFrame供其他脚本使用
        return df
    else:
        print("未获取到任何数据。")
        return pd.DataFrame()


def format_stock_code(stock_code):
    """
    统一股票代码格式，支持600001.SH、SH600001、000001.SZ、SZ000001等
    返回雪球格式（如SH600001、SZ000001）和Redis存储格式（如SZ000978）
    """
    stock_code = stock_code.upper()
    if '.' in stock_code:
        code, market = stock_code.split('.')
        if market == 'SH':
            return f'SH{code}', f'{market}{code}'
        elif market == 'SZ':
            return f'SZ{code}', f'{market}{code}'
        elif market == 'BJ':
            return f'BJ{code}', f'{market}{code}'
        else:
            return stock_code, stock_code
    elif stock_code.startswith(('SH', 'SZ', 'BJ')):
        return stock_code, stock_code
    else:
        # 默认返回原始
        return stock_code, stock_code


def get_stock_realtime_info_from_redis(stock_code):
    """
    根据股票代码从Redis查询实时行情，并封装为指定结构。
    :param stock_code: 支持600001.SH、SH600001、000001.SZ、SZ000001等
    :return: dict or None
    """
    _, redis_code = format_stock_code(stock_code)
    r = get_redis_conn()
    value = r.hget(REDIS_KEY, redis_code)
    if not value:
        return None
    try:
        data = json.loads(value)
    except Exception:
        return None
    # 封装为指定结构
    result = {
        "code": None,
        "crawlDate": None,
        "marketValue": None,
        "maxPrice": None,
        "minPrice": None,
        "nowPrice": None,
        "pbRate": None,
        "rangeRiseAndFall": None,
        "shortName": None,
        "todayStartPrice": None,
        "ttm": None,
        "turnoverRate": None,
        "yesterdayEndPrice": None
    }
    # 赋值映射
    result["code"] = data.get("symbol")
    result["crawlDate"] = data.get("fetch_time")
    result["marketValue"] = data.get("market_capital")
    result["maxPrice"] = data.get("high") if "high" in data else data.get("high52w")
    result["minPrice"] = data.get("low") if "low" in data else data.get("low52w")
    result["nowPrice"] = data.get("current")
    result["pbRate"] = data.get("pb")
    result["rangeRiseAndFall"] = data.get("percent")
    result["shortName"] = data.get("name")
    result["todayStartPrice"] = data.get("open")
    result["ttm"] = data.get("pe_ttm")
    result["turnoverRate"] = data.get("turnover_rate")
    result["yesterdayEndPrice"] = data.get("last_close") if "last_close" in data else data.get("pre_close")
    # 兼容部分字段缺失
    if result["maxPrice"] is None and "high" in data:
        result["maxPrice"] = data["high"]
    if result["minPrice"] is None and "low" in data:
        result["minPrice"] = data["low"]
    return result


if __name__ == '__main__':
    fetch_and_store_stock_data()