commit;
This commit is contained in:
		
							parent
							
								
									4e4f4c8e4a
								
							
						
					
					
						commit
						8203b5dd74
					
				|  | @ -11,7 +11,8 @@ show_help() { | |||
|     echo "  start [实例ID]        启动指定实例或所有实例" | ||||
|     echo "  stop [实例ID]         停止指定实例或所有实例" | ||||
|     echo "  restart [实例ID]      重启指定实例或所有实例" | ||||
|     echo "  logs [实例ID]         实时查看指定实例的日志 (Ctrl+C 退出)" | ||||
|     echo "  logs [实例ID]         查看最新10条日志并实时跟踪新日志" | ||||
|     echo "  logs-follow [实例ID]  实时查看指定实例的日志 (Ctrl+C 退出)" | ||||
|     echo "  status               显示实例状态概览" | ||||
|     echo "  remove [实例ID]       删除指定实例或所有实例" | ||||
|     echo "  rebuild [数量]        重新构建镜像并部署指定数量的实例" | ||||
|  | @ -21,7 +22,8 @@ show_help() { | |||
|     echo "  $0 list              列出所有实例" | ||||
|     echo "  $0 start 2           启动实例2" | ||||
|     echo "  $0 stop all          停止所有实例" | ||||
|     echo "  $0 logs 1            查看实例1的日志" | ||||
|     echo "  $0 logs 1            查看实例1最新10条日志并实时跟踪" | ||||
|     echo "  $0 logs-follow 1     实时跟踪实例1的日志" | ||||
|     echo "  $0 rebuild 2         重新构建并部署2个实例" | ||||
|     echo "  $0 update            热更新所有实例的代码" | ||||
|     exit 1 | ||||
|  | @ -68,6 +70,13 @@ restart_instance() { | |||
| 
 | ||||
| # 函数:查看实例日志 | ||||
| view_logs() { | ||||
|     echo "显示实例 $1 的最新10条日志,然后实时跟踪新日志 (按 Ctrl+C 退出):" | ||||
|     echo "----------------------------------------" | ||||
|     docker logs --tail 10 -f stock-app-$1 | ||||
| } | ||||
| 
 | ||||
| # 函数:实时查看实例日志 | ||||
| view_logs_follow() { | ||||
|     echo "正在实时显示实例 $1 的日志 (按 Ctrl+C 退出):" | ||||
|     echo "----------------------------------------" | ||||
|     docker logs -f stock-app-$1 | ||||
|  | @ -187,6 +196,13 @@ case "$1" in | |||
|         fi | ||||
|         view_logs $2 | ||||
|         ;; | ||||
|     logs-follow) | ||||
|         if [ "$#" -lt 2 ]; then | ||||
|             echo "错误: 缺少实例ID参数" | ||||
|             show_help | ||||
|         fi | ||||
|         view_logs_follow $2 | ||||
|         ;; | ||||
|     status) | ||||
|         show_status | ||||
|         ;; | ||||
|  |  | |||
|  | @ -245,7 +245,7 @@ def create_buy_strategy_callback(xt_trader, acc, buy_amount, logger): | |||
|                      | ||||
|                     # 检查是否有在途订单(Redis) | ||||
|                     if is_stock_pending_order(stock_code): | ||||
|                         logger.info(f"{stock_code} 有在途订单,跳过买入") | ||||
|                         # logger.info(f"{stock_code} 有在途订单,跳过买入") | ||||
|                         continue | ||||
| 
 | ||||
|                     # 集合竞价时段:只观察,不下单 | ||||
|  | @ -339,7 +339,7 @@ def create_sell_strategy_callback(xt_trader, acc, logger): | |||
|                      | ||||
|                     # 检查是否有在途订单 | ||||
|                     if is_stock_pending_order(stock_code): | ||||
|                         logger.info(f"{stock_code} 有在途订单,跳过卖出") | ||||
|                         # logger.info(f"{stock_code} 有在途订单,跳过卖出") | ||||
|                         continue | ||||
| 
 | ||||
|                     # 集合竞价时段:只观察,不下单 | ||||
|  |  | |||
							
								
								
									
										68
									
								
								src/app.py
								
								
								
								
							
							
						
						
									
										68
									
								
								src/app.py
								
								
								
								
							|  | @ -3404,6 +3404,74 @@ def analyze_valuation_indicator(): | |||
|             "message": f"估值指标分析失败: {str(e)}" | ||||
|         }), 500 | ||||
| 
 | ||||
| 
 | ||||
| @app.route('/api/overlap/analysis', methods=['GET']) | ||||
| def analyze_stock_overlap(): | ||||
|     """分析股票重叠度和滞涨情况 | ||||
|     GET参数: | ||||
|     - stock_code: 股票代码 (例如: 300661.SZ) | ||||
|     - days: 分析天数,默认3个交易日 | ||||
|      | ||||
|     返回格式: | ||||
|     { | ||||
|         "status": "success", | ||||
|         "data": { | ||||
|             "target_stock": "300661.SZ", | ||||
|             "target_stock_name": "圣邦股份", | ||||
|             "analysis_date": "2025-09-28 15:30:42", | ||||
|             "analysis_period_days": 3, | ||||
|             "industries": [...], | ||||
|             "concepts": [...], | ||||
|             "overlap_threshold": 5, | ||||
|             "top5_stocks": [...], | ||||
|             "lag_analysis": {...} | ||||
|         } | ||||
|     } | ||||
|     """ | ||||
|     try: | ||||
|         # 获取参数 | ||||
|         stock_code = request.args.get('stock_code') | ||||
|         days = int(request.args.get('days', 3)) | ||||
|          | ||||
|         if not stock_code: | ||||
|             return jsonify({ | ||||
|                 "status": "error", | ||||
|                 "message": "缺少必需参数: stock_code" | ||||
|             }), 400 | ||||
|          | ||||
|         # 导入重叠度分析器 | ||||
|         from src.quantitative_analysis.overlap_analyzer import OverlapAnalyzer | ||||
|          | ||||
|         # 创建分析器实例 | ||||
|         analyzer = OverlapAnalyzer() | ||||
|          | ||||
|         try: | ||||
|             # 执行分析 | ||||
|             result = analyzer.analyze_stock_overlap(stock_code, days) | ||||
|              | ||||
|             if 'error' in result: | ||||
|                 return jsonify({ | ||||
|                     "status": "error", | ||||
|                     "message": result['error'] | ||||
|                 }), 500 | ||||
|              | ||||
|             return jsonify({ | ||||
|                 "status": "success", | ||||
|                 "data": result | ||||
|             }) | ||||
|              | ||||
|         finally: | ||||
|             # 关闭数据库连接 | ||||
|             analyzer.close_connection() | ||||
|              | ||||
|     except Exception as e: | ||||
|         logger.error(f"重叠度分析失败: {str(e)}") | ||||
|         return jsonify({ | ||||
|             "status": "error", | ||||
|             "message": f"重叠度分析失败: {str(e)}" | ||||
|         }), 500 | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
| 
 | ||||
|     # 启动Web服务器 | ||||
|  |  | |||
										
											Binary file not shown.
										
									
								
							|  | @ -289,10 +289,10 @@ def main(): | |||
|     analyzer = AverageDistanceFactor(db_url) | ||||
|      | ||||
|     # 示例1: 分析特定行业 | ||||
|     # result = analyzer.analyze_industry(industry_name="旅游") | ||||
|     result = analyzer.analyze_industry(industry_name="军工电子") | ||||
|      | ||||
|     # 示例2: 分析特定概念 | ||||
|     result = analyzer.analyze_industry(concept_name="人形机器人") | ||||
|     # result = analyzer.analyze_industry(concept_name="固态电池") | ||||
|      | ||||
|     # 示例3: 查看可用的行业列表 | ||||
|     # industries = analyzer.get_available_industries() | ||||
|  |  | |||
|  | @ -0,0 +1,668 @@ | |||
| #!/usr/bin/env python3 | ||||
| # -*- coding: utf-8 -*- | ||||
| 
 | ||||
| """ | ||||
| 行业重叠度和概念重叠度分析工具 | ||||
| 用于分析某股票与相似股票的重叠度,识别滞涨投资机会 | ||||
| """ | ||||
| 
 | ||||
| import sys | ||||
| import os | ||||
| import logging | ||||
| from typing import Dict, List, Optional, Tuple | ||||
| from pathlib import Path | ||||
| from sqlalchemy import create_engine, text | ||||
| import pandas as pd | ||||
| from datetime import datetime, timedelta | ||||
| 
 | ||||
| # 添加项目根路径到Python路径 | ||||
| project_root = Path(__file__).parent.parent.parent | ||||
| sys.path.append(str(project_root)) | ||||
| 
 | ||||
| # 导入配置 | ||||
| try: | ||||
|     from src.valuation_analysis.config import DB_URL | ||||
| except ImportError: | ||||
|     # 如果上面的导入失败,尝试直接导入 | ||||
|     import importlib.util | ||||
|     config_path = os.path.join(project_root, 'src', 'valuation_analysis', 'config.py') | ||||
|     spec = importlib.util.spec_from_file_location("config", config_path) | ||||
|     config_module = importlib.util.module_from_spec(spec) | ||||
|     spec.loader.exec_module(config_module) | ||||
|     DB_URL = config_module.DB_URL | ||||
| 
 | ||||
| # 导入股票代码格式转换工具 | ||||
| try: | ||||
|     from tools.stock_code_formatter import StockCodeFormatter | ||||
| except ImportError: | ||||
|     # 如果上面的导入失败,尝试直接导入 | ||||
|     import importlib.util | ||||
|     formatter_path = os.path.join(project_root, 'tools', 'stock_code_formatter.py') | ||||
|     spec = importlib.util.spec_from_file_location("stock_code_formatter", formatter_path) | ||||
|     formatter_module = importlib.util.module_from_spec(spec) | ||||
|     spec.loader.exec_module(formatter_module) | ||||
|     StockCodeFormatter = formatter_module.StockCodeFormatter | ||||
| 
 | ||||
| # 设置日志 | ||||
| logging.basicConfig( | ||||
|     level=logging.INFO, | ||||
|     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' | ||||
| ) | ||||
| logger = logging.getLogger(__name__) | ||||
| 
 | ||||
| 
 | ||||
| class OverlapAnalyzer: | ||||
|     """行业重叠度和概念重叠度分析器""" | ||||
|      | ||||
|     def __init__(self): | ||||
|         """初始化""" | ||||
|         # MySQL连接 | ||||
|         self.mysql_engine = None | ||||
|          | ||||
|         # 股票代码格式转换器 | ||||
|         self.code_formatter = StockCodeFormatter() | ||||
|          | ||||
|         # 重叠度评分权重 | ||||
|         self.INDUSTRY_WEIGHT = 3  # 行业重叠权重 | ||||
|         self.CONCEPT_WEIGHT = 1   # 概念重叠权重 | ||||
|         self.MIN_OVERLAP_SCORE = 5  # 最小重叠分数阈值 | ||||
|          | ||||
|         self.connect_mysql() | ||||
|      | ||||
|     def connect_mysql(self): | ||||
|         """连接MySQL数据库""" | ||||
|         try: | ||||
|             self.mysql_engine = create_engine( | ||||
|                 DB_URL, | ||||
|                 pool_size=5, | ||||
|                 max_overflow=10, | ||||
|                 pool_recycle=3600 | ||||
|             ) | ||||
|              | ||||
|             # 测试连接 | ||||
|             with self.mysql_engine.connect() as conn: | ||||
|                 conn.execute(text("SELECT 1")) | ||||
|              | ||||
|             logger.info("MySQL数据库连接成功") | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.error(f"MySQL数据库连接失败: {str(e)}") | ||||
|             raise | ||||
|      | ||||
|     def normalize_stock_code(self, stock_code: str) -> str: | ||||
|         """ | ||||
|         标准化股票代码格式,转换为数据库中使用的格式 | ||||
|          | ||||
|         Args: | ||||
|             stock_code: 输入的股票代码,支持多种格式 | ||||
|              | ||||
|         Returns: | ||||
|             str: 标准化后的股票代码 | ||||
|         """ | ||||
|         return self.code_formatter.to_prefix_format(stock_code) | ||||
|      | ||||
|     def get_stock_industries(self, stock_code: str) -> List[Tuple[str, str]]: | ||||
|         """ | ||||
|         获取股票所属的行业板块 | ||||
|          | ||||
|         Args: | ||||
|             stock_code: 股票代码 | ||||
|              | ||||
|         Returns: | ||||
|             List[Tuple[str, str]]: [(板块代码, 板块名称)] 列表 | ||||
|         """ | ||||
|         try: | ||||
|             normalized_code = self.normalize_stock_code(stock_code) | ||||
|              | ||||
|             query = text(""" | ||||
|                 SELECT DISTINCT bk_code, bk_name | ||||
|                 FROM gp_hybk  | ||||
|                 WHERE gp_code = :stock_code | ||||
|                 AND bk_code IS NOT NULL | ||||
|                 AND bk_name IS NOT NULL | ||||
|             """) | ||||
|              | ||||
|             with self.mysql_engine.connect() as conn: | ||||
|                 result = conn.execute(query, {"stock_code": normalized_code}).fetchall() | ||||
|                  | ||||
|             industries = [(str(row[0]), str(row[1])) for row in result] | ||||
|             logger.info(f"股票 {stock_code} 找到 {len(industries)} 个行业板块") | ||||
|             return industries | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.error(f"获取股票 {stock_code} 行业板块失败: {str(e)}") | ||||
|             return [] | ||||
|      | ||||
|     def get_stock_concepts(self, stock_code: str) -> List[Tuple[str, str]]: | ||||
|         """ | ||||
|         获取股票所属的概念板块 | ||||
|          | ||||
|         Args: | ||||
|             stock_code: 股票代码 | ||||
|              | ||||
|         Returns: | ||||
|             List[Tuple[str, str]]: [(板块代码, 板块名称)] 列表 | ||||
|         """ | ||||
|         try: | ||||
|             normalized_code = self.normalize_stock_code(stock_code) | ||||
|              | ||||
|             query = text(""" | ||||
|                 SELECT DISTINCT bk_code, bk_name | ||||
|                 FROM gp_gnbk  | ||||
|                 WHERE gp_code = :stock_code | ||||
|                 AND bk_code IS NOT NULL | ||||
|                 AND bk_name IS NOT NULL | ||||
|             """) | ||||
|              | ||||
|             with self.mysql_engine.connect() as conn: | ||||
|                 result = conn.execute(query, {"stock_code": normalized_code}).fetchall() | ||||
|                  | ||||
|             concepts = [(str(row[0]), str(row[1])) for row in result] | ||||
|             logger.info(f"股票 {stock_code} 找到 {len(concepts)} 个概念板块") | ||||
|             return concepts | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.error(f"获取股票 {stock_code} 概念板块失败: {str(e)}") | ||||
|             return [] | ||||
|      | ||||
|     def get_similar_stocks_by_industry(self, industries: List[Tuple[str, str]]) -> Dict[str, List[str]]: | ||||
|         """ | ||||
|         根据行业板块获取相似股票 | ||||
|          | ||||
|         Args: | ||||
|             industries: 行业板块列表 [(板块代码, 板块名称)] | ||||
|              | ||||
|         Returns: | ||||
|             Dict[str, List[str]]: {板块代码: [股票代码列表]} | ||||
|         """ | ||||
|         try: | ||||
|             similar_stocks = {} | ||||
|              | ||||
|             for bk_code, bk_name in industries: | ||||
|                 query = text(""" | ||||
|                     SELECT DISTINCT gp_code | ||||
|                     FROM gp_hybk  | ||||
|                     WHERE bk_code = :bk_code | ||||
|                     AND gp_code IS NOT NULL | ||||
|                 """) | ||||
|                  | ||||
|                 with self.mysql_engine.connect() as conn: | ||||
|                     result = conn.execute(query, {"bk_code": bk_code}).fetchall() | ||||
|                      | ||||
|                 stock_codes = [row[0] for row in result if row[0]] | ||||
|                 similar_stocks[bk_code] = stock_codes | ||||
|                 logger.info(f"行业板块 {bk_name}({bk_code}) 包含 {len(stock_codes)} 只股票") | ||||
|              | ||||
|             return similar_stocks | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.error(f"获取行业相似股票失败: {str(e)}") | ||||
|             return {} | ||||
|      | ||||
|     def get_similar_stocks_by_concept(self, concepts: List[Tuple[str, str]]) -> Dict[str, List[str]]: | ||||
|         """ | ||||
|         根据概念板块获取相似股票 | ||||
|          | ||||
|         Args: | ||||
|             concepts: 概念板块列表 [(板块代码, 板块名称)] | ||||
|              | ||||
|         Returns: | ||||
|             Dict[str, List[str]]: {板块代码: [股票代码列表]} | ||||
|         """ | ||||
|         try: | ||||
|             similar_stocks = {} | ||||
|              | ||||
|             for bk_code, bk_name in concepts: | ||||
|                 query = text(""" | ||||
|                     SELECT DISTINCT gp_code | ||||
|                     FROM gp_gnbk  | ||||
|                     WHERE bk_code = :bk_code | ||||
|                     AND gp_code IS NOT NULL | ||||
|                 """) | ||||
|                  | ||||
|                 with self.mysql_engine.connect() as conn: | ||||
|                     result = conn.execute(query, {"bk_code": bk_code}).fetchall() | ||||
|                      | ||||
|                 stock_codes = [row[0] for row in result if row[0]] | ||||
|                 similar_stocks[bk_code] = stock_codes | ||||
|                 logger.info(f"概念板块 {bk_name}({bk_code}) 包含 {len(stock_codes)} 只股票") | ||||
|              | ||||
|             return similar_stocks | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.error(f"获取概念相似股票失败: {str(e)}") | ||||
|             return {} | ||||
|      | ||||
|     def calculate_overlap_scores(self, target_stock: str, industry_stocks: Dict[str, List[str]], concept_stocks: Dict[str, List[str]]) -> Dict[str, float]: | ||||
|         """ | ||||
|         计算重叠度分数 | ||||
|          | ||||
|         Args: | ||||
|             target_stock: 目标股票代码 | ||||
|             industry_stocks: 行业相似股票字典 {板块代码: [股票代码列表]} | ||||
|             concept_stocks: 概念相似股票字典 {板块代码: [股票代码列表]} | ||||
|              | ||||
|         Returns: | ||||
|             Dict[str, float]: {股票代码: 重叠分数} | ||||
|         """ | ||||
|         try: | ||||
|             overlap_scores = {} | ||||
|             target_stock_normalized = self.normalize_stock_code(target_stock) | ||||
|              | ||||
|             # 计算行业重叠分数(每个行业+3分) | ||||
|             for bk_code, stock_list in industry_stocks.items(): | ||||
|                 for stock_code in stock_list: | ||||
|                     if stock_code == target_stock_normalized: | ||||
|                         continue  # 跳过目标股票本身 | ||||
|                      | ||||
|                     if stock_code not in overlap_scores: | ||||
|                         overlap_scores[stock_code] = 0 | ||||
|                      | ||||
|                     overlap_scores[stock_code] += self.INDUSTRY_WEIGHT | ||||
|              | ||||
|             # 计算概念重叠分数(每个概念+1分) | ||||
|             for bk_code, stock_list in concept_stocks.items(): | ||||
|                 for stock_code in stock_list: | ||||
|                     if stock_code == target_stock_normalized: | ||||
|                         continue  # 跳过目标股票本身 | ||||
|                      | ||||
|                     if stock_code not in overlap_scores: | ||||
|                         overlap_scores[stock_code] = 0 | ||||
|                      | ||||
|                     overlap_scores[stock_code] += self.CONCEPT_WEIGHT | ||||
|              | ||||
|             logger.info(f"计算出 {len(overlap_scores)} 只股票的重叠分数") | ||||
|             return overlap_scores | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.error(f"计算重叠分数失败: {str(e)}") | ||||
|             return {} | ||||
|      | ||||
|     def filter_high_overlap_stocks(self, overlap_scores: Dict[str, float]) -> List[Tuple[str, float]]: | ||||
|         """ | ||||
|         筛选高重叠度股票 | ||||
|          | ||||
|         Args: | ||||
|             overlap_scores: 重叠分数字典 | ||||
|              | ||||
|         Returns: | ||||
|             List[Tuple[str, float]]: [(股票代码, 分数)] 按分数降序排列 | ||||
|         """ | ||||
|         try: | ||||
|             # 筛选分数大于阈值的股票 | ||||
|             high_overlap_stocks = [ | ||||
|                 (stock_code, score) for stock_code, score in overlap_scores.items()  | ||||
|                 if score > self.MIN_OVERLAP_SCORE | ||||
|             ] | ||||
|              | ||||
|             # 按分数降序排列 | ||||
|             high_overlap_stocks.sort(key=lambda x: x[1], reverse=True) | ||||
|              | ||||
|             logger.info(f"筛选出 {len(high_overlap_stocks)} 只高重叠度股票(分数>{self.MIN_OVERLAP_SCORE})") | ||||
|             return high_overlap_stocks | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.error(f"筛选高重叠度股票失败: {str(e)}") | ||||
|             return [] | ||||
|      | ||||
|     def get_recent_price_changes(self, stock_codes: List[str], days: int = 3) -> Dict[str, Dict]: | ||||
|         """ | ||||
|         获取股票的近期涨跌幅(默认近3个交易日) | ||||
|          | ||||
|         Args: | ||||
|             stock_codes: 股票代码列表 | ||||
|             days: 统计天数,默认3个交易日 | ||||
|              | ||||
|         Returns: | ||||
|             Dict[str, Dict]: {股票代码: {price_change, change_pct, latest_price}} | ||||
|         """ | ||||
|         try: | ||||
|             price_changes = {} | ||||
|              | ||||
|             for stock_code in stock_codes: | ||||
|                 # 获取最近N个交易日的价格数据 | ||||
|                 query = text(""" | ||||
|                     SELECT close, percent, timestamp | ||||
|                     FROM gp_day_data  | ||||
|                     WHERE symbol = :stock_code  | ||||
|                     ORDER BY timestamp DESC  | ||||
|                     LIMIT :days | ||||
|                 """) | ||||
|                  | ||||
|                 with self.mysql_engine.connect() as conn: | ||||
|                     result = conn.execute(query, {"stock_code": stock_code, "days": days}).fetchall() | ||||
|                  | ||||
|                 if len(result) >= 2:  # 至少需要2个交易日的数据 | ||||
|                     # 最新价格(第一个) | ||||
|                     latest_price = float(result[0][0]) if result[0][0] else None | ||||
|                     latest_change_pct = float(result[0][1]) if result[0][1] else None | ||||
|                      | ||||
|                     # N天前价格(最后一个) | ||||
|                     historical_price = float(result[-1][0]) if result[-1][0] else None | ||||
|                      | ||||
|                     if latest_price and historical_price: | ||||
|                         price_change = latest_price - historical_price | ||||
|                         change_pct = (price_change / historical_price) * 100 | ||||
|                          | ||||
|                         price_changes[stock_code] = { | ||||
|                             'latest_price': latest_price, | ||||
|                             'price_change': price_change, | ||||
|                             'change_pct': change_pct, | ||||
|                             'latest_change_pct': latest_change_pct, | ||||
|                             'trading_days': len(result) | ||||
|                         } | ||||
|              | ||||
|             logger.info(f"获取到 {len(price_changes)} 只股票的近{days}个交易日涨跌幅数据") | ||||
|             return price_changes | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.error(f"获取近期涨跌幅失败: {str(e)}") | ||||
|             return {} | ||||
|      | ||||
|     def analyze_lag_performance(self, target_change: Dict, top5_stocks: List[Tuple[str, float]], similar_changes: Dict[str, Dict]) -> Dict: | ||||
|         """ | ||||
|         分析目标股票的滞涨情况(基于近3个交易日) | ||||
|          | ||||
|         Args: | ||||
|             target_change: 目标股票涨跌幅数据 | ||||
|             top5_stocks: 重叠度最高的5只股票 [(股票代码, 重叠分数)] | ||||
|             similar_changes: 相似股票涨跌幅数据 | ||||
|              | ||||
|         Returns: | ||||
|             Dict: 滞涨分析结果 | ||||
|         """ | ||||
|         try: | ||||
|             if not target_change or 'change_pct' not in target_change: | ||||
|                 logger.warning("目标股票涨跌幅数据不完整,无法进行滞涨分析") | ||||
|                 return { | ||||
|                     'target_lag_vs_top5': None, | ||||
|                     'target_lag_vs_top1': None, | ||||
|                     'top5_avg_change': None, | ||||
|                     'top1_change': None | ||||
|                 } | ||||
|              | ||||
|             target_change_pct = target_change['change_pct'] | ||||
|              | ||||
|             # 获取前5只股票的有效涨跌幅数据 | ||||
|             top5_changes = [] | ||||
|             for stock_code, _ in top5_stocks: | ||||
|                 if stock_code in similar_changes and 'change_pct' in similar_changes[stock_code]: | ||||
|                     change_pct = similar_changes[stock_code]['change_pct'] | ||||
|                     if change_pct is not None: | ||||
|                         top5_changes.append(change_pct) | ||||
|              | ||||
|             if len(top5_changes) < 1: | ||||
|                 logger.warning("没有有效的前5只股票涨跌幅数据") | ||||
|                 return { | ||||
|                     'target_lag_vs_top5': None, | ||||
|                     'target_lag_vs_top1': None, | ||||
|                     'top5_avg_change': None, | ||||
|                     'top1_change': None | ||||
|                 } | ||||
|              | ||||
|             # 计算前5只股票的平均涨幅 | ||||
|             top5_avg_change = sum(top5_changes) / len(top5_changes) | ||||
|              | ||||
|             # 获取重叠度最高的1只股票涨幅 | ||||
|             top1_change = top5_changes[0] if top5_changes else None | ||||
|              | ||||
|             # 计算滞涨分数 | ||||
|             lag_vs_top5 = target_change_pct - top5_avg_change if top5_avg_change is not None else None | ||||
|             lag_vs_top1 = target_change_pct - top1_change if top1_change is not None else None | ||||
|              | ||||
|             # 滞涨等级判断 | ||||
|             def get_lag_level(lag_score): | ||||
|                 if lag_score is None: | ||||
|                     return "无数据" | ||||
|                 elif lag_score < -8: | ||||
|                     return "严重滞涨" | ||||
|                 elif lag_score < -4: | ||||
|                     return "明显滞涨" | ||||
|                 elif lag_score < -1.5: | ||||
|                     return "轻微滞涨" | ||||
|                 elif lag_score > 4: | ||||
|                     return "跑赢同行" | ||||
|                 elif lag_score > 1.5: | ||||
|                     return "略胜同行" | ||||
|                 else: | ||||
|                     return "正常水平" | ||||
|             result = { | ||||
|                 'target_lag_vs_top5': { | ||||
|                     'lag_score': lag_vs_top5, | ||||
|                     'lag_level': get_lag_level(lag_vs_top5), | ||||
|                     'target_change_pct': target_change_pct, | ||||
|                     'top5_avg_change': top5_avg_change | ||||
|                 }, | ||||
|                 'target_lag_vs_top1': { | ||||
|                     'lag_score': lag_vs_top1, | ||||
|                     'lag_level': get_lag_level(lag_vs_top1), | ||||
|                     'target_change_pct': target_change_pct, | ||||
|                     'top1_change': top1_change | ||||
|                 }, | ||||
|                 'top5_avg_change': top5_avg_change, | ||||
|                 'top1_change': top1_change | ||||
|             } | ||||
|              | ||||
|             logger.info(f"滞涨分析完成 - 目标股票: {target_change_pct:.2f}%, 前5平均: {top5_avg_change:.2f}%, 最高1只: {top1_change:.2f}%") | ||||
|             return result | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.error(f"滞涨分析失败: {str(e)}") | ||||
|             return { | ||||
|                 'target_lag_vs_top5': None, | ||||
|                 'target_lag_vs_top1': None, | ||||
|                 'top5_avg_change': None, | ||||
|                 'top1_change': None | ||||
|             } | ||||
|      | ||||
|     def get_stock_name(self, stock_code: str) -> Optional[str]: | ||||
|         """ | ||||
|         获取股票名称 | ||||
|          | ||||
|         Args: | ||||
|             stock_code: 股票代码 | ||||
|              | ||||
|         Returns: | ||||
|             str: 股票名称 | ||||
|         """ | ||||
|         try: | ||||
|             normalized_code = self.normalize_stock_code(stock_code) | ||||
|              | ||||
|             query = text(""" | ||||
|                 SELECT gp_name | ||||
|                 FROM gp_code_all  | ||||
|                 WHERE gp_code = :stock_code | ||||
|                 LIMIT 1 | ||||
|             """) | ||||
|              | ||||
|             with self.mysql_engine.connect() as conn: | ||||
|                 result = conn.execute(query, {"stock_code": normalized_code}).fetchone() | ||||
|                  | ||||
|             if result and result[0]: | ||||
|                 return result[0] | ||||
|             else: | ||||
|                 return None | ||||
|                  | ||||
|         except Exception as e: | ||||
|             logger.error(f"获取股票名称失败 {stock_code}: {str(e)}") | ||||
|             return None | ||||
|      | ||||
|     def analyze_stock_overlap(self, stock_code: str, days: int = 20) -> Dict: | ||||
|         """ | ||||
|         分析股票重叠度的主函数 | ||||
|          | ||||
|         Args: | ||||
|             stock_code: 目标股票代码 | ||||
|             days: 统计涨跌幅的天数 | ||||
|              | ||||
|         Returns: | ||||
|             Dict: 分析结果 | ||||
|         """ | ||||
|         try: | ||||
|             logger.info(f"开始分析股票 {stock_code} 的重叠度") | ||||
|              | ||||
|             # 1. 获取目标股票的行业和概念板块 | ||||
|             industries = self.get_stock_industries(stock_code) | ||||
|             concepts = self.get_stock_concepts(stock_code) | ||||
|              | ||||
|             if not industries and not concepts: | ||||
|                 logger.warning(f"股票 {stock_code} 没有找到行业或概念板块数据") | ||||
|                 return { | ||||
|                     'target_stock': stock_code, | ||||
|                     'error': '未找到行业或概念板块数据' | ||||
|                 } | ||||
|              | ||||
|             # 2. 获取相似股票 | ||||
|             industry_stocks = self.get_similar_stocks_by_industry(industries) | ||||
|             concept_stocks = self.get_similar_stocks_by_concept(concepts) | ||||
|              | ||||
|             # 3. 计算重叠分数 | ||||
|             overlap_scores = self.calculate_overlap_scores(stock_code, industry_stocks, concept_stocks) | ||||
|              | ||||
|             # 4. 筛选高重叠度股票 | ||||
|             high_overlap_stocks = self.filter_high_overlap_stocks(overlap_scores) | ||||
|              | ||||
|             if not high_overlap_stocks: | ||||
|                 logger.info(f"股票 {stock_code} 没有找到高重叠度股票(分数>{self.MIN_OVERLAP_SCORE})") | ||||
|                 return { | ||||
|                     'target_stock': stock_code, | ||||
|                     'similar_stocks_count': 0, | ||||
|                     'message': f'没有找到重叠度超过{self.MIN_OVERLAP_SCORE}分的股票' | ||||
|                 } | ||||
|              | ||||
|             # 5. 获取前5只重叠度最高的股票 | ||||
|             top5_stocks = high_overlap_stocks[:5]  # 取前5只 | ||||
|             top5_stock_codes = [stock[0] for stock in top5_stocks] | ||||
|             all_stock_codes = [self.normalize_stock_code(stock_code)] + top5_stock_codes | ||||
|              | ||||
|             price_changes = self.get_recent_price_changes(all_stock_codes, days) | ||||
|              | ||||
|             # 6. 滞涨分析 | ||||
|             target_change = price_changes.get(self.normalize_stock_code(stock_code)) | ||||
|             similar_changes = {code: data for code, data in price_changes.items()  | ||||
|                              if code != self.normalize_stock_code(stock_code)} | ||||
|              | ||||
|             lag_analysis = self.analyze_lag_performance(target_change, top5_stocks, similar_changes) | ||||
|              | ||||
|             # 7. 组装前5只股票结果 | ||||
|             top5_results = [] | ||||
|             for similar_stock, overlap_score in top5_stocks: | ||||
|                 stock_name = self.get_stock_name(similar_stock) | ||||
|                 price_data = price_changes.get(similar_stock, {}) | ||||
|                  | ||||
|                 top5_results.append({ | ||||
|                     'stock_code': similar_stock, | ||||
|                     'stock_name': stock_name, | ||||
|                     'overlap_score': overlap_score, | ||||
|                     'latest_price': price_data.get('latest_price'), | ||||
|                     'price_change': price_data.get('price_change'), | ||||
|                     'change_pct': price_data.get('change_pct') | ||||
|                 }) | ||||
|              | ||||
|             return { | ||||
|                 'target_stock': stock_code, | ||||
|                 'target_stock_name': self.get_stock_name(stock_code), | ||||
|                 'target_price_data': target_change, | ||||
|                 'analysis_date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), | ||||
|                 'analysis_period_days': days, | ||||
|                 'industries': [{'code': code, 'name': name} for code, name in industries], | ||||
|                 'concepts': [{'code': code, 'name': name} for code, name in concepts], | ||||
|                 'overlap_threshold': self.MIN_OVERLAP_SCORE, | ||||
|                 'top5_stocks': top5_results, | ||||
|                 'lag_analysis': lag_analysis | ||||
|             } | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.error(f"分析股票重叠度失败 {stock_code}: {str(e)}") | ||||
|             return { | ||||
|                 'target_stock': stock_code, | ||||
|                 'error': str(e) | ||||
|             } | ||||
|      | ||||
|     def close_connection(self): | ||||
|         """关闭数据库连接""" | ||||
|         try: | ||||
|             if self.mysql_engine: | ||||
|                 self.mysql_engine.dispose() | ||||
|                 logger.info("MySQL连接已关闭") | ||||
|         except Exception as e: | ||||
|             logger.error(f"关闭MySQL连接失败: {str(e)}") | ||||
| 
 | ||||
| 
 | ||||
| def main(): | ||||
|     """主函数 - 示例用法""" | ||||
|     analyzer = None | ||||
|     try: | ||||
|         # 创建分析器实例 | ||||
|         analyzer = OverlapAnalyzer() | ||||
|          | ||||
|         # 示例:分析某只股票的重叠度 | ||||
|         stock_code = "300661.SZ"  # 圣邦股份 | ||||
|          | ||||
|         print(f"=== 股票重叠度分析:{stock_code} ===") | ||||
|          | ||||
|         # 执行分析 | ||||
|         result = analyzer.analyze_stock_overlap(stock_code, days=3) | ||||
|          | ||||
|         if 'error' in result: | ||||
|             print(f"分析失败: {result['error']}") | ||||
|             return | ||||
|          | ||||
|         # 输出结果 | ||||
|         print(f"\n目标股票: {result['target_stock_name']} ({result['target_stock']})") | ||||
|         print(f"分析日期: {result['analysis_date']}") | ||||
|         print(f"统计周期: {result['analysis_period_days']}天") | ||||
|          | ||||
|         if 'target_price_data' in result and result['target_price_data']: | ||||
|             target_data = result['target_price_data'] | ||||
|             print(f"目标股票涨跌幅: {target_data.get('change_pct', 0):.2f}%") | ||||
|          | ||||
|         print(f"\n行业板块: {len(result['industries'])}个") | ||||
|         for industry in result['industries'][:3]:  # 显示前3个 | ||||
|             print(f"  - {industry['name']} ({industry['code']})") | ||||
|          | ||||
|         print(f"\n概念板块: {len(result['concepts'])}个") | ||||
|         for concept in result['concepts'][:3]:  # 显示前3个 | ||||
|             print(f"  - {concept['name']} ({concept['code']})") | ||||
|          | ||||
|         print(f"\n重叠度最高的5只股票:") | ||||
|         print("-" * 80) | ||||
|         print(f"{'股票代码':<12} {'股票名称':<15} {'重叠分数':<8} {'最新价格':<10} {'涨跌幅':<10}") | ||||
|         print("-" * 80) | ||||
|          | ||||
|         for stock in result['top5_stocks']: | ||||
|             print(f"{stock['stock_code']:<12} " | ||||
|                   f"{stock['stock_name'] or '未知':<15} " | ||||
|                   f"{stock['overlap_score']:<8.1f} " | ||||
|                   f"{stock['latest_price'] or 0:<10.2f} " | ||||
|                   f"{stock['change_pct'] or 0:<10.2f}%") | ||||
|          | ||||
|         # 显示滞涨分析结果 | ||||
|         lag_analysis = result.get('lag_analysis', {}) | ||||
|         if lag_analysis: | ||||
|             print(f"\n滞涨分析结果:") | ||||
|             print("-" * 50) | ||||
|              | ||||
|             # vs 前5平均 | ||||
|             vs_top5 = lag_analysis.get('target_lag_vs_top5', {}) | ||||
|             if vs_top5: | ||||
|                 print(f"vs 前5平均: {vs_top5.get('lag_score', 0):.2f}% ({vs_top5.get('lag_level', '未知')})") | ||||
|                 print(f"  目标股票: {vs_top5.get('target_change_pct', 0):.2f}%") | ||||
|                 print(f"  前5平均: {vs_top5.get('top5_avg_change', 0):.2f}%") | ||||
|              | ||||
|             # vs 最高1只 | ||||
|             vs_top1 = lag_analysis.get('target_lag_vs_top1', {}) | ||||
|             if vs_top1: | ||||
|                 print(f"vs 最高1只: {vs_top1.get('lag_score', 0):.2f}% ({vs_top1.get('lag_level', '未知')})") | ||||
|                 print(f"  目标股票: {vs_top1.get('target_change_pct', 0):.2f}%") | ||||
|                 print(f"  最高1只: {vs_top1.get('top1_change', 0):.2f}%") | ||||
|          | ||||
|     except Exception as e: | ||||
|         logger.error(f"程序执行失败: {str(e)}") | ||||
|     finally: | ||||
|         if analyzer: | ||||
|             analyzer.close_connection() | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     main() | ||||
|  | @ -713,6 +713,28 @@ def main(): | |||
|                 print(f"  最高总分: {df['total_score'].max():.2f}") | ||||
|                 print(f"  最低总分: {df['total_score'].min():.2f}") | ||||
|          | ||||
|         # 生成成长+成熟合并的简表(仅三列:股票代码、总分、排名) | ||||
|         combined_parts = [] | ||||
|         for stage, df in results.items(): | ||||
|             if isinstance(df, pd.DataFrame) and not df.empty: | ||||
|                 if 'stock_code' in df.columns and 'total_score' in df.columns: | ||||
|                     combined_parts.append(df[['stock_code', 'total_score']].copy()) | ||||
|          | ||||
|         if combined_parts: | ||||
|             combined_df = pd.concat(combined_parts, ignore_index=True) | ||||
|             # 去除总分为空的数据 | ||||
|             combined_df = combined_df.dropna(subset=['total_score']) | ||||
|             # 按总分降序并重新排名 | ||||
|             combined_df = combined_df.sort_values('total_score', ascending=False).reset_index(drop=True) | ||||
|             combined_df['rank'] = range(1, len(combined_df) + 1) | ||||
|             # 保存文件 | ||||
|             combined_file = f"tech_fundamental_factor_all_{datetime.now().strftime('%Y%m%d_%H%M')}.csv" | ||||
|             combined_df.to_csv(combined_file, index=False, encoding='utf-8-sig') | ||||
|             print(f"\n=== 合并结果(成长+成熟) ===") | ||||
|             print(f"总股票数量: {len(combined_df)}") | ||||
|             print(combined_df.head(10).to_string(index=False)) | ||||
|             print(f"\n合并简表已保存到: {combined_file}") | ||||
|          | ||||
|         print(f"\n=== 策略运行完成 ===") | ||||
|          | ||||
|     except Exception as e: | ||||
|  |  | |||
|  | @ -0,0 +1,757 @@ | |||
| #!/usr/bin/env python3 | ||||
| # -*- coding: utf-8 -*- | ||||
| 
 | ||||
| """ | ||||
| 是固态电池、人形机器人、通信设备、算力租赁四个赛道的筛选。 | ||||
| 结合个股历史基本面,以及个股波动情况,这里波动指标是一个波动越大,分数越高的指标 | ||||
| v2并不是科技主题选股!!!!!!--这里就是入口--请执行这个文件! | ||||
| 整合企业生命周期、财务指标和平均距离因子分析 | ||||
| """ | ||||
| 
 | ||||
| import sys | ||||
| import pandas as pd | ||||
| import numpy as np | ||||
| import logging | ||||
| from typing import Dict, List, Tuple | ||||
| from pathlib import Path | ||||
| from sqlalchemy import create_engine, text | ||||
| from datetime import datetime | ||||
| import math | ||||
| 
 | ||||
| # 添加项目根路径到Python路径 | ||||
| project_root = Path(__file__).parent.parent.parent | ||||
| sys.path.append(str(project_root)) | ||||
| 
 | ||||
| # 导入依赖的工具类 | ||||
| from src.quantitative_analysis.company_lifecycle_factor import CompanyLifecycleFactor | ||||
| from src.quantitative_analysis.financial_indicator_analyzer import FinancialIndicatorAnalyzer | ||||
| from src.quantitative_analysis.average_distance_factor import AverageDistanceFactor | ||||
| from src.valuation_analysis.config import MONGO_CONFIG2, DB_URL | ||||
| 
 | ||||
| # 设置日志 | ||||
| logging.basicConfig( | ||||
|     level=logging.INFO, | ||||
|     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' | ||||
| ) | ||||
| logger = logging.getLogger(__name__) | ||||
| 
 | ||||
| 
 | ||||
| class TechFundamentalFactorStrategy: | ||||
|     """科技主题基本面因子选股策略""" | ||||
|      | ||||
|     def __init__(self): | ||||
|         """初始化策略""" | ||||
|         self.lifecycle_calculator = CompanyLifecycleFactor() | ||||
|         self.financial_analyzer = FinancialIndicatorAnalyzer() | ||||
|         self.distance_calculator = AverageDistanceFactor(DB_URL) | ||||
|          | ||||
|         # MySQL连接 | ||||
|         self.mysql_engine = create_engine( | ||||
|             DB_URL, | ||||
|             pool_size=5, | ||||
|             max_overflow=10, | ||||
|             pool_recycle=3600 | ||||
|         ) | ||||
|          | ||||
|         # 科技概念板块列表 | ||||
|         self.tech_concepts = [ | ||||
|             "固态电池", "人形机器人", "通信设备", "算力租赁" | ||||
|         ] | ||||
|         logger.info("科技主题基本面因子选股策略初始化完成") | ||||
|      | ||||
|     def get_tech_stocks(self) -> pd.DataFrame: | ||||
|         """ | ||||
|         获取科技概念板块的股票列表 | ||||
|          | ||||
|         Returns: | ||||
|             pd.DataFrame: 包含股票代码和名称的DataFrame | ||||
|         """ | ||||
|         try: | ||||
|             # 构建查询条件 | ||||
|             concepts_str = "', '".join(self.tech_concepts) | ||||
|             query = text(f""" | ||||
|                 SELECT DISTINCT gp_code as stock_code, gp_name as stock_name, bk_name as concept_name | ||||
|                 FROM gp_gnbk  | ||||
|                 WHERE bk_name IN ('{concepts_str}') | ||||
|                 UNION | ||||
|                 SELECT DISTINCT gp_code as stock_code, gp_name as stock_name, bk_name as concept_name | ||||
|                 FROM gp_hybk | ||||
|                 WHERE bk_name = '通信设备' | ||||
|                 ORDER BY stock_code | ||||
|             """) | ||||
| 
 | ||||
|             with self.mysql_engine.connect() as conn: | ||||
|                 df = pd.read_sql(query, conn) | ||||
|              | ||||
|             logger.info(f"获取到 {len(df)} 只科技概念股票") | ||||
|             return df | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.error(f"获取科技概念股票失败: {str(e)}") | ||||
|             return pd.DataFrame() | ||||
|      | ||||
|     def filter_by_lifecycle(self, stock_codes: List[str], year: int = 2024) -> Dict[str, List[str]]: | ||||
|         """ | ||||
|         根据企业生命周期筛选股票 | ||||
|          | ||||
|         Args: | ||||
|             stock_codes: 股票代码列表 | ||||
|             year: 分析年份 | ||||
|              | ||||
|         Returns: | ||||
|             Dict: 包含成长期和成熟期股票的字典 | ||||
|         """ | ||||
|         try: | ||||
|             logger.info(f"开始分析 {len(stock_codes)} 只股票的企业生命周期") | ||||
|              | ||||
|             # 批量计算生命周期 | ||||
|             lifecycle_df = self.lifecycle_calculator.batch_calculate_lifecycle_factors(stock_codes, year) | ||||
|              | ||||
|             # 筛选目标阶段的股票 | ||||
|             # 引入期(1)和成长期(2)合并为成长期,成熟期(3)保持不变 | ||||
|             growth_stage_stocks = lifecycle_df[ | ||||
|                 lifecycle_df['stage_id'].isin([1, 2]) | ||||
|             ]['stock_code'].tolist() | ||||
|              | ||||
|             mature_stage_stocks = lifecycle_df[ | ||||
|                 lifecycle_df['stage_id'] == 3 | ||||
|             ]['stock_code'].tolist() | ||||
|              | ||||
|             result = { | ||||
|                 'growth': growth_stage_stocks, | ||||
|                 'mature': mature_stage_stocks | ||||
|             } | ||||
|              | ||||
|             logger.info(f"成长期股票: {len(growth_stage_stocks)} 只") | ||||
|             logger.info(f"成熟期股票: {len(mature_stage_stocks)} 只") | ||||
|              | ||||
|             return result | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.error(f"生命周期筛选失败: {str(e)}") | ||||
|             return {'growth': [], 'mature': []} | ||||
|      | ||||
|     def calculate_distance_factors(self, growth_stocks: List[str], mature_stocks: List[str]) -> Tuple[pd.DataFrame, pd.DataFrame]: | ||||
|         """ | ||||
|         分别计算成长期和成熟期股票的平均距离因子 | ||||
|          | ||||
|         Args: | ||||
|             growth_stocks: 成长期股票列表 | ||||
|             mature_stocks: 成熟期股票列表 | ||||
|              | ||||
|         Returns: | ||||
|             Tuple: (成长期距离因子DataFrame, 成熟期距离因子DataFrame) | ||||
|         """ | ||||
|         try: | ||||
|             growth_distance_df = pd.DataFrame() | ||||
|             mature_distance_df = pd.DataFrame() | ||||
|              | ||||
|             # 计算成长期股票距离因子 | ||||
|             if growth_stocks: | ||||
|                 logger.info(f"计算 {len(growth_stocks)} 只成长期股票的距离因子") | ||||
|                 growth_data = self.distance_calculator.get_stock_data(growth_stocks) | ||||
|                 if not growth_data.empty: | ||||
|                     growth_indicators = self.distance_calculator.calculate_technical_indicators(growth_data) | ||||
|                     growth_distance_df = self.distance_calculator.calculate_distance_factor(growth_indicators) | ||||
|              | ||||
|             # 计算成熟期股票距离因子 | ||||
|             if mature_stocks: | ||||
|                 logger.info(f"计算 {len(mature_stocks)} 只成熟期股票的距离因子") | ||||
|                 mature_data = self.distance_calculator.get_stock_data(mature_stocks) | ||||
|                 if not mature_data.empty: | ||||
|                     mature_indicators = self.distance_calculator.calculate_technical_indicators(mature_data) | ||||
|                     mature_distance_df = self.distance_calculator.calculate_distance_factor(mature_indicators) | ||||
|              | ||||
|             return growth_distance_df, mature_distance_df | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.error(f"计算距离因子失败: {str(e)}") | ||||
|             return pd.DataFrame(), pd.DataFrame() | ||||
|      | ||||
|     def calculate_common_factors(self, stock_codes: List[str]) -> pd.DataFrame: | ||||
|         """ | ||||
|         计算通用因子 | ||||
|          | ||||
|         Args: | ||||
|             stock_codes: 股票代码列表 | ||||
|              | ||||
|         Returns: | ||||
|             pd.DataFrame: 包含通用因子的DataFrame | ||||
|         """ | ||||
|         try: | ||||
|             logger.info(f"计算 {len(stock_codes)} 只股票的通用因子") | ||||
|              | ||||
|             results = [] | ||||
|             latest_date = "2025-06-30"  # 最新季度数据 | ||||
|             annual_date = "2024-12-31"  # 年报数据 | ||||
|              | ||||
|             for stock_code in stock_codes: | ||||
|                 try: | ||||
| 
 | ||||
|                     factor_data = {'stock_code': stock_code} | ||||
|                      | ||||
|                     # 1. 毛利率(使用最新数据) | ||||
|                     gross_margin = self.financial_analyzer.analyze_gross_profit_margin(stock_code, latest_date) | ||||
|                     factor_data['gross_profit_margin'] = gross_margin | ||||
|                      | ||||
|                     # 2. 成长能力指标 | ||||
|                     growth_capability = self.financial_analyzer.analyze_growth_capability(stock_code) | ||||
|                     if growth_capability is not None: | ||||
|                         # 成长能力越高越好,使用sigmoid函数映射到0-1 | ||||
|                         growth_score = 1 / (1 + math.exp(-growth_capability)) | ||||
|                     else: | ||||
|                         growth_score = 0.5  # 默认中性评分 | ||||
|                     factor_data['growth_score'] = growth_score | ||||
|                      | ||||
|                     # 3. 前五大供应商占比(使用年报数据) | ||||
|                     supplier_conc = self.financial_analyzer.analyze_supplier_concentration(stock_code, annual_date) | ||||
|                     factor_data['supplier_concentration'] = supplier_conc | ||||
| 
 | ||||
|                     # 4. 前五大客户占比(使用年报数据) | ||||
|                     customer_conc = self.financial_analyzer.analyze_customer_concentration(stock_code, annual_date) | ||||
|                     factor_data['customer_concentration'] = customer_conc | ||||
|                      | ||||
|                     results.append(factor_data) | ||||
|                      | ||||
|                 except Exception as e: | ||||
|                     logger.warning(f"计算股票 {stock_code} 通用因子失败: {str(e)}") | ||||
|                     continue | ||||
|              | ||||
|             df = pd.DataFrame(results) | ||||
|             logger.info(f"成功计算 {len(df)} 只股票的通用因子") | ||||
|             return df | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.error(f"计算通用因子失败: {str(e)}") | ||||
|             return pd.DataFrame() | ||||
|      | ||||
|     def calculate_growth_specific_factors(self, stock_codes: List[str]) -> pd.DataFrame: | ||||
|         """ | ||||
|         计算成长期特色因子 | ||||
|          | ||||
|         Args: | ||||
|             stock_codes: 成长期股票代码列表 | ||||
|              | ||||
|         Returns: | ||||
|             pd.DataFrame: 包含成长期特色因子的DataFrame | ||||
|         """ | ||||
|         try: | ||||
|             logger.info(f"计算 {len(stock_codes)} 只成长期股票的特色因子") | ||||
|              | ||||
|             results = [] | ||||
|             latest_date = "2025-06-30"  # 使用最新数据 | ||||
|             annual_date = "2024-12-31"  # 使用年度数据 | ||||
| 
 | ||||
|             for stock_code in stock_codes: | ||||
|                 try: | ||||
| 
 | ||||
|                     factor_data = {'stock_code': stock_code} | ||||
|                      | ||||
|                     # 1. 管理费用率(使用最新数据) | ||||
|                     admin_ratio = self.financial_analyzer.analyze_admin_expense_ratio(stock_code, latest_date) | ||||
|                     factor_data['admin_expense_ratio'] = admin_ratio | ||||
|                      | ||||
|                     # 2. 研发费用折旧摊销占比(使用年度数据) | ||||
|                     # financial_data = self.financial_analyzer.get_financial_data(stock_code, latest_date) | ||||
|                     financial_data = self.financial_analyzer.get_financial_data(stock_code, annual_date) | ||||
|                     if financial_data: | ||||
|                         intangible_amortize = financial_data.get('cash_flow_statement', {}).get('IA_AMORTIZE', 0) | ||||
|                         rd_expense = financial_data.get('profit_statement', {}).get('RESEARCH_EXPENSE', 0) | ||||
|                          | ||||
|                         if rd_expense and rd_expense != 0: | ||||
|                             rd_amortize_ratio = intangible_amortize / rd_expense if intangible_amortize else 0 | ||||
|                         else: | ||||
|                             rd_amortize_ratio = None  # 使用None而不是0,避免这些股票获得最高分 | ||||
|                          | ||||
|                         factor_data['rd_amortize_ratio'] = rd_amortize_ratio | ||||
|                     else: | ||||
|                         factor_data['rd_amortize_ratio'] = None | ||||
|                      | ||||
|                     # 3. 资产负债率(使用最新数据) | ||||
|                     asset_liability_ratio = self.financial_analyzer.analyze_asset_liability_ratio(stock_code, latest_date) | ||||
|                     factor_data['asset_liability_ratio'] = asset_liability_ratio | ||||
|                      | ||||
|                     results.append(factor_data) | ||||
|                      | ||||
|                 except Exception as e: | ||||
|                     logger.warning(f"计算股票 {stock_code} 成长期特色因子失败: {str(e)}") | ||||
|                     continue | ||||
|              | ||||
|             df = pd.DataFrame(results) | ||||
|             logger.info(f"成功计算 {len(df)} 只成长期股票的特色因子") | ||||
|             return df | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.error(f"计算成长期特色因子失败: {str(e)}") | ||||
|             return pd.DataFrame() | ||||
|      | ||||
|     def calculate_mature_specific_factors(self, stock_codes: List[str]) -> pd.DataFrame: | ||||
|         """ | ||||
|         计算成熟期特色因子 | ||||
|          | ||||
|         Args: | ||||
|             stock_codes: 成熟期股票代码列表 | ||||
|              | ||||
|         Returns: | ||||
|             pd.DataFrame: 包含成熟期特色因子的DataFrame | ||||
|         """ | ||||
|         try: | ||||
|             logger.info(f"计算 {len(stock_codes)} 只成熟期股票的特色因子") | ||||
|              | ||||
|             latest_date = "2025-06-30"  # 使用最新数据 | ||||
|              | ||||
|             # 在循环外获取全A股PB和ROE数据,避免重复查询 | ||||
|             logger.info("获取全A股PB数据...") | ||||
|             all_pb_data = self.financial_analyzer.get_all_stocks_pb_data() | ||||
|              | ||||
|             logger.info("获取全A股ROE数据...") | ||||
|             all_roe_data = self.financial_analyzer.get_all_stocks_roe_data(latest_date) | ||||
|              | ||||
|             results = [] | ||||
|              | ||||
|             for stock_code in stock_codes: | ||||
|                 try: | ||||
|                     factor_data = {'stock_code': stock_code} | ||||
|                      | ||||
|                     # 1. 应收账款周转率(使用最新数据) | ||||
|                     formatted_stock_code = self.financial_analyzer.code_formatter.to_dot_format(stock_code) | ||||
|                     financial_data = self.financial_analyzer.get_financial_data(formatted_stock_code, latest_date) | ||||
|                     if financial_data: | ||||
|                         revenue = financial_data.get('profit_statement', {}).get('OPERATE_INCOME', 0) | ||||
|                         accounts_rece = financial_data.get('balance_sheet', {}).get('ACCOUNTS_RECE', 0) | ||||
|                          | ||||
|                         if accounts_rece and accounts_rece != 0: | ||||
|                             turnover_ratio = revenue / accounts_rece if revenue else 0 | ||||
|                         else: | ||||
|                             turnover_ratio = None  # 使用None而不是0 | ||||
|                          | ||||
|                         factor_data['accounts_receivable_turnover'] = turnover_ratio | ||||
|                     else: | ||||
|                         factor_data['accounts_receivable_turnover'] = None | ||||
|                      | ||||
|                     # 2. 研发强度(使用最新数据) | ||||
|                     rd_intensity = self.financial_analyzer.analyze_rd_expense_ratio(stock_code, latest_date) | ||||
|                     factor_data['rd_intensity'] = rd_intensity | ||||
|                      | ||||
|                     # 3. PB-ROE排名因子:使用预获取的全A股数据 | ||||
|                     if all_pb_data and all_roe_data: | ||||
|                         pb_roe_rank_factor = self.financial_analyzer.calculate_pb_roe_rank_factor( | ||||
|                             stock_code, all_pb_data, all_roe_data | ||||
|                         ) | ||||
|                         factor_data['pb_roe_rank_factor'] = pb_roe_rank_factor | ||||
|                     else: | ||||
|                         factor_data['pb_roe_rank_factor'] = None | ||||
|                      | ||||
|                     results.append(factor_data) | ||||
|                      | ||||
|                 except Exception as e: | ||||
|                     logger.warning(f"计算股票 {stock_code} 成熟期特色因子失败: {str(e)}") | ||||
|                     continue | ||||
|              | ||||
|             df = pd.DataFrame(results) | ||||
|             logger.info(f"成功计算 {len(df)} 只成熟期股票的特色因子") | ||||
|             return df | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.error(f"计算成熟期特色因子失败: {str(e)}") | ||||
|             return pd.DataFrame() | ||||
|      | ||||
|     def run_strategy(self, year: int = 2024) -> Dict[str, pd.DataFrame]: | ||||
|         """ | ||||
|         运行完整的选股策略 | ||||
|          | ||||
|         Args: | ||||
|             year: 分析年份 | ||||
|              | ||||
|         Returns: | ||||
|             Dict: 包含成长期和成熟期股票分析结果的字典 | ||||
|         """ | ||||
|         try: | ||||
|             logger.info("开始运行科技主题基本面因子选股策略") | ||||
|              | ||||
|             # 1. 获取科技概念股票 | ||||
|             tech_stocks_df = self.get_tech_stocks() | ||||
|             if tech_stocks_df.empty: | ||||
|                 logger.error("未获取到科技概念股票") | ||||
|                 return {} | ||||
|              | ||||
|             stock_codes = tech_stocks_df['stock_code'].unique().tolist() | ||||
|             logger.info(f"共获取到 {len(stock_codes)} 只科技概念股票") | ||||
|              | ||||
|             # 2. 按企业生命周期筛选 | ||||
|             lifecycle_result = self.filter_by_lifecycle(stock_codes, year) | ||||
|             growth_stocks = lifecycle_result['growth'] | ||||
|             mature_stocks = lifecycle_result['mature'] | ||||
|              | ||||
|             if not growth_stocks and not mature_stocks: | ||||
|                 logger.warning("未找到符合条件的成长期或成熟期股票") | ||||
|                 return {} | ||||
|              | ||||
|             # 3. 计算平均距离因子 | ||||
|             growth_distance_df, mature_distance_df = self.calculate_distance_factors(growth_stocks, mature_stocks) | ||||
|              | ||||
|             # 4. 计算通用因子 | ||||
|             all_qualified_stocks = growth_stocks + mature_stocks | ||||
|             common_factors_df = self.calculate_common_factors(all_qualified_stocks) | ||||
|              | ||||
|             # 5. 计算特色因子 | ||||
|             growth_specific_df = self.calculate_growth_specific_factors(growth_stocks) if growth_stocks else pd.DataFrame() | ||||
|             mature_specific_df = self.calculate_mature_specific_factors(mature_stocks) if mature_stocks else pd.DataFrame() | ||||
|              | ||||
|             # 6. 合并结果并计算分数 | ||||
|             result = {} | ||||
|              | ||||
|             # 处理成长期股票 | ||||
|             if not growth_specific_df.empty: | ||||
|                 # 成长期结果合并 | ||||
|                 growth_result = growth_specific_df.copy() | ||||
|                  | ||||
|                 # 合并距离因子 | ||||
|                 if not growth_distance_df.empty: | ||||
|                     growth_result = growth_result.merge( | ||||
|                         growth_distance_df[['symbol', 'avg_distance_factor']],  | ||||
|                         left_on='stock_code', right_on='symbol', how='left' | ||||
|                     ).drop('symbol', axis=1) | ||||
|                  | ||||
|                 # 合并通用因子 | ||||
|                 if not common_factors_df.empty: | ||||
|                     growth_result = growth_result.merge( | ||||
|                         common_factors_df, on='stock_code', how='left' | ||||
|                     ) | ||||
|                  | ||||
|                 # 计算因子分数 | ||||
|                 growth_result = self.calculate_factor_scores(growth_result, 'growth') | ||||
|                  | ||||
|                 # 计算总分并排序 | ||||
|                 growth_result = self.calculate_total_score(growth_result, 'growth') | ||||
|                  | ||||
|                 result['growth'] = growth_result | ||||
|                 logger.info(f"成长期结果: {len(growth_result)} 只股票") | ||||
|              | ||||
|             # 处理成熟期股票 | ||||
|             if not mature_specific_df.empty: | ||||
|                 # 成熟期结果合并 | ||||
|                 mature_result = mature_specific_df.copy() | ||||
|                  | ||||
|                 # 合并距离因子 | ||||
|                 if not mature_distance_df.empty: | ||||
|                     mature_result = mature_result.merge( | ||||
|                         mature_distance_df[['symbol', 'avg_distance_factor']],  | ||||
|                         left_on='stock_code', right_on='symbol', how='left' | ||||
|                     ).drop('symbol', axis=1) | ||||
|                  | ||||
|                 # 合并通用因子 | ||||
|                 if not common_factors_df.empty: | ||||
|                     mature_result = mature_result.merge( | ||||
|                         common_factors_df, on='stock_code', how='left' | ||||
|                     ) | ||||
|                  | ||||
|                 # 计算因子分数 | ||||
|                 mature_result = self.calculate_factor_scores(mature_result, 'mature') | ||||
|                  | ||||
|                 # 计算总分并排序 | ||||
|                 mature_result = self.calculate_total_score(mature_result, 'mature') | ||||
|                  | ||||
|                 result['mature'] = mature_result | ||||
|                 logger.info(f"成熟期结果: {len(mature_result)} 只股票") | ||||
|              | ||||
|             logger.info("科技主题基本面因子选股策略运行完成") | ||||
|             return result | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.error(f"策略运行失败: {str(e)}") | ||||
|             return {} | ||||
|      | ||||
|     def calculate_factor_scores(self, df: pd.DataFrame, stage: str) -> pd.DataFrame: | ||||
|         """ | ||||
|         计算单因子打分(0-100分位数) | ||||
|          | ||||
|         Args: | ||||
|             df: 包含因子数据的DataFrame | ||||
|             stage: 阶段类型 ('growth' 或 'mature') | ||||
|              | ||||
|         Returns: | ||||
|             pd.DataFrame: 包含因子分数的DataFrame | ||||
|         """ | ||||
|         try: | ||||
|             if df.empty: | ||||
|                 return df | ||||
|              | ||||
|             df_scored = df.copy() | ||||
|              | ||||
|             # 定义因子方向(正向为True,负向为False) | ||||
|             factor_directions = { | ||||
|                 # 通用因子 | ||||
|                 'gross_profit_margin': True,  # 毛利率_环比增量 - 正向 | ||||
|                 'growth_score': True,       # 成长能力 - 正向 | ||||
|                 'supplier_concentration': False,  # 前5大供应商金额占比合计 - 负向 | ||||
|                 'customer_concentration': False,  # 前5大客户收入金额占比合计 - 负向 | ||||
|                 'avg_distance_factor': True,     # 平均距离因子 - 负向 | ||||
|                  | ||||
|                 # 成长期特色因子 | ||||
|                 'admin_expense_ratio': False,     # 管理费用/营业总收入_环比增量 - 负向 | ||||
|                 'rd_amortize_ratio': False,       # 研发费用折旧摊销占比_环比增量 - 负向 | ||||
|                 'asset_liability_ratio': True,    # 资产负债率 - 正向 | ||||
|                  | ||||
|                 # 成熟期特色因子 | ||||
|                 'accounts_receivable_turnover': True,  # 应收账款周转率 - 正向 | ||||
|                 'rd_intensity': True,                  # 研发费用直接投入占比_环比增量 - 正向 | ||||
|                 'pb_roe_rank_factor': False            # PB-ROE排名因子 - 负向(越小越好) | ||||
|             } | ||||
|              | ||||
|             # 为每个因子计算分位数分数 | ||||
|             for column in df.columns: | ||||
|                 if column == 'stock_code': | ||||
|                     continue | ||||
|                  | ||||
|                 # 只对有效值进行排名计算 | ||||
|                 values = df_scored[column].dropna() | ||||
|                 if len(values) <= 1: | ||||
|                     # 如果只有一个值或没有值,所有股票都得50分或0分 | ||||
|                     if len(values) == 1: | ||||
|                         df_scored[f'{column}_score'] = df_scored[column].apply(lambda x: 50 if pd.notna(x) else 0) | ||||
|                     else: | ||||
|                         df_scored[f'{column}_score'] = 0 | ||||
|                     continue | ||||
|                  | ||||
|                 # 根据因子方向确定排序方式 | ||||
|                 is_positive = factor_directions.get(column, True) | ||||
|                  | ||||
|                 # 计算排名分数 | ||||
|                 if is_positive: | ||||
|                     # 正向因子:值越大分数越高 | ||||
|                     ranked_values = values.rank(pct=True) * 100 | ||||
|                 else: | ||||
|                     # 负向因子:值越小分数越高 | ||||
|                     ranked_values = (1 - values.rank(pct=True)) * 100 | ||||
|                  | ||||
|                 # 初始化分数列 | ||||
|                 df_scored[f'{column}_score'] = 0.0 | ||||
|                  | ||||
|                 # 将分数赋值给对应的行 | ||||
|                 for idx in ranked_values.index: | ||||
|                     df_scored.loc[idx, f'{column}_score'] = ranked_values[idx] | ||||
|              | ||||
|             logger.info(f"完成 {stage} 阶段 {len(df_scored)} 只股票的因子打分") | ||||
|             return df_scored | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.error(f"计算因子分数失败: {str(e)}") | ||||
|             import traceback | ||||
|             traceback.print_exc() | ||||
|             return df | ||||
|      | ||||
|     def calculate_total_score(self, df: pd.DataFrame, stage: str) -> pd.DataFrame: | ||||
|         """ | ||||
|         计算总分 | ||||
|         使用公式:总分 = 1/8 * Mean(Si) + Mean(Si)/Std(Si) | ||||
|          | ||||
|         Args: | ||||
|             df: 包含因子分数的DataFrame | ||||
|             stage: 阶段类型 ('growth' 或 'mature') | ||||
|              | ||||
|         Returns: | ||||
|             pd.DataFrame: 包含总分的DataFrame | ||||
|         """ | ||||
|         try: | ||||
|             if df.empty: | ||||
|                 return df | ||||
|              | ||||
|             df_result = df.copy() | ||||
|              | ||||
|             # 定义因子权重(注意:这里是factor_score而不是factor) | ||||
|             if stage == 'growth': | ||||
|                 factor_weights = { | ||||
|                     # 通用因子 | ||||
|                     'gross_profit_margin_score': 1/8, | ||||
|                     'growth_score_score': 1/8,  # 注意这里是growth_score_score | ||||
|                     'supplier_concentration_score': 1/8, | ||||
|                     'customer_concentration_score': 1/8, | ||||
|                     'avg_distance_factor_score': 2/8, | ||||
|                      | ||||
|                     # 成长期特色因子 | ||||
|                     'admin_expense_ratio_score': 1/8, | ||||
|                     'rd_amortize_ratio_score': 1/8, | ||||
|                     'asset_liability_ratio_score': 1/8 | ||||
|                 } | ||||
|             else:  # mature | ||||
|                 factor_weights = { | ||||
|                     # 通用因子 | ||||
|                     'gross_profit_margin_score': 1/8, | ||||
|                     'growth_score_score': 1/8,  # 注意这里是growth_score_score | ||||
|                     'supplier_concentration_score': 1/8, | ||||
|                     'customer_concentration_score': 1/8, | ||||
|                     'avg_distance_factor_score': 2/8, | ||||
|                      | ||||
|                     # 成熟期特色因子 | ||||
|                     'accounts_receivable_turnover_score': 1/8, | ||||
|                     'rd_intensity_score': 1/8, | ||||
|                     'pb_roe_rank_factor_score': 1/8 | ||||
|                 } | ||||
|              | ||||
|             # 计算每只股票的总分 | ||||
|             total_scores = [] | ||||
|              | ||||
|             for index, row in df_result.iterrows(): | ||||
|                 # 获取该股票的所有因子分数 | ||||
|                 factor_scores = [] | ||||
|                 valid_weights = [] | ||||
|                  | ||||
|                 for factor, weight in factor_weights.items(): | ||||
|                     if factor in row and pd.notna(row[factor]) and row[factor] > 0: | ||||
|                         factor_scores.append(row[factor]) | ||||
|                         valid_weights.append(weight) | ||||
|                  | ||||
|                 if len(factor_scores) == 0: | ||||
|                     total_scores.append(0) | ||||
|                     continue | ||||
|                  | ||||
|                 factor_scores = np.array(factor_scores) | ||||
|                 valid_weights = np.array(valid_weights) | ||||
|                  | ||||
|                 # 重新标准化权重 | ||||
|                 valid_weights = valid_weights / valid_weights.sum() | ||||
|                  | ||||
|                 # 计算加权平均分数 | ||||
|                 mean_score = np.average(factor_scores, weights=valid_weights) | ||||
|                  | ||||
|                 # 计算调整项 Mean(Si)/Std(Si) | ||||
|                 if len(factor_scores) > 1 and np.std(factor_scores) > 0: | ||||
|                     adjustment = np.mean(factor_scores) / np.std(factor_scores) | ||||
|                 else: | ||||
|                     adjustment = 0 | ||||
|                  | ||||
|                 # 计算总分:1/8 * Mean(Si) + Mean(Si)/Std(Si) | ||||
|                 total_score = (1/8) * mean_score + adjustment | ||||
|                 total_scores.append(total_score) | ||||
|              | ||||
|             df_result['total_score'] = total_scores | ||||
|              | ||||
|             # 按总分降序排列 | ||||
|             df_result = df_result.sort_values('total_score', ascending=False).reset_index(drop=True) | ||||
|             df_result['rank'] = range(1, len(df_result) + 1) | ||||
|              | ||||
|             logger.info(f"完成 {stage} 阶段 {len(df_result)} 只股票的总分计算") | ||||
|             return df_result | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.error(f"计算总分失败: {str(e)}") | ||||
|             import traceback | ||||
|             traceback.print_exc() | ||||
|             return df | ||||
|      | ||||
|     def close_connections(self): | ||||
|         """关闭所有数据库连接""" | ||||
|         try: | ||||
|             if hasattr(self, 'lifecycle_calculator'): | ||||
|                 del self.lifecycle_calculator | ||||
|             if hasattr(self, 'financial_analyzer'): | ||||
|                 self.financial_analyzer.close_connection() | ||||
|             if hasattr(self, 'distance_calculator'): | ||||
|                 del self.distance_calculator | ||||
|             if hasattr(self, 'mysql_engine'): | ||||
|                 self.mysql_engine.dispose() | ||||
|             logger.info("数据库连接已关闭") | ||||
|         except Exception as e: | ||||
|             logger.error(f"关闭连接失败: {str(e)}") | ||||
| 
 | ||||
| 
 | ||||
| def main(): | ||||
|     """主函数 - 科技主题基本面因子选股策略""" | ||||
|     strategy = None | ||||
|     try: | ||||
|         print("=== 科技主题基本面因子选股策略 ===") | ||||
|         print("数据说明:") | ||||
|         print("- 毛利率、净利润增长率等:使用最新数据 (2025-06-30)") | ||||
|         print("- 供应商客户集中度、折旧摊销、研发费用:使用年报数据 (2024-12-31)") | ||||
|         print() | ||||
|          | ||||
|         # 创建策略实例 | ||||
|         strategy = TechFundamentalFactorStrategy() | ||||
|         logger.info("策略实例创建成功") | ||||
|          | ||||
|         # 运行策略 | ||||
|         results = strategy.run_strategy(year=2024) | ||||
|          | ||||
|         # 输出结果 | ||||
|         if not results: | ||||
|             print("未获得分析结果") | ||||
|             return | ||||
|              | ||||
|         for stage, df in results.items(): | ||||
|             print(f"\n=== {stage.upper()} 阶段股票分析结果 ===") | ||||
|             print(f"股票数量: {len(df)}") | ||||
|              | ||||
|             if not df.empty: | ||||
|                 # 调试:显示所有列名 | ||||
|                 print(f"数据列: {list(df.columns)}") | ||||
|                 # 显示前5只股票的关键指标 | ||||
|                 print("\n前5只股票:") | ||||
|                 display_columns = [ | ||||
|                     'stock_code', 'gross_profit_margin', 'growth_score',  | ||||
|                     'supplier_concentration', 'customer_concentration',  | ||||
|                     'total_score', 'rank' | ||||
|                 ] | ||||
|                 available_columns = [col for col in display_columns if col in df.columns] | ||||
|                 print(df[available_columns].head(5).to_string(index=False)) | ||||
| 
 | ||||
|                 # 保存完整结果 | ||||
|                 output_file = f"tech_fundamental_factor_{stage}_{datetime.now().strftime('%Y%m%d_%H%M')}.csv" | ||||
|                 df.to_csv(output_file, index=False, encoding='utf-8-sig') | ||||
|                 print(f"\n完整结果已保存到: {output_file}") | ||||
|                  | ||||
|                 # 显示统计信息 | ||||
|                 print(f"\n统计信息:") | ||||
|                 print(f"  平均总分: {df['total_score'].mean():.2f}") | ||||
|                 print(f"  最高总分: {df['total_score'].max():.2f}") | ||||
|                 print(f"  最低总分: {df['total_score'].min():.2f}") | ||||
|          | ||||
|         # 生成成长+成熟合并的简表(仅三列:股票代码、总分、排名) | ||||
|         combined_parts = [] | ||||
|         for stage, df in results.items(): | ||||
|             if isinstance(df, pd.DataFrame) and not df.empty: | ||||
|                 if 'stock_code' in df.columns and 'total_score' in df.columns: | ||||
|                     combined_parts.append(df[['stock_code', 'total_score']].copy()) | ||||
|          | ||||
|         if combined_parts: | ||||
|             combined_df = pd.concat(combined_parts, ignore_index=True) | ||||
|             # 去除总分为空的数据 | ||||
|             combined_df = combined_df.dropna(subset=['total_score']) | ||||
|             # 按总分降序并重新排名 | ||||
|             combined_df = combined_df.sort_values('total_score', ascending=False).reset_index(drop=True) | ||||
|             combined_df['rank'] = range(1, len(combined_df) + 1) | ||||
|             # 追加概念列:获取每个股票所属概念(若多概念则以逗号分隔) | ||||
|             try: | ||||
|                 concept_df = strategy.get_tech_stocks() | ||||
|                 if not concept_df.empty and 'stock_code' in concept_df.columns and 'concept_name' in concept_df.columns: | ||||
|                     concept_map = ( | ||||
|                         concept_df.groupby('stock_code')['concept_name'] | ||||
|                         .apply(lambda s: ','.join(sorted(set([str(x) for x in s if pd.notna(x)])))) | ||||
|                         .to_dict() | ||||
|                     ) | ||||
|                     combined_df['concepts'] = combined_df['stock_code'].map(concept_map) | ||||
|                 else: | ||||
|                     combined_df['concepts'] = None | ||||
|             except Exception: | ||||
|                 combined_df['concepts'] = None | ||||
|             # 保存文件 | ||||
|             combined_file = f"tech_fundamental_factor_all_{datetime.now().strftime('%Y%m%d_%H%M')}.csv" | ||||
|             combined_df.to_csv(combined_file, index=False, encoding='utf-8-sig') | ||||
|             print(f"\n=== 合并结果(成长+成熟) ===") | ||||
|             print(f"总股票数量: {len(combined_df)}") | ||||
|             print(combined_df.head(10).to_string(index=False)) | ||||
|             print(f"\n合并简表已保存到: {combined_file}") | ||||
|          | ||||
|         print(f"\n=== 策略运行完成 ===") | ||||
|          | ||||
|     except Exception as e: | ||||
|         logger.error(f"程序执行失败: {str(e)}") | ||||
|         import traceback | ||||
|         traceback.print_exc() | ||||
|     finally: | ||||
|         if strategy: | ||||
|             strategy.close_connections() | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     main() | ||||
|  | @ -1,3 +1,5 @@ | |||
| #美股行业板块导出,从通达信里面打开美股首页,然后栏目里面有个细分行业。点击导出所有栏目即可 | ||||
| 
 | ||||
| import requests | ||||
| import pandas as pd | ||||
| from datetime import datetime | ||||
|  | @ -291,3 +293,5 @@ if __name__ == '__main__': | |||
|     fetch_and_store_us_stock_data_optimized(use_proxy=False)  # 默认不使用代理 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -11,7 +11,7 @@ XUEQIU_HEADERS = { | |||
|     'Accept-Encoding': 'gzip, deflate, br, zstd', | ||||
|     'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', | ||||
|     'Client-Version': 'v2.44.75', | ||||
|     'Cookie': 'cookiesu=811743062689927; device_id=33fa3c7fca4a65f8f4354e10ed6b7470; smidV2=20250327160437f244626e8b47ca2a7992f30f389e4e790074ae48656a22f10; HMACCOUNT=8B64A2E3C307C8C0; s=c611ttmqlj; xq_is_login=1; u=8493411634; bid=4065a77ca57a69c83405d6e591ab5449_m8r2nhs8; __utma=1.434320573.1747189698.1747189698.1747189698.1; __utmc=1; __utmz=1.1747189698.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); snbim_minify=true; _c_WBKFRo=dsWgHR8i8KGPbIyhFlN51PHOzVuuNytvUAFppfkD; _nb_ioWEgULi=; aliyungf_tc=00c6b999835b16cea9e4a6aab36cca373a0976bf55ee74770d11f421f7119ad8; Hm_lvt_1db88642e346389874251b5a1eded6e3=1757464900; xq_a_token=975ab9f15a4965b9e557b9bc6f33bc1da20a0f49; xqat=975ab9f15a4965b9e557b9bc6f33bc1da20a0f49; xq_id_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJ1aWQiOjg0OTM0MTE2MzQsImlzcyI6InVjIiwiZXhwIjoxNzYwMjU2NzM4LCJjdG0iOjE3NTc2NjQ3Mzg4MjUsImNpZCI6ImQ5ZDBuNEFadXAifQ.TMx4-TjKx96j5h6-EGiRIM2WKtJm1xctZhYidc40Em0pRcr0UBHAKBGl3No5r1BElYa9qnEDgNYI0Zv137Inx-EMPqm5cd1Z_ZjLdWOSLzT9qqBj8zdfuqJwP2nCYvC6KLjd8BvykS0vSFKqwb-r0WhEA3OzbO8teVNsaemdKAhBoIyP3-RQCfRxJ9RLNha1ZMdg66iZvfz_SOsG41y8IA9yyl-FFFJOq4TnAiywY1yO1QIJJhkh8YQqfnDfQQdSIFgJGToU980Lw1dm4aCDY-kvn-t18KjrL_hZJ_UNN65bgZsSsuWf-VQ7wsjjczNrfBYAHdZ6kES0CGo9g8IZZw; xq_r_token=c209224335327f29fc555d9910b43c0df6d52d5a; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1757774901; .thumbcache_f24b8bbe5a5934237bbc0eda20c1b6e7=rJD0qKtipTMjRBkLBVEyXbl0CiVeY7y4AxEZC0Vf6Zkou9cxp0NPsxwSrnOyFyBMr+Ws5/nJDO1NUalRDyAPsA%3D%3D; acw_tc=3ccdc14717578973700474899e2dc1c35b6358f1af81617250f8f00b4cf31c; ssxmod_itna=1-eqGxBDnGKYuxcD4kDRgxYq7ueYKS8DBP01Dp2xQyP08D60DB40Q0qRHhqDylAkiETFN1t42Y5D/KlYeDZDGFdDqx0Ei6FiYHK1ezjCGbKSAQY5P53Niio89NQ7DEQm6fjL1S4K7s5h8KRDo9n4hiDB3DbqDymgY5qxGGA4GwDGoD34DiDDPDb8rDALeD7qDFnenropTDm4GWneGfDDoDYbT3xiUYDDUvbeG2iET4DDN4bIGYZ2G76=r1doBip29xKiTDjqPD/ShUoiuzZKC4icFL2/amAeGyC5GuY6mWHQ77SWcbscAV70i8hx_Bx3rKqB5YGDRYqK8o2xY9iKR0YRDxeEDW0DWnQ8EwhDDiP46iRiGDYZtgrNMhXiY4MQA7bAilP4nPkFGCmqzBqGYesQGQiT3ihKbm5CexbxxD; ssxmod_itna2=1-eqGxBDnGKYuxcD4kDRgxYq7ueYKS8DBP01Dp2xQyP08D60DB40Q0qRHhqDylAkiETFN1t42YeDA4rYnRItORCitz/D3nyGQigbiD', | ||||
|     'Cookie': 'cookiesu=811743062689927; device_id=33fa3c7fca4a65f8f4354e10ed6b7470; smidV2=20250327160437f244626e8b47ca2a7992f30f389e4e790074ae48656a22f10; HMACCOUNT=8B64A2E3C307C8C0; s=c611ttmqlj; xq_is_login=1; u=8493411634; bid=4065a77ca57a69c83405d6e591ab5449_m8r2nhs8; __utma=1.434320573.1747189698.1747189698.1747189698.1; __utmc=1; __utmz=1.1747189698.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); snbim_minify=true; _c_WBKFRo=dsWgHR8i8KGPbIyhFlN51PHOzVuuNytvUAFppfkD; _nb_ioWEgULi=; aliyungf_tc=00c6b999835b16cea9e4a6aab36cca373a0976bf55ee74770d11f421f7119ad8; Hm_lvt_1db88642e346389874251b5a1eded6e3=1757464900; xq_a_token=0c9cfffc3d713428b4759d3c4e11fff91b618525; xqat=0c9cfffc3d713428b4759d3c4e11fff91b618525; xq_id_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJ1aWQiOjg0OTM0MTE2MzQsImlzcyI6InVjIiwiZXhwIjoxNzYxNTYzMTU1LCJjdG0iOjE3NTg5NzExNTUyODksImNpZCI6ImQ5ZDBuNEFadXAifQ.KotT161D0bQF-m3HNgQFTknHqUpMUR_KWY7BAjiLKOaQocVghM85sCKUretUK5OpMDXMzGPhzxTWMs4VhGxKUHxHSu3inrbCw7rgGDtK0Lnr_6OXHkEMwDadN37V3AYbxKWaX609pACLK1q2cvRmGg94wAgj9KEqu76gdV4w4tRv4LupUJQgYb6M0GEVe7ejLs2_CdV5PV7j0PSjTtpjaeZWkc9YhW32vOyA6gg3fU9S4BQg7qgciYLI8AxiU-GYgFLyFSK2S7nvAngz8TEhNoQcClKrD4NeqqSj2nA5bh5_5FM8AbdbKGw6K8eIO0wkxCV2iUWBDMz-h160cX0GLw; xq_r_token=e586d0abb8de83a22a5d66be2f908954a50005b2; .thumbcache_f24b8bbe5a5934237bbc0eda20c1b6e7=nGvY6/KwbtfD3HgPBzY+S0khnrVCqr4mPyfsZ27X+GjYcRWS61PMogABu5uvj/LAd4TJsSFQiYwdEepZMxwg3A%3D%3D; ssxmod_itna=1-iqmxyQG=KWqGqwD4qeqY5Lor=G2zDuDBP01DpxYK0CDmxjKideDUDtTxR3R27rQA04D7777mqqeCQ_4D/A0ieDZDGKQDqx0or0KBiBUPehSmRxdW9gTBThssUUo8vG8FV4x9WCL5ZG9emiTjxIx_D0aDmKDUx4x_BAYDeWFDCeDQxirDD4DAmPDFxibDimAb4DdkK8EvUOpIDGrDlKDRchok4GWDiPD76gDl6gpbDDCDiyjKV3Dixiaz3eDBomc41iI4xi3kb8_c8b5Yq4EWi2ID7v3DlPqjdnIKy/M4E3Eps/9=agju40ODG6CofPchOBoFO3o6=rlwIlqrDKpY4Kleiix57xhx_eQDrWD57IpDx54_xmmZ7hpY21Bb0nQi/AO4b3mDrnNDt0=a0q345Bi5Drhl54nmeYnAD_mmD_GWq7h1iGq7YzGxA8DqBD=4nz7xNImYnb47xxD; ssxmod_itna2=1-iqmxyQG=KWqGqwD4qeqY5Lor=G2zDuDBP01DpxYK0CDmxjKideDUDtTxR3R27rQA04D7777mqqeCQ4DW_QRiAwxKA07vi3WDj4pxkh3pfaDlxTMHwkxxRl_UFFyKFmUR1v7kCiqQHALxyxuseYn8Q79bLg08eBKxkZwGFEy8ZxTGW7YkIrrzM=5YCx_Gem5Qg2HvQz9AhR0FvxfOG7AQwzoPjSpGmtApnKdwC4uUuzI6eDo2Bedxmf8L2fgms1KnurZO0K=m0ifvg8yDYb=qs=GimaBFojOsWf01ibDy6ZP5DLpur3mS//PdQPR0PqEEqiKseLxQQ3iKiC3ijQ5zKLELcAULPdKD6Q86ejjQKAk6K3leiePYGxdGpYpDbO3YPKa_x4RPTxTif5T3mOIGa8TCY44Y4hWn1AQq7G1nDQ7GQ7D5BDxzWw8bz3D; is_overseas=0; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1759979780', | ||||
|     'Referer': 'https://weibo.com/u/7735765253', | ||||
|     'Sec-Ch-Ua': '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"', | ||||
|     'Sec-Ch-Ua-Mobile': '?0', | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue