This commit is contained in:
liao 2025-07-03 15:57:04 +08:00
parent 34f1cfd6e6
commit aa4be0e64c
20 changed files with 15844 additions and 75 deletions

517
data.sql Normal file
View File

@ -0,0 +1,517 @@
/*
Navicat Premium Dump SQL
Source Server : 192.168.18.199(gpfx)
Source Server Type : MySQL
Source Server Version : 90200 (9.2.0)
Source Host : 192.168.18.199:3306
Source Schema : db_gp_cj
Target Server Type : MySQL
Target Server Version : 90200 (9.2.0)
File Encoding : 65001
Date: 26/06/2025 11:41:15
*/
SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;
-- ----------------------------
-- Table structure for eastmoney_rzrq_data
-- ----------------------------
DROP TABLE IF EXISTS `eastmoney_rzrq_data`;
CREATE TABLE `eastmoney_rzrq_data` (
`trade_date` date NOT NULL,
`index_value` decimal(10, 4) NULL DEFAULT NULL COMMENT '指数',
`change_percent` decimal(10, 4) NULL DEFAULT NULL COMMENT '涨跌幅',
`float_market_value` decimal(20, 2) NULL DEFAULT NULL COMMENT '流通市值',
`change_percent_3d` decimal(10, 4) NULL DEFAULT NULL COMMENT '3日涨跌幅',
`change_percent_5d` decimal(10, 4) NULL DEFAULT NULL COMMENT '5日涨跌幅',
`change_percent_10d` decimal(10, 4) NULL DEFAULT NULL COMMENT '10日涨跌幅',
`financing_balance` decimal(20, 2) NULL DEFAULT NULL COMMENT '融资余额',
`financing_balance_ratio` decimal(10, 4) NULL DEFAULT NULL COMMENT '融资余额占比',
`financing_buy_amount` decimal(20, 2) NULL DEFAULT NULL COMMENT '融资买入额',
`financing_buy_amount_3d` decimal(20, 2) NULL DEFAULT NULL COMMENT '3日融资买入额',
`financing_buy_amount_5d` decimal(20, 2) NULL DEFAULT NULL COMMENT '5日融资买入额',
`financing_buy_amount_10d` decimal(20, 2) NULL DEFAULT NULL COMMENT '10日融资买入额',
`financing_repay_amount` decimal(20, 2) NULL DEFAULT NULL COMMENT '融资偿还额',
`financing_repay_amount_3d` decimal(20, 2) NULL DEFAULT NULL COMMENT '3日融资偿还额',
`financing_repay_amount_5d` decimal(20, 2) NULL DEFAULT NULL COMMENT '5日融资偿还额',
`financing_repay_amount_10d` decimal(20, 2) NULL DEFAULT NULL COMMENT '10日融资偿还额',
`financing_net_amount` decimal(20, 2) NULL DEFAULT NULL COMMENT '融资净额',
`financing_net_amount_3d` decimal(20, 2) NULL DEFAULT NULL COMMENT '3日融资净额',
`financing_net_amount_5d` decimal(20, 2) NULL DEFAULT NULL COMMENT '5日融资净额',
`financing_net_amount_10d` decimal(20, 2) NULL DEFAULT NULL COMMENT '10日融资净额',
`securities_balance` decimal(20, 2) NULL DEFAULT NULL COMMENT '融券余额',
`securities_volume` decimal(20, 2) NULL DEFAULT NULL COMMENT '融券余量',
`securities_repay_volume` decimal(20, 2) NULL DEFAULT NULL COMMENT '融券偿还量',
`securities_repay_volume_3d` decimal(20, 2) NULL DEFAULT NULL COMMENT '3日融券偿还量',
`securities_repay_volume_5d` decimal(20, 2) NULL DEFAULT NULL COMMENT '5日融券偿还量',
`securities_repay_volume_10d` decimal(20, 2) NULL DEFAULT NULL COMMENT '10日融券偿还量',
`securities_sell_volume` decimal(20, 2) NULL DEFAULT NULL COMMENT '融券卖出量',
`securities_sell_volume_3d` decimal(20, 2) NULL DEFAULT NULL COMMENT '3日融券卖出量',
`securities_sell_volume_5d` decimal(20, 2) NULL DEFAULT NULL COMMENT '5日融券卖出量',
`securities_sell_volume_10d` decimal(20, 2) NULL DEFAULT NULL COMMENT '10日融券卖出量',
`securities_net_volume` decimal(20, 2) NULL DEFAULT NULL COMMENT '融券净量',
`securities_net_volume_3d` decimal(20, 2) NULL DEFAULT NULL COMMENT '3日融券净量',
`securities_net_volume_5d` decimal(20, 2) NULL DEFAULT NULL COMMENT '5日融券净量',
`securities_net_volume_10d` decimal(20, 2) NULL DEFAULT NULL COMMENT '10日融券净量',
`total_rzrq_balance` decimal(20, 2) NULL DEFAULT NULL COMMENT '融资融券余额',
`total_rzrq_balance_cz` decimal(20, 2) NULL DEFAULT NULL COMMENT '融资融券余额差值',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`trade_date`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = '东方财富融资融券数据表' ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for fear_greed_index
-- ----------------------------
DROP TABLE IF EXISTS `fear_greed_index`;
CREATE TABLE `fear_greed_index` (
`id` int NOT NULL AUTO_INCREMENT,
`index_value` decimal(5, 2) NOT NULL COMMENT '恐贪指数值(0-100)',
`trading_date` date NOT NULL COMMENT '交易日期',
`update_time` datetime NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
PRIMARY KEY (`id`) USING BTREE,
UNIQUE INDEX `uk_trading_date`(`trading_date` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 1003 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = '市场恐贪指数数据' ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for follow_stock
-- ----------------------------
DROP TABLE IF EXISTS `follow_stock`;
CREATE TABLE `follow_stock` (
`id` bigint NOT NULL AUTO_INCREMENT,
`stock_code` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '股票代码',
`stock_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '股票名称',
`add_time` datetime NULL DEFAULT NULL COMMENT '添加时间',
`status` tinyint NULL DEFAULT 1 COMMENT '状态',
PRIMARY KEY (`id`) USING BTREE,
UNIQUE INDEX `uk_stock_code`(`stock_code` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 22 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = '关注的股票' ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for fund_cang
-- ----------------------------
DROP TABLE IF EXISTS `fund_cang`;
CREATE TABLE `fund_cang` (
`id` int NOT NULL AUTO_INCREMENT,
`fund_id` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`fund_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`company_money` decimal(20, 2) NULL DEFAULT NULL,
`company_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`company_id` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`fund_data` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 1874595 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_bin ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for fundamental_analysis
-- ----------------------------
DROP TABLE IF EXISTS `fundamental_analysis`;
CREATE TABLE `fundamental_analysis` (
`id` int NOT NULL AUTO_INCREMENT,
`stock_code` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '股票代码',
`stock_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '股票名称',
`dimension` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '分析维度',
`ai_response` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT 'AI分析结果',
`reasoning_process` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '推理过程',
`references` json NULL COMMENT '参考资料',
`update_time` datetime NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
`extra_info` json NULL COMMENT '扩展信息',
PRIMARY KEY (`id`) USING BTREE,
INDEX `idx_stock_dimension`(`stock_code` ASC, `dimension` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 12104 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = '基本面分析结果表' ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for gp_category_industry
-- ----------------------------
DROP TABLE IF EXISTS `gp_category_industry`;
CREATE TABLE `gp_category_industry` (
`id` int NOT NULL AUTO_INCREMENT COMMENT 'id',
`category_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '分类名称',
`belong_industry` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '所属行业',
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 52 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for gp_code_all
-- ----------------------------
DROP TABLE IF EXISTS `gp_code_all`;
CREATE TABLE `gp_code_all` (
`id` int NOT NULL AUTO_INCREMENT,
`gp_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`gp_code` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`gp_code_two` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`gp_code_three` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`market_cap` decimal(20, 2) NULL DEFAULT NULL,
`mark1` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`mark2` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 6686 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_bin COMMENT = '所有个股代码-爬取指数用' ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for gp_code_hk
-- ----------------------------
DROP TABLE IF EXISTS `gp_code_hk`;
CREATE TABLE `gp_code_hk` (
`id` int NOT NULL AUTO_INCREMENT,
`gp_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`gp_code` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`gp_code_two` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`gp_code_three` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`market_cap` decimal(20, 2) NULL DEFAULT NULL,
`mark1` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`mark2` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 2956 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_bin COMMENT = '所有指数代码-爬取数据使用' ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for gp_code_zs
-- ----------------------------
DROP TABLE IF EXISTS `gp_code_zs`;
CREATE TABLE `gp_code_zs` (
`id` int NOT NULL AUTO_INCREMENT,
`gp_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`gp_code` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`gp_code_two` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`gp_code_three` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`market_cap` decimal(20, 2) NULL DEFAULT NULL,
`mark1` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`mark2` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 6686 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_bin COMMENT = '所有指数代码-爬取数据使用' ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for gp_data
-- ----------------------------
DROP TABLE IF EXISTS `gp_data`;
CREATE TABLE `gp_data` (
`id` int NOT NULL AUTO_INCREMENT,
`pg_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`xiangsidu` decimal(10, 2) NULL DEFAULT NULL COMMENT '控制相似度为多少的时候进行回测 和下面的数字是关联的',
`ii` int NULL DEFAULT NULL COMMENT '相似度的条数',
`raye_ga_hc` decimal(10, 2) NULL DEFAULT NULL COMMENT '回测涨跌幅大于多少的数据',
`huice_function_num` int NULL DEFAULT NULL COMMENT '回测判断胜率方法 0是代表相差1%或者同涨同跌都算胜 1是代表相差1%算胜 2是代表同涨同跌算胜',
`yes_yuce` int NULL DEFAULT NULL,
`no_yuce` int NULL DEFAULT NULL,
`yes_yuce_twoday` int NULL DEFAULT NULL,
`no_yuce_twoday` int NULL DEFAULT NULL,
`yes_yuce_fiveday` int NULL DEFAULT NULL,
`no_yuce_fiveday` int NULL DEFAULT NULL,
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 42690 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_bin ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for gp_day_data
-- ----------------------------
DROP TABLE IF EXISTS `gp_day_data`;
CREATE TABLE `gp_day_data` (
`id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键',
`symbol` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '个股代码',
`timestamp` timestamp NULL DEFAULT NULL COMMENT '时间戳',
`volume` bigint NULL DEFAULT NULL COMMENT '数量',
`open` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '开始价',
`high` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '最高价',
`low` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '最低价',
`close` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '结束价',
`chg` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '变化数值',
`percent` decimal(10, 2) NULL DEFAULT NULL COMMENT '变化百分比',
`turnoverrate` decimal(10, 2) NULL DEFAULT NULL COMMENT '换手率',
`amount` bigint NULL DEFAULT NULL COMMENT '成交金额',
`pb` decimal(10, 2) NULL DEFAULT NULL COMMENT '当前PB',
`pe` decimal(10, 2) NULL DEFAULT NULL COMMENT '当前PE',
`ps` decimal(10, 2) NULL DEFAULT NULL COMMENT '当前PS',
`create_time` datetime NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
INDEX `idx_symbol`(`symbol` ASC) USING BTREE,
INDEX `idx_timestamp`(`timestamp` ASC) USING BTREE,
INDEX `idx_symbol_time`(`symbol` ASC, `timestamp` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 28356293 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_bin ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for gp_ex_rights_log
-- ----------------------------
DROP TABLE IF EXISTS `gp_ex_rights_log`;
CREATE TABLE `gp_ex_rights_log` (
`id` int NOT NULL AUTO_INCREMENT,
`stock_code` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '股票代码',
`change_date` date NULL DEFAULT NULL COMMENT '除权变动日期',
`before_price` decimal(10, 3) NULL DEFAULT NULL COMMENT '变动前收盘价(数据库中的价格)',
`after_price` decimal(10, 3) NULL DEFAULT NULL COMMENT '变动后收盘价API获取的价格',
`update_time` datetime NULL DEFAULT NULL COMMENT '脚本执行的更新时间',
`optimization_flag` int NULL DEFAULT NULL COMMENT '波段优化标志1为已经完成优化',
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 285 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_bin COMMENT = '股票除权日志表' ROW_FORMAT = DYNAMIC;
-- ----------------------------
-- Table structure for gp_gnbk
-- ----------------------------
DROP TABLE IF EXISTS `gp_gnbk`;
CREATE TABLE `gp_gnbk` (
`id` bigint NULL DEFAULT NULL,
`bk_code` bigint NULL DEFAULT NULL,
`bk_name` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
`gp_code` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
`gp_name` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for gp_gnbk_all
-- ----------------------------
DROP TABLE IF EXISTS `gp_gnbk_all`;
CREATE TABLE `gp_gnbk_all` (
`id` bigint NULL DEFAULT NULL,
`bk_code` bigint NULL DEFAULT NULL,
`bk_name` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
`gp_code` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
`gp_name` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for gp_gnbk_gn
-- ----------------------------
DROP TABLE IF EXISTS `gp_gnbk_gn`;
CREATE TABLE `gp_gnbk_gn` (
`id` bigint NULL DEFAULT NULL,
`bk_code` bigint NULL DEFAULT NULL,
`bk_name` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
`gp_code` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
`gp_name` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for gp_hybk
-- ----------------------------
DROP TABLE IF EXISTS `gp_hybk`;
CREATE TABLE `gp_hybk` (
`id` bigint NULL DEFAULT NULL,
`bk_code` bigint NULL DEFAULT NULL,
`bk_name` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
`gp_code` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
`gp_name` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
`Unnamed: 5` double NULL DEFAULT NULL,
`Unnamed: 6` double NULL DEFAULT NULL
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for gp_main_business
-- ----------------------------
DROP TABLE IF EXISTS `gp_main_business`;
CREATE TABLE `gp_main_business` (
`stock_code` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '股票代码',
`stock_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '股票简称',
`report_date` varchar(8) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '报告期(YYYYMMDD)',
`product_rank` varchar(1) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '项目排名(1-5)',
`product_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '项目名称',
`revenue` decimal(20, 2) NULL DEFAULT NULL COMMENT '项目收入',
`cost` decimal(20, 2) NULL DEFAULT NULL COMMENT '项目成本',
`profit` decimal(20, 2) NULL DEFAULT NULL COMMENT '项目毛利',
`profit_margin` decimal(10, 4) NULL DEFAULT NULL COMMENT '项目毛利率',
PRIMARY KEY (`stock_code`, `report_date`, `product_rank`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for gp_min_data
-- ----------------------------
DROP TABLE IF EXISTS `gp_min_data`;
CREATE TABLE `gp_min_data` (
`id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键',
`symbol` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '个股代码',
`timestamp` timestamp NULL DEFAULT NULL COMMENT '时间戳',
`volume` bigint NULL DEFAULT NULL COMMENT '数量',
`open` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '开始价',
`high` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '最高价',
`low` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '最低价',
`close` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '结束价',
`chg` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '变化数值',
`percent` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '变化百分比',
`turnoverrate` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`amount` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '成交金额',
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 61116588 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_bin ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for gp_product_category
-- ----------------------------
DROP TABLE IF EXISTS `gp_product_category`;
CREATE TABLE `gp_product_category` (
`id` int NOT NULL AUTO_INCREMENT,
`category_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL,
`product_name` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL,
`stock_code` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL,
`stock_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL,
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
INDEX `idx_stock_code`(`stock_code` ASC) USING BTREE,
INDEX `idx_product_name`(`product_name` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 35869 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for gp_zygc
-- ----------------------------
DROP TABLE IF EXISTS `gp_zygc`;
CREATE TABLE `gp_zygc` (
`id` int NOT NULL AUTO_INCREMENT,
`gp_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '股票名称',
`gp_code` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '股票代码',
`zygc_xmmc` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '主营构成(按行业)-项目名称',
`zygc_xmsr` decimal(20, 0) NULL DEFAULT NULL COMMENT '主营构成(按行业)-项目收入',
`zygc_xmmlr` decimal(10, 2) NULL DEFAULT NULL COMMENT '主营构成(按行业)-项目毛利率',
`belong_time` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '所属财报期',
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 12774 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = '个股的主营构成' ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for gp_zyyw
-- ----------------------------
DROP TABLE IF EXISTS `gp_zyyw`;
CREATE TABLE `gp_zyyw` (
`id` int NOT NULL AUTO_INCREMENT,
`gp_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '股票名称',
`gp_code` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '股票代码',
`zyyw_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '主营业务名称',
`zyyw_zb` decimal(10, 2) NULL DEFAULT NULL COMMENT '主营业务占比',
`belong_time` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '所属财报期',
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 25280 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = '上市公司主营业务占比' ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for hk_hot_stocks
-- ----------------------------
DROP TABLE IF EXISTS `hk_hot_stocks`;
CREATE TABLE `hk_hot_stocks` (
`id` int NOT NULL AUTO_INCREMENT,
`symbol` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL,
`name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL,
`value` int NULL DEFAULT NULL,
`increment` int NULL DEFAULT NULL,
`rank_change` int NULL DEFAULT NULL,
`percent` float NULL DEFAULT NULL,
`current` float NULL DEFAULT NULL,
`chg` float NULL DEFAULT NULL,
`exchange` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL,
`stock_type` int NULL DEFAULT NULL,
`add_time` datetime NULL DEFAULT NULL,
`status` varchar(1) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL,
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 53981 CHARACTER SET = sjis COLLATE = sjis_japanese_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for limitup_analysis_stock_changes
-- ----------------------------
DROP TABLE IF EXISTS `limitup_analysis_stock_changes`;
CREATE TABLE `limitup_analysis_stock_changes` (
`symbol` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
`net_profit_cagr` double NULL DEFAULT NULL,
`north_net_inflow` double NULL DEFAULT NULL,
`ps` double NULL DEFAULT NULL,
`type` bigint NULL DEFAULT NULL,
`percent` double NULL DEFAULT NULL,
`has_follow` tinyint(1) NULL DEFAULT NULL,
`tick_size` double NULL DEFAULT NULL,
`pb_ttm` double NULL DEFAULT NULL,
`float_shares` bigint NULL DEFAULT NULL,
`current` double NULL DEFAULT NULL,
`amplitude` double NULL DEFAULT NULL,
`pcf` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
`current_year_percent` double NULL DEFAULT NULL,
`float_market_capital` double NULL DEFAULT NULL,
`north_net_inflow_time` double NULL DEFAULT NULL,
`market_capital` double NULL DEFAULT NULL,
`dividend_yield` double NULL DEFAULT NULL,
`lot_size` bigint NULL DEFAULT NULL,
`roe_ttm` double NULL DEFAULT NULL,
`total_percent` double NULL DEFAULT NULL,
`percent5m` double NULL DEFAULT NULL,
`income_cagr` double NULL DEFAULT NULL,
`amount` double NULL DEFAULT NULL,
`chg` double NULL DEFAULT NULL,
`issue_date_ts` bigint NULL DEFAULT NULL,
`eps` double NULL DEFAULT NULL,
`main_net_inflows` double NULL DEFAULT NULL,
`volume` bigint NULL DEFAULT NULL,
`volume_ratio` double NULL DEFAULT NULL,
`pb` double NULL DEFAULT NULL,
`followers` bigint NULL DEFAULT NULL,
`turnover_rate` double NULL DEFAULT NULL,
`mapping_quote_current` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
`first_percent` double NULL DEFAULT NULL,
`name` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
`pe_ttm` double NULL DEFAULT NULL,
`dual_counter_mapping_symbol` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
`total_shares` bigint NULL DEFAULT NULL,
`limitup_days` bigint NULL DEFAULT NULL,
`id` bigint NULL DEFAULT NULL
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for rzrq_data
-- ----------------------------
DROP TABLE IF EXISTS `rzrq_data`;
CREATE TABLE `rzrq_data` (
`trade_date` date NOT NULL COMMENT '交易日期',
`sh_financing_balance` decimal(12, 2) NULL DEFAULT NULL COMMENT '上海融资余额(亿元)',
`sz_financing_balance` decimal(12, 2) NULL DEFAULT NULL COMMENT '深圳融资余额(亿元)',
`bj_financing_balance` decimal(12, 2) NULL DEFAULT NULL COMMENT '北京融资余额(亿元)',
`total_financing_balance` decimal(12, 2) NULL DEFAULT NULL COMMENT '融资余额合计(亿元)',
`sh_financing_buy` decimal(12, 2) NULL DEFAULT NULL COMMENT '上海融资买入额(亿元)',
`sz_financing_buy` decimal(12, 2) NULL DEFAULT NULL COMMENT '深圳融资买入额(亿元)',
`bj_financing_buy` decimal(12, 2) NULL DEFAULT NULL COMMENT '北京融资买入额(亿元)',
`total_financing_buy` decimal(12, 2) NULL DEFAULT NULL COMMENT '融资买入额合计(亿元)',
`sh_securities_balance` decimal(12, 2) NULL DEFAULT NULL COMMENT '上海融券余量余额(亿元)',
`sz_securities_balance` decimal(12, 2) NULL DEFAULT NULL COMMENT '深圳融券余量余额(亿元)',
`bj_securities_balance` decimal(12, 2) NULL DEFAULT NULL COMMENT '北京融券余量余额(亿元)',
`total_securities_balance` decimal(12, 2) NULL DEFAULT NULL COMMENT '融券余量余额合计(亿元)',
`sh_rzrq_balance` decimal(12, 2) NULL DEFAULT NULL COMMENT '上海融资融券余额(亿元)',
`sz_rzrq_balance` decimal(12, 2) NULL DEFAULT NULL COMMENT '深圳融资融券余额(亿元)',
`bj_rzrq_balance` decimal(12, 2) NULL DEFAULT NULL COMMENT '北京融资融券余额(亿元)',
`total_rzrq_balance` decimal(12, 2) NULL DEFAULT NULL COMMENT '融资融券余额合计(亿元)',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`trade_date`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = '融资融券数据表' ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for stock_price_changes
-- ----------------------------
DROP TABLE IF EXISTS `stock_price_changes`;
CREATE TABLE `stock_price_changes` (
`symbol` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
`name` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
`current` double NULL DEFAULT NULL,
`percent` double NULL DEFAULT NULL,
`time_mark` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
`add_time` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for stock_price_data
-- ----------------------------
DROP TABLE IF EXISTS `stock_price_data`;
CREATE TABLE `stock_price_data` (
`stock_code` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '股票代码',
`stock_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '股票名称',
`latest_price` decimal(10, 2) NULL DEFAULT NULL COMMENT '最新价',
`change_percent` decimal(10, 2) NULL DEFAULT NULL COMMENT '涨跌幅',
`change_amount` decimal(10, 2) NULL DEFAULT NULL COMMENT '涨跌额',
`volume` bigint NULL DEFAULT NULL COMMENT '成交量(手)',
`amount` decimal(20, 2) NULL DEFAULT NULL COMMENT '成交额',
`amplitude` decimal(10, 2) NULL DEFAULT NULL COMMENT '振幅',
`turnover_rate` decimal(10, 2) NULL DEFAULT NULL COMMENT '换手率',
`pe_ratio` decimal(10, 2) NULL DEFAULT NULL COMMENT '市盈率',
`high_price` decimal(10, 2) NULL DEFAULT NULL COMMENT '最高价',
`low_price` decimal(10, 2) NULL DEFAULT NULL COMMENT '最低价',
`open_price` decimal(10, 2) NULL DEFAULT NULL COMMENT '开盘价',
`pre_close` decimal(10, 2) NULL DEFAULT NULL COMMENT '昨收价',
`total_market_value` decimal(20, 2) NULL DEFAULT NULL COMMENT '总市值',
`float_market_value` decimal(20, 2) NULL DEFAULT NULL COMMENT '流通市值',
`pb_ratio` decimal(10, 2) NULL DEFAULT NULL COMMENT '市净率',
`list_date` date NULL DEFAULT NULL COMMENT '上市日期',
`update_time` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
PRIMARY KEY (`stock_code`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = '实时股价数据表' ROW_FORMAT = Dynamic;
SET FOREIGN_KEY_CHECKS = 1;

View File

@ -17,4 +17,5 @@ google-genai
redis==5.2.1
pandas==2.2.3
apscheduler==3.11.0
pymongo==4.13.0
pymongo==4.13.0
scikit-learn==1.6.1

View File

@ -42,10 +42,12 @@ from src.valuation_analysis.index_analyzer import IndexAnalyzer
# 导入股票日线数据采集器
from src.scripts.stock_daily_data_collector import collect_stock_daily_data
from src.scripts.stock_daily_data_collector_v2 import collect_stock_daily_data_v2
from valuation_analysis.financial_analysis import FinancialAnalyzer
from src.valuation_analysis.stock_price_collector import StockPriceCollector
from src.quantitative_analysis.batch_stock_price_collector import fetch_and_store_stock_data, get_stock_realtime_info_from_redis
from src.quantitative_analysis.batch_stock_price_collector import fetch_and_store_stock_data
from src.quantitative_analysis.hk_stock_price_collector import fetch_and_store_hk_stock_data
from src.quantitative_analysis.momentum_analysis import MomentumAnalyzer
# 设置日志
@ -207,6 +209,24 @@ def run_stock_daily_collection1():
# 获取当天日期
today = datetime.now().strftime('%Y-%m-%d')
# 定义数据库连接地址
db_url = 'mysql+pymysql://root:Chlry#$.8@192.168.18.199:3306/db_gp_cj'
# collect_stock_daily_data(db_url, today)
collect_stock_daily_data_v2(db_url)
except Exception as e:
logger.error(f"启动股票日线数据采集任务失败: {str(e)}")
return jsonify({
"status": "success"
}), 200
@app.route('/scheduler/stockDailyHK/collection', methods=['GET'])
def run_stock_daily_collection2():
"""执行股票日线数据采集任务 下午4点开始"""
try:
logger.info("开始执行股票日线数据采集")
# 获取当天日期
today = datetime.now().strftime('%Y-%m-%d')
# 定义数据库连接地址
db_url = 'mysql+pymysql://root:Chlry#$.8@192.168.18.199:3306/db_gp_cj'
collect_stock_daily_data(db_url, today)
@ -3002,6 +3022,16 @@ def run_batch_stock_price_collection():
logger.error(f"批量采集A股行情失败: {str(e)}")
return jsonify({"status": "error", "message": str(e)})
@app.route('/scheduler/batch_hk_stock_price/collection', methods=['GET'])
def run_batch_hk_stock_price_collection():
"""批量采集A股行情并保存到数据库"""
try:
fetch_and_store_hk_stock_data()
return jsonify({"status": "success", "message": "批量采集A股行情并保存到数据库成功"})
except Exception as e:
logger.error(f"批量采集A股行情失败: {str(e)}")
return jsonify({"status": "error", "message": str(e)})
if __name__ == '__main__':
# 启动Web服务器

View File

@ -0,0 +1,310 @@
# coding:utf-8
#计算股价平均距离因子-行业个股列表来计算
import pandas as pd
import numpy as np
from sqlalchemy import create_engine, text
from datetime import datetime, timedelta
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import euclidean_distances
import warnings
warnings.filterwarnings('ignore')
class AverageDistanceFactor:
"""平均距离因子计算器"""
def __init__(self, db_url):
self.engine = create_engine(
db_url,
pool_size=5,
max_overflow=10,
pool_recycle=3600
)
def get_industry_stocks(self, industry_name=None, concept_name=None):
"""获取指定行业或概念的股票列表"""
if industry_name:
query = text("""
SELECT DISTINCT gp_code as symbol
FROM gp_hybk
WHERE bk_name = :name
""")
params = {"name": industry_name}
elif concept_name:
query = text("""
SELECT DISTINCT gp_code as symbol
FROM gp_gnbk
WHERE bk_name = :name
""")
params = {"name": concept_name}
else:
raise ValueError("必须提供 industry_name 或 concept_name 之一")
try:
with self.engine.connect() as conn:
result = conn.execute(query, params).fetchall()
if result:
symbols = [row[0] for row in result]
print(f"获取到 {len(symbols)} 只股票")
return symbols
else:
print(f"未找到{'行业' if industry_name else '概念'} {industry_name or concept_name} 的股票")
return []
except Exception as e:
print(f"获取股票列表失败: {e}")
return []
def get_stock_data(self, symbols, days=20):
"""获取股票的历史数据"""
if not symbols:
return pd.DataFrame()
# 计算开始日期
end_date = datetime.now()
start_date = end_date - timedelta(days=days * 2) # 多取一些数据以防节假日
# 构建SQL查询
symbols_str = "', '".join(symbols)
query = f"""
SELECT symbol, timestamp, volume, open, high, low, close,
chg, percent, turnoverrate, amount, pb, pe, ps
FROM gp_day_data
WHERE symbol IN ('{symbols_str}')
AND timestamp >= '{start_date.strftime('%Y-%m-%d')}'
ORDER BY symbol, timestamp DESC
"""
try:
df = pd.read_sql(query, self.engine)
print(f"获取到 {len(df)} 条历史数据")
return df
except Exception as e:
print(f"获取历史数据失败: {e}")
return pd.DataFrame()
def calculate_technical_indicators(self, df, days=20):
"""计算技术指标"""
result_data = []
for symbol in df['symbol'].unique():
stock_data = df[df['symbol'] == symbol].copy()
stock_data = stock_data.sort_values('timestamp')
# 只取最近N天的数据
stock_data = stock_data.tail(days)
if len(stock_data) < days:
continue # 数据不足,跳过
# 转换数据类型
for col in ['open', 'high', 'low', 'close', 'chg']:
stock_data[col] = pd.to_numeric(stock_data[col], errors='coerce')
# 计算各种技术指标
indicators = self._compute_indicators(stock_data)
indicators['symbol'] = symbol
result_data.append(indicators)
return pd.DataFrame(result_data)
def _compute_indicators(self, data):
"""计算具体的技术指标"""
indicators = {}
# 1. 收益率指标
data['returns'] = data['close'].pct_change()
indicators['return_5d'] = data['returns'].tail(5).sum() # 5日累计收益率
indicators['return_10d'] = data['returns'].tail(10).sum() # 10日累计收益率
indicators['return_20d'] = data['returns'].tail(20).sum() # 20日累计收益率
# 2. 波动率指标
indicators['volatility_5d'] = data['returns'].tail(5).std() # 5日波动率
indicators['volatility_10d'] = data['returns'].tail(10).std() # 10日波动率
indicators['volatility_20d'] = data['returns'].tail(20).std() # 20日波动率
# 3. 价格相对位置
indicators['price_position_5d'] = (data['close'].iloc[-1] - data['low'].tail(5).min()) / (data['high'].tail(5).max() - data['low'].tail(5).min())
indicators['price_position_10d'] = (data['close'].iloc[-1] - data['low'].tail(10).min()) / (data['high'].tail(10).max() - data['low'].tail(10).min())
indicators['price_position_20d'] = (data['close'].iloc[-1] - data['low'].tail(20).min()) / (data['high'].tail(20).max() - data['low'].tail(20).min())
# 4. 移动平均偏离度
ma_5 = data['close'].tail(5).mean()
ma_10 = data['close'].tail(10).mean()
ma_20 = data['close'].tail(20).mean()
current_price = data['close'].iloc[-1]
indicators['ma_deviation_5d'] = (current_price - ma_5) / ma_5
indicators['ma_deviation_10d'] = (current_price - ma_10) / ma_10
indicators['ma_deviation_20d'] = (current_price - ma_20) / ma_20
# 5. 成交量相关指标
indicators['volume_ratio_5d'] = data['volume'].tail(5).mean() / data['volume'].mean()
indicators['volume_ratio_10d'] = data['volume'].tail(10).mean() / data['volume'].mean()
indicators['turnover_avg_5d'] = data['turnoverrate'].tail(5).mean()
indicators['turnover_avg_10d'] = data['turnoverrate'].tail(10).mean()
# 6. 价格振幅指标
data['amplitude'] = (data['high'] - data['low']) / data['close']
indicators['amplitude_avg_5d'] = data['amplitude'].tail(5).mean()
indicators['amplitude_avg_10d'] = data['amplitude'].tail(10).mean()
# 7. 趋势强度(连续涨跌)
indicators['consecutive_up'] = self._count_consecutive(data['percent'] > 0)
indicators['consecutive_down'] = self._count_consecutive(data['percent'] < 0)
# 8. 估值动量如果有PE、PB、PS数据
if 'pe' in data.columns and not data['pe'].isna().all():
pe_change = data['pe'].pct_change().tail(5).mean()
indicators['pe_momentum'] = pe_change if not np.isnan(pe_change) else 0
else:
indicators['pe_momentum'] = 0
if 'pb' in data.columns and not data['pb'].isna().all():
pb_change = data['pb'].pct_change().tail(5).mean()
indicators['pb_momentum'] = pb_change if not np.isnan(pb_change) else 0
else:
indicators['pb_momentum'] = 0
# 处理NaN值
for key, value in indicators.items():
if np.isnan(value) or np.isinf(value):
indicators[key] = 0
return indicators
def _count_consecutive(self, condition_series):
"""计算连续满足条件的天数"""
if len(condition_series) == 0:
return 0
count = 0
for value in reversed(condition_series.tolist()):
if value:
count += 1
else:
break
return count
def calculate_distance_factor(self, indicators_df):
"""计算平均距离因子"""
if len(indicators_df) < 2:
print("股票数量不足,无法计算距离因子")
return pd.DataFrame()
# 准备特征矩阵
feature_columns = [col for col in indicators_df.columns if col != 'symbol']
X = indicators_df[feature_columns].values
# 标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# 计算距离矩阵
distances = euclidean_distances(X_scaled)
# 计算每只股票的平均距离
n_stocks = len(indicators_df)
avg_distances = []
for i in range(n_stocks):
# 排除自己与自己的距离(对角线元素)
other_distances = np.concatenate([distances[i, :i], distances[i, i+1:]])
avg_distance = np.mean(other_distances)
avg_distances.append(avg_distance)
# 创建结果DataFrame
result_df = pd.DataFrame({
'symbol': indicators_df['symbol'],
'avg_distance_factor': avg_distances
})
# 按距离因子降序排列
result_df = result_df.sort_values('avg_distance_factor', ascending=False)
result_df['rank'] = range(1, len(result_df) + 1)
return result_df
def analyze_industry(self, industry_name=None, concept_name=None, days=20):
"""分析指定行业或概念的平均距离因子"""
print(f"开始分析{'行业' if industry_name else '概念'}: {industry_name or concept_name}")
# 1. 获取股票列表
symbols = self.get_industry_stocks(industry_name, concept_name)
if not symbols:
return pd.DataFrame()
# 2. 获取历史数据
stock_data = self.get_stock_data(symbols, days)
if stock_data.empty:
return pd.DataFrame()
# 3. 计算技术指标
print("计算技术指标...")
indicators_df = self.calculate_technical_indicators(stock_data, days)
if indicators_df.empty:
return pd.DataFrame()
print(f"成功计算了 {len(indicators_df)} 只股票的技术指标")
# 4. 计算平均距离因子
print("计算平均距离因子...")
distance_df = self.calculate_distance_factor(indicators_df)
if not distance_df.empty:
print(f"计算完成,共 {len(distance_df)} 只股票")
print(distance_df.to_string(index=False))
return distance_df
def get_available_industries(self):
"""获取可用的行业列表"""
query = "SELECT DISTINCT bk_name FROM gp_hybk ORDER BY bk_name"
try:
df = pd.read_sql(query, self.engine)
return df['bk_name'].tolist()
except Exception as e:
print(f"获取行业列表失败: {e}")
return []
def get_available_concepts(self):
"""获取可用的概念列表"""
query = "SELECT DISTINCT bk_name FROM gp_gnbk ORDER BY bk_name"
try:
df = pd.read_sql(query, self.engine)
return df['bk_name'].tolist()
except Exception as e:
print(f"获取概念列表失败: {e}")
return []
def __del__(self):
if hasattr(self, 'engine'):
self.engine.dispose()
def main():
"""主函数示例"""
db_url = 'mysql+pymysql://root:Chlry#$.8@192.168.18.199:3306/db_gp_cj'
# 创建分析器
analyzer = AverageDistanceFactor(db_url)
# 示例1: 分析特定行业
result = analyzer.analyze_industry(industry_name="证券")
# 示例2: 分析特定概念
# result = analyzer.analyze_industry(concept_name="3D打印")
# 示例3: 查看可用的行业列表
# industries = analyzer.get_available_industries()
# print("可用行业列表前10个:")
# for industry in industries[:10]:
# print(f" - {industry}")
# 示例4: 查看可用的概念列表
# concepts = analyzer.get_available_concepts()
# print("\n可用概念列表前10个:")
# for concept in concepts[:10]:
# print(f" - {concept}")
if __name__ == "__main__":
main()

View File

@ -10,6 +10,9 @@ import json
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(project_root)
# 导入代理管理器
from src.scripts.ProxyIP import EnhancedProxyManager
# 读取雪球headers和Redis配置
try:
from src.scripts.config import XUEQIU_HEADERS
@ -28,6 +31,9 @@ except ImportError:
REDIS_KEY = 'xq_stock_changes_latest' # 存放行情的主键
# 创建全局代理管理器实例
proxy_manager = EnhancedProxyManager()
def get_redis_conn():
"""获取Redis连接"""
@ -62,8 +68,9 @@ def fetch_and_store_stock_data(page_size=90):
'type': stock_type
}
# 初次请求以获取总页数
response = requests.get(base_url, headers=headers, params=params)
# 初次请求以获取总页数,使用代理
response = proxy_manager.request_with_proxy('get', base_url, headers=headers, params=params)
# response = requests.get(base_url, headers=headers, params=params)
if response.status_code != 200:
print(f"请求 {stock_type} 数据失败,状态码:{response.status_code}")
continue
@ -74,10 +81,12 @@ def fetch_and_store_stock_data(page_size=90):
for page in range(1, total_pages + 1):
params['page'] = page
response = requests.get(base_url, headers=headers, params=params)
# response = requests.get(base_url, headers=headers, params=params)
response = proxy_manager.request_with_proxy('get', base_url, headers=headers, params=params)
if response.status_code == 200:
data = response.json()
all_data.extend(data['data']['list'])
print(f"成功采集第 {page}/{total_pages} 页数据")
else:
print(f"请求 {stock_type} 数据第 {page} 页失败,状态码:{response.status_code}")
# 转换为 DataFrame
@ -99,8 +108,12 @@ def fetch_and_store_stock_data(page_size=90):
pipe.hset(REDIS_KEY, symbol, json.dumps(value, ensure_ascii=False))
pipe.execute()
print(f"成功将数据写入Redis哈希 {REDIS_KEY},共{len(df)}条记录。")
# 返回DataFrame供其他脚本使用
return df
else:
print("未获取到任何数据。")
return pd.DataFrame()
def format_stock_code(stock_code):

View File

@ -0,0 +1,317 @@
# coding:utf-8
# 判断企业生命周期
import pandas as pd
import pymongo
import logging
from typing import Dict, List, Optional
import sys
import os
# 添加项目根目录到路径
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(project_root)
# 导入配置
try:
from valuation_analysis.config import MONGO_CONFIG2
except ImportError:
import importlib.util
config_path = os.path.join(project_root, 'valuation_analysis', 'config.py')
spec = importlib.util.spec_from_file_location("config", config_path)
config_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(config_module)
MONGO_CONFIG2 = config_module.MONGO_CONFIG2
# 导入股票代码格式化工具
try:
from tools.stock_code_formatter import StockCodeFormatter
except ImportError:
import importlib.util
formatter_path = os.path.join(os.path.dirname(project_root), 'tools', 'stock_code_formatter.py')
spec = importlib.util.spec_from_file_location("stock_code_formatter", formatter_path)
formatter_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(formatter_module)
StockCodeFormatter = formatter_module.StockCodeFormatter
# 配置日志
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class CompanyLifecycleFactor:
"""企业生命周期阶段因子计算器"""
def __init__(self):
"""初始化"""
self.mongo_client = None
self.db = None
self.collection = None
self.connect_mongodb()
# 初始化股票代码格式化工具
self.stock_formatter = StockCodeFormatter()
# 定义企业生命周期阶段
self.lifecycle_stages = {
1: "引入期",
2: "成长期",
3: "成熟期",
4: "震荡期",
5: "衰退期"
}
# 现金流组合模式映射到生命周期阶段
self.cashflow_pattern_mapping = {
('', '', ''): 1, # 引入期
('', '', ''): 2, # 成长期
('', '', ''): 3, # 成熟期
('', '', ''): 4, # 震荡期
('', '', ''): 5, # 衰退期
('', '', ''): 4, # 震荡期(变种)
('', '', ''): 4, # 震荡期(困难期)
('', '', ''): 2, # 成长期(变种,现金充足)
}
def connect_mongodb(self):
"""连接MongoDB数据库"""
try:
self.mongo_client = pymongo.MongoClient(
host=MONGO_CONFIG2['host'],
port=MONGO_CONFIG2['port'],
username=MONGO_CONFIG2['username'],
password=MONGO_CONFIG2['password']
)
self.db = self.mongo_client[MONGO_CONFIG2['db']]
self.collection = self.db['eastmoney_financial_data_v2']
# 测试连接
self.mongo_client.admin.command('ping')
logger.info("MongoDB连接成功")
except Exception as e:
logger.error(f"MongoDB连接失败: {str(e)}")
raise
def get_annual_financial_data(self, stock_code: str, year: int) -> Optional[Dict]:
"""
获取指定股票指定年份的年报数据
Args:
stock_code: 股票代码支持多种格式 (300661.SZ, 300661, SZ300661)
year: 年份如2024
Returns:
Dict: 年报财务数据如果没有找到则返回None
"""
try:
# 标准化股票代码格式
normalized_code = self.stock_formatter.to_dot_format(stock_code)
# 构建年报日期12-31结尾
report_date = f"{year}-12-31"
# 查询指定股票指定年份的年报数据
query = {
"stock_code": normalized_code,
"report_date": report_date
}
annual_data = self.collection.find_one(query)
if annual_data:
logger.info(f"找到年报数据: {stock_code} (标准化后: {normalized_code}) - {report_date}")
return annual_data
else:
logger.warning(f"未找到年报数据: {stock_code} (标准化后: {normalized_code}) - {report_date}")
return None
except Exception as e:
logger.error(f"获取年报数据失败: {stock_code} - {year} - {str(e)}")
return None
def extract_cashflow_values(self, financial_data: Dict) -> tuple:
"""
从财务数据中提取现金流量表的三个关键指标
Returns:
Tuple: (经营现金流净额, 投资现金流净额, 筹资现金流净额)
"""
try:
cash_flow_statement = financial_data.get('cash_flow_statement', {})
# 提取三个现金流指标
operating_cashflow = cash_flow_statement.get('NETCASH_OPERATE')
investing_cashflow = cash_flow_statement.get('NETCASH_INVEST')
financing_cashflow = cash_flow_statement.get('NETCASH_FINANCE')
# 转换为浮点数
def safe_float_convert(value):
if value is None or value == '':
return None
try:
return float(value)
except (ValueError, TypeError):
return None
operating_cashflow = safe_float_convert(operating_cashflow)
investing_cashflow = safe_float_convert(investing_cashflow)
financing_cashflow = safe_float_convert(financing_cashflow)
return operating_cashflow, investing_cashflow, financing_cashflow
except Exception as e:
logger.error(f"提取现金流数据失败: {str(e)}")
return None, None, None
def classify_cashflow_pattern(self, operating_cf: float, investing_cf: float, financing_cf: float) -> tuple:
"""将现金流数值分类为正负"""
def classify_value(value):
if value is None:
return "未知"
return "" if value >= 0 else ""
operating_pattern = classify_value(operating_cf)
investing_pattern = classify_value(investing_cf)
financing_pattern = classify_value(financing_cf)
return operating_pattern, investing_pattern, financing_pattern
def determine_lifecycle_stage(self, cashflow_pattern: tuple) -> int:
"""
根据现金流模式确定企业生命周期阶段
Returns:
int: 阶段ID (1-5)0表示未知
"""
stage_id = self.cashflow_pattern_mapping.get(cashflow_pattern, 0)
return stage_id
def calculate_lifecycle_factor(self, stock_code: str, year: int) -> Dict:
"""
计算指定股票指定年份的企业生命周期因子
Args:
stock_code: 股票代码支持多种格式 (300661.SZ, 300661, SZ300661)
year: 年份
Returns:
Dict: 生命周期因子结果
"""
try:
# 获取年报数据
financial_data = self.get_annual_financial_data(stock_code, year)
if not financial_data:
return {
'stock_code': stock_code,
'year': year,
'stage_id': 0,
'stage_name': '数据缺失'
}
# 提取现金流数据
operating_cf, investing_cf, financing_cf = self.extract_cashflow_values(financial_data)
if None in [operating_cf, investing_cf, financing_cf]:
return {
'stock_code': stock_code,
'year': year,
'stage_id': 0,
'stage_name': '数据不完整'
}
# 分类现金流模式
cashflow_pattern = self.classify_cashflow_pattern(operating_cf, investing_cf, financing_cf)
# 确定生命周期阶段
stage_id = self.determine_lifecycle_stage(cashflow_pattern)
stage_name = self.lifecycle_stages.get(stage_id, '未知阶段')
return {
'stock_code': stock_code,
'year': year,
'stage_id': stage_id,
'stage_name': stage_name
}
except Exception as e:
logger.error(f"计算生命周期因子失败: {stock_code} - {year} - {str(e)}")
return {
'stock_code': stock_code,
'year': year,
'stage_id': 0,
'stage_name': '计算失败'
}
def batch_calculate_lifecycle_factors(self, stock_codes: List[str], year: int) -> pd.DataFrame:
"""
批量计算多只股票指定年份的企业生命周期因子
Args:
stock_codes: 股票代码列表
year: 年份
Returns:
pd.DataFrame: 包含所有股票生命周期因子的DataFrame
"""
results = []
total_stocks = len(stock_codes)
logger.info(f"开始批量计算 {total_stocks} 只股票 {year} 年的企业生命周期因子")
for i, stock_code in enumerate(stock_codes, 1):
# 显示进度
if i % 100 == 0 or i == total_stocks:
progress = (i / total_stocks) * 100
logger.info(f"进度: [{i}/{total_stocks}] ({progress:.1f}%)")
result = self.calculate_lifecycle_factor(stock_code, year)
results.append(result)
# 转换为DataFrame
df = pd.DataFrame(results)
# 统计各阶段分布
stage_distribution = df['stage_name'].value_counts()
logger.info(f"{year}年企业生命周期阶段分布:")
for stage, count in stage_distribution.items():
percentage = (count / len(df)) * 100
logger.info(f" {stage}: {count} 只 ({percentage:.1f}%)")
return df
def __del__(self):
"""关闭数据库连接"""
if hasattr(self, 'mongo_client') and self.mongo_client:
self.mongo_client.close()
def main():
"""主函数示例"""
try:
# 创建生命周期因子计算器
lifecycle_calculator = CompanyLifecycleFactor()
# 示例1: 计算单只股票2024年的生命周期阶段
print("=== 单只股票分析示例 ===")
result = lifecycle_calculator.calculate_lifecycle_factor('600519.SH', 2024)
print(f"股票: {result['stock_code']}")
print(f"年份: {result['year']}")
print(f"生命周期阶段: {result['stage_name']}")
# 示例2: 批量分析
print("\n=== 批量分析示例 ===")
test_stocks = ['300879.SZ', '301123.SZ', '300884.SZ', '300918.SZ', '600908.SH']
df_results = lifecycle_calculator.batch_calculate_lifecycle_factors(test_stocks, 2024)
print("\n2024年生命周期阶段结果:")
print(df_results[['stock_code', 'stage_name']].to_string(index=False))
# 保存结果
# df_results.to_csv(f"company_lifecycle_{2024}.csv", index=False, encoding='utf-8-sig')
# print(f"\n结果已保存到: company_lifecycle_{2024}.csv")
except Exception as e:
logger.error(f"程序执行失败: {str(e)}")
if __name__ == "__main__":
main()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -127,7 +127,7 @@ class FinancialDataCollectorV2:
List[str]: 股票代码列表
"""
try:
query = "SELECT DISTINCT gp_code_two FROM gp_code_all WHERE gp_code_two IS NOT NULL AND gp_code_two != ''"
query = "SELECT DISTINCT gp_code_two FROM gp_code_all_copy1 WHERE gp_code_two IS NOT NULL AND gp_code_two != ''"
with self.mysql_engine.connect() as conn:
df = pd.read_sql(text(query), conn)
@ -274,46 +274,164 @@ class FinancialDataCollectorV2:
return []
def fetch_balance_sheet(self, stock_code: str, periods: int = 21) -> List[Dict]:
"""获取资产负债表数据"""
"""获取资产负债表数据 - 支持API自动切换 (G→B→S→I)"""
date_filter = self.build_date_filter(stock_code, periods)
url = f'https://datacenter.eastmoney.com/securities/api/data/get?type=RPT_F10_FINANCE_GBALANCE&sty=F10_FINANCE_GBALANCE&filter={date_filter}&p=1&ps={periods}&sr=-1&st=REPORT_DATE&source=HSF10&client=PC&v=012481899342117453'
# 先尝试G系列API
g_url = f'https://datacenter.eastmoney.com/securities/api/data/get?type=RPT_F10_FINANCE_GBALANCE&sty=F10_FINANCE_GBALANCE&filter={date_filter}&p=1&ps={periods}&sr=-1&st=REPORT_DATE&source=HSF10&client=PC&v=012481899342117453'
headers = {"Content-Type": "application/json"}
try:
response = requests.get(url, headers=headers, timeout=30)
response = requests.get(g_url, headers=headers, timeout=30)
response.raise_for_status()
data = response.json()
if 'result' in data and 'data' in data['result']:
logger.info(f"成功获取资产负债表数据,共 {len(data['result']['data'])} 个报告期")
return data['result']['data']
# 检查G系列API是否返回有效数据
if data.get('code') == 9201 or data.get('success') == False or not data.get('result') or not data.get('result', {}).get('data'):
logger.info(f"G系列API返回空数据尝试B系列API - {stock_code}")
# 切换到B系列API
b_url = f'https://datacenter.eastmoney.com/securities/api/data/get?type=RPT_F10_FINANCE_BBALANCE&sty=F10_FINANCE_BBALANCE&filter={date_filter}&p=1&ps={periods}&sr=-1&st=REPORT_DATE&source=HSF10&client=PC&v=08691713756184818'
response = requests.get(b_url, headers=headers, timeout=30)
response.raise_for_status()
data = response.json()
# 检查B系列API是否返回有效数据
if data.get('code') == 9201 or data.get('success') == False or not data.get('result') or not data.get('result', {}).get('data'):
logger.info(f"B系列API也返回空数据尝试S系列API - {stock_code}")
# 切换到S系列API (证券公司)
s_url = f'https://datacenter.eastmoney.com/securities/api/data/get?type=RPT_F10_FINANCE_SBALANCE&sty=F10_FINANCE_SBALANCE&filter={date_filter}&p=1&ps={periods}&sr=-1&st=REPORT_DATE&source=HSF10&client=PC&v=08691713756184818'
response = requests.get(s_url, headers=headers, timeout=30)
response.raise_for_status()
data = response.json()
# 检查S系列API是否返回有效数据
if data.get('code') == 9201 or data.get('success') == False or not data.get('result') or not data.get('result', {}).get('data'):
logger.info(f"S系列API也返回空数据尝试I系列API - {stock_code}")
# 切换到I系列API
i_url = f'https://datacenter.eastmoney.com/securities/api/data/get?type=RPT_F10_FINANCE_IBALANCE&sty=F10_FINANCE_IBALANCE&filter={date_filter}&p=1&ps={periods}&sr=-1&st=REPORT_DATE&source=HSF10&client=PC&v=08691713756184818'
response = requests.get(i_url, headers=headers, timeout=30)
response.raise_for_status()
data = response.json()
if 'result' in data and data['result'] and 'data' in data['result']:
logger.info(f"I系列API成功获取资产负债表数据{len(data['result']['data'])} 个报告期")
return data['result']['data']
else:
logger.warning(f"I系列API也无法获取资产负债表数据 - {stock_code}")
return []
else:
# S系列API成功
if 'result' in data and data['result'] and 'data' in data['result']:
logger.info(f"S系列API成功获取资产负债表数据{len(data['result']['data'])} 个报告期")
return data['result']['data']
else:
logger.warning(f"S系列API资产负债表数据格式异常 - {stock_code}")
return []
else:
# B系列API成功
if 'result' in data and data['result'] and 'data' in data['result']:
logger.info(f"B系列API成功获取资产负债表数据{len(data['result']['data'])} 个报告期")
return data['result']['data']
else:
logger.warning(f"B系列API资产负债表数据格式异常 - {stock_code}")
return []
else:
logger.warning("资产负债表数据格式异常")
return []
# G系列API成功
if 'result' in data and 'data' in data['result']:
logger.info(f"G系列API成功获取资产负债表数据{len(data['result']['data'])} 个报告期")
return data['result']['data']
else:
logger.warning("G系列API资产负债表数据格式异常")
return []
except Exception as e:
logger.error(f"获取资产负债表失败: {str(e)}")
return []
def fetch_cash_flow_statement(self, stock_code: str, periods: int = 21) -> List[Dict]:
"""获取现金流量表数据"""
"""获取现金流量表数据 - 支持API自动切换 (G→B→S→I)"""
date_filter = self.build_date_filter(stock_code, periods)
url = f'https://datacenter.eastmoney.com/securities/api/data/get?type=RPT_F10_FINANCE_GCASHFLOW&sty=APP_F10_GCASHFLOW&filter={date_filter}&p=1&ps={periods}&sr=-1&st=REPORT_DATE&source=HSF10&client=PC&v=04664977872701077'
# 先尝试G系列API
g_url = f'https://datacenter.eastmoney.com/securities/api/data/get?type=RPT_F10_FINANCE_GCASHFLOW&sty=APP_F10_GCASHFLOW&filter={date_filter}&p=1&ps={periods}&sr=-1&st=REPORT_DATE&source=HSF10&client=PC&v=04664977872701077'
headers = {"Content-Type": "application/json"}
try:
response = requests.get(url, headers=headers, timeout=30)
response = requests.get(g_url, headers=headers, timeout=30)
response.raise_for_status()
data = response.json()
if 'result' in data and 'data' in data['result']:
logger.info(f"成功获取现金流量表数据,共 {len(data['result']['data'])} 个报告期")
return data['result']['data']
# 检查G系列API是否返回有效数据
if data.get('code') == 9201 or data.get('success') == False or not data.get('result') or not data.get('result', {}).get('data'):
logger.info(f"G系列API返回空数据尝试B系列API - {stock_code}")
# 切换到B系列API
b_url = f'https://datacenter.eastmoney.com/securities/api/data/get?type=RPT_F10_FINANCE_BCASHFLOW&sty=APP_F10_BCASHFLOW&filter={date_filter}&p=1&ps={periods}&sr=-1&st=REPORT_DATE&source=HSF10&client=PC&v=03391901368547232'
response = requests.get(b_url, headers=headers, timeout=30)
response.raise_for_status()
data = response.json()
# 检查B系列API是否返回有效数据
if data.get('code') == 9201 or data.get('success') == False or not data.get('result') or not data.get('result', {}).get('data'):
logger.info(f"B系列API也返回空数据尝试S系列API - {stock_code}")
# 切换到S系列API (证券公司)
s_url = f'https://datacenter.eastmoney.com/securities/api/data/get?type=RPT_F10_FINANCE_SCASHFLOW&sty=APP_F10_SCASHFLOW&filter={date_filter}&p=1&ps={periods}&sr=-1&st=REPORT_DATE&source=HSF10&client=PC&v=03391901368547232'
response = requests.get(s_url, headers=headers, timeout=30)
response.raise_for_status()
data = response.json()
# 检查S系列API是否返回有效数据
if data.get('code') == 9201 or data.get('success') == False or not data.get('result') or not data.get('result', {}).get('data'):
logger.info(f"S系列API也返回空数据尝试I系列API - {stock_code}")
# 切换到I系列API
i_url = f'https://datacenter.eastmoney.com/securities/api/data/get?type=RPT_F10_FINANCE_ICASHFLOW&sty=APP_F10_ICASHFLOW&filter={date_filter}&p=1&ps={periods}&sr=-1&st=REPORT_DATE&source=HSF10&client=PC&v=03391901368547232'
response = requests.get(i_url, headers=headers, timeout=30)
response.raise_for_status()
data = response.json()
if 'result' in data and data['result'] and 'data' in data['result']:
logger.info(f"I系列API成功获取现金流量表数据{len(data['result']['data'])} 个报告期")
return data['result']['data']
else:
logger.warning(f"I系列API也无法获取现金流量表数据 - {stock_code}")
return []
else:
# S系列API成功
if 'result' in data and data['result'] and 'data' in data['result']:
logger.info(f"S系列API成功获取现金流量表数据{len(data['result']['data'])} 个报告期")
return data['result']['data']
else:
logger.warning(f"S系列API现金流量表数据格式异常 - {stock_code}")
return []
else:
# B系列API成功
if 'result' in data and data['result'] and 'data' in data['result']:
logger.info(f"B系列API成功获取现金流量表数据{len(data['result']['data'])} 个报告期")
return data['result']['data']
else:
logger.warning(f"B系列API现金流量表数据格式异常 - {stock_code}")
return []
else:
logger.warning("现金流量表数据格式异常")
return []
# G系列API成功
if 'result' in data and 'data' in data['result']:
logger.info(f"G系列API成功获取现金流量表数据{len(data['result']['data'])} 个报告期")
return data['result']['data']
else:
logger.warning("G系列API现金流量表数据格式异常")
return []
except Exception as e:
logger.error(f"获取现金流量表失败: {str(e)}")
@ -421,9 +539,124 @@ class FinancialDataCollectorV2:
logger.error(f"保存数据到MongoDB失败: {str(e)}")
return False
def check_missing_data(self, stock_code: str) -> List[str]:
"""
检查MongoDB中哪些报告期的资产负债表或现金流量表数据为空
Args:
stock_code: 股票代码
Returns:
List[str]: 需要更新的报告期列表
"""
try:
# 查询该股票的所有记录
records = list(self.collection.find({'stock_code': stock_code}))
missing_periods = []
for record in records:
balance_empty = not record.get('balance_sheet') or record.get('balance_sheet') == {}
cash_empty = not record.get('cash_flow_statement') or record.get('cash_flow_statement') == {}
# 如果资产负债表或现金流量表为空,则需要更新
if balance_empty or cash_empty:
missing_periods.append(record.get('report_date'))
logger.debug(f"发现需要更新的数据: {stock_code} - {record.get('report_date')} (资产负债表空: {balance_empty}, 现金流量表空: {cash_empty})")
if missing_periods:
logger.info(f"股票 {stock_code}{len(missing_periods)} 个报告期需要更新数据")
else:
logger.info(f"股票 {stock_code} 的数据完整,无需更新")
return missing_periods
except Exception as e:
logger.error(f"检查缺失数据失败: {str(e)}")
return []
def update_missing_financial_data(self, stock_code: str, missing_periods: List[str]) -> bool:
"""
更新缺失的财务数据只更新资产负债表和现金流量表
Args:
stock_code: 股票代码
missing_periods: 需要更新的报告期列表
Returns:
bool: 是否更新成功
"""
try:
if not missing_periods:
return True
logger.info(f"开始更新股票 {stock_code} 缺失的财务数据")
# 获取资产负债表和现金流量表数据
balance_data = self.fetch_balance_sheet(stock_code, periods=21)
time.sleep(1)
cash_data = self.fetch_cash_flow_statement(stock_code, periods=21)
time.sleep(1)
# 创建按报告日期索引的字典
balance_dict = {item['REPORT_DATE'][:10]: item for item in balance_data if item.get('REPORT_DATE')}
cash_dict = {item['REPORT_DATE'][:10]: item for item in cash_data if item.get('REPORT_DATE')}
updated_count = 0
for report_date in missing_periods:
try:
# 查找当前记录
current_record = self.collection.find_one({
'stock_code': stock_code,
'report_date': report_date
})
if not current_record:
logger.warning(f"未找到记录: {stock_code} - {report_date}")
continue
# 准备更新的字段
update_fields = {}
# 检查是否需要更新资产负债表
balance_empty = not current_record.get('balance_sheet') or current_record.get('balance_sheet') == {}
if balance_empty and report_date in balance_dict:
update_fields['balance_sheet'] = balance_dict[report_date]
logger.debug(f"更新资产负债表: {stock_code} - {report_date}")
# 检查是否需要更新现金流量表
cash_empty = not current_record.get('cash_flow_statement') or current_record.get('cash_flow_statement') == {}
if cash_empty and report_date in cash_dict:
update_fields['cash_flow_statement'] = cash_dict[report_date]
logger.debug(f"更新现金流量表: {stock_code} - {report_date}")
# 如果有字段需要更新
if update_fields:
update_fields['collect_time'] = datetime.datetime.now() # 更新采集时间
self.collection.update_one(
{'stock_code': stock_code, 'report_date': report_date},
{'$set': update_fields}
)
updated_count += 1
logger.info(f"成功更新: {stock_code} - {report_date}")
except Exception as e:
logger.error(f"更新记录失败: {stock_code} - {report_date} - {str(e)}")
continue
logger.info(f"股票 {stock_code} 更新完成,共更新 {updated_count} 个报告期")
return True
except Exception as e:
logger.error(f"更新缺失财务数据失败: {str(e)}")
return False
def collect_financial_data(self, stock_code: str, periods: int = 21) -> bool:
"""
采集单只股票的财务数据
采集单只股票的财务数据 - 增量更新模式
Args:
stock_code: 股票代码'300750.SZ'
@ -433,37 +666,20 @@ class FinancialDataCollectorV2:
bool: 是否采集成功
"""
try:
logger.info(f"开始采集股票 {stock_code} 的财务数据{periods}个报告期)")
logger.info(f"开始检查股票 {stock_code} 的财务数据")
# 获取三张财务报表数据
profit_data = self.fetch_profit_statement(stock_code, periods)
time.sleep(1) # 避免请求过于频繁
# 检查哪些报告期的数据缺失
missing_periods = self.check_missing_data(stock_code)
balance_data = self.fetch_balance_sheet(stock_code, periods)
time.sleep(1)
if not missing_periods:
logger.info(f"股票 {stock_code} 数据完整,跳过")
return True
cash_data = self.fetch_cash_flow_statement(stock_code, periods)
time.sleep(1)
# 检查至少有一张表有数据
if not any([profit_data, balance_data, cash_data]):
logger.error(f"股票 {stock_code} 没有获取到任何财务数据")
return False
# 处理财务数据
financial_data_list = self.process_financial_data(
stock_code, profit_data, balance_data, cash_data
)
if not financial_data_list:
logger.error(f"股票 {stock_code} 的财务数据处理失败")
return False
# 保存到MongoDB
success = self.save_to_mongodb(financial_data_list)
# 更新缺失的数据
success = self.update_missing_financial_data(stock_code, missing_periods)
if success:
logger.info(f"股票 {stock_code} 的财务数据采集完成")
logger.info(f"股票 {stock_code} 的财务数据更新完成")
return success
@ -473,19 +689,19 @@ class FinancialDataCollectorV2:
def batch_collect_financial_data(self, stock_codes: List[str], periods: int = 21) -> Dict:
"""
批量采集多只股票的财务数据
批量更新多只股票的缺失财务数据
Args:
stock_codes: 股票代码列表
periods: 获取多少个报告期默认21个季度
Returns:
Dict: 采集结果统计
Dict: 更新结果统计
"""
results = {'success': 0, 'failed': 0, 'failed_stocks': []}
results = {'success': 0, 'failed': 0, 'failed_stocks': [], 'skipped': 0}
total_stocks = len(stock_codes)
logger.info(f"开始批量采集 {total_stocks} 只股票的财务数据")
logger.info(f"开始批量检查和更新 {total_stocks} 只股票的财务数据")
for index, stock_code in enumerate(stock_codes, 1):
try:
@ -496,11 +712,11 @@ class FinancialDataCollectorV2:
success = self.collect_financial_data(stock_code, periods)
if success:
results['success'] += 1
logger.info(f"SUCCESS [{index}/{total_stocks}] {stock_code} 采集成功")
logger.info(f"SUCCESS [{index}/{total_stocks}] {stock_code} 处理成功")
else:
results['failed'] += 1
results['failed_stocks'].append(stock_code)
logger.warning(f"FAILED [{index}/{total_stocks}] {stock_code} 采集失败")
logger.warning(f"FAILED [{index}/{total_stocks}] {stock_code} 处理失败")
# 每只股票之间暂停一下,避免请求过于频繁
time.sleep(2)
@ -519,7 +735,7 @@ class FinancialDataCollectorV2:
continue
success_rate = (results['success'] / total_stocks) * 100
logger.info(f"批量采集完成: 成功{results['success']}只,失败{results['failed']}只,成功率: {success_rate:.2f}%")
logger.info(f"批量更新完成: 成功{results['success']}只,失败{results['failed']}只,成功率: {success_rate:.2f}%")
if results['failed_stocks']:
logger.info(f"失败的股票数量: {len(results['failed_stocks'])}")
@ -541,7 +757,7 @@ class FinancialDataCollectorV2:
def main():
"""主函数 - 批量采集所有股票的财务数据"""
"""主函数 - 批量更新所有股票的缺失财务数据"""
collector = FinancialDataCollectorV2()
try:
@ -555,30 +771,32 @@ def main():
logger.info(f"从数据库获取到 {len(stock_codes)} 只股票")
# 可以选择采集所有股票或者部分股票进行测试
# 可以选择处理所有股票或者部分股票进行测试
# 如果要测试,可以取前几只股票
# 测试模式:只采集前10只股票
TEST_MODE = False # 设置为False将采集所有股票
# 测试模式:只处理前10只股票
TEST_MODE = False # 设置为False将处理所有股票
if TEST_MODE:
test_count = min(10, len(stock_codes)) # 最多取10只股票测试
stock_codes = stock_codes[:test_count]
logger.info(f"TEST MODE: 仅采集{test_count} 只股票")
logger.info(f"TEST MODE: 仅处理{test_count} 只股票")
else:
logger.info(f"PRODUCTION MODE: 将采集全部 {len(stock_codes)} 只股票")
logger.info(f"PRODUCTION MODE: 将处理全部 {len(stock_codes)} 只股票")
logger.info(f"开始批量采集 {len(stock_codes)} 只股票的财务数据")
logger.info(f"开始批量检查和更新 {len(stock_codes)} 只股票的财务数据")
logger.info("注意: 本次运行为增量更新模式,只会更新缺失的资产负债表和现金流量表数据")
# 批量采集
# 批量更新
results = collector.batch_collect_financial_data(stock_codes, periods=21)
# 输出最终结果
print(f"\n{'='*50}")
print(f"批量采集完成统计")
print(f"批量更新完成统计")
print(f"{'='*50}")
print(f"SUCCESS 成功采集: {results['success']} 只股票")
print(f"FAILED 采集失败: {results['failed']} 只股票")
print(f"SUCCESS 成功处理: {results['success']} 只股票")
print(f"FAILED 处理失败: {results['failed']} 只股票")
print(f"SUCCESS RATE 成功率: {(results['success'] / len(stock_codes) * 100):.2f}%")
print(f"\n说明: 成功处理包括数据完整(无需更新)和成功更新缺失数据的股票")
if results['failed_stocks']:
print(f"\n失败的股票列表:")
@ -593,7 +811,7 @@ def main():
logger.info("用户中断程序执行")
print("\n警告: 程序被用户中断")
except Exception as e:
logger.error(f"采集过程中出现错误: {str(e)}")
logger.error(f"更新过程中出现错误: {str(e)}")
print(f"\n错误: 程序执行出错: {str(e)}")
finally:
collector.close_connection()

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,197 @@
import requests
import pandas as pd
from datetime import datetime
import sys
import os
import redis
import json
# 添加项目根目录到路径便于导入scripts.config
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(project_root)
# 导入代理管理器
from src.scripts.ProxyIP import EnhancedProxyManager
# 读取雪球headers和Redis配置
try:
from src.scripts.config import XUEQIU_HEADERS
from src.valuation_analysis.config import REDIS_CONFIG
except ImportError:
XUEQIU_HEADERS = {
'User-Agent': 'Mozilla/5.0',
'Cookie': '', # 需要填写雪球cookie
}
REDIS_CONFIG = {
'host': 'localhost',
'port': 6379,
'db': 0,
'password': None
}
REDIS_KEY = 'xq_hk_stock_changes_latest' # 存放港股行情的主键
# 创建全局代理管理器实例
proxy_manager = EnhancedProxyManager()
def get_redis_conn():
"""获取Redis连接"""
pool = redis.ConnectionPool(
host=REDIS_CONFIG['host'],
port=REDIS_CONFIG['port'],
db=REDIS_CONFIG.get('db', 0),
password=REDIS_CONFIG.get('password', None),
decode_responses=True
)
return redis.Redis(connection_pool=pool)
def fetch_and_store_hk_stock_data(page_size=90):
"""
批量采集雪球港股所有股票的最新行情数据并保存到Redis
:param page_size: 每页采集数量
"""
base_url = 'https://stock.xueqiu.com/v5/stock/screener/quote/list.json'
headers = XUEQIU_HEADERS
all_data = []
# 使用港股API参数
params = {
'page': 1,
'size': page_size,
'order': 'desc',
'order_by': 'dividend_yield',
'market': 'HK', # 港股市场
'type': 'hk' # 港股类型
}
# 初次请求以获取总页数,使用代理
try:
response = proxy_manager.request_with_proxy('get', base_url, headers=headers, params=params)
if response.status_code != 200:
print(f"请求港股数据失败,状态码:{response.status_code}")
return
except Exception as e:
print(f"请求港股数据时发生异常:{e}")
return
data = response.json()
total_count = data['data']['count']
total_pages = (total_count // page_size) + 1
print(f"开始采集港股数据,共 {total_pages} 页,{total_count} 条记录")
# 循环获取所有页面的数据
for page in range(1, total_pages + 1):
params['page'] = page
try:
response = proxy_manager.request_with_proxy('get', base_url, headers=headers, params=params)
if response.status_code == 200:
data = response.json()
all_data.extend(data['data']['list'])
print(f"成功采集港股第 {page}/{total_pages} 页数据")
else:
print(f"请求港股数据第 {page} 页失败,状态码:{response.status_code}")
except Exception as e:
print(f"请求港股数据第 {page} 页时发生异常:{e}")
continue
# 转换为 DataFrame
df = pd.DataFrame(all_data)
if not df.empty:
df['fetch_time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
# 存入Redis使用hash结构key为symbolvalue为json字符串
r = get_redis_conn()
pipe = r.pipeline()
# 先清空旧数据
r.delete(REDIS_KEY)
for _, row in df.iterrows():
symbol = row.get('symbol')
if not symbol:
continue
# 只保留必要字段也可直接存row.to_dict()
value = row.to_dict()
pipe.hset(REDIS_KEY, symbol, json.dumps(value, ensure_ascii=False))
pipe.execute()
print(f"成功将港股数据写入Redis哈希 {REDIS_KEY},共{len(df)}条记录。")
else:
print("未获取到任何港股数据。")
def format_hk_stock_code(stock_code):
"""
统一港股代码格式支持0700.HKHK0700等
返回雪球格式如0700.HK和Redis存储格式
"""
stock_code = stock_code.upper()
if '.HK' in stock_code:
return stock_code, stock_code
elif stock_code.startswith('HK'):
code = stock_code[2:]
return f'{code}.HK', f'{code}.HK'
else:
# 假设是纯数字,添加.HK后缀
return f'{stock_code}.HK', f'{stock_code}.HK'
def get_hk_stock_realtime_info_from_redis(stock_code):
"""
根据港股代码从Redis查询实时行情并封装为指定结构
:param stock_code: 支持0700.HKHK0700等格式
:return: dict or None
"""
_, redis_code = format_hk_stock_code(stock_code)
r = get_redis_conn()
value = r.hget(REDIS_KEY, redis_code)
if not value:
return None
try:
data = json.loads(value)
except Exception:
return None
# 封装为指定结构
result = {
"code": None,
"crawlDate": None,
"marketValue": None,
"maxPrice": None,
"minPrice": None,
"nowPrice": None,
"pbRate": None,
"rangeRiseAndFall": None,
"shortName": None,
"todayStartPrice": None,
"ttm": None,
"turnoverRate": None,
"yesterdayEndPrice": None
}
# 赋值映射
result["code"] = data.get("symbol")
result["crawlDate"] = data.get("fetch_time")
result["marketValue"] = data.get("market_capital")
result["maxPrice"] = data.get("high") if "high" in data else data.get("high52w")
result["minPrice"] = data.get("low") if "low" in data else data.get("low52w")
result["nowPrice"] = data.get("current")
result["pbRate"] = data.get("pb")
result["rangeRiseAndFall"] = data.get("percent")
result["shortName"] = data.get("name")
result["todayStartPrice"] = data.get("open")
result["ttm"] = data.get("pe_ttm")
result["turnoverRate"] = data.get("turnover_rate")
result["yesterdayEndPrice"] = data.get("last_close") if "last_close" in data else data.get("pre_close")
# 兼容部分字段缺失
if result["maxPrice"] is None and "high" in data:
result["maxPrice"] = data["high"]
if result["minPrice"] is None and "low" in data:
result["minPrice"] = data["low"]
return result
if __name__ == '__main__':
fetch_and_store_hk_stock_data()

View File

@ -0,0 +1,306 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
供应商客户占比数据更新器
用于将前五供应商占比和前五客户占比数据更新到MongoDB的eastmoney_financial_data_v2集合中
这个脚本要在财务脚本之后执行
"""
import os
import sys
import pandas as pd
import pymongo
import datetime
import logging
from typing import Dict, List, Optional
from pathlib import Path
# 添加项目根路径到Python路径
project_root = Path(__file__).parent.parent.parent
sys.path.append(str(project_root))
# 导入配置
from src.valuation_analysis.config import MONGO_CONFIG2
# 设置日志
logging.basicConfig(
level=logging.ERROR,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
class SupplierCustomerRatioUpdater:
"""供应商客户占比数据更新器"""
def __init__(self):
"""初始化"""
self.mongo_client = None
self.db = None
self.collection_name = 'eastmoney_financial_data_v2'
self.collection = None
# CSV文件路径 - 使用绝对路径
current_dir = Path(__file__).parent
self.supplier_file = current_dir / 'data' / '前五供应商占比.csv'
self.customer_file = current_dir / 'data' / '前五客户占比.csv'
# 季度日期映射 - 根据CSV文件中的列名
self.quarter_mapping = {
'20200331': '2020-03-31',
'20200630': '2020-06-30',
'20200930': '2020-09-30',
'20201231': '2020-12-31',
'20210331': '2021-03-31',
'20210630': '2021-06-30',
'20210930': '2021-09-30',
'20211231': '2021-12-31',
'20220331': '2022-03-31',
'20220630': '2022-06-30',
'20220930': '2022-09-30',
'20221231': '2022-12-31',
'20230331': '2023-03-31',
'20230630': '2023-06-30',
'20230930': '2023-09-30',
'20231231': '2023-12-31',
'20240331': '2024-03-31',
'20240630': '2024-06-30',
'20240930': '2024-09-30',
'20241231': '2024-12-31',
'20250331': '2025-03-31'
}
self.connect_mongodb()
def connect_mongodb(self):
"""连接MongoDB数据库"""
try:
self.mongo_client = pymongo.MongoClient(
host=MONGO_CONFIG2['host'],
port=MONGO_CONFIG2['port'],
username=MONGO_CONFIG2['username'],
password=MONGO_CONFIG2['password']
)
self.db = self.mongo_client[MONGO_CONFIG2['db']]
self.collection = self.db[self.collection_name]
# 测试连接
self.mongo_client.admin.command('ping')
logger.info(f"MongoDB连接成功使用集合: {self.collection_name}")
except Exception as e:
logger.error(f"MongoDB连接失败: {str(e)}")
raise
def load_csv_data(self, file_path: str) -> pd.DataFrame:
"""加载CSV文件数据"""
try:
if not os.path.exists(file_path):
logger.error(f"文件不存在: {file_path}")
return pd.DataFrame()
df = pd.read_csv(file_path, encoding='utf-8')
logger.info(f"成功加载文件: {file_path}, 数据行数: {len(df)}")
return df
except Exception as e:
logger.error(f"加载CSV文件失败 {file_path}: {str(e)}")
return pd.DataFrame()
def parse_ratio_data(self, df: pd.DataFrame, data_type: str) -> Dict[str, Dict[str, float]]:
"""
解析占比数据
Args:
df: CSV数据DataFrame
data_type: 数据类型'supplier' 'customer'
Returns:
Dict: {stock_code: {report_date: ratio_value}}
"""
ratio_data = {}
try:
for _, row in df.iterrows():
stock_code = row['stock_code']
if pd.isna(stock_code):
continue
ratio_data[stock_code] = {}
# 遍历所有季度列
for quarter_col, report_date in self.quarter_mapping.items():
if quarter_col in row:
ratio_value = row[quarter_col]
# 处理数据如果值为0或nan跳过否则除以100转换为小数
if pd.notna(ratio_value) and ratio_value > 0:
# 将百分比数值转换为小数如56.22 -> 0.5622
ratio_data[stock_code][report_date] = float(ratio_value) / 100.0
else:
# 0值或空值不添加到字典中
continue
logger.info(f"解析{data_type}数据完成,共处理 {len(ratio_data)} 只股票")
return ratio_data
except Exception as e:
logger.error(f"解析{data_type}数据失败: {str(e)}")
return {}
def update_mongodb_record(self, stock_code: str, report_date: str,
supplier_ratio: Optional[float], customer_ratio: Optional[float]) -> bool:
"""
更新MongoDB中的单条记录
Args:
stock_code: 股票代码
report_date: 报告日期
supplier_ratio: 前五供应商占比
customer_ratio: 前五客户占比
Returns:
bool: 是否更新成功
"""
try:
# 查找记录
filter_condition = {
'stock_code': stock_code,
'report_date': report_date
}
existing_record = self.collection.find_one(filter_condition)
if not existing_record:
# 记录不存在,跳过
logger.debug(f"记录不存在,跳过: {stock_code} - {report_date}")
return False
# 准备更新字段
update_fields = {}
if supplier_ratio is not None:
update_fields['top_five_suppliers_ratio'] = supplier_ratio
if customer_ratio is not None:
update_fields['top_five_customers_ratio'] = customer_ratio
if not update_fields:
return False
# 添加更新时间
update_fields['ratio_update_time'] = datetime.datetime.now()
# 执行更新
result = self.collection.update_one(
filter_condition,
{'$set': update_fields}
)
if result.modified_count > 0:
logger.debug(f"更新成功: {stock_code} - {report_date}")
return True
else:
logger.debug(f"无需更新: {stock_code} - {report_date}")
return False
except Exception as e:
logger.error(f"更新记录失败 {stock_code} - {report_date}: {str(e)}")
return False
def batch_update_ratios(self):
"""批量更新所有占比数据"""
try:
# 加载CSV数据
logger.info("开始加载CSV文件...")
supplier_df = self.load_csv_data(self.supplier_file)
customer_df = self.load_csv_data(self.customer_file)
if supplier_df.empty and customer_df.empty:
logger.error("没有可用的CSV数据")
return False
# 解析数据
logger.info("解析数据中...")
supplier_data = self.parse_ratio_data(supplier_df, 'supplier') if not supplier_df.empty else {}
customer_data = self.parse_ratio_data(customer_df, 'customer') if not customer_df.empty else {}
# 获取所有涉及的股票代码
all_stock_codes = set()
all_stock_codes.update(supplier_data.keys())
all_stock_codes.update(customer_data.keys())
logger.info(f"开始更新数据,共涉及 {len(all_stock_codes)} 只股票")
# 统计
total_count = 0
updated_count = 0
# 逐个股票更新
for stock_code in all_stock_codes:
supplier_ratios = supplier_data.get(stock_code, {})
customer_ratios = customer_data.get(stock_code, {})
# 获取所有日期
all_dates = set()
all_dates.update(supplier_ratios.keys())
all_dates.update(customer_ratios.keys())
# 逐个日期更新
for report_date in all_dates:
supplier_ratio = supplier_ratios.get(report_date)
customer_ratio = customer_ratios.get(report_date)
total_count += 1
if self.update_mongodb_record(stock_code, report_date, supplier_ratio, customer_ratio):
updated_count += 1
# 每处理100条记录输出一次进度
if total_count % 100 == 0:
logger.info(f"已处理 {total_count} 条记录,更新 {updated_count}")
logger.info(f"数据更新完成!总计处理 {total_count} 条记录,成功更新 {updated_count}")
return True
except Exception as e:
logger.error(f"批量更新失败: {str(e)}")
return False
def close_connection(self):
"""关闭数据库连接"""
try:
if self.mongo_client:
self.mongo_client.close()
logger.info("MongoDB连接已关闭")
except Exception as e:
logger.error(f"关闭MongoDB连接失败: {str(e)}")
def main():
"""主函数"""
logger.info("=== 供应商客户占比数据更新器启动 ===")
updater = None
try:
# 创建更新器实例
updater = SupplierCustomerRatioUpdater()
# 执行批量更新
success = updater.batch_update_ratios()
if success:
logger.info("=== 数据更新成功完成 ===")
else:
logger.error("=== 数据更新失败 ===")
except Exception as e:
logger.error(f"程序执行失败: {str(e)}")
finally:
if updater:
updater.close_connection()
if __name__ == "__main__":
main()

View File

@ -0,0 +1,729 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
科技主题基本面因子选股策略
整合企业生命周期财务指标和平均距离因子分析
"""
import sys
import pymongo
import pandas as pd
import numpy as np
import logging
from typing import Dict, List, Optional, Tuple
from pathlib import Path
from sqlalchemy import create_engine, text
from datetime import datetime
import math
# 添加项目根路径到Python路径
project_root = Path(__file__).parent.parent.parent
sys.path.append(str(project_root))
# 导入依赖的工具类
from src.quantitative_analysis.company_lifecycle_factor import CompanyLifecycleFactor
from src.quantitative_analysis.financial_indicator_analyzer import FinancialIndicatorAnalyzer
from src.quantitative_analysis.average_distance_factor import AverageDistanceFactor
from src.valuation_analysis.config import MONGO_CONFIG2, DB_URL
# 设置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
class TechFundamentalFactorStrategy:
"""科技主题基本面因子选股策略"""
def __init__(self):
"""初始化策略"""
self.lifecycle_calculator = CompanyLifecycleFactor()
self.financial_analyzer = FinancialIndicatorAnalyzer()
self.distance_calculator = AverageDistanceFactor(DB_URL)
# MySQL连接
self.mysql_engine = create_engine(
DB_URL,
pool_size=5,
max_overflow=10,
pool_recycle=3600
)
# 科技概念板块列表
self.tech_concepts = [
"5G概念", "物联网", "云计算", "边缘计算", "信息安全", "国产软件",
"大数据", "数据中心", "芯片", "MCU芯片", "汽车芯片", "存储芯片",
"人工智能", "AIGC概念", "ChatGPT概念", "CPO概念", "华为鸿蒙",
"华为海思", "华为算力", "量子科技", "区块链", "数字货币", "工业互联",
"操作系统", "光刻机", "第三代半导体", "元宇宙概念", "云游戏", "信创",
"东数西算", "PCB概念", "先进封装", "EDA概念", "Web3概念", "数据确权",
"数据要素", "数字水印", "工业软件", "6G概念", "时空大数据", "算力租赁",
"光通信", "英伟达概念", "星闪概念", "液冷服务器", "多模态AI", "Sora概念",
"AI手机PC", "铜缆高速连接", "车联网", "财税数字化", "智谱AI", "AI智能体",
"DeepSeek概念", "AI医疗概念"
]
# self.tech_concepts = [
# "5G概念"
# ]
logger.info("科技主题基本面因子选股策略初始化完成")
def get_tech_stocks(self) -> pd.DataFrame:
"""
获取科技概念板块的股票列表
Returns:
pd.DataFrame: 包含股票代码和名称的DataFrame
"""
try:
# 构建查询条件
concepts_str = "', '".join(self.tech_concepts)
query = text(f"""
SELECT DISTINCT gp_code as stock_code, gp_name as stock_name, bk_name as concept_name
FROM gp_gnbk
WHERE bk_name IN ('{concepts_str}')
ORDER BY gp_code
""")
with self.mysql_engine.connect() as conn:
df = pd.read_sql(query, conn)
logger.info(f"获取到 {len(df)} 只科技概念股票")
return df
except Exception as e:
logger.error(f"获取科技概念股票失败: {str(e)}")
return pd.DataFrame()
def filter_by_lifecycle(self, stock_codes: List[str], year: int = 2024) -> Dict[str, List[str]]:
"""
根据企业生命周期筛选股票
Args:
stock_codes: 股票代码列表
year: 分析年份
Returns:
Dict: 包含成长期和成熟期股票的字典
"""
try:
logger.info(f"开始分析 {len(stock_codes)} 只股票的企业生命周期")
# 批量计算生命周期
lifecycle_df = self.lifecycle_calculator.batch_calculate_lifecycle_factors(stock_codes, year)
# 筛选目标阶段的股票
# 引入期(1)和成长期(2)合并为成长期,成熟期(3)保持不变
growth_stage_stocks = lifecycle_df[
lifecycle_df['stage_id'].isin([1, 2])
]['stock_code'].tolist()
mature_stage_stocks = lifecycle_df[
lifecycle_df['stage_id'] == 3
]['stock_code'].tolist()
result = {
'growth': growth_stage_stocks,
'mature': mature_stage_stocks
}
logger.info(f"成长期股票: {len(growth_stage_stocks)}")
logger.info(f"成熟期股票: {len(mature_stage_stocks)}")
return result
except Exception as e:
logger.error(f"生命周期筛选失败: {str(e)}")
return {'growth': [], 'mature': []}
def calculate_distance_factors(self, growth_stocks: List[str], mature_stocks: List[str]) -> Tuple[pd.DataFrame, pd.DataFrame]:
"""
分别计算成长期和成熟期股票的平均距离因子
Args:
growth_stocks: 成长期股票列表
mature_stocks: 成熟期股票列表
Returns:
Tuple: (成长期距离因子DataFrame, 成熟期距离因子DataFrame)
"""
try:
growth_distance_df = pd.DataFrame()
mature_distance_df = pd.DataFrame()
# 计算成长期股票距离因子
if growth_stocks:
logger.info(f"计算 {len(growth_stocks)} 只成长期股票的距离因子")
growth_data = self.distance_calculator.get_stock_data(growth_stocks)
if not growth_data.empty:
growth_indicators = self.distance_calculator.calculate_technical_indicators(growth_data)
growth_distance_df = self.distance_calculator.calculate_distance_factor(growth_indicators)
# 计算成熟期股票距离因子
if mature_stocks:
logger.info(f"计算 {len(mature_stocks)} 只成熟期股票的距离因子")
mature_data = self.distance_calculator.get_stock_data(mature_stocks)
if not mature_data.empty:
mature_indicators = self.distance_calculator.calculate_technical_indicators(mature_data)
mature_distance_df = self.distance_calculator.calculate_distance_factor(mature_indicators)
return growth_distance_df, mature_distance_df
except Exception as e:
logger.error(f"计算距离因子失败: {str(e)}")
return pd.DataFrame(), pd.DataFrame()
def calculate_common_factors(self, stock_codes: List[str]) -> pd.DataFrame:
"""
计算通用因子
Args:
stock_codes: 股票代码列表
Returns:
pd.DataFrame: 包含通用因子的DataFrame
"""
try:
logger.info(f"计算 {len(stock_codes)} 只股票的通用因子")
results = []
latest_date = "2025-03-31" # 最新季度数据
annual_date = "2024-12-31" # 年报数据
for stock_code in stock_codes:
try:
factor_data = {'stock_code': stock_code}
# 1. 毛利率(使用最新数据)
gross_margin = self.financial_analyzer.analyze_gross_profit_margin(stock_code, latest_date)
factor_data['gross_profit_margin'] = gross_margin
# 2. 成长能力指标
growth_capability = self.financial_analyzer.analyze_growth_capability(stock_code)
if growth_capability is not None:
# 成长能力越高越好使用sigmoid函数映射到0-1
growth_score = 1 / (1 + math.exp(-growth_capability))
else:
growth_score = 0.5 # 默认中性评分
factor_data['growth_score'] = growth_score
# 3. 前五大供应商占比(使用年报数据)
supplier_conc = self.financial_analyzer.analyze_supplier_concentration(stock_code, annual_date)
factor_data['supplier_concentration'] = supplier_conc
# 4. 前五大客户占比(使用年报数据)
customer_conc = self.financial_analyzer.analyze_customer_concentration(stock_code, annual_date)
factor_data['customer_concentration'] = customer_conc
results.append(factor_data)
except Exception as e:
logger.warning(f"计算股票 {stock_code} 通用因子失败: {str(e)}")
continue
df = pd.DataFrame(results)
logger.info(f"成功计算 {len(df)} 只股票的通用因子")
return df
except Exception as e:
logger.error(f"计算通用因子失败: {str(e)}")
return pd.DataFrame()
def calculate_growth_specific_factors(self, stock_codes: List[str]) -> pd.DataFrame:
"""
计算成长期特色因子
Args:
stock_codes: 成长期股票代码列表
Returns:
pd.DataFrame: 包含成长期特色因子的DataFrame
"""
try:
logger.info(f"计算 {len(stock_codes)} 只成长期股票的特色因子")
results = []
latest_date = "2025-03-31" # 使用最新数据
annual_date = "2024-12-31" # 使用年度数据
for stock_code in stock_codes:
try:
factor_data = {'stock_code': stock_code}
# 1. 管理费用率(使用最新数据)
admin_ratio = self.financial_analyzer.analyze_admin_expense_ratio(stock_code, latest_date)
factor_data['admin_expense_ratio'] = admin_ratio
# 2. 研发费用折旧摊销占比(使用年度数据)
# financial_data = self.financial_analyzer.get_financial_data(stock_code, latest_date)
financial_data = self.financial_analyzer.get_financial_data(stock_code, annual_date)
if financial_data:
intangible_amortize = financial_data.get('cash_flow_statement', {}).get('IA_AMORTIZE', 0)
rd_expense = financial_data.get('profit_statement', {}).get('RESEARCH_EXPENSE', 0)
if rd_expense and rd_expense != 0:
rd_amortize_ratio = intangible_amortize / rd_expense if intangible_amortize else 0
else:
rd_amortize_ratio = None # 使用None而不是0避免这些股票获得最高分
factor_data['rd_amortize_ratio'] = rd_amortize_ratio
else:
factor_data['rd_amortize_ratio'] = None
# 3. 资产负债率(使用最新数据)
asset_liability_ratio = self.financial_analyzer.analyze_asset_liability_ratio(stock_code, latest_date)
factor_data['asset_liability_ratio'] = asset_liability_ratio
results.append(factor_data)
except Exception as e:
logger.warning(f"计算股票 {stock_code} 成长期特色因子失败: {str(e)}")
continue
df = pd.DataFrame(results)
logger.info(f"成功计算 {len(df)} 只成长期股票的特色因子")
return df
except Exception as e:
logger.error(f"计算成长期特色因子失败: {str(e)}")
return pd.DataFrame()
def calculate_mature_specific_factors(self, stock_codes: List[str]) -> pd.DataFrame:
"""
计算成熟期特色因子
Args:
stock_codes: 成熟期股票代码列表
Returns:
pd.DataFrame: 包含成熟期特色因子的DataFrame
"""
try:
logger.info(f"计算 {len(stock_codes)} 只成熟期股票的特色因子")
latest_date = "2025-03-31" # 使用最新数据
# 在循环外获取全A股PB和ROE数据避免重复查询
logger.info("获取全A股PB数据...")
all_pb_data = self.financial_analyzer.get_all_stocks_pb_data()
logger.info("获取全A股ROE数据...")
all_roe_data = self.financial_analyzer.get_all_stocks_roe_data(latest_date)
results = []
for stock_code in stock_codes:
try:
factor_data = {'stock_code': stock_code}
# 1. 应收账款周转率(使用最新数据)
formatted_stock_code = self.financial_analyzer.code_formatter.to_dot_format(stock_code)
financial_data = self.financial_analyzer.get_financial_data(formatted_stock_code, latest_date)
if financial_data:
revenue = financial_data.get('profit_statement', {}).get('OPERATE_INCOME', 0)
accounts_rece = financial_data.get('balance_sheet', {}).get('ACCOUNTS_RECE', 0)
if accounts_rece and accounts_rece != 0:
turnover_ratio = revenue / accounts_rece if revenue else 0
else:
turnover_ratio = None # 使用None而不是0
factor_data['accounts_receivable_turnover'] = turnover_ratio
else:
factor_data['accounts_receivable_turnover'] = None
# 2. 研发强度(使用最新数据)
rd_intensity = self.financial_analyzer.analyze_rd_expense_ratio(stock_code, latest_date)
factor_data['rd_intensity'] = rd_intensity
# 3. PB-ROE排名因子使用预获取的全A股数据
if all_pb_data and all_roe_data:
pb_roe_rank_factor = self.financial_analyzer.calculate_pb_roe_rank_factor(
stock_code, all_pb_data, all_roe_data
)
factor_data['pb_roe_rank_factor'] = pb_roe_rank_factor
else:
factor_data['pb_roe_rank_factor'] = None
results.append(factor_data)
except Exception as e:
logger.warning(f"计算股票 {stock_code} 成熟期特色因子失败: {str(e)}")
continue
df = pd.DataFrame(results)
logger.info(f"成功计算 {len(df)} 只成熟期股票的特色因子")
return df
except Exception as e:
logger.error(f"计算成熟期特色因子失败: {str(e)}")
return pd.DataFrame()
def run_strategy(self, year: int = 2024) -> Dict[str, pd.DataFrame]:
"""
运行完整的选股策略
Args:
year: 分析年份
Returns:
Dict: 包含成长期和成熟期股票分析结果的字典
"""
try:
logger.info("开始运行科技主题基本面因子选股策略")
# 1. 获取科技概念股票
tech_stocks_df = self.get_tech_stocks()
if tech_stocks_df.empty:
logger.error("未获取到科技概念股票")
return {}
stock_codes = tech_stocks_df['stock_code'].unique().tolist()
logger.info(f"共获取到 {len(stock_codes)} 只科技概念股票")
# 2. 按企业生命周期筛选
lifecycle_result = self.filter_by_lifecycle(stock_codes, year)
growth_stocks = lifecycle_result['growth']
mature_stocks = lifecycle_result['mature']
if not growth_stocks and not mature_stocks:
logger.warning("未找到符合条件的成长期或成熟期股票")
return {}
# 3. 计算平均距离因子
growth_distance_df, mature_distance_df = self.calculate_distance_factors(growth_stocks, mature_stocks)
# 4. 计算通用因子
all_qualified_stocks = growth_stocks + mature_stocks
common_factors_df = self.calculate_common_factors(all_qualified_stocks)
# 5. 计算特色因子
growth_specific_df = self.calculate_growth_specific_factors(growth_stocks) if growth_stocks else pd.DataFrame()
mature_specific_df = self.calculate_mature_specific_factors(mature_stocks) if mature_stocks else pd.DataFrame()
# 6. 合并结果并计算分数
result = {}
# 处理成长期股票
if not growth_specific_df.empty:
# 成长期结果合并
growth_result = growth_specific_df.copy()
# 合并距离因子
if not growth_distance_df.empty:
growth_result = growth_result.merge(
growth_distance_df[['symbol', 'avg_distance_factor']],
left_on='stock_code', right_on='symbol', how='left'
).drop('symbol', axis=1)
# 合并通用因子
if not common_factors_df.empty:
growth_result = growth_result.merge(
common_factors_df, on='stock_code', how='left'
)
# 计算因子分数
growth_result = self.calculate_factor_scores(growth_result, 'growth')
# 计算总分并排序
growth_result = self.calculate_total_score(growth_result, 'growth')
result['growth'] = growth_result
logger.info(f"成长期结果: {len(growth_result)} 只股票")
# 处理成熟期股票
if not mature_specific_df.empty:
# 成熟期结果合并
mature_result = mature_specific_df.copy()
# 合并距离因子
if not mature_distance_df.empty:
mature_result = mature_result.merge(
mature_distance_df[['symbol', 'avg_distance_factor']],
left_on='stock_code', right_on='symbol', how='left'
).drop('symbol', axis=1)
# 合并通用因子
if not common_factors_df.empty:
mature_result = mature_result.merge(
common_factors_df, on='stock_code', how='left'
)
# 计算因子分数
mature_result = self.calculate_factor_scores(mature_result, 'mature')
# 计算总分并排序
mature_result = self.calculate_total_score(mature_result, 'mature')
result['mature'] = mature_result
logger.info(f"成熟期结果: {len(mature_result)} 只股票")
logger.info("科技主题基本面因子选股策略运行完成")
return result
except Exception as e:
logger.error(f"策略运行失败: {str(e)}")
return {}
def calculate_factor_scores(self, df: pd.DataFrame, stage: str) -> pd.DataFrame:
"""
计算单因子打分0-100分位数
Args:
df: 包含因子数据的DataFrame
stage: 阶段类型 ('growth' 'mature')
Returns:
pd.DataFrame: 包含因子分数的DataFrame
"""
try:
if df.empty:
return df
df_scored = df.copy()
# 定义因子方向正向为True负向为False
factor_directions = {
# 通用因子
'gross_profit_margin': True, # 毛利率_环比增量 - 正向
'growth_score': True, # 成长能力 - 正向
'supplier_concentration': False, # 前5大供应商金额占比合计 - 负向
'customer_concentration': False, # 前5大客户收入金额占比合计 - 负向
'avg_distance_factor': False, # 平均距离因子 - 负向
# 成长期特色因子
'admin_expense_ratio': False, # 管理费用/营业总收入_环比增量 - 负向
'rd_amortize_ratio': False, # 研发费用折旧摊销占比_环比增量 - 负向
'asset_liability_ratio': True, # 资产负债率 - 正向
# 成熟期特色因子
'accounts_receivable_turnover': True, # 应收账款周转率 - 正向
'rd_intensity': True, # 研发费用直接投入占比_环比增量 - 正向
'pb_roe_rank_factor': False # PB-ROE排名因子 - 负向(越小越好)
}
# 为每个因子计算分位数分数
for column in df.columns:
if column == 'stock_code':
continue
# 只对有效值进行排名计算
values = df_scored[column].dropna()
if len(values) <= 1:
# 如果只有一个值或没有值所有股票都得50分或0分
if len(values) == 1:
df_scored[f'{column}_score'] = df_scored[column].apply(lambda x: 50 if pd.notna(x) else 0)
else:
df_scored[f'{column}_score'] = 0
continue
# 根据因子方向确定排序方式
is_positive = factor_directions.get(column, True)
# 计算排名分数
if is_positive:
# 正向因子:值越大分数越高
ranked_values = values.rank(pct=True) * 100
else:
# 负向因子:值越小分数越高
ranked_values = (1 - values.rank(pct=True)) * 100
# 初始化分数列
df_scored[f'{column}_score'] = 0.0
# 将分数赋值给对应的行
for idx in ranked_values.index:
df_scored.loc[idx, f'{column}_score'] = ranked_values[idx]
logger.info(f"完成 {stage} 阶段 {len(df_scored)} 只股票的因子打分")
return df_scored
except Exception as e:
logger.error(f"计算因子分数失败: {str(e)}")
import traceback
traceback.print_exc()
return df
def calculate_total_score(self, df: pd.DataFrame, stage: str) -> pd.DataFrame:
"""
计算总分
使用公式总分 = 1/8 * Mean(Si) + Mean(Si)/Std(Si)
Args:
df: 包含因子分数的DataFrame
stage: 阶段类型 ('growth' 'mature')
Returns:
pd.DataFrame: 包含总分的DataFrame
"""
try:
if df.empty:
return df
df_result = df.copy()
# 定义因子权重注意这里是factor_score而不是factor
if stage == 'growth':
factor_weights = {
# 通用因子
'gross_profit_margin_score': 1/8,
'growth_score_score': 1/8, # 注意这里是growth_score_score
'supplier_concentration_score': 1/8,
'customer_concentration_score': 1/8,
'avg_distance_factor_score': 1/8,
# 成长期特色因子
'admin_expense_ratio_score': 1/8,
'rd_amortize_ratio_score': 1/8,
'asset_liability_ratio_score': 1/8
}
else: # mature
factor_weights = {
# 通用因子
'gross_profit_margin_score': 1/8,
'growth_score_score': 1/8, # 注意这里是growth_score_score
'supplier_concentration_score': 1/8,
'customer_concentration_score': 1/8,
'avg_distance_factor_score': 1/8,
# 成熟期特色因子
'accounts_receivable_turnover_score': 1/8,
'rd_intensity_score': 1/8,
'pb_roe_rank_factor_score': 1/8
}
# 计算每只股票的总分
total_scores = []
for index, row in df_result.iterrows():
# 获取该股票的所有因子分数
factor_scores = []
valid_weights = []
for factor, weight in factor_weights.items():
if factor in row and pd.notna(row[factor]) and row[factor] > 0:
factor_scores.append(row[factor])
valid_weights.append(weight)
if len(factor_scores) == 0:
total_scores.append(0)
continue
factor_scores = np.array(factor_scores)
valid_weights = np.array(valid_weights)
# 重新标准化权重
valid_weights = valid_weights / valid_weights.sum()
# 计算加权平均分数
mean_score = np.average(factor_scores, weights=valid_weights)
# 计算调整项 Mean(Si)/Std(Si)
if len(factor_scores) > 1 and np.std(factor_scores) > 0:
adjustment = np.mean(factor_scores) / np.std(factor_scores)
else:
adjustment = 0
# 计算总分1/8 * Mean(Si) + Mean(Si)/Std(Si)
total_score = (1/8) * mean_score + adjustment
total_scores.append(total_score)
df_result['total_score'] = total_scores
# 按总分降序排列
df_result = df_result.sort_values('total_score', ascending=False).reset_index(drop=True)
df_result['rank'] = range(1, len(df_result) + 1)
logger.info(f"完成 {stage} 阶段 {len(df_result)} 只股票的总分计算")
return df_result
except Exception as e:
logger.error(f"计算总分失败: {str(e)}")
import traceback
traceback.print_exc()
return df
def close_connections(self):
"""关闭所有数据库连接"""
try:
if hasattr(self, 'lifecycle_calculator'):
del self.lifecycle_calculator
if hasattr(self, 'financial_analyzer'):
self.financial_analyzer.close_connection()
if hasattr(self, 'distance_calculator'):
del self.distance_calculator
if hasattr(self, 'mysql_engine'):
self.mysql_engine.dispose()
logger.info("数据库连接已关闭")
except Exception as e:
logger.error(f"关闭连接失败: {str(e)}")
def main():
"""主函数 - 科技主题基本面因子选股策略"""
strategy = None
try:
print("=== 科技主题基本面因子选股策略 ===")
print("数据说明:")
print("- 毛利率、净利润增长率等:使用最新数据 (2025-03-31)")
print("- 供应商客户集中度、折旧摊销、研发费用:使用年报数据 (2024-12-31)")
print()
# 创建策略实例
strategy = TechFundamentalFactorStrategy()
logger.info("策略实例创建成功")
# 运行策略
results = strategy.run_strategy(year=2024)
# 输出结果
if not results:
print("未获得分析结果")
return
for stage, df in results.items():
print(f"\n=== {stage.upper()} 阶段股票分析结果 ===")
print(f"股票数量: {len(df)}")
if not df.empty:
# 调试:显示所有列名
print(f"数据列: {list(df.columns)}")
# 显示前5只股票的关键指标
print("\n前5只股票:")
display_columns = [
'stock_code', 'gross_profit_margin', 'growth_score',
'supplier_concentration', 'customer_concentration',
'total_score', 'rank'
]
available_columns = [col for col in display_columns if col in df.columns]
print(df[available_columns].head(5).to_string(index=False))
# 保存完整结果
output_file = f"tech_fundamental_factor_{stage}_{datetime.now().strftime('%Y%m%d_%H%M')}.csv"
df.to_csv(output_file, index=False, encoding='utf-8-sig')
print(f"\n完整结果已保存到: {output_file}")
# 显示统计信息
print(f"\n统计信息:")
print(f" 平均总分: {df['total_score'].mean():.2f}")
print(f" 最高总分: {df['total_score'].max():.2f}")
print(f" 最低总分: {df['total_score'].min():.2f}")
print(f"\n=== 策略运行完成 ===")
except Exception as e:
logger.error(f"程序执行失败: {str(e)}")
import traceback
traceback.print_exc()
finally:
if strategy:
strategy.close_connections()
if __name__ == "__main__":
main()

342
src/scripts/ProxyIP.py Normal file
View File

@ -0,0 +1,342 @@
import requests
import redis
import random
import time
import threading
import json
from typing import Dict, List, Optional, Union
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor, as_completed
class EnhancedProxyManager:
def __init__(
self,
proxy_api_url: str = 'https://dps.kdlapi.com/api/getdps?secret_id=o4itop21b4byqg47eevx&signature=3d3fuvm6raah1xyjecl6bby1mj6gtx0c&num=3&format=json',
valid_check_url: str = 'https://dps.kdlapi.com/api/checkdpsvalid?secret_id=o4itop21b4byqg47eevx&signature=3d3fuvm6raah1xyjecl6bby1mj6gtx0c',
redis_host: str = '192.168.18.123',
redis_port: int = 6379,
redis_db: int = 7,
redis_password: str = 'wlkj2018',
redis_key: str = 'proxy_pool',
update_interval: int = 3600,
max_retries: int = 3,
proxy_timeout: int = 10,
auto_refresh: bool = False,
max_pool_size: int = 50,
enable_api_validation: bool = True # 新增是否启用API验证开关
):
"""
增强版代理管理器 - 支持多IP池和手动代理管理
:param proxy_api_url: 获取代理的API地址
:param redis_host: Redis主机地址
:param redis_port: Redis端口
:param redis_db: Redis数据库
:param redis_password: Redis密码
:param redis_key: Redis中存储代理的键前缀
:param update_interval: 代理更新间隔()
:param max_retries: 最大重试次数
:param proxy_timeout: 代理连接超时时间()
:param auto_refresh: 是否自动从API获取代理
:param max_pool_size: 代理池最大容量
"""
self.proxy_api_url = proxy_api_url
self.redis_key_prefix = redis_key
self.update_interval = update_interval
self.max_retries = max_retries
self.proxy_timeout = proxy_timeout
self.auto_refresh = auto_refresh
self.max_pool_size = max_pool_size
self.valid_check_url = valid_check_url
self.enable_api_validation = enable_api_validation
# Redis连接
self.redis_conn = redis.StrictRedis(
host=redis_host,
port=redis_port,
db=redis_db,
password=redis_password,
decode_responses=True
)
# 线程安全控制
self.lock = threading.Lock()
self.condition = threading.Condition()
# 启动维护线程
if self.auto_refresh:
self._start_maintenance_thread()
def _start_maintenance_thread(self):
"""启动后台维护线程"""
def maintenance_loop():
while True:
with self.condition:
self._refresh_api_proxies()
self.condition.notify_all()
time.sleep(self.update_interval)
thread = threading.Thread(target=maintenance_loop, daemon=True)
thread.start()
def _get_redis_key(self, proxy_type: str) -> str:
"""获取Redis键名"""
return f"{self.redis_key_prefix}:{proxy_type}"
def _check_proxy_valid(self, proxy_list: List[str]) -> Dict[str, bool]:
"""通过API检查代理是否有效"""
if not self.enable_api_validation or not proxy_list:
return {}
try:
# 拼接代理参数proxy=ip1:port1,ip2:port2
proxy_param = '&proxy=' + ','.join(proxy_list)
response = requests.get(self.valid_check_url + proxy_param, timeout=10)
if response.status_code == 200:
data = response.json()
if data.get('code') == 0:
return data.get('data', {})
except Exception as e:
print(f"API验证代理有效性失败: {e}")
return {}
def _refresh_api_proxies(self) -> bool:
"""从API获取最新代理并存入Redis"""
api_key = self._get_redis_key('api')
# 前置检查:如果已有足够数量且未过期的代理,则不需要刷新
existing_proxies = self.redis_conn.hgetall(api_key)
# 检查代理数量是否足够大于等于4且未过期
if len(existing_proxies) >= 4:
# 检查代理是否过期假设代理有效期为24小时
current_time = datetime.now()
need_refresh = False
for proxy_json in existing_proxies.values():
proxy = json.loads(proxy_json)
last_checked = datetime.strptime(proxy['last_checked'], "%Y-%m-%d %H:%M:%S")
if (current_time - last_checked) > timedelta(hours=1):
need_refresh = True
break
if not need_refresh:
print("当前有足够数量且未过期的代理,无需刷新")
return False
try:
response = requests.get(self.proxy_api_url, timeout=self.proxy_timeout)
if response.status_code == 200:
data = response.json()
if data.get('code') == 0 and data.get('data'):
# 清空旧API代理
api_key = self._get_redis_key('api')
# self.redis_conn.delete(api_key)
# 添加新代理
for proxy_data in data['data']['proxy_list']:
proxy = {
'http': f"http://{proxy_data}",
'https': f"http://{proxy_data}",
'server': proxy_data,
'source': 'api',
'last_checked': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
'status': 'active'
}
self.redis_conn.hset(
api_key,
proxy_data,
json.dumps(proxy)
)
return True
except Exception as e:
print(f"更新API代理失败: {e}")
return False
def add_manual_proxies(self, proxies: Union[List[str], str]) -> int:
"""
手动添加代理到池中
:param proxies: 代理列表(格式: ["ip:port", ...] "ip:port")
:return: 成功添加的代理数量
"""
if isinstance(proxies, str):
proxies = [proxies]
added = 0
manual_key = self._get_redis_key('manual')
# 验证并添加代理
def _check_and_add(proxy):
nonlocal added
proxy_dict = {
'http': f"http://{proxy}",
'https': f"http://{proxy}",
'server': proxy,
'source': 'manual',
'last_checked': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
'status': 'active'
}
if self._is_proxy_working(proxy_dict):
with self.lock:
# 检查是否已存在
if not self.redis_conn.hexists(manual_key, proxy):
self.redis_conn.hset(manual_key, proxy, json.dumps(proxy_dict))
added += 1
# 使用线程池并行验证
with ThreadPoolExecutor(max_workers=10) as executor:
executor.map(_check_and_add, proxies)
print(f"成功添加 {added} 个手动代理")
return added
def _is_proxy_working(self, proxy: Dict) -> bool:
"""检查代理是否可用"""
test_urls = [
"http://httpbin.org/ip",
"http://www.google.com/gen_204"
]
proxies = {
'http': proxy['http'],
'https': proxy['https']
}
for url in test_urls:
try:
response = requests.get(
url,
proxies=proxies,
timeout=self.proxy_timeout,
allow_redirects=False
)
if 200 <= response.status_code < 500:
return True
except:
continue
return False
def get_random_proxy(self) -> Optional[Dict]:
"""
随机获取一个可用代理
:return: 代理字典或None(如果无可用代理)
"""
# 优先从API代理获取
# api_key = self._get_redis_key('api')
manual_key = self._get_redis_key('manual')
# 获取所有活跃代理
proxies = []
# 先检查API代理
# api_proxies = self.redis_conn.hgetall(api_key)
# for proxy_json in api_proxies.values():
# proxy = json.loads(proxy_json)
# if proxy.get('status') == 'active':
# proxies.append(proxy)
# 如果API代理不可用或auto_refresh关闭检查手动代理
if not proxies or not self.auto_refresh:
manual_proxies = self.redis_conn.hgetall(manual_key)
for proxy_json in manual_proxies.values():
proxy = json.loads(proxy_json)
if proxy.get('status') == 'active':
proxies.append(proxy)
if not proxies:
if self.auto_refresh:
print("代理池为空尝试从API获取...")
self._refresh_api_proxies()
return self.get_random_proxy()
else:
print("代理池为空且自动刷新已关闭")
return None
# 随机选择一个代理
selected = random.choice(proxies)
selected['_redis_key'] = self._get_redis_key(selected['source'])
return selected
def mark_proxy_failed(self, proxy: Dict):
"""标记代理为失败并从池中移除"""
if '_redis_key' not in proxy:
return
# 如果是API代理且启用验证先检查是否真的失效
if proxy.get('source') == 'api' and self.enable_api_validation:
valid_status = self._check_proxy_valid([proxy['server']])
is_valid = valid_status.get(proxy['server'], False)
if is_valid:
print(f"代理 {proxy['server']} API验证仍有效暂不移除")
return
with self.lock:
self.redis_conn.hdel(proxy['_redis_key'], proxy['server'])
print(f"代理 {proxy['server']} 已被移除")
def request_with_proxy(
self,
method: str,
url: str,
retry_count: int = 0,
**kwargs
) -> requests.Response:
"""
使用代理发送请求
:param method: HTTP方法
:param url: 请求URL
:param retry_count: 内部重试计数
:param kwargs: 其他requests参数
:return: Response对象
"""
if retry_count >= self.max_retries:
raise requests.exceptions.RequestException(f"达到最大重试次数 {self.max_retries}")
proxy = self.get_random_proxy()
if not proxy:
raise requests.exceptions.RequestException("无可用代理")
try:
response = requests.request(
method,
url,
proxies={
'http': proxy['http'],
'https': proxy['https']
},
timeout=self.proxy_timeout,
**kwargs
)
if response.status_code >= 400:
raise requests.exceptions.HTTPError(
f"HTTP错误: {response.status_code}",
response=response
)
return response
except requests.exceptions.RequestException as e:
print(f"代理 {proxy['server']} 请求失败: {e}")
self.mark_proxy_failed(proxy)
return self.request_with_proxy(method, url, retry_count + 1, **kwargs)
def get_pool_status(self) -> Dict:
"""获取代理池状态"""
api_key = self._get_redis_key('api')
manual_key = self._get_redis_key('manual')
return {
'api_proxies': self.redis_conn.hlen(api_key),
'manual_proxies': self.redis_conn.hlen(manual_key),
'auto_refresh': self.auto_refresh,
'last_update': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}

View File

@ -11,7 +11,7 @@ XUEQIU_HEADERS = {
'Accept-Encoding': 'gzip, deflate, br, zstd',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
'Client-Version': 'v2.44.75',
'Cookie': 'cookiesu=811743062689927; device_id=33fa3c7fca4a65f8f4354e10ed6b7470; smidV2=20250327160437f244626e8b47ca2a7992f30f389e4e790074ae48656a22f10; HMACCOUNT=8B64A2E3C307C8C0; s=c611ttmqlj; xq_is_login=1; u=8493411634; bid=4065a77ca57a69c83405d6e591ab5449_m8r2nhs8; __utma=1.434320573.1747189698.1747189698.1747189698.1; __utmc=1; __utmz=1.1747189698.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); snbim_minify=true; Hm_lvt_1db88642e346389874251b5a1eded6e3=1749028611; acw_tc=0a27aa3317504105803118918e00823643107980bbedc1c9307d37d1cf7fb7; xq_a_token=5b11f7f5a3986a802def7dea48868a3b2849e719; xqat=5b11f7f5a3986a802def7dea48868a3b2849e719; xq_id_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJ1aWQiOjg0OTM0MTE2MzQsImlzcyI6InVjIiwiZXhwIjoxNzUzMDAzNjExLCJjdG0iOjE3NTA0MTE2MTEyODIsImNpZCI6ImQ5ZDBuNEFadXAifQ.FB12KEYSdWo5g3UqQbnfqR-Gopar8JkuDf54eSf86FzmuGG9XugW7osl3idav9oTgLzgWBut4X6a5-gbqn61wPPV7OV3dMO8oNyBZUxMjisaMBW_-IcUuQ1z-gtXBcHleNamANA-2H3Xf5mZNdVXAW_E0rQZE_y0TEqzeiLxfU5B_RJOTR1Zq_-BQaaOn_Tk0or_hu-nOZR-26lBtcBl1VoTR2Ov1tm_CRN375ohMcZniA265X8umpL_tysQ4m7oazNyezopJE6W7jt-djNGJXZAbLoVXF1U2ULKV325dPWHvPcSZOevxGprItb665QNZvXEzhBB-4fuzhAnYBsqGw; xq_r_token=2ba0614b400ec779704c3adaa7f17c2c2c88143b; is_overseas=0; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1750411602; .thumbcache_f24b8bbe5a5934237bbc0eda20c1b6e7=Jg9N/8vN3mjfEOHOPlAxHQ+1x+X4nN7jc9vkKRkIGulMwceWqptDd3OUgWPM6XqKNq/15EvM032gWoeeYMHgRg%3D%3D; ssxmod_itna=eqGxBDnGKYuxcD4kDRgxYq7ueYKS8DBP01Dp2xQyP08D60DB40Q0OHhqDyliGQQmhGtKq0aCDD/KlYeDZDGFdDqx0Ei6FiDHICezjQgDKgACjktpeBflQR5RYGlcNpp=0IDpnOAGdeGLDY=DCTKK420iDYYfDBYD74G+DDeDih3Dj4GmDGY=aeDFIQutVCRKdxDwDB=DmqG23ObDm4DfDDLorBD4Il2YDDtDAkaGNPDADA3doDDlYD84Kdb4DYpogQ0FdgahphusIeDMixGXzAlzx9CnoiWtV/vfrf2aHPGuDG=OcC0Hh2bmRT3f8hGxYDo5Qe8hx+Bx3rKq0DW7HRYqYYeYAh+2DR0DQhxRDxgGYgEw/rdPrd5kh6WdYYrcqsMkbZMshie5QhNiNQDoOBtQgdeAde6D/r5l05Dr=grAWG4HmmNBiQm44D; ssxmod_itna2=eqGxBDnGKYuxcD4kDRgxYq7ueYKS8DBP01Dp2xQyP08D60DB40Q0OHhqDyliGQQmhGtKq0aeDWhYebouIdHFW5NsDoenRT6eeD',
'Cookie': 'cookiesu=811743062689927; device_id=33fa3c7fca4a65f8f4354e10ed6b7470; smidV2=20250327160437f244626e8b47ca2a7992f30f389e4e790074ae48656a22f10; HMACCOUNT=8B64A2E3C307C8C0; s=c611ttmqlj; xq_is_login=1; u=8493411634; bid=4065a77ca57a69c83405d6e591ab5449_m8r2nhs8; __utma=1.434320573.1747189698.1747189698.1747189698.1; __utmc=1; __utmz=1.1747189698.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); snbim_minify=true; Hm_lvt_1db88642e346389874251b5a1eded6e3=1749028611; xq_a_token=5b11f7f5a3986a802def7dea48868a3b2849e719; xqat=5b11f7f5a3986a802def7dea48868a3b2849e719; xq_id_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJ1aWQiOjg0OTM0MTE2MzQsImlzcyI6InVjIiwiZXhwIjoxNzUzMDAzNjExLCJjdG0iOjE3NTA0MTE2MTEyODIsImNpZCI6ImQ5ZDBuNEFadXAifQ.FB12KEYSdWo5g3UqQbnfqR-Gopar8JkuDf54eSf86FzmuGG9XugW7osl3idav9oTgLzgWBut4X6a5-gbqn61wPPV7OV3dMO8oNyBZUxMjisaMBW_-IcUuQ1z-gtXBcHleNamANA-2H3Xf5mZNdVXAW_E0rQZE_y0TEqzeiLxfU5B_RJOTR1Zq_-BQaaOn_Tk0or_hu-nOZR-26lBtcBl1VoTR2Ov1tm_CRN375ohMcZniA265X8umpL_tysQ4m7oazNyezopJE6W7jt-djNGJXZAbLoVXF1U2ULKV325dPWHvPcSZOevxGprItb665QNZvXEzhBB-4fuzhAnYBsqGw; xq_r_token=2ba0614b400ec779704c3adaa7f17c2c2c88143b; _c_WBKFRo=dsWgHR8i8KGPbIyhFlN51PHOzVuuNytvUAFppfkD; _nb_ioWEgULi=; .thumbcache_f24b8bbe5a5934237bbc0eda20c1b6e7=WLnKAYCmLCxL13sG3b3dCuaIIWxGZItK2dxyj3SQELeHQlC27oBs/jcXLvR4rVqsrBg/lnbnfvkgBRtAnBSoIQ%3D%3D; acw_tc=0a27a99417515124338191367e005b5efa24266b40be35881bb2c1c385360c; is_overseas=0; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1751513519; ssxmod_itna=eqGxBDnGKYuxcD4kDRgxYq7ueYKS8DBP01Dp2xQyP08D60DB40Q0OHhqDylQ0dV0GrtqN42D5D/SP4GzDiLPGhDBWAFdYGdTt4lBEWYpxBBIToh63dwWKwxYs=B1pxKWTGRgQW01e0aDmKDUcFYP4iiTx0rD0eDPxDYDGRWD7PDoxDrHzYDjDEp8Rom3F4DKx0kDY5Dwa4mDYPDWxDFi+0xeDowrDDCDi5fZb3DixiaTbDDBriueYmweDi3iIoGfF4LV3TIRxoD9h4DsZGBS9MBpT0vOR6Cbwm+A+3DvxDkXzEGj4umhUghmIriDb7D44rF7GYzGbmr1ADUmrKtY4WBDWAQY+rmx4Rhk0wxmxtiBYu42YOFR443mlf5/Gm/BHYerdzqxiR=uEhl7KSbKQitBu7nhHK0N7GQ897zDPA0eZYiYoqAGDD; ssxmod_itna2=eqGxBDnGKYuxcD4kDRgxYq7ueYKS8DBP01Dp2xQyP08D60DB40Q0OHhqDylQ0dV0GrtqN42YeDA4rYnRItORCDU1ZKlwhQ4eIKPSbxD',
'Referer': 'https://weibo.com/u/7735765253',
'Sec-Ch-Ua': '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"',
'Sec-Ch-Ua-Mobile': '?0',

View File

@ -6,6 +6,7 @@ from sqlalchemy import create_engine, text
from datetime import datetime, timedelta
from tqdm import tqdm
from src.scripts.config import XUEQIU_HEADERS
from src.scripts.ProxyIP import EnhancedProxyManager
import gc
class StockDailyDataCollector:
@ -19,6 +20,8 @@ class StockDailyDataCollector:
pool_recycle=3600
)
self.headers = XUEQIU_HEADERS
# 初始化代理管理器
self.proxy_manager = EnhancedProxyManager()
def fetch_all_stock_codes(self):
# 从gp_code_all获取股票代码
@ -37,15 +40,18 @@ class StockDailyDataCollector:
codes_hk = df_hk['gp_code'].tolist()
# 合并去重
all_codes = list(set(codes_all + codes_zs + codes_hk))
print(f"获取到股票代码: {len(codes_all)}个来自gp_code_all, {len(codes_zs)}个来自gp_code_zs, {len(codes_hk)}个来自gp_code_hk, 去重后共{len(all_codes)}")
all_codes = list(set(codes_hk))
# all_codes = list(set(codes_all + codes_zs + codes_hk))
print(f"获取到股票代码: {len(codes_all)} 个来自gp_code_all, {len(codes_zs)}个来自gp_code_zs, {len(codes_hk)}个来自gp_code_hk, 去重后共{len(all_codes)}")
return all_codes
def fetch_daily_stock_data(self, symbol, begin, count=-1):
"""获取日线数据count=-1表示最新一天-2表示最近两天-1800表示最近1800天"""
url = f"https://stock.xueqiu.com/v5/stock/chart/kline.json?symbol={symbol}&begin={begin}&period=day&type=before&count={count}&indicator=kline,pe,pb,ps,pcf,market_capital,agt,ggt,balance"
try:
response = requests.get(url, headers=self.headers, timeout=20)
# 使用代理管理器发送请求
# response = requests.get(url, headers=self.headers, timeout=20)
response = self.proxy_manager.request_with_proxy('get', url, headers=self.headers)
return response.json()
except Exception as e:
print(f"Request error for {symbol}: {e}")
@ -85,9 +91,9 @@ class StockDailyDataCollector:
start_date = datetime.strptime(date, '%Y-%m-%d')
date_str = date
delete_query = text("DELETE FROM gp_day_data WHERE `timestamp` LIKE :date_str")
with self.engine.begin() as conn:
conn.execute(delete_query, {"date_str": f"{date_str}%"})
# delete_query = text("DELETE FROM gp_day_data WHERE `timestamp` LIKE :date_str")
# with self.engine.begin() as conn:
# conn.execute(delete_query, {"date_str": f"{date_str}%"})
stock_codes = self.fetch_all_stock_codes()
begin = int(start_date.replace(hour=0, minute=0, second=0, microsecond=0).timestamp() * 1000)
@ -253,6 +259,30 @@ class StockDailyDataCollector:
except Exception as e:
print(f"!!! Error saving ex-rights log: {e}")
def fetch_single_stock_history(self, symbol, days=1800):
"""
获取单只股票的历史数据并保存到数据库
:param symbol: 股票代码
:param days: 获取的天数默认1800天
:return: 是否成功
"""
print(f"开始获取 {symbol} 最近 {days} 天的历史数据...")
begin = int(datetime.now().timestamp() * 1000)
data = self.fetch_daily_stock_data(symbol, begin, count=-days)
if data.get('error_code') == 0:
df = self.transform_data(data, symbol)
if df is not None and not df.empty:
df.to_sql('gp_day_data', self.engine, if_exists='append', index=False)
print(f"成功保存 {symbol} 的历史数据,共 {len(df)} 条记录")
return True
else:
print(f"未能转换 {symbol} 的数据")
return False
else:
print(f"获取 {symbol} 数据失败: {data.get('error_description')}")
return False
def collect_stock_daily_data(db_url, date=None):
collector = StockDailyDataCollector(db_url)
collector.fetch_data_for_date(date)

View File

@ -0,0 +1,328 @@
# coding:utf-8
import requests
import pandas as pd
from sqlalchemy import create_engine, text
from datetime import datetime, timedelta
from tqdm import tqdm
import sys
import os
import gc
# 添加项目根目录到路径
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(project_root)
from src.scripts.config import XUEQIU_HEADERS
from src.scripts.ProxyIP import EnhancedProxyManager
from src.quantitative_analysis.batch_stock_price_collector import fetch_and_store_stock_data
from src.valuation_analysis.stock_price_collector import StockPriceCollector
from src.scripts.stock_daily_data_collector import StockDailyDataCollector
class StockDailyDataCollectorV2:
"""股票日线数据采集器V2版本 - 整合雪球和东方财富数据"""
def __init__(self, db_url):
self.engine = create_engine(
db_url,
pool_size=5,
max_overflow=10,
pool_recycle=3600
)
self.headers = XUEQIU_HEADERS
self.proxy_manager = EnhancedProxyManager()
# 创建东方财富数据采集器
self.eastmoney_collector = StockPriceCollector(db_url)
# 创建原版采集器用于单只股票历史数据获取
self.original_collector = StockDailyDataCollector(db_url)
def convert_symbol_format(self, symbol):
"""
将雪球格式的股票代码转换为东方财富格式
雪球格式SZ300177 -> 东方财富格式300177.SZ
"""
if symbol.startswith('SZ'):
return f"{symbol[2:]}.SZ"
elif symbol.startswith('SH'):
return f"{symbol[2:]}.SH"
else:
return symbol
def convert_eastmoney_to_xueqiu_format(self, stock_code):
"""
将东方财富格式的股票代码转换为雪球格式
东方财富格式300177.SZ -> 雪球格式SZ300177
"""
if '.SZ' in stock_code:
return f"SZ{stock_code.replace('.SZ', '')}"
elif '.SH' in stock_code:
return f"SH{stock_code.replace('.SH', '')}"
else:
return stock_code
def fetch_eastmoney_data(self):
"""获取东方财富的实时数据"""
print("正在获取东方财富数据...")
df = self.eastmoney_collector.fetch_all_data()
if not df.empty:
# 转换股票代码格式为雪球格式
df['symbol'] = df['stock_code'].apply(self.convert_eastmoney_to_xueqiu_format)
print(f"成功获取东方财富数据,共 {len(df)} 条记录")
return df
def merge_data(self, xueqiu_df, eastmoney_df):
"""合并雪球和东方财富数据"""
print("正在合并雪球和东方财富数据...")
# 基于symbol进行合并
merged_df = pd.merge(
xueqiu_df,
eastmoney_df[['symbol', 'high_price', 'low_price', 'open_price', 'pre_close', 'list_date']],
on='symbol',
how='left'
)
print(f"数据合并完成,共 {len(merged_df)} 条记录")
return merged_df
def transform_to_gp_day_data(self, merged_df):
"""将合并后的数据转换为gp_day_data表结构"""
print("正在转换数据格式...")
# 创建符合gp_day_data表结构的DataFrame
gp_day_df = pd.DataFrame()
# 映射字段
gp_day_df['symbol'] = merged_df['symbol']
gp_day_df['timestamp'] = pd.to_datetime(merged_df['fetch_time'])
gp_day_df['volume'] = merged_df['volume']
gp_day_df['open'] = merged_df['open_price']
gp_day_df['high'] = merged_df['high_price']
gp_day_df['low'] = merged_df['low_price']
gp_day_df['close'] = merged_df['current']
gp_day_df['chg'] = merged_df['chg']
gp_day_df['percent'] = merged_df['percent']
gp_day_df['turnoverrate'] = merged_df['turnover_rate']
gp_day_df['amount'] = merged_df['amount']
gp_day_df['pb'] = merged_df['pb']
gp_day_df['pe'] = merged_df['pe_ttm']
gp_day_df['ps'] = merged_df['ps']
# 添加pre_close字段用于除权检查
gp_day_df['pre_close'] = merged_df['pre_close']
print(f"数据转换完成,共 {len(gp_day_df)} 条记录")
return gp_day_df
def save_to_database(self, df):
"""保存数据到数据库"""
if df.empty:
print("没有数据需要保存")
return
print(f"正在保存数据到数据库,共 {len(df)} 条记录...")
# 删除今日数据
today_str = datetime.now().strftime('%Y-%m-%d')
delete_query = text("DELETE FROM gp_day_data WHERE `timestamp` LIKE :date_str")
try:
with self.engine.begin() as conn:
conn.execute(delete_query, {"date_str": f"{today_str}%"})
print(f"已删除今日 {today_str} 的旧数据")
except Exception as e:
print(f"删除今日数据失败: {e}")
# 分批保存数据
batch_size = 1000
for i in range(0, len(df), batch_size):
batch = df.iloc[i:i+batch_size]
try:
batch.to_sql('gp_day_data', self.engine, if_exists='append', index=False)
print(f"已保存第 {i//batch_size + 1} 批数据")
except Exception as e:
print(f"保存第 {i//batch_size + 1} 批数据失败: {e}")
print("数据保存完成")
def check_ex_rights_before_save(self, df):
"""在保存数据库之前检查除权情况,返回除权股票列表和除权日志数据"""
print("步骤5.1: 检查除权情况(保存前)...")
ex_rights_stocks = []
ex_rights_log_data = []
today_str = datetime.now().strftime('%Y-%m-%d')
for _, row in tqdm(df.iterrows(), total=len(df), desc="检查除权"):
symbol = row['symbol']
current_pre_close = row['pre_close']
# 如果pre_close为空跳过
if pd.isna(current_pre_close):
continue
# 查询数据库中该股票的最近两条收盘价记录
query = text("""
SELECT `close`, `timestamp` FROM gp_day_data
WHERE symbol = :symbol
ORDER BY `timestamp` DESC
LIMIT 2
""")
try:
with self.engine.connect() as conn:
results = conn.execute(query, {"symbol": symbol}).fetchall()
if results:
# 检查最新记录是否为今天的数据
latest_record = results[0]
latest_timestamp = latest_record[1]
latest_date_str = latest_timestamp.strftime('%Y-%m-%d')
if latest_date_str == today_str and len(results) > 1:
# 如果最新记录是今天的,且有第二条记录,则用第二条记录比较
db_last_close = float(results[1][0])
# print(f"股票 {symbol}: 检测到今日已有数据,使用昨日收盘价 {db_last_close} 进行比较")
else:
# 如果最新记录不是今天的,或者只有一条记录,则用最新记录比较
db_last_close = float(results[0][0])
# 比较pre_close和数据库中的收盘价
if abs(db_last_close - current_pre_close) > 0.001:
print(f"发现除权股票: {symbol}, 数据库收盘价: {db_last_close}, 当前昨收价: {current_pre_close}")
ex_rights_stocks.append(symbol)
# 收集除权日志数据
ex_rights_log_data.append({
'symbol': symbol,
'date': today_str,
'db_price': db_last_close,
'api_price': current_pre_close,
'log_time': datetime.now()
})
except Exception as e:
print(f"查询 {symbol} 历史数据失败: {e}")
continue
if ex_rights_stocks:
print(f"检测到 {len(ex_rights_stocks)} 只除权股票: {ex_rights_stocks}")
else:
print("未发现除权股票")
return ex_rights_stocks, ex_rights_log_data
def save_ex_rights_log(self, log_data: list):
"""将除权日志保存到数据库"""
if not log_data:
return
print(f"正在保存 {len(log_data)} 条除权日志到gp_ex_rights_log表...")
try:
df = pd.DataFrame(log_data)
# 确保列名与数据库字段匹配
df = df.rename(columns={
'symbol': 'stock_code',
'date': 'change_date',
'db_price': 'before_price',
'api_price': 'after_price',
'log_time': 'update_time'
})
df.to_sql('gp_ex_rights_log', self.engine, if_exists='append', index=False)
print("除权日志保存成功")
except Exception as e:
print(f"保存除权日志失败: {e}")
def handle_ex_rights_stocks(self, ex_rights_stocks, ex_rights_log_data):
"""处理除权股票保存日志、删除历史数据并重新获取1800天数据"""
if not ex_rights_stocks:
return
print("步骤6: 处理除权股票...")
# 6.1 保存除权日志
if ex_rights_log_data:
self.save_ex_rights_log(ex_rights_log_data)
# 6.2 重新获取历史数据
print(f"开始处理 {len(ex_rights_stocks)} 只除权股票,重新获取历史数据...")
for symbol in tqdm(ex_rights_stocks, desc="处理除权股票"):
try:
# 删除该股票的所有历史数据
delete_query = text("DELETE FROM gp_day_data WHERE symbol = :symbol")
with self.engine.begin() as conn:
conn.execute(delete_query, {"symbol": symbol})
print(f"已删除 {symbol} 的历史数据")
# 重新获取1800天的历史数据
success = self.original_collector.fetch_single_stock_history(symbol, 1800)
if success:
print(f"成功重新获取 {symbol} 的历史数据")
else:
print(f"重新获取 {symbol} 的历史数据失败")
except Exception as e:
print(f"处理除权股票 {symbol} 失败: {e}")
def run_daily_collection(self):
"""执行每日数据采集"""
print("=" * 60)
print("股票日线数据采集器V2 - 开始运行")
print("=" * 60)
try:
# 1. 获取雪球数据
print("步骤1: 获取雪球数据...")
xueqiu_df = fetch_and_store_stock_data()
if xueqiu_df.empty:
print("雪球数据获取失败,终止运行")
return
# 2. 获取东方财富数据
print("步骤2: 获取东方财富数据...")
eastmoney_df = self.fetch_eastmoney_data()
if eastmoney_df.empty:
print("东方财富数据获取失败,终止运行")
return
# 3. 合并数据
print("步骤3: 合并数据...")
merged_df = self.merge_data(xueqiu_df, eastmoney_df)
# 4. 转换数据格式
print("步骤4: 转换数据格式...")
gp_day_df = self.transform_to_gp_day_data(merged_df)
# 5. 检查除权(保存前)
ex_rights_stocks, ex_rights_log_data = self.check_ex_rights_before_save(gp_day_df)
# 5.2. 保存到数据库
print("步骤5.2: 保存到数据库...")
self.save_to_database(gp_day_df)
# 6. 处理除权股票(保存后)
self.handle_ex_rights_stocks(ex_rights_stocks, ex_rights_log_data)
print("=" * 60)
print("股票日线数据采集完成")
print("=" * 60)
except Exception as e:
print(f"采集过程中发生错误: {e}")
finally:
# 清理资源
self.engine.dispose()
gc.collect()
def collect_stock_daily_data_v2(db_url):
"""V2版本的股票日线数据采集入口函数"""
collector = StockDailyDataCollectorV2(db_url)
collector.run_daily_collection()
if __name__ == "__main__":
db_url = 'mysql+pymysql://root:Chlry#$.8@192.168.18.199:3306/db_gp_cj'
collect_stock_daily_data_v2(db_url)

View File

@ -0,0 +1,244 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
股票代码格式转换工具类
提供多种股票代码格式之间的转换功能
"""
import re
from typing import Optional
class StockCodeFormatter:
"""股票代码格式转换器"""
def __init__(self):
"""初始化"""
pass
def _parse_stock_code(self, stock_code: str) -> tuple:
"""
解析股票代码提取代码和市场信息
Args:
stock_code: 输入的股票代码
Returns:
tuple: (代码数字, 市场标识) ('688008', 'SH')
"""
if not stock_code:
return None, None
stock_code = stock_code.strip().upper()
# 处理 688008.SH 格式
if '.' in stock_code:
parts = stock_code.split('.')
if len(parts) == 2:
code, market = parts[0], parts[1]
return code, market
# 处理 SH688008 格式
elif stock_code.startswith(('SZ', 'SH', 'BJ')):
market = stock_code[:2]
code = stock_code[2:]
return code, market
# 处理纯数字 688008 格式
elif stock_code.isdigit():
# 根据数字前缀自动判断市场
if stock_code.startswith(('60', '68')):
return stock_code, 'SH'
elif stock_code.startswith(('00', '30', '20')):
return stock_code, 'SZ'
elif stock_code.startswith(('8', '43', '87')):
return stock_code, 'BJ'
else:
return stock_code, None
return None, None
def to_dot_format(self, stock_code: str) -> Optional[str]:
"""
转换为点分格式 (: 688008.SH)
Args:
stock_code: 输入的股票代码支持多种格式
Returns:
str: 点分格式的股票代码如果无法转换则返回None
Examples:
>>> formatter = StockCodeFormatter()
>>> formatter.to_dot_format('SH688008')
'688008.SH'
>>> formatter.to_dot_format('688008')
'688008.SH'
>>> formatter.to_dot_format('688008.SH')
'688008.SH'
"""
code, market = self._parse_stock_code(stock_code)
if code and market:
return f"{code}.{market}"
return None
def to_prefix_format(self, stock_code: str) -> Optional[str]:
"""
转换为前缀格式 (: SH688008)
Args:
stock_code: 输入的股票代码支持多种格式
Returns:
str: 前缀格式的股票代码如果无法转换则返回None
Examples:
>>> formatter = StockCodeFormatter()
>>> formatter.to_prefix_format('688008.SH')
'SH688008'
>>> formatter.to_prefix_format('688008')
'SH688008'
>>> formatter.to_prefix_format('SH688008')
'SH688008'
"""
code, market = self._parse_stock_code(stock_code)
if code and market:
return f"{market}{code}"
return None
def to_number_only(self, stock_code: str) -> Optional[str]:
"""
转换为纯数字格式 (: 688008)
Args:
stock_code: 输入的股票代码支持多种格式
Returns:
str: 纯数字格式的股票代码如果无法转换则返回None
Examples:
>>> formatter = StockCodeFormatter()
>>> formatter.to_number_only('688008.SH')
'688008'
>>> formatter.to_number_only('SH688008')
'688008'
>>> formatter.to_number_only('688008')
'688008'
"""
code, market = self._parse_stock_code(stock_code)
if code:
return code
return None
def get_market(self, stock_code: str) -> Optional[str]:
"""
获取股票代码对应的市场标识
Args:
stock_code: 输入的股票代码支持多种格式
Returns:
str: 市场标识 ('SH', 'SZ', 'BJ')如果无法识别则返回None
"""
code, market = self._parse_stock_code(stock_code)
return market
def is_valid_stock_code(self, stock_code: str) -> bool:
"""
验证股票代码是否有效
Args:
stock_code: 输入的股票代码
Returns:
bool: 是否为有效的股票代码
"""
code, market = self._parse_stock_code(stock_code)
return code is not None and market is not None
def batch_convert(self, stock_codes: list, target_format: str = 'dot') -> dict:
"""
批量转换股票代码格式
Args:
stock_codes: 股票代码列表
target_format: 目标格式 ('dot', 'prefix', 'number')
Returns:
dict: {原始代码: 转换后代码} 的映射字典
"""
result = {}
for stock_code in stock_codes:
if target_format == 'dot':
converted = self.to_dot_format(stock_code)
elif target_format == 'prefix':
converted = self.to_prefix_format(stock_code)
elif target_format == 'number':
converted = self.to_number_only(stock_code)
else:
converted = None
result[stock_code] = converted
return result
def main():
"""示例用法"""
print("=== 股票代码格式转换工具示例 ===")
# 创建格式转换器
formatter = StockCodeFormatter()
# 测试用例
test_codes = [
'688008.SH', # 沪市科创板
'SH688008', # 前缀格式
'688008', # 纯数字
'300661.SZ', # 深市创业板
'SZ300661', # 前缀格式
'300661', # 纯数字
'000858.SZ', # 深市主板
'600519.SH', # 沪市主板
'430123.BJ', # 北交所
'BJ430123', # 北交所前缀格式
]
print("\n原始代码 -> 点分格式 -> 前缀格式 -> 纯数字格式 -> 市场")
print("-" * 70)
for code in test_codes:
dot_format = formatter.to_dot_format(code)
prefix_format = formatter.to_prefix_format(code)
number_format = formatter.to_number_only(code)
market = formatter.get_market(code)
is_valid = formatter.is_valid_stock_code(code)
status = "" if is_valid else ""
print(f"{code:12} -> {dot_format or 'None':12} -> {prefix_format or 'None':12} -> {number_format or 'None':8} -> {market or 'None':4} {status}")
# 批量转换示例
print(f"\n=== 批量转换示例 ===")
batch_codes = ['SH688008', '300661', 'BJ430123']
dot_results = formatter.batch_convert(batch_codes, 'dot')
print("转换为点分格式:")
for original, converted in dot_results.items():
print(f" {original} -> {converted}")
prefix_results = formatter.batch_convert(batch_codes, 'prefix')
print("转换为前缀格式:")
for original, converted in prefix_results.items():
print(f" {original} -> {converted}")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,35 @@
import requests
import json
def trigger_batch_stock_price_collection():
"""
一个简单的工具函数用于触发批量采集A股行情的调度器接口并打印响应
"""
# 目标URL
url = "http://192.168.16.214:5089/scheduler/batch_stock_price/collection"
print(f"正在向以下地址发送GET请求:\n{url}\n")
try:
# 发送GET请求可以设置一个超时时间例如10秒
response = requests.get(url, timeout=30)
# 检查响应状态码
print(f"请求完成HTTP状态码: {response.status_code}\n")
# 尝试将响应体解析为JSON格式并打印
try:
response_json = response.json()
print("服务器响应内容 (JSON格式):")
# 使用json.dumps美化输出
print(json.dumps(response_json, indent=2, ensure_ascii=False))
except json.JSONDecodeError:
print("服务器响应内容 (非JSON格式):")
print(response.text)
except requests.exceptions.RequestException as e:
# 处理请求过程中可能出现的异常(如网络问题、超时等)
print(f"请求失败,发生异常: {e}")
if __name__ == '__main__':
trigger_batch_stock_price_collection()