commit;
This commit is contained in:
parent
34f1cfd6e6
commit
aa4be0e64c
|
@ -0,0 +1,517 @@
|
|||
/*
|
||||
Navicat Premium Dump SQL
|
||||
|
||||
Source Server : 192.168.18.199(gpfx)
|
||||
Source Server Type : MySQL
|
||||
Source Server Version : 90200 (9.2.0)
|
||||
Source Host : 192.168.18.199:3306
|
||||
Source Schema : db_gp_cj
|
||||
|
||||
Target Server Type : MySQL
|
||||
Target Server Version : 90200 (9.2.0)
|
||||
File Encoding : 65001
|
||||
|
||||
Date: 26/06/2025 11:41:15
|
||||
*/
|
||||
|
||||
SET NAMES utf8mb4;
|
||||
SET FOREIGN_KEY_CHECKS = 0;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for eastmoney_rzrq_data
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `eastmoney_rzrq_data`;
|
||||
CREATE TABLE `eastmoney_rzrq_data` (
|
||||
`trade_date` date NOT NULL,
|
||||
`index_value` decimal(10, 4) NULL DEFAULT NULL COMMENT '指数',
|
||||
`change_percent` decimal(10, 4) NULL DEFAULT NULL COMMENT '涨跌幅',
|
||||
`float_market_value` decimal(20, 2) NULL DEFAULT NULL COMMENT '流通市值',
|
||||
`change_percent_3d` decimal(10, 4) NULL DEFAULT NULL COMMENT '3日涨跌幅',
|
||||
`change_percent_5d` decimal(10, 4) NULL DEFAULT NULL COMMENT '5日涨跌幅',
|
||||
`change_percent_10d` decimal(10, 4) NULL DEFAULT NULL COMMENT '10日涨跌幅',
|
||||
`financing_balance` decimal(20, 2) NULL DEFAULT NULL COMMENT '融资余额',
|
||||
`financing_balance_ratio` decimal(10, 4) NULL DEFAULT NULL COMMENT '融资余额占比',
|
||||
`financing_buy_amount` decimal(20, 2) NULL DEFAULT NULL COMMENT '融资买入额',
|
||||
`financing_buy_amount_3d` decimal(20, 2) NULL DEFAULT NULL COMMENT '3日融资买入额',
|
||||
`financing_buy_amount_5d` decimal(20, 2) NULL DEFAULT NULL COMMENT '5日融资买入额',
|
||||
`financing_buy_amount_10d` decimal(20, 2) NULL DEFAULT NULL COMMENT '10日融资买入额',
|
||||
`financing_repay_amount` decimal(20, 2) NULL DEFAULT NULL COMMENT '融资偿还额',
|
||||
`financing_repay_amount_3d` decimal(20, 2) NULL DEFAULT NULL COMMENT '3日融资偿还额',
|
||||
`financing_repay_amount_5d` decimal(20, 2) NULL DEFAULT NULL COMMENT '5日融资偿还额',
|
||||
`financing_repay_amount_10d` decimal(20, 2) NULL DEFAULT NULL COMMENT '10日融资偿还额',
|
||||
`financing_net_amount` decimal(20, 2) NULL DEFAULT NULL COMMENT '融资净额',
|
||||
`financing_net_amount_3d` decimal(20, 2) NULL DEFAULT NULL COMMENT '3日融资净额',
|
||||
`financing_net_amount_5d` decimal(20, 2) NULL DEFAULT NULL COMMENT '5日融资净额',
|
||||
`financing_net_amount_10d` decimal(20, 2) NULL DEFAULT NULL COMMENT '10日融资净额',
|
||||
`securities_balance` decimal(20, 2) NULL DEFAULT NULL COMMENT '融券余额',
|
||||
`securities_volume` decimal(20, 2) NULL DEFAULT NULL COMMENT '融券余量',
|
||||
`securities_repay_volume` decimal(20, 2) NULL DEFAULT NULL COMMENT '融券偿还量',
|
||||
`securities_repay_volume_3d` decimal(20, 2) NULL DEFAULT NULL COMMENT '3日融券偿还量',
|
||||
`securities_repay_volume_5d` decimal(20, 2) NULL DEFAULT NULL COMMENT '5日融券偿还量',
|
||||
`securities_repay_volume_10d` decimal(20, 2) NULL DEFAULT NULL COMMENT '10日融券偿还量',
|
||||
`securities_sell_volume` decimal(20, 2) NULL DEFAULT NULL COMMENT '融券卖出量',
|
||||
`securities_sell_volume_3d` decimal(20, 2) NULL DEFAULT NULL COMMENT '3日融券卖出量',
|
||||
`securities_sell_volume_5d` decimal(20, 2) NULL DEFAULT NULL COMMENT '5日融券卖出量',
|
||||
`securities_sell_volume_10d` decimal(20, 2) NULL DEFAULT NULL COMMENT '10日融券卖出量',
|
||||
`securities_net_volume` decimal(20, 2) NULL DEFAULT NULL COMMENT '融券净量',
|
||||
`securities_net_volume_3d` decimal(20, 2) NULL DEFAULT NULL COMMENT '3日融券净量',
|
||||
`securities_net_volume_5d` decimal(20, 2) NULL DEFAULT NULL COMMENT '5日融券净量',
|
||||
`securities_net_volume_10d` decimal(20, 2) NULL DEFAULT NULL COMMENT '10日融券净量',
|
||||
`total_rzrq_balance` decimal(20, 2) NULL DEFAULT NULL COMMENT '融资融券余额',
|
||||
`total_rzrq_balance_cz` decimal(20, 2) NULL DEFAULT NULL COMMENT '融资融券余额差值',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (`trade_date`) USING BTREE
|
||||
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = '东方财富融资融券数据表' ROW_FORMAT = Dynamic;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for fear_greed_index
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `fear_greed_index`;
|
||||
CREATE TABLE `fear_greed_index` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`index_value` decimal(5, 2) NOT NULL COMMENT '恐贪指数值(0-100)',
|
||||
`trading_date` date NOT NULL COMMENT '交易日期',
|
||||
`update_time` datetime NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
UNIQUE INDEX `uk_trading_date`(`trading_date` ASC) USING BTREE
|
||||
) ENGINE = InnoDB AUTO_INCREMENT = 1003 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = '市场恐贪指数数据' ROW_FORMAT = Dynamic;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for follow_stock
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `follow_stock`;
|
||||
CREATE TABLE `follow_stock` (
|
||||
`id` bigint NOT NULL AUTO_INCREMENT,
|
||||
`stock_code` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '股票代码',
|
||||
`stock_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '股票名称',
|
||||
`add_time` datetime NULL DEFAULT NULL COMMENT '添加时间',
|
||||
`status` tinyint NULL DEFAULT 1 COMMENT '状态',
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
UNIQUE INDEX `uk_stock_code`(`stock_code` ASC) USING BTREE
|
||||
) ENGINE = InnoDB AUTO_INCREMENT = 22 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = '关注的股票' ROW_FORMAT = Dynamic;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for fund_cang
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `fund_cang`;
|
||||
CREATE TABLE `fund_cang` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`fund_id` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
|
||||
`fund_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
|
||||
`company_money` decimal(20, 2) NULL DEFAULT NULL,
|
||||
`company_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
|
||||
`company_id` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
|
||||
`fund_data` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
|
||||
PRIMARY KEY (`id`) USING BTREE
|
||||
) ENGINE = InnoDB AUTO_INCREMENT = 1874595 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_bin ROW_FORMAT = Dynamic;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for fundamental_analysis
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `fundamental_analysis`;
|
||||
CREATE TABLE `fundamental_analysis` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`stock_code` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '股票代码',
|
||||
`stock_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '股票名称',
|
||||
`dimension` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '分析维度',
|
||||
`ai_response` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT 'AI分析结果',
|
||||
`reasoning_process` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '推理过程',
|
||||
`references` json NULL COMMENT '参考资料',
|
||||
`update_time` datetime NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
|
||||
`extra_info` json NULL COMMENT '扩展信息',
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
INDEX `idx_stock_dimension`(`stock_code` ASC, `dimension` ASC) USING BTREE
|
||||
) ENGINE = InnoDB AUTO_INCREMENT = 12104 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = '基本面分析结果表' ROW_FORMAT = Dynamic;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for gp_category_industry
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `gp_category_industry`;
|
||||
CREATE TABLE `gp_category_industry` (
|
||||
`id` int NOT NULL AUTO_INCREMENT COMMENT 'id',
|
||||
`category_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '分类名称',
|
||||
`belong_industry` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '所属行业',
|
||||
PRIMARY KEY (`id`) USING BTREE
|
||||
) ENGINE = InnoDB AUTO_INCREMENT = 52 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for gp_code_all
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `gp_code_all`;
|
||||
CREATE TABLE `gp_code_all` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`gp_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
|
||||
`gp_code` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
|
||||
`gp_code_two` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
|
||||
`gp_code_three` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
|
||||
`market_cap` decimal(20, 2) NULL DEFAULT NULL,
|
||||
`mark1` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
|
||||
`mark2` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
|
||||
PRIMARY KEY (`id`) USING BTREE
|
||||
) ENGINE = InnoDB AUTO_INCREMENT = 6686 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_bin COMMENT = '所有个股代码-爬取指数用' ROW_FORMAT = Dynamic;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for gp_code_hk
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `gp_code_hk`;
|
||||
CREATE TABLE `gp_code_hk` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`gp_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
|
||||
`gp_code` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
|
||||
`gp_code_two` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
|
||||
`gp_code_three` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
|
||||
`market_cap` decimal(20, 2) NULL DEFAULT NULL,
|
||||
`mark1` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
|
||||
`mark2` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
|
||||
PRIMARY KEY (`id`) USING BTREE
|
||||
) ENGINE = InnoDB AUTO_INCREMENT = 2956 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_bin COMMENT = '所有指数代码-爬取数据使用' ROW_FORMAT = Dynamic;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for gp_code_zs
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `gp_code_zs`;
|
||||
CREATE TABLE `gp_code_zs` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`gp_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
|
||||
`gp_code` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
|
||||
`gp_code_two` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
|
||||
`gp_code_three` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
|
||||
`market_cap` decimal(20, 2) NULL DEFAULT NULL,
|
||||
`mark1` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
|
||||
`mark2` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
|
||||
PRIMARY KEY (`id`) USING BTREE
|
||||
) ENGINE = InnoDB AUTO_INCREMENT = 6686 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_bin COMMENT = '所有指数代码-爬取数据使用' ROW_FORMAT = Dynamic;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for gp_data
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `gp_data`;
|
||||
CREATE TABLE `gp_data` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`pg_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
|
||||
`xiangsidu` decimal(10, 2) NULL DEFAULT NULL COMMENT '控制相似度为多少的时候进行回测 和下面的数字是关联的',
|
||||
`ii` int NULL DEFAULT NULL COMMENT '相似度的条数',
|
||||
`raye_ga_hc` decimal(10, 2) NULL DEFAULT NULL COMMENT '回测涨跌幅大于多少的数据',
|
||||
`huice_function_num` int NULL DEFAULT NULL COMMENT '回测判断胜率方法 0是代表相差1%或者同涨同跌都算胜 1是代表相差1%算胜 2是代表同涨同跌算胜',
|
||||
`yes_yuce` int NULL DEFAULT NULL,
|
||||
`no_yuce` int NULL DEFAULT NULL,
|
||||
`yes_yuce_twoday` int NULL DEFAULT NULL,
|
||||
`no_yuce_twoday` int NULL DEFAULT NULL,
|
||||
`yes_yuce_fiveday` int NULL DEFAULT NULL,
|
||||
`no_yuce_fiveday` int NULL DEFAULT NULL,
|
||||
PRIMARY KEY (`id`) USING BTREE
|
||||
) ENGINE = InnoDB AUTO_INCREMENT = 42690 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_bin ROW_FORMAT = Dynamic;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for gp_day_data
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `gp_day_data`;
|
||||
CREATE TABLE `gp_day_data` (
|
||||
`id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键',
|
||||
`symbol` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '个股代码',
|
||||
`timestamp` timestamp NULL DEFAULT NULL COMMENT '时间戳',
|
||||
`volume` bigint NULL DEFAULT NULL COMMENT '数量',
|
||||
`open` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '开始价',
|
||||
`high` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '最高价',
|
||||
`low` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '最低价',
|
||||
`close` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '结束价',
|
||||
`chg` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '变化数值',
|
||||
`percent` decimal(10, 2) NULL DEFAULT NULL COMMENT '变化百分比',
|
||||
`turnoverrate` decimal(10, 2) NULL DEFAULT NULL COMMENT '换手率',
|
||||
`amount` bigint NULL DEFAULT NULL COMMENT '成交金额',
|
||||
`pb` decimal(10, 2) NULL DEFAULT NULL COMMENT '当前PB',
|
||||
`pe` decimal(10, 2) NULL DEFAULT NULL COMMENT '当前PE',
|
||||
`ps` decimal(10, 2) NULL DEFAULT NULL COMMENT '当前PS',
|
||||
`create_time` datetime NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
INDEX `idx_symbol`(`symbol` ASC) USING BTREE,
|
||||
INDEX `idx_timestamp`(`timestamp` ASC) USING BTREE,
|
||||
INDEX `idx_symbol_time`(`symbol` ASC, `timestamp` ASC) USING BTREE
|
||||
) ENGINE = InnoDB AUTO_INCREMENT = 28356293 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_bin ROW_FORMAT = Dynamic;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for gp_ex_rights_log
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `gp_ex_rights_log`;
|
||||
CREATE TABLE `gp_ex_rights_log` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`stock_code` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '股票代码',
|
||||
`change_date` date NULL DEFAULT NULL COMMENT '除权变动日期',
|
||||
`before_price` decimal(10, 3) NULL DEFAULT NULL COMMENT '变动前收盘价(数据库中的价格)',
|
||||
`after_price` decimal(10, 3) NULL DEFAULT NULL COMMENT '变动后收盘价(API获取的价格)',
|
||||
`update_time` datetime NULL DEFAULT NULL COMMENT '脚本执行的更新时间',
|
||||
`optimization_flag` int NULL DEFAULT NULL COMMENT '波段优化标志,1为已经完成优化',
|
||||
PRIMARY KEY (`id`) USING BTREE
|
||||
) ENGINE = InnoDB AUTO_INCREMENT = 285 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_bin COMMENT = '股票除权日志表' ROW_FORMAT = DYNAMIC;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for gp_gnbk
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `gp_gnbk`;
|
||||
CREATE TABLE `gp_gnbk` (
|
||||
`id` bigint NULL DEFAULT NULL,
|
||||
`bk_code` bigint NULL DEFAULT NULL,
|
||||
`bk_name` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
|
||||
`gp_code` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
|
||||
`gp_name` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL
|
||||
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for gp_gnbk_all
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `gp_gnbk_all`;
|
||||
CREATE TABLE `gp_gnbk_all` (
|
||||
`id` bigint NULL DEFAULT NULL,
|
||||
`bk_code` bigint NULL DEFAULT NULL,
|
||||
`bk_name` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
|
||||
`gp_code` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
|
||||
`gp_name` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL
|
||||
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for gp_gnbk_gn
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `gp_gnbk_gn`;
|
||||
CREATE TABLE `gp_gnbk_gn` (
|
||||
`id` bigint NULL DEFAULT NULL,
|
||||
`bk_code` bigint NULL DEFAULT NULL,
|
||||
`bk_name` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
|
||||
`gp_code` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
|
||||
`gp_name` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL
|
||||
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for gp_hybk
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `gp_hybk`;
|
||||
CREATE TABLE `gp_hybk` (
|
||||
`id` bigint NULL DEFAULT NULL,
|
||||
`bk_code` bigint NULL DEFAULT NULL,
|
||||
`bk_name` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
|
||||
`gp_code` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
|
||||
`gp_name` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
|
||||
`Unnamed: 5` double NULL DEFAULT NULL,
|
||||
`Unnamed: 6` double NULL DEFAULT NULL
|
||||
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for gp_main_business
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `gp_main_business`;
|
||||
CREATE TABLE `gp_main_business` (
|
||||
`stock_code` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '股票代码',
|
||||
`stock_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '股票简称',
|
||||
`report_date` varchar(8) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '报告期(YYYYMMDD)',
|
||||
`product_rank` varchar(1) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '项目排名(1-5)',
|
||||
`product_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '项目名称',
|
||||
`revenue` decimal(20, 2) NULL DEFAULT NULL COMMENT '项目收入',
|
||||
`cost` decimal(20, 2) NULL DEFAULT NULL COMMENT '项目成本',
|
||||
`profit` decimal(20, 2) NULL DEFAULT NULL COMMENT '项目毛利',
|
||||
`profit_margin` decimal(10, 4) NULL DEFAULT NULL COMMENT '项目毛利率',
|
||||
PRIMARY KEY (`stock_code`, `report_date`, `product_rank`) USING BTREE
|
||||
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for gp_min_data
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `gp_min_data`;
|
||||
CREATE TABLE `gp_min_data` (
|
||||
`id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键',
|
||||
`symbol` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '个股代码',
|
||||
`timestamp` timestamp NULL DEFAULT NULL COMMENT '时间戳',
|
||||
`volume` bigint NULL DEFAULT NULL COMMENT '数量',
|
||||
`open` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '开始价',
|
||||
`high` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '最高价',
|
||||
`low` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '最低价',
|
||||
`close` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '结束价',
|
||||
`chg` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '变化数值',
|
||||
`percent` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '变化百分比',
|
||||
`turnoverrate` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
|
||||
`amount` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '成交金额',
|
||||
PRIMARY KEY (`id`) USING BTREE
|
||||
) ENGINE = InnoDB AUTO_INCREMENT = 61116588 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_bin ROW_FORMAT = Dynamic;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for gp_product_category
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `gp_product_category`;
|
||||
CREATE TABLE `gp_product_category` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`category_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL,
|
||||
`product_name` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL,
|
||||
`stock_code` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL,
|
||||
`stock_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL,
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
INDEX `idx_stock_code`(`stock_code` ASC) USING BTREE,
|
||||
INDEX `idx_product_name`(`product_name` ASC) USING BTREE
|
||||
) ENGINE = InnoDB AUTO_INCREMENT = 35869 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for gp_zygc
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `gp_zygc`;
|
||||
CREATE TABLE `gp_zygc` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`gp_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '股票名称',
|
||||
`gp_code` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '股票代码',
|
||||
`zygc_xmmc` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '主营构成(按行业)-项目名称',
|
||||
`zygc_xmsr` decimal(20, 0) NULL DEFAULT NULL COMMENT '主营构成(按行业)-项目收入',
|
||||
`zygc_xmmlr` decimal(10, 2) NULL DEFAULT NULL COMMENT '主营构成(按行业)-项目毛利率',
|
||||
`belong_time` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '所属财报期',
|
||||
PRIMARY KEY (`id`) USING BTREE
|
||||
) ENGINE = InnoDB AUTO_INCREMENT = 12774 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = '个股的主营构成' ROW_FORMAT = Dynamic;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for gp_zyyw
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `gp_zyyw`;
|
||||
CREATE TABLE `gp_zyyw` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`gp_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '股票名称',
|
||||
`gp_code` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '股票代码',
|
||||
`zyyw_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '主营业务名称',
|
||||
`zyyw_zb` decimal(10, 2) NULL DEFAULT NULL COMMENT '主营业务占比',
|
||||
`belong_time` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '所属财报期',
|
||||
PRIMARY KEY (`id`) USING BTREE
|
||||
) ENGINE = InnoDB AUTO_INCREMENT = 25280 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = '上市公司主营业务占比' ROW_FORMAT = Dynamic;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for hk_hot_stocks
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `hk_hot_stocks`;
|
||||
CREATE TABLE `hk_hot_stocks` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`symbol` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL,
|
||||
`name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL,
|
||||
`value` int NULL DEFAULT NULL,
|
||||
`increment` int NULL DEFAULT NULL,
|
||||
`rank_change` int NULL DEFAULT NULL,
|
||||
`percent` float NULL DEFAULT NULL,
|
||||
`current` float NULL DEFAULT NULL,
|
||||
`chg` float NULL DEFAULT NULL,
|
||||
`exchange` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL,
|
||||
`stock_type` int NULL DEFAULT NULL,
|
||||
`add_time` datetime NULL DEFAULT NULL,
|
||||
`status` varchar(1) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL,
|
||||
PRIMARY KEY (`id`) USING BTREE
|
||||
) ENGINE = InnoDB AUTO_INCREMENT = 53981 CHARACTER SET = sjis COLLATE = sjis_japanese_ci ROW_FORMAT = Dynamic;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for limitup_analysis_stock_changes
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `limitup_analysis_stock_changes`;
|
||||
CREATE TABLE `limitup_analysis_stock_changes` (
|
||||
`symbol` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
|
||||
`net_profit_cagr` double NULL DEFAULT NULL,
|
||||
`north_net_inflow` double NULL DEFAULT NULL,
|
||||
`ps` double NULL DEFAULT NULL,
|
||||
`type` bigint NULL DEFAULT NULL,
|
||||
`percent` double NULL DEFAULT NULL,
|
||||
`has_follow` tinyint(1) NULL DEFAULT NULL,
|
||||
`tick_size` double NULL DEFAULT NULL,
|
||||
`pb_ttm` double NULL DEFAULT NULL,
|
||||
`float_shares` bigint NULL DEFAULT NULL,
|
||||
`current` double NULL DEFAULT NULL,
|
||||
`amplitude` double NULL DEFAULT NULL,
|
||||
`pcf` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
|
||||
`current_year_percent` double NULL DEFAULT NULL,
|
||||
`float_market_capital` double NULL DEFAULT NULL,
|
||||
`north_net_inflow_time` double NULL DEFAULT NULL,
|
||||
`market_capital` double NULL DEFAULT NULL,
|
||||
`dividend_yield` double NULL DEFAULT NULL,
|
||||
`lot_size` bigint NULL DEFAULT NULL,
|
||||
`roe_ttm` double NULL DEFAULT NULL,
|
||||
`total_percent` double NULL DEFAULT NULL,
|
||||
`percent5m` double NULL DEFAULT NULL,
|
||||
`income_cagr` double NULL DEFAULT NULL,
|
||||
`amount` double NULL DEFAULT NULL,
|
||||
`chg` double NULL DEFAULT NULL,
|
||||
`issue_date_ts` bigint NULL DEFAULT NULL,
|
||||
`eps` double NULL DEFAULT NULL,
|
||||
`main_net_inflows` double NULL DEFAULT NULL,
|
||||
`volume` bigint NULL DEFAULT NULL,
|
||||
`volume_ratio` double NULL DEFAULT NULL,
|
||||
`pb` double NULL DEFAULT NULL,
|
||||
`followers` bigint NULL DEFAULT NULL,
|
||||
`turnover_rate` double NULL DEFAULT NULL,
|
||||
`mapping_quote_current` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
|
||||
`first_percent` double NULL DEFAULT NULL,
|
||||
`name` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
|
||||
`pe_ttm` double NULL DEFAULT NULL,
|
||||
`dual_counter_mapping_symbol` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
|
||||
`total_shares` bigint NULL DEFAULT NULL,
|
||||
`limitup_days` bigint NULL DEFAULT NULL,
|
||||
`id` bigint NULL DEFAULT NULL
|
||||
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for rzrq_data
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `rzrq_data`;
|
||||
CREATE TABLE `rzrq_data` (
|
||||
`trade_date` date NOT NULL COMMENT '交易日期',
|
||||
`sh_financing_balance` decimal(12, 2) NULL DEFAULT NULL COMMENT '上海融资余额(亿元)',
|
||||
`sz_financing_balance` decimal(12, 2) NULL DEFAULT NULL COMMENT '深圳融资余额(亿元)',
|
||||
`bj_financing_balance` decimal(12, 2) NULL DEFAULT NULL COMMENT '北京融资余额(亿元)',
|
||||
`total_financing_balance` decimal(12, 2) NULL DEFAULT NULL COMMENT '融资余额合计(亿元)',
|
||||
`sh_financing_buy` decimal(12, 2) NULL DEFAULT NULL COMMENT '上海融资买入额(亿元)',
|
||||
`sz_financing_buy` decimal(12, 2) NULL DEFAULT NULL COMMENT '深圳融资买入额(亿元)',
|
||||
`bj_financing_buy` decimal(12, 2) NULL DEFAULT NULL COMMENT '北京融资买入额(亿元)',
|
||||
`total_financing_buy` decimal(12, 2) NULL DEFAULT NULL COMMENT '融资买入额合计(亿元)',
|
||||
`sh_securities_balance` decimal(12, 2) NULL DEFAULT NULL COMMENT '上海融券余量余额(亿元)',
|
||||
`sz_securities_balance` decimal(12, 2) NULL DEFAULT NULL COMMENT '深圳融券余量余额(亿元)',
|
||||
`bj_securities_balance` decimal(12, 2) NULL DEFAULT NULL COMMENT '北京融券余量余额(亿元)',
|
||||
`total_securities_balance` decimal(12, 2) NULL DEFAULT NULL COMMENT '融券余量余额合计(亿元)',
|
||||
`sh_rzrq_balance` decimal(12, 2) NULL DEFAULT NULL COMMENT '上海融资融券余额(亿元)',
|
||||
`sz_rzrq_balance` decimal(12, 2) NULL DEFAULT NULL COMMENT '深圳融资融券余额(亿元)',
|
||||
`bj_rzrq_balance` decimal(12, 2) NULL DEFAULT NULL COMMENT '北京融资融券余额(亿元)',
|
||||
`total_rzrq_balance` decimal(12, 2) NULL DEFAULT NULL COMMENT '融资融券余额合计(亿元)',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (`trade_date`) USING BTREE
|
||||
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = '融资融券数据表' ROW_FORMAT = Dynamic;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for stock_price_changes
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `stock_price_changes`;
|
||||
CREATE TABLE `stock_price_changes` (
|
||||
`symbol` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
|
||||
`name` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
|
||||
`current` double NULL DEFAULT NULL,
|
||||
`percent` double NULL DEFAULT NULL,
|
||||
`time_mark` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,
|
||||
`add_time` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL
|
||||
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for stock_price_data
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `stock_price_data`;
|
||||
CREATE TABLE `stock_price_data` (
|
||||
`stock_code` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '股票代码',
|
||||
`stock_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '股票名称',
|
||||
`latest_price` decimal(10, 2) NULL DEFAULT NULL COMMENT '最新价',
|
||||
`change_percent` decimal(10, 2) NULL DEFAULT NULL COMMENT '涨跌幅',
|
||||
`change_amount` decimal(10, 2) NULL DEFAULT NULL COMMENT '涨跌额',
|
||||
`volume` bigint NULL DEFAULT NULL COMMENT '成交量(手)',
|
||||
`amount` decimal(20, 2) NULL DEFAULT NULL COMMENT '成交额',
|
||||
`amplitude` decimal(10, 2) NULL DEFAULT NULL COMMENT '振幅',
|
||||
`turnover_rate` decimal(10, 2) NULL DEFAULT NULL COMMENT '换手率',
|
||||
`pe_ratio` decimal(10, 2) NULL DEFAULT NULL COMMENT '市盈率',
|
||||
`high_price` decimal(10, 2) NULL DEFAULT NULL COMMENT '最高价',
|
||||
`low_price` decimal(10, 2) NULL DEFAULT NULL COMMENT '最低价',
|
||||
`open_price` decimal(10, 2) NULL DEFAULT NULL COMMENT '开盘价',
|
||||
`pre_close` decimal(10, 2) NULL DEFAULT NULL COMMENT '昨收价',
|
||||
`total_market_value` decimal(20, 2) NULL DEFAULT NULL COMMENT '总市值',
|
||||
`float_market_value` decimal(20, 2) NULL DEFAULT NULL COMMENT '流通市值',
|
||||
`pb_ratio` decimal(10, 2) NULL DEFAULT NULL COMMENT '市净率',
|
||||
`list_date` date NULL DEFAULT NULL COMMENT '上市日期',
|
||||
`update_time` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
||||
PRIMARY KEY (`stock_code`) USING BTREE
|
||||
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = '实时股价数据表' ROW_FORMAT = Dynamic;
|
||||
|
||||
SET FOREIGN_KEY_CHECKS = 1;
|
|
@ -17,4 +17,5 @@ google-genai
|
|||
redis==5.2.1
|
||||
pandas==2.2.3
|
||||
apscheduler==3.11.0
|
||||
pymongo==4.13.0
|
||||
pymongo==4.13.0
|
||||
scikit-learn==1.6.1
|
||||
|
|
32
src/app.py
32
src/app.py
|
@ -42,10 +42,12 @@ from src.valuation_analysis.index_analyzer import IndexAnalyzer
|
|||
|
||||
# 导入股票日线数据采集器
|
||||
from src.scripts.stock_daily_data_collector import collect_stock_daily_data
|
||||
from src.scripts.stock_daily_data_collector_v2 import collect_stock_daily_data_v2
|
||||
|
||||
from valuation_analysis.financial_analysis import FinancialAnalyzer
|
||||
from src.valuation_analysis.stock_price_collector import StockPriceCollector
|
||||
from src.quantitative_analysis.batch_stock_price_collector import fetch_and_store_stock_data, get_stock_realtime_info_from_redis
|
||||
from src.quantitative_analysis.batch_stock_price_collector import fetch_and_store_stock_data
|
||||
from src.quantitative_analysis.hk_stock_price_collector import fetch_and_store_hk_stock_data
|
||||
from src.quantitative_analysis.momentum_analysis import MomentumAnalyzer
|
||||
|
||||
# 设置日志
|
||||
|
@ -207,6 +209,24 @@ def run_stock_daily_collection1():
|
|||
# 获取当天日期
|
||||
today = datetime.now().strftime('%Y-%m-%d')
|
||||
|
||||
# 定义数据库连接地址
|
||||
db_url = 'mysql+pymysql://root:Chlry#$.8@192.168.18.199:3306/db_gp_cj'
|
||||
# collect_stock_daily_data(db_url, today)
|
||||
collect_stock_daily_data_v2(db_url)
|
||||
except Exception as e:
|
||||
logger.error(f"启动股票日线数据采集任务失败: {str(e)}")
|
||||
return jsonify({
|
||||
"status": "success"
|
||||
}), 200
|
||||
|
||||
@app.route('/scheduler/stockDailyHK/collection', methods=['GET'])
|
||||
def run_stock_daily_collection2():
|
||||
"""执行股票日线数据采集任务 下午4点开始"""
|
||||
try:
|
||||
logger.info("开始执行股票日线数据采集")
|
||||
# 获取当天日期
|
||||
today = datetime.now().strftime('%Y-%m-%d')
|
||||
|
||||
# 定义数据库连接地址
|
||||
db_url = 'mysql+pymysql://root:Chlry#$.8@192.168.18.199:3306/db_gp_cj'
|
||||
collect_stock_daily_data(db_url, today)
|
||||
|
@ -3002,6 +3022,16 @@ def run_batch_stock_price_collection():
|
|||
logger.error(f"批量采集A股行情失败: {str(e)}")
|
||||
return jsonify({"status": "error", "message": str(e)})
|
||||
|
||||
@app.route('/scheduler/batch_hk_stock_price/collection', methods=['GET'])
|
||||
def run_batch_hk_stock_price_collection():
|
||||
"""批量采集A股行情并保存到数据库"""
|
||||
try:
|
||||
fetch_and_store_hk_stock_data()
|
||||
return jsonify({"status": "success", "message": "批量采集A股行情并保存到数据库成功"})
|
||||
except Exception as e:
|
||||
logger.error(f"批量采集A股行情失败: {str(e)}")
|
||||
return jsonify({"status": "error", "message": str(e)})
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
# 启动Web服务器
|
||||
|
|
|
@ -0,0 +1,310 @@
|
|||
# coding:utf-8
|
||||
#计算股价平均距离因子-行业个股列表来计算
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from sqlalchemy import create_engine, text
|
||||
from datetime import datetime, timedelta
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.metrics.pairwise import euclidean_distances
|
||||
import warnings
|
||||
warnings.filterwarnings('ignore')
|
||||
|
||||
class AverageDistanceFactor:
|
||||
"""平均距离因子计算器"""
|
||||
|
||||
def __init__(self, db_url):
|
||||
self.engine = create_engine(
|
||||
db_url,
|
||||
pool_size=5,
|
||||
max_overflow=10,
|
||||
pool_recycle=3600
|
||||
)
|
||||
|
||||
def get_industry_stocks(self, industry_name=None, concept_name=None):
|
||||
"""获取指定行业或概念的股票列表"""
|
||||
if industry_name:
|
||||
query = text("""
|
||||
SELECT DISTINCT gp_code as symbol
|
||||
FROM gp_hybk
|
||||
WHERE bk_name = :name
|
||||
""")
|
||||
params = {"name": industry_name}
|
||||
elif concept_name:
|
||||
query = text("""
|
||||
SELECT DISTINCT gp_code as symbol
|
||||
FROM gp_gnbk
|
||||
WHERE bk_name = :name
|
||||
""")
|
||||
params = {"name": concept_name}
|
||||
else:
|
||||
raise ValueError("必须提供 industry_name 或 concept_name 之一")
|
||||
|
||||
try:
|
||||
with self.engine.connect() as conn:
|
||||
result = conn.execute(query, params).fetchall()
|
||||
|
||||
if result:
|
||||
symbols = [row[0] for row in result]
|
||||
print(f"获取到 {len(symbols)} 只股票")
|
||||
return symbols
|
||||
else:
|
||||
print(f"未找到{'行业' if industry_name else '概念'} {industry_name or concept_name} 的股票")
|
||||
return []
|
||||
except Exception as e:
|
||||
print(f"获取股票列表失败: {e}")
|
||||
return []
|
||||
|
||||
def get_stock_data(self, symbols, days=20):
|
||||
"""获取股票的历史数据"""
|
||||
if not symbols:
|
||||
return pd.DataFrame()
|
||||
|
||||
# 计算开始日期
|
||||
end_date = datetime.now()
|
||||
start_date = end_date - timedelta(days=days * 2) # 多取一些数据以防节假日
|
||||
|
||||
# 构建SQL查询
|
||||
symbols_str = "', '".join(symbols)
|
||||
query = f"""
|
||||
SELECT symbol, timestamp, volume, open, high, low, close,
|
||||
chg, percent, turnoverrate, amount, pb, pe, ps
|
||||
FROM gp_day_data
|
||||
WHERE symbol IN ('{symbols_str}')
|
||||
AND timestamp >= '{start_date.strftime('%Y-%m-%d')}'
|
||||
ORDER BY symbol, timestamp DESC
|
||||
"""
|
||||
|
||||
try:
|
||||
df = pd.read_sql(query, self.engine)
|
||||
print(f"获取到 {len(df)} 条历史数据")
|
||||
return df
|
||||
except Exception as e:
|
||||
print(f"获取历史数据失败: {e}")
|
||||
return pd.DataFrame()
|
||||
|
||||
def calculate_technical_indicators(self, df, days=20):
|
||||
"""计算技术指标"""
|
||||
result_data = []
|
||||
|
||||
for symbol in df['symbol'].unique():
|
||||
stock_data = df[df['symbol'] == symbol].copy()
|
||||
stock_data = stock_data.sort_values('timestamp')
|
||||
|
||||
# 只取最近N天的数据
|
||||
stock_data = stock_data.tail(days)
|
||||
|
||||
if len(stock_data) < days:
|
||||
continue # 数据不足,跳过
|
||||
|
||||
# 转换数据类型
|
||||
for col in ['open', 'high', 'low', 'close', 'chg']:
|
||||
stock_data[col] = pd.to_numeric(stock_data[col], errors='coerce')
|
||||
|
||||
# 计算各种技术指标
|
||||
indicators = self._compute_indicators(stock_data)
|
||||
indicators['symbol'] = symbol
|
||||
result_data.append(indicators)
|
||||
|
||||
return pd.DataFrame(result_data)
|
||||
|
||||
def _compute_indicators(self, data):
|
||||
"""计算具体的技术指标"""
|
||||
indicators = {}
|
||||
|
||||
# 1. 收益率指标
|
||||
data['returns'] = data['close'].pct_change()
|
||||
indicators['return_5d'] = data['returns'].tail(5).sum() # 5日累计收益率
|
||||
indicators['return_10d'] = data['returns'].tail(10).sum() # 10日累计收益率
|
||||
indicators['return_20d'] = data['returns'].tail(20).sum() # 20日累计收益率
|
||||
|
||||
# 2. 波动率指标
|
||||
indicators['volatility_5d'] = data['returns'].tail(5).std() # 5日波动率
|
||||
indicators['volatility_10d'] = data['returns'].tail(10).std() # 10日波动率
|
||||
indicators['volatility_20d'] = data['returns'].tail(20).std() # 20日波动率
|
||||
|
||||
# 3. 价格相对位置
|
||||
indicators['price_position_5d'] = (data['close'].iloc[-1] - data['low'].tail(5).min()) / (data['high'].tail(5).max() - data['low'].tail(5).min())
|
||||
indicators['price_position_10d'] = (data['close'].iloc[-1] - data['low'].tail(10).min()) / (data['high'].tail(10).max() - data['low'].tail(10).min())
|
||||
indicators['price_position_20d'] = (data['close'].iloc[-1] - data['low'].tail(20).min()) / (data['high'].tail(20).max() - data['low'].tail(20).min())
|
||||
|
||||
# 4. 移动平均偏离度
|
||||
ma_5 = data['close'].tail(5).mean()
|
||||
ma_10 = data['close'].tail(10).mean()
|
||||
ma_20 = data['close'].tail(20).mean()
|
||||
current_price = data['close'].iloc[-1]
|
||||
|
||||
indicators['ma_deviation_5d'] = (current_price - ma_5) / ma_5
|
||||
indicators['ma_deviation_10d'] = (current_price - ma_10) / ma_10
|
||||
indicators['ma_deviation_20d'] = (current_price - ma_20) / ma_20
|
||||
|
||||
# 5. 成交量相关指标
|
||||
indicators['volume_ratio_5d'] = data['volume'].tail(5).mean() / data['volume'].mean()
|
||||
indicators['volume_ratio_10d'] = data['volume'].tail(10).mean() / data['volume'].mean()
|
||||
indicators['turnover_avg_5d'] = data['turnoverrate'].tail(5).mean()
|
||||
indicators['turnover_avg_10d'] = data['turnoverrate'].tail(10).mean()
|
||||
|
||||
# 6. 价格振幅指标
|
||||
data['amplitude'] = (data['high'] - data['low']) / data['close']
|
||||
indicators['amplitude_avg_5d'] = data['amplitude'].tail(5).mean()
|
||||
indicators['amplitude_avg_10d'] = data['amplitude'].tail(10).mean()
|
||||
|
||||
# 7. 趋势强度(连续涨跌)
|
||||
indicators['consecutive_up'] = self._count_consecutive(data['percent'] > 0)
|
||||
indicators['consecutive_down'] = self._count_consecutive(data['percent'] < 0)
|
||||
|
||||
# 8. 估值动量(如果有PE、PB、PS数据)
|
||||
if 'pe' in data.columns and not data['pe'].isna().all():
|
||||
pe_change = data['pe'].pct_change().tail(5).mean()
|
||||
indicators['pe_momentum'] = pe_change if not np.isnan(pe_change) else 0
|
||||
else:
|
||||
indicators['pe_momentum'] = 0
|
||||
|
||||
if 'pb' in data.columns and not data['pb'].isna().all():
|
||||
pb_change = data['pb'].pct_change().tail(5).mean()
|
||||
indicators['pb_momentum'] = pb_change if not np.isnan(pb_change) else 0
|
||||
else:
|
||||
indicators['pb_momentum'] = 0
|
||||
|
||||
# 处理NaN值
|
||||
for key, value in indicators.items():
|
||||
if np.isnan(value) or np.isinf(value):
|
||||
indicators[key] = 0
|
||||
|
||||
return indicators
|
||||
|
||||
def _count_consecutive(self, condition_series):
|
||||
"""计算连续满足条件的天数"""
|
||||
if len(condition_series) == 0:
|
||||
return 0
|
||||
|
||||
count = 0
|
||||
for value in reversed(condition_series.tolist()):
|
||||
if value:
|
||||
count += 1
|
||||
else:
|
||||
break
|
||||
return count
|
||||
|
||||
def calculate_distance_factor(self, indicators_df):
|
||||
"""计算平均距离因子"""
|
||||
if len(indicators_df) < 2:
|
||||
print("股票数量不足,无法计算距离因子")
|
||||
return pd.DataFrame()
|
||||
|
||||
# 准备特征矩阵
|
||||
feature_columns = [col for col in indicators_df.columns if col != 'symbol']
|
||||
X = indicators_df[feature_columns].values
|
||||
|
||||
# 标准化
|
||||
scaler = StandardScaler()
|
||||
X_scaled = scaler.fit_transform(X)
|
||||
|
||||
# 计算距离矩阵
|
||||
distances = euclidean_distances(X_scaled)
|
||||
|
||||
# 计算每只股票的平均距离
|
||||
n_stocks = len(indicators_df)
|
||||
avg_distances = []
|
||||
|
||||
for i in range(n_stocks):
|
||||
# 排除自己与自己的距离(对角线元素)
|
||||
other_distances = np.concatenate([distances[i, :i], distances[i, i+1:]])
|
||||
avg_distance = np.mean(other_distances)
|
||||
avg_distances.append(avg_distance)
|
||||
|
||||
# 创建结果DataFrame
|
||||
result_df = pd.DataFrame({
|
||||
'symbol': indicators_df['symbol'],
|
||||
'avg_distance_factor': avg_distances
|
||||
})
|
||||
|
||||
# 按距离因子降序排列
|
||||
result_df = result_df.sort_values('avg_distance_factor', ascending=False)
|
||||
result_df['rank'] = range(1, len(result_df) + 1)
|
||||
|
||||
return result_df
|
||||
|
||||
def analyze_industry(self, industry_name=None, concept_name=None, days=20):
|
||||
"""分析指定行业或概念的平均距离因子"""
|
||||
print(f"开始分析{'行业' if industry_name else '概念'}: {industry_name or concept_name}")
|
||||
|
||||
# 1. 获取股票列表
|
||||
symbols = self.get_industry_stocks(industry_name, concept_name)
|
||||
if not symbols:
|
||||
return pd.DataFrame()
|
||||
|
||||
# 2. 获取历史数据
|
||||
stock_data = self.get_stock_data(symbols, days)
|
||||
if stock_data.empty:
|
||||
return pd.DataFrame()
|
||||
|
||||
# 3. 计算技术指标
|
||||
print("计算技术指标...")
|
||||
indicators_df = self.calculate_technical_indicators(stock_data, days)
|
||||
if indicators_df.empty:
|
||||
return pd.DataFrame()
|
||||
|
||||
print(f"成功计算了 {len(indicators_df)} 只股票的技术指标")
|
||||
|
||||
# 4. 计算平均距离因子
|
||||
print("计算平均距离因子...")
|
||||
distance_df = self.calculate_distance_factor(indicators_df)
|
||||
|
||||
if not distance_df.empty:
|
||||
print(f"计算完成,共 {len(distance_df)} 只股票")
|
||||
print(distance_df.to_string(index=False))
|
||||
|
||||
return distance_df
|
||||
|
||||
def get_available_industries(self):
|
||||
"""获取可用的行业列表"""
|
||||
query = "SELECT DISTINCT bk_name FROM gp_hybk ORDER BY bk_name"
|
||||
try:
|
||||
df = pd.read_sql(query, self.engine)
|
||||
return df['bk_name'].tolist()
|
||||
except Exception as e:
|
||||
print(f"获取行业列表失败: {e}")
|
||||
return []
|
||||
|
||||
def get_available_concepts(self):
|
||||
"""获取可用的概念列表"""
|
||||
query = "SELECT DISTINCT bk_name FROM gp_gnbk ORDER BY bk_name"
|
||||
try:
|
||||
df = pd.read_sql(query, self.engine)
|
||||
return df['bk_name'].tolist()
|
||||
except Exception as e:
|
||||
print(f"获取概念列表失败: {e}")
|
||||
return []
|
||||
|
||||
def __del__(self):
|
||||
if hasattr(self, 'engine'):
|
||||
self.engine.dispose()
|
||||
|
||||
def main():
|
||||
"""主函数示例"""
|
||||
db_url = 'mysql+pymysql://root:Chlry#$.8@192.168.18.199:3306/db_gp_cj'
|
||||
|
||||
# 创建分析器
|
||||
analyzer = AverageDistanceFactor(db_url)
|
||||
|
||||
# 示例1: 分析特定行业
|
||||
result = analyzer.analyze_industry(industry_name="证券")
|
||||
|
||||
# 示例2: 分析特定概念
|
||||
# result = analyzer.analyze_industry(concept_name="3D打印")
|
||||
|
||||
# 示例3: 查看可用的行业列表
|
||||
# industries = analyzer.get_available_industries()
|
||||
# print("可用行业列表(前10个):")
|
||||
# for industry in industries[:10]:
|
||||
# print(f" - {industry}")
|
||||
|
||||
# 示例4: 查看可用的概念列表
|
||||
# concepts = analyzer.get_available_concepts()
|
||||
# print("\n可用概念列表(前10个):")
|
||||
# for concept in concepts[:10]:
|
||||
# print(f" - {concept}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -10,6 +10,9 @@ import json
|
|||
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
sys.path.append(project_root)
|
||||
|
||||
# 导入代理管理器
|
||||
from src.scripts.ProxyIP import EnhancedProxyManager
|
||||
|
||||
# 读取雪球headers和Redis配置
|
||||
try:
|
||||
from src.scripts.config import XUEQIU_HEADERS
|
||||
|
@ -28,6 +31,9 @@ except ImportError:
|
|||
|
||||
REDIS_KEY = 'xq_stock_changes_latest' # 存放行情的主键
|
||||
|
||||
# 创建全局代理管理器实例
|
||||
proxy_manager = EnhancedProxyManager()
|
||||
|
||||
|
||||
def get_redis_conn():
|
||||
"""获取Redis连接"""
|
||||
|
@ -62,8 +68,9 @@ def fetch_and_store_stock_data(page_size=90):
|
|||
'type': stock_type
|
||||
}
|
||||
|
||||
# 初次请求以获取总页数
|
||||
response = requests.get(base_url, headers=headers, params=params)
|
||||
# 初次请求以获取总页数,使用代理
|
||||
response = proxy_manager.request_with_proxy('get', base_url, headers=headers, params=params)
|
||||
# response = requests.get(base_url, headers=headers, params=params)
|
||||
if response.status_code != 200:
|
||||
print(f"请求 {stock_type} 数据失败,状态码:{response.status_code}")
|
||||
continue
|
||||
|
@ -74,10 +81,12 @@ def fetch_and_store_stock_data(page_size=90):
|
|||
|
||||
for page in range(1, total_pages + 1):
|
||||
params['page'] = page
|
||||
response = requests.get(base_url, headers=headers, params=params)
|
||||
# response = requests.get(base_url, headers=headers, params=params)
|
||||
response = proxy_manager.request_with_proxy('get', base_url, headers=headers, params=params)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
all_data.extend(data['data']['list'])
|
||||
print(f"成功采集第 {page}/{total_pages} 页数据")
|
||||
else:
|
||||
print(f"请求 {stock_type} 数据第 {page} 页失败,状态码:{response.status_code}")
|
||||
# 转换为 DataFrame
|
||||
|
@ -99,8 +108,12 @@ def fetch_and_store_stock_data(page_size=90):
|
|||
pipe.hset(REDIS_KEY, symbol, json.dumps(value, ensure_ascii=False))
|
||||
pipe.execute()
|
||||
print(f"成功将数据写入Redis哈希 {REDIS_KEY},共{len(df)}条记录。")
|
||||
|
||||
# 返回DataFrame供其他脚本使用
|
||||
return df
|
||||
else:
|
||||
print("未获取到任何数据。")
|
||||
return pd.DataFrame()
|
||||
|
||||
|
||||
def format_stock_code(stock_code):
|
||||
|
|
|
@ -0,0 +1,317 @@
|
|||
# coding:utf-8
|
||||
# 判断企业生命周期
|
||||
import pandas as pd
|
||||
import pymongo
|
||||
import logging
|
||||
from typing import Dict, List, Optional
|
||||
import sys
|
||||
import os
|
||||
|
||||
# 添加项目根目录到路径
|
||||
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
sys.path.append(project_root)
|
||||
|
||||
# 导入配置
|
||||
try:
|
||||
from valuation_analysis.config import MONGO_CONFIG2
|
||||
except ImportError:
|
||||
import importlib.util
|
||||
config_path = os.path.join(project_root, 'valuation_analysis', 'config.py')
|
||||
spec = importlib.util.spec_from_file_location("config", config_path)
|
||||
config_module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(config_module)
|
||||
MONGO_CONFIG2 = config_module.MONGO_CONFIG2
|
||||
|
||||
# 导入股票代码格式化工具
|
||||
try:
|
||||
from tools.stock_code_formatter import StockCodeFormatter
|
||||
except ImportError:
|
||||
import importlib.util
|
||||
formatter_path = os.path.join(os.path.dirname(project_root), 'tools', 'stock_code_formatter.py')
|
||||
spec = importlib.util.spec_from_file_location("stock_code_formatter", formatter_path)
|
||||
formatter_module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(formatter_module)
|
||||
StockCodeFormatter = formatter_module.StockCodeFormatter
|
||||
|
||||
# 配置日志
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class CompanyLifecycleFactor:
|
||||
"""企业生命周期阶段因子计算器"""
|
||||
|
||||
def __init__(self):
|
||||
"""初始化"""
|
||||
self.mongo_client = None
|
||||
self.db = None
|
||||
self.collection = None
|
||||
self.connect_mongodb()
|
||||
|
||||
# 初始化股票代码格式化工具
|
||||
self.stock_formatter = StockCodeFormatter()
|
||||
|
||||
# 定义企业生命周期阶段
|
||||
self.lifecycle_stages = {
|
||||
1: "引入期",
|
||||
2: "成长期",
|
||||
3: "成熟期",
|
||||
4: "震荡期",
|
||||
5: "衰退期"
|
||||
}
|
||||
|
||||
# 现金流组合模式映射到生命周期阶段
|
||||
self.cashflow_pattern_mapping = {
|
||||
('负', '负', '正'): 1, # 引入期
|
||||
('正', '负', '正'): 2, # 成长期
|
||||
('正', '负', '负'): 3, # 成熟期
|
||||
('负', '正', '正'): 4, # 震荡期
|
||||
('正', '正', '负'): 5, # 衰退期
|
||||
('负', '正', '负'): 4, # 震荡期(变种)
|
||||
('负', '负', '负'): 4, # 震荡期(困难期)
|
||||
('正', '正', '正'): 2, # 成长期(变种,现金充足)
|
||||
}
|
||||
|
||||
def connect_mongodb(self):
|
||||
"""连接MongoDB数据库"""
|
||||
try:
|
||||
self.mongo_client = pymongo.MongoClient(
|
||||
host=MONGO_CONFIG2['host'],
|
||||
port=MONGO_CONFIG2['port'],
|
||||
username=MONGO_CONFIG2['username'],
|
||||
password=MONGO_CONFIG2['password']
|
||||
)
|
||||
self.db = self.mongo_client[MONGO_CONFIG2['db']]
|
||||
self.collection = self.db['eastmoney_financial_data_v2']
|
||||
|
||||
# 测试连接
|
||||
self.mongo_client.admin.command('ping')
|
||||
logger.info("MongoDB连接成功")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"MongoDB连接失败: {str(e)}")
|
||||
raise
|
||||
|
||||
|
||||
|
||||
|
||||
def get_annual_financial_data(self, stock_code: str, year: int) -> Optional[Dict]:
|
||||
"""
|
||||
获取指定股票指定年份的年报数据
|
||||
|
||||
Args:
|
||||
stock_code: 股票代码,支持多种格式 (300661.SZ, 300661, SZ300661)
|
||||
year: 年份,如2024
|
||||
|
||||
Returns:
|
||||
Dict: 年报财务数据,如果没有找到则返回None
|
||||
"""
|
||||
try:
|
||||
# 标准化股票代码格式
|
||||
normalized_code = self.stock_formatter.to_dot_format(stock_code)
|
||||
# 构建年报日期(12-31结尾)
|
||||
report_date = f"{year}-12-31"
|
||||
|
||||
# 查询指定股票指定年份的年报数据
|
||||
query = {
|
||||
"stock_code": normalized_code,
|
||||
"report_date": report_date
|
||||
}
|
||||
|
||||
annual_data = self.collection.find_one(query)
|
||||
|
||||
if annual_data:
|
||||
logger.info(f"找到年报数据: {stock_code} (标准化后: {normalized_code}) - {report_date}")
|
||||
return annual_data
|
||||
else:
|
||||
logger.warning(f"未找到年报数据: {stock_code} (标准化后: {normalized_code}) - {report_date}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"获取年报数据失败: {stock_code} - {year} - {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_cashflow_values(self, financial_data: Dict) -> tuple:
|
||||
"""
|
||||
从财务数据中提取现金流量表的三个关键指标
|
||||
|
||||
Returns:
|
||||
Tuple: (经营现金流净额, 投资现金流净额, 筹资现金流净额)
|
||||
"""
|
||||
try:
|
||||
cash_flow_statement = financial_data.get('cash_flow_statement', {})
|
||||
|
||||
# 提取三个现金流指标
|
||||
operating_cashflow = cash_flow_statement.get('NETCASH_OPERATE')
|
||||
investing_cashflow = cash_flow_statement.get('NETCASH_INVEST')
|
||||
financing_cashflow = cash_flow_statement.get('NETCASH_FINANCE')
|
||||
|
||||
# 转换为浮点数
|
||||
def safe_float_convert(value):
|
||||
if value is None or value == '':
|
||||
return None
|
||||
try:
|
||||
return float(value)
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
operating_cashflow = safe_float_convert(operating_cashflow)
|
||||
investing_cashflow = safe_float_convert(investing_cashflow)
|
||||
financing_cashflow = safe_float_convert(financing_cashflow)
|
||||
|
||||
return operating_cashflow, investing_cashflow, financing_cashflow
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"提取现金流数据失败: {str(e)}")
|
||||
return None, None, None
|
||||
|
||||
def classify_cashflow_pattern(self, operating_cf: float, investing_cf: float, financing_cf: float) -> tuple:
|
||||
"""将现金流数值分类为正负"""
|
||||
def classify_value(value):
|
||||
if value is None:
|
||||
return "未知"
|
||||
return "正" if value >= 0 else "负"
|
||||
|
||||
operating_pattern = classify_value(operating_cf)
|
||||
investing_pattern = classify_value(investing_cf)
|
||||
financing_pattern = classify_value(financing_cf)
|
||||
|
||||
return operating_pattern, investing_pattern, financing_pattern
|
||||
|
||||
def determine_lifecycle_stage(self, cashflow_pattern: tuple) -> int:
|
||||
"""
|
||||
根据现金流模式确定企业生命周期阶段
|
||||
|
||||
Returns:
|
||||
int: 阶段ID (1-5),0表示未知
|
||||
"""
|
||||
stage_id = self.cashflow_pattern_mapping.get(cashflow_pattern, 0)
|
||||
return stage_id
|
||||
|
||||
def calculate_lifecycle_factor(self, stock_code: str, year: int) -> Dict:
|
||||
"""
|
||||
计算指定股票指定年份的企业生命周期因子
|
||||
|
||||
Args:
|
||||
stock_code: 股票代码,支持多种格式 (300661.SZ, 300661, SZ300661)
|
||||
year: 年份
|
||||
|
||||
Returns:
|
||||
Dict: 生命周期因子结果
|
||||
"""
|
||||
try:
|
||||
# 获取年报数据
|
||||
financial_data = self.get_annual_financial_data(stock_code, year)
|
||||
if not financial_data:
|
||||
return {
|
||||
'stock_code': stock_code,
|
||||
'year': year,
|
||||
'stage_id': 0,
|
||||
'stage_name': '数据缺失'
|
||||
}
|
||||
|
||||
# 提取现金流数据
|
||||
operating_cf, investing_cf, financing_cf = self.extract_cashflow_values(financial_data)
|
||||
|
||||
if None in [operating_cf, investing_cf, financing_cf]:
|
||||
return {
|
||||
'stock_code': stock_code,
|
||||
'year': year,
|
||||
'stage_id': 0,
|
||||
'stage_name': '数据不完整'
|
||||
}
|
||||
|
||||
# 分类现金流模式
|
||||
cashflow_pattern = self.classify_cashflow_pattern(operating_cf, investing_cf, financing_cf)
|
||||
|
||||
# 确定生命周期阶段
|
||||
stage_id = self.determine_lifecycle_stage(cashflow_pattern)
|
||||
stage_name = self.lifecycle_stages.get(stage_id, '未知阶段')
|
||||
|
||||
return {
|
||||
'stock_code': stock_code,
|
||||
'year': year,
|
||||
'stage_id': stage_id,
|
||||
'stage_name': stage_name
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"计算生命周期因子失败: {stock_code} - {year} - {str(e)}")
|
||||
return {
|
||||
'stock_code': stock_code,
|
||||
'year': year,
|
||||
'stage_id': 0,
|
||||
'stage_name': '计算失败'
|
||||
}
|
||||
|
||||
def batch_calculate_lifecycle_factors(self, stock_codes: List[str], year: int) -> pd.DataFrame:
|
||||
"""
|
||||
批量计算多只股票指定年份的企业生命周期因子
|
||||
|
||||
Args:
|
||||
stock_codes: 股票代码列表
|
||||
year: 年份
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: 包含所有股票生命周期因子的DataFrame
|
||||
"""
|
||||
results = []
|
||||
total_stocks = len(stock_codes)
|
||||
|
||||
logger.info(f"开始批量计算 {total_stocks} 只股票 {year} 年的企业生命周期因子")
|
||||
|
||||
for i, stock_code in enumerate(stock_codes, 1):
|
||||
# 显示进度
|
||||
if i % 100 == 0 or i == total_stocks:
|
||||
progress = (i / total_stocks) * 100
|
||||
logger.info(f"进度: [{i}/{total_stocks}] ({progress:.1f}%)")
|
||||
|
||||
result = self.calculate_lifecycle_factor(stock_code, year)
|
||||
results.append(result)
|
||||
|
||||
# 转换为DataFrame
|
||||
df = pd.DataFrame(results)
|
||||
|
||||
# 统计各阶段分布
|
||||
stage_distribution = df['stage_name'].value_counts()
|
||||
logger.info(f"{year}年企业生命周期阶段分布:")
|
||||
for stage, count in stage_distribution.items():
|
||||
percentage = (count / len(df)) * 100
|
||||
logger.info(f" {stage}: {count} 只 ({percentage:.1f}%)")
|
||||
|
||||
return df
|
||||
|
||||
def __del__(self):
|
||||
"""关闭数据库连接"""
|
||||
if hasattr(self, 'mongo_client') and self.mongo_client:
|
||||
self.mongo_client.close()
|
||||
|
||||
def main():
|
||||
"""主函数示例"""
|
||||
try:
|
||||
# 创建生命周期因子计算器
|
||||
lifecycle_calculator = CompanyLifecycleFactor()
|
||||
|
||||
# 示例1: 计算单只股票2024年的生命周期阶段
|
||||
print("=== 单只股票分析示例 ===")
|
||||
result = lifecycle_calculator.calculate_lifecycle_factor('600519.SH', 2024)
|
||||
print(f"股票: {result['stock_code']}")
|
||||
print(f"年份: {result['year']}")
|
||||
print(f"生命周期阶段: {result['stage_name']}")
|
||||
|
||||
# 示例2: 批量分析
|
||||
print("\n=== 批量分析示例 ===")
|
||||
test_stocks = ['300879.SZ', '301123.SZ', '300884.SZ', '300918.SZ', '600908.SH']
|
||||
df_results = lifecycle_calculator.batch_calculate_lifecycle_factors(test_stocks, 2024)
|
||||
|
||||
print("\n2024年生命周期阶段结果:")
|
||||
print(df_results[['stock_code', 'stage_name']].to_string(index=False))
|
||||
|
||||
# 保存结果
|
||||
# df_results.to_csv(f"company_lifecycle_{2024}.csv", index=False, encoding='utf-8-sig')
|
||||
# print(f"\n结果已保存到: company_lifecycle_{2024}.csv")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"程序执行失败: {str(e)}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
|
@ -127,7 +127,7 @@ class FinancialDataCollectorV2:
|
|||
List[str]: 股票代码列表
|
||||
"""
|
||||
try:
|
||||
query = "SELECT DISTINCT gp_code_two FROM gp_code_all WHERE gp_code_two IS NOT NULL AND gp_code_two != ''"
|
||||
query = "SELECT DISTINCT gp_code_two FROM gp_code_all_copy1 WHERE gp_code_two IS NOT NULL AND gp_code_two != ''"
|
||||
|
||||
with self.mysql_engine.connect() as conn:
|
||||
df = pd.read_sql(text(query), conn)
|
||||
|
@ -274,46 +274,164 @@ class FinancialDataCollectorV2:
|
|||
return []
|
||||
|
||||
def fetch_balance_sheet(self, stock_code: str, periods: int = 21) -> List[Dict]:
|
||||
"""获取资产负债表数据"""
|
||||
"""获取资产负债表数据 - 支持API自动切换 (G→B→S→I)"""
|
||||
date_filter = self.build_date_filter(stock_code, periods)
|
||||
url = f'https://datacenter.eastmoney.com/securities/api/data/get?type=RPT_F10_FINANCE_GBALANCE&sty=F10_FINANCE_GBALANCE&filter={date_filter}&p=1&ps={periods}&sr=-1&st=REPORT_DATE&source=HSF10&client=PC&v=012481899342117453'
|
||||
|
||||
# 先尝试G系列API
|
||||
g_url = f'https://datacenter.eastmoney.com/securities/api/data/get?type=RPT_F10_FINANCE_GBALANCE&sty=F10_FINANCE_GBALANCE&filter={date_filter}&p=1&ps={periods}&sr=-1&st=REPORT_DATE&source=HSF10&client=PC&v=012481899342117453'
|
||||
|
||||
headers = {"Content-Type": "application/json"}
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=headers, timeout=30)
|
||||
response = requests.get(g_url, headers=headers, timeout=30)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
if 'result' in data and 'data' in data['result']:
|
||||
logger.info(f"成功获取资产负债表数据,共 {len(data['result']['data'])} 个报告期")
|
||||
return data['result']['data']
|
||||
# 检查G系列API是否返回有效数据
|
||||
if data.get('code') == 9201 or data.get('success') == False or not data.get('result') or not data.get('result', {}).get('data'):
|
||||
logger.info(f"G系列API返回空数据,尝试B系列API - {stock_code}")
|
||||
|
||||
# 切换到B系列API
|
||||
b_url = f'https://datacenter.eastmoney.com/securities/api/data/get?type=RPT_F10_FINANCE_BBALANCE&sty=F10_FINANCE_BBALANCE&filter={date_filter}&p=1&ps={periods}&sr=-1&st=REPORT_DATE&source=HSF10&client=PC&v=08691713756184818'
|
||||
|
||||
response = requests.get(b_url, headers=headers, timeout=30)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
# 检查B系列API是否返回有效数据
|
||||
if data.get('code') == 9201 or data.get('success') == False or not data.get('result') or not data.get('result', {}).get('data'):
|
||||
logger.info(f"B系列API也返回空数据,尝试S系列API - {stock_code}")
|
||||
|
||||
# 切换到S系列API (证券公司)
|
||||
s_url = f'https://datacenter.eastmoney.com/securities/api/data/get?type=RPT_F10_FINANCE_SBALANCE&sty=F10_FINANCE_SBALANCE&filter={date_filter}&p=1&ps={periods}&sr=-1&st=REPORT_DATE&source=HSF10&client=PC&v=08691713756184818'
|
||||
|
||||
response = requests.get(s_url, headers=headers, timeout=30)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
# 检查S系列API是否返回有效数据
|
||||
if data.get('code') == 9201 or data.get('success') == False or not data.get('result') or not data.get('result', {}).get('data'):
|
||||
logger.info(f"S系列API也返回空数据,尝试I系列API - {stock_code}")
|
||||
|
||||
# 切换到I系列API
|
||||
i_url = f'https://datacenter.eastmoney.com/securities/api/data/get?type=RPT_F10_FINANCE_IBALANCE&sty=F10_FINANCE_IBALANCE&filter={date_filter}&p=1&ps={periods}&sr=-1&st=REPORT_DATE&source=HSF10&client=PC&v=08691713756184818'
|
||||
|
||||
response = requests.get(i_url, headers=headers, timeout=30)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
if 'result' in data and data['result'] and 'data' in data['result']:
|
||||
logger.info(f"I系列API成功获取资产负债表数据,共 {len(data['result']['data'])} 个报告期")
|
||||
return data['result']['data']
|
||||
else:
|
||||
logger.warning(f"I系列API也无法获取资产负债表数据 - {stock_code}")
|
||||
return []
|
||||
else:
|
||||
# S系列API成功
|
||||
if 'result' in data and data['result'] and 'data' in data['result']:
|
||||
logger.info(f"S系列API成功获取资产负债表数据,共 {len(data['result']['data'])} 个报告期")
|
||||
return data['result']['data']
|
||||
else:
|
||||
logger.warning(f"S系列API资产负债表数据格式异常 - {stock_code}")
|
||||
return []
|
||||
else:
|
||||
# B系列API成功
|
||||
if 'result' in data and data['result'] and 'data' in data['result']:
|
||||
logger.info(f"B系列API成功获取资产负债表数据,共 {len(data['result']['data'])} 个报告期")
|
||||
return data['result']['data']
|
||||
else:
|
||||
logger.warning(f"B系列API资产负债表数据格式异常 - {stock_code}")
|
||||
return []
|
||||
else:
|
||||
logger.warning("资产负债表数据格式异常")
|
||||
return []
|
||||
# G系列API成功
|
||||
if 'result' in data and 'data' in data['result']:
|
||||
logger.info(f"G系列API成功获取资产负债表数据,共 {len(data['result']['data'])} 个报告期")
|
||||
return data['result']['data']
|
||||
else:
|
||||
logger.warning("G系列API资产负债表数据格式异常")
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"获取资产负债表失败: {str(e)}")
|
||||
return []
|
||||
|
||||
def fetch_cash_flow_statement(self, stock_code: str, periods: int = 21) -> List[Dict]:
|
||||
"""获取现金流量表数据"""
|
||||
"""获取现金流量表数据 - 支持API自动切换 (G→B→S→I)"""
|
||||
date_filter = self.build_date_filter(stock_code, periods)
|
||||
url = f'https://datacenter.eastmoney.com/securities/api/data/get?type=RPT_F10_FINANCE_GCASHFLOW&sty=APP_F10_GCASHFLOW&filter={date_filter}&p=1&ps={periods}&sr=-1&st=REPORT_DATE&source=HSF10&client=PC&v=04664977872701077'
|
||||
|
||||
# 先尝试G系列API
|
||||
g_url = f'https://datacenter.eastmoney.com/securities/api/data/get?type=RPT_F10_FINANCE_GCASHFLOW&sty=APP_F10_GCASHFLOW&filter={date_filter}&p=1&ps={periods}&sr=-1&st=REPORT_DATE&source=HSF10&client=PC&v=04664977872701077'
|
||||
|
||||
headers = {"Content-Type": "application/json"}
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=headers, timeout=30)
|
||||
response = requests.get(g_url, headers=headers, timeout=30)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
if 'result' in data and 'data' in data['result']:
|
||||
logger.info(f"成功获取现金流量表数据,共 {len(data['result']['data'])} 个报告期")
|
||||
return data['result']['data']
|
||||
# 检查G系列API是否返回有效数据
|
||||
if data.get('code') == 9201 or data.get('success') == False or not data.get('result') or not data.get('result', {}).get('data'):
|
||||
logger.info(f"G系列API返回空数据,尝试B系列API - {stock_code}")
|
||||
|
||||
# 切换到B系列API
|
||||
b_url = f'https://datacenter.eastmoney.com/securities/api/data/get?type=RPT_F10_FINANCE_BCASHFLOW&sty=APP_F10_BCASHFLOW&filter={date_filter}&p=1&ps={periods}&sr=-1&st=REPORT_DATE&source=HSF10&client=PC&v=03391901368547232'
|
||||
|
||||
response = requests.get(b_url, headers=headers, timeout=30)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
# 检查B系列API是否返回有效数据
|
||||
if data.get('code') == 9201 or data.get('success') == False or not data.get('result') or not data.get('result', {}).get('data'):
|
||||
logger.info(f"B系列API也返回空数据,尝试S系列API - {stock_code}")
|
||||
|
||||
# 切换到S系列API (证券公司)
|
||||
s_url = f'https://datacenter.eastmoney.com/securities/api/data/get?type=RPT_F10_FINANCE_SCASHFLOW&sty=APP_F10_SCASHFLOW&filter={date_filter}&p=1&ps={periods}&sr=-1&st=REPORT_DATE&source=HSF10&client=PC&v=03391901368547232'
|
||||
|
||||
response = requests.get(s_url, headers=headers, timeout=30)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
# 检查S系列API是否返回有效数据
|
||||
if data.get('code') == 9201 or data.get('success') == False or not data.get('result') or not data.get('result', {}).get('data'):
|
||||
logger.info(f"S系列API也返回空数据,尝试I系列API - {stock_code}")
|
||||
|
||||
# 切换到I系列API
|
||||
i_url = f'https://datacenter.eastmoney.com/securities/api/data/get?type=RPT_F10_FINANCE_ICASHFLOW&sty=APP_F10_ICASHFLOW&filter={date_filter}&p=1&ps={periods}&sr=-1&st=REPORT_DATE&source=HSF10&client=PC&v=03391901368547232'
|
||||
|
||||
response = requests.get(i_url, headers=headers, timeout=30)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
if 'result' in data and data['result'] and 'data' in data['result']:
|
||||
logger.info(f"I系列API成功获取现金流量表数据,共 {len(data['result']['data'])} 个报告期")
|
||||
return data['result']['data']
|
||||
else:
|
||||
logger.warning(f"I系列API也无法获取现金流量表数据 - {stock_code}")
|
||||
return []
|
||||
else:
|
||||
# S系列API成功
|
||||
if 'result' in data and data['result'] and 'data' in data['result']:
|
||||
logger.info(f"S系列API成功获取现金流量表数据,共 {len(data['result']['data'])} 个报告期")
|
||||
return data['result']['data']
|
||||
else:
|
||||
logger.warning(f"S系列API现金流量表数据格式异常 - {stock_code}")
|
||||
return []
|
||||
else:
|
||||
# B系列API成功
|
||||
if 'result' in data and data['result'] and 'data' in data['result']:
|
||||
logger.info(f"B系列API成功获取现金流量表数据,共 {len(data['result']['data'])} 个报告期")
|
||||
return data['result']['data']
|
||||
else:
|
||||
logger.warning(f"B系列API现金流量表数据格式异常 - {stock_code}")
|
||||
return []
|
||||
else:
|
||||
logger.warning("现金流量表数据格式异常")
|
||||
return []
|
||||
# G系列API成功
|
||||
if 'result' in data and 'data' in data['result']:
|
||||
logger.info(f"G系列API成功获取现金流量表数据,共 {len(data['result']['data'])} 个报告期")
|
||||
return data['result']['data']
|
||||
else:
|
||||
logger.warning("G系列API现金流量表数据格式异常")
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"获取现金流量表失败: {str(e)}")
|
||||
|
@ -421,9 +539,124 @@ class FinancialDataCollectorV2:
|
|||
logger.error(f"保存数据到MongoDB失败: {str(e)}")
|
||||
return False
|
||||
|
||||
def check_missing_data(self, stock_code: str) -> List[str]:
|
||||
"""
|
||||
检查MongoDB中哪些报告期的资产负债表或现金流量表数据为空
|
||||
|
||||
Args:
|
||||
stock_code: 股票代码
|
||||
|
||||
Returns:
|
||||
List[str]: 需要更新的报告期列表
|
||||
"""
|
||||
try:
|
||||
# 查询该股票的所有记录
|
||||
records = list(self.collection.find({'stock_code': stock_code}))
|
||||
|
||||
missing_periods = []
|
||||
|
||||
for record in records:
|
||||
balance_empty = not record.get('balance_sheet') or record.get('balance_sheet') == {}
|
||||
cash_empty = not record.get('cash_flow_statement') or record.get('cash_flow_statement') == {}
|
||||
|
||||
# 如果资产负债表或现金流量表为空,则需要更新
|
||||
if balance_empty or cash_empty:
|
||||
missing_periods.append(record.get('report_date'))
|
||||
logger.debug(f"发现需要更新的数据: {stock_code} - {record.get('report_date')} (资产负债表空: {balance_empty}, 现金流量表空: {cash_empty})")
|
||||
|
||||
if missing_periods:
|
||||
logger.info(f"股票 {stock_code} 有 {len(missing_periods)} 个报告期需要更新数据")
|
||||
else:
|
||||
logger.info(f"股票 {stock_code} 的数据完整,无需更新")
|
||||
|
||||
return missing_periods
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"检查缺失数据失败: {str(e)}")
|
||||
return []
|
||||
|
||||
def update_missing_financial_data(self, stock_code: str, missing_periods: List[str]) -> bool:
|
||||
"""
|
||||
更新缺失的财务数据(只更新资产负债表和现金流量表)
|
||||
|
||||
Args:
|
||||
stock_code: 股票代码
|
||||
missing_periods: 需要更新的报告期列表
|
||||
|
||||
Returns:
|
||||
bool: 是否更新成功
|
||||
"""
|
||||
try:
|
||||
if not missing_periods:
|
||||
return True
|
||||
|
||||
logger.info(f"开始更新股票 {stock_code} 缺失的财务数据")
|
||||
|
||||
# 获取资产负债表和现金流量表数据
|
||||
balance_data = self.fetch_balance_sheet(stock_code, periods=21)
|
||||
time.sleep(1)
|
||||
|
||||
cash_data = self.fetch_cash_flow_statement(stock_code, periods=21)
|
||||
time.sleep(1)
|
||||
|
||||
# 创建按报告日期索引的字典
|
||||
balance_dict = {item['REPORT_DATE'][:10]: item for item in balance_data if item.get('REPORT_DATE')}
|
||||
cash_dict = {item['REPORT_DATE'][:10]: item for item in cash_data if item.get('REPORT_DATE')}
|
||||
|
||||
updated_count = 0
|
||||
|
||||
for report_date in missing_periods:
|
||||
try:
|
||||
# 查找当前记录
|
||||
current_record = self.collection.find_one({
|
||||
'stock_code': stock_code,
|
||||
'report_date': report_date
|
||||
})
|
||||
|
||||
if not current_record:
|
||||
logger.warning(f"未找到记录: {stock_code} - {report_date}")
|
||||
continue
|
||||
|
||||
# 准备更新的字段
|
||||
update_fields = {}
|
||||
|
||||
# 检查是否需要更新资产负债表
|
||||
balance_empty = not current_record.get('balance_sheet') or current_record.get('balance_sheet') == {}
|
||||
if balance_empty and report_date in balance_dict:
|
||||
update_fields['balance_sheet'] = balance_dict[report_date]
|
||||
logger.debug(f"更新资产负债表: {stock_code} - {report_date}")
|
||||
|
||||
# 检查是否需要更新现金流量表
|
||||
cash_empty = not current_record.get('cash_flow_statement') or current_record.get('cash_flow_statement') == {}
|
||||
if cash_empty and report_date in cash_dict:
|
||||
update_fields['cash_flow_statement'] = cash_dict[report_date]
|
||||
logger.debug(f"更新现金流量表: {stock_code} - {report_date}")
|
||||
|
||||
# 如果有字段需要更新
|
||||
if update_fields:
|
||||
update_fields['collect_time'] = datetime.datetime.now() # 更新采集时间
|
||||
|
||||
self.collection.update_one(
|
||||
{'stock_code': stock_code, 'report_date': report_date},
|
||||
{'$set': update_fields}
|
||||
)
|
||||
updated_count += 1
|
||||
logger.info(f"成功更新: {stock_code} - {report_date}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"更新记录失败: {stock_code} - {report_date} - {str(e)}")
|
||||
continue
|
||||
|
||||
logger.info(f"股票 {stock_code} 更新完成,共更新 {updated_count} 个报告期")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"更新缺失财务数据失败: {str(e)}")
|
||||
return False
|
||||
|
||||
def collect_financial_data(self, stock_code: str, periods: int = 21) -> bool:
|
||||
"""
|
||||
采集单只股票的财务数据
|
||||
采集单只股票的财务数据 - 增量更新模式
|
||||
|
||||
Args:
|
||||
stock_code: 股票代码,如'300750.SZ'
|
||||
|
@ -433,37 +666,20 @@ class FinancialDataCollectorV2:
|
|||
bool: 是否采集成功
|
||||
"""
|
||||
try:
|
||||
logger.info(f"开始采集股票 {stock_code} 的财务数据({periods}个报告期)")
|
||||
logger.info(f"开始检查股票 {stock_code} 的财务数据")
|
||||
|
||||
# 获取三张财务报表数据
|
||||
profit_data = self.fetch_profit_statement(stock_code, periods)
|
||||
time.sleep(1) # 避免请求过于频繁
|
||||
# 检查哪些报告期的数据缺失
|
||||
missing_periods = self.check_missing_data(stock_code)
|
||||
|
||||
balance_data = self.fetch_balance_sheet(stock_code, periods)
|
||||
time.sleep(1)
|
||||
if not missing_periods:
|
||||
logger.info(f"股票 {stock_code} 数据完整,跳过")
|
||||
return True
|
||||
|
||||
cash_data = self.fetch_cash_flow_statement(stock_code, periods)
|
||||
time.sleep(1)
|
||||
|
||||
# 检查至少有一张表有数据
|
||||
if not any([profit_data, balance_data, cash_data]):
|
||||
logger.error(f"股票 {stock_code} 没有获取到任何财务数据")
|
||||
return False
|
||||
|
||||
# 处理财务数据
|
||||
financial_data_list = self.process_financial_data(
|
||||
stock_code, profit_data, balance_data, cash_data
|
||||
)
|
||||
|
||||
if not financial_data_list:
|
||||
logger.error(f"股票 {stock_code} 的财务数据处理失败")
|
||||
return False
|
||||
|
||||
# 保存到MongoDB
|
||||
success = self.save_to_mongodb(financial_data_list)
|
||||
# 更新缺失的数据
|
||||
success = self.update_missing_financial_data(stock_code, missing_periods)
|
||||
|
||||
if success:
|
||||
logger.info(f"股票 {stock_code} 的财务数据采集完成")
|
||||
logger.info(f"股票 {stock_code} 的财务数据更新完成")
|
||||
|
||||
return success
|
||||
|
||||
|
@ -473,19 +689,19 @@ class FinancialDataCollectorV2:
|
|||
|
||||
def batch_collect_financial_data(self, stock_codes: List[str], periods: int = 21) -> Dict:
|
||||
"""
|
||||
批量采集多只股票的财务数据
|
||||
批量更新多只股票的缺失财务数据
|
||||
|
||||
Args:
|
||||
stock_codes: 股票代码列表
|
||||
periods: 获取多少个报告期,默认21个季度
|
||||
|
||||
Returns:
|
||||
Dict: 采集结果统计
|
||||
Dict: 更新结果统计
|
||||
"""
|
||||
results = {'success': 0, 'failed': 0, 'failed_stocks': []}
|
||||
results = {'success': 0, 'failed': 0, 'failed_stocks': [], 'skipped': 0}
|
||||
total_stocks = len(stock_codes)
|
||||
|
||||
logger.info(f"开始批量采集 {total_stocks} 只股票的财务数据")
|
||||
logger.info(f"开始批量检查和更新 {total_stocks} 只股票的财务数据")
|
||||
|
||||
for index, stock_code in enumerate(stock_codes, 1):
|
||||
try:
|
||||
|
@ -496,11 +712,11 @@ class FinancialDataCollectorV2:
|
|||
success = self.collect_financial_data(stock_code, periods)
|
||||
if success:
|
||||
results['success'] += 1
|
||||
logger.info(f"SUCCESS [{index}/{total_stocks}] {stock_code} 采集成功")
|
||||
logger.info(f"SUCCESS [{index}/{total_stocks}] {stock_code} 处理成功")
|
||||
else:
|
||||
results['failed'] += 1
|
||||
results['failed_stocks'].append(stock_code)
|
||||
logger.warning(f"FAILED [{index}/{total_stocks}] {stock_code} 采集失败")
|
||||
logger.warning(f"FAILED [{index}/{total_stocks}] {stock_code} 处理失败")
|
||||
|
||||
# 每只股票之间暂停一下,避免请求过于频繁
|
||||
time.sleep(2)
|
||||
|
@ -519,7 +735,7 @@ class FinancialDataCollectorV2:
|
|||
continue
|
||||
|
||||
success_rate = (results['success'] / total_stocks) * 100
|
||||
logger.info(f"批量采集完成: 成功{results['success']}只,失败{results['failed']}只,成功率: {success_rate:.2f}%")
|
||||
logger.info(f"批量更新完成: 成功{results['success']}只,失败{results['failed']}只,成功率: {success_rate:.2f}%")
|
||||
|
||||
if results['failed_stocks']:
|
||||
logger.info(f"失败的股票数量: {len(results['failed_stocks'])}")
|
||||
|
@ -541,7 +757,7 @@ class FinancialDataCollectorV2:
|
|||
|
||||
|
||||
def main():
|
||||
"""主函数 - 批量采集所有股票的财务数据"""
|
||||
"""主函数 - 批量更新所有股票的缺失财务数据"""
|
||||
collector = FinancialDataCollectorV2()
|
||||
|
||||
try:
|
||||
|
@ -555,30 +771,32 @@ def main():
|
|||
|
||||
logger.info(f"从数据库获取到 {len(stock_codes)} 只股票")
|
||||
|
||||
# 可以选择采集所有股票或者部分股票进行测试
|
||||
# 可以选择处理所有股票或者部分股票进行测试
|
||||
# 如果要测试,可以取前几只股票
|
||||
# 测试模式:只采集前10只股票
|
||||
TEST_MODE = False # 设置为False将采集所有股票
|
||||
# 测试模式:只处理前10只股票
|
||||
TEST_MODE = False # 设置为False将处理所有股票
|
||||
|
||||
if TEST_MODE:
|
||||
test_count = min(10, len(stock_codes)) # 最多取10只股票测试
|
||||
stock_codes = stock_codes[:test_count]
|
||||
logger.info(f"TEST MODE: 仅采集前 {test_count} 只股票")
|
||||
logger.info(f"TEST MODE: 仅处理前 {test_count} 只股票")
|
||||
else:
|
||||
logger.info(f"PRODUCTION MODE: 将采集全部 {len(stock_codes)} 只股票")
|
||||
logger.info(f"PRODUCTION MODE: 将处理全部 {len(stock_codes)} 只股票")
|
||||
|
||||
logger.info(f"开始批量采集 {len(stock_codes)} 只股票的财务数据")
|
||||
logger.info(f"开始批量检查和更新 {len(stock_codes)} 只股票的财务数据")
|
||||
logger.info("注意: 本次运行为增量更新模式,只会更新缺失的资产负债表和现金流量表数据")
|
||||
|
||||
# 批量采集
|
||||
# 批量更新
|
||||
results = collector.batch_collect_financial_data(stock_codes, periods=21)
|
||||
|
||||
# 输出最终结果
|
||||
print(f"\n{'='*50}")
|
||||
print(f"批量采集完成统计")
|
||||
print(f"批量更新完成统计")
|
||||
print(f"{'='*50}")
|
||||
print(f"SUCCESS 成功采集: {results['success']} 只股票")
|
||||
print(f"FAILED 采集失败: {results['failed']} 只股票")
|
||||
print(f"SUCCESS 成功处理: {results['success']} 只股票")
|
||||
print(f"FAILED 处理失败: {results['failed']} 只股票")
|
||||
print(f"SUCCESS RATE 成功率: {(results['success'] / len(stock_codes) * 100):.2f}%")
|
||||
print(f"\n说明: 成功处理包括数据完整(无需更新)和成功更新缺失数据的股票")
|
||||
|
||||
if results['failed_stocks']:
|
||||
print(f"\n失败的股票列表:")
|
||||
|
@ -593,7 +811,7 @@ def main():
|
|||
logger.info("用户中断程序执行")
|
||||
print("\n警告: 程序被用户中断")
|
||||
except Exception as e:
|
||||
logger.error(f"采集过程中出现错误: {str(e)}")
|
||||
logger.error(f"更新过程中出现错误: {str(e)}")
|
||||
print(f"\n错误: 程序执行出错: {str(e)}")
|
||||
finally:
|
||||
collector.close_connection()
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,197 @@
|
|||
import requests
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
import sys
|
||||
import os
|
||||
import redis
|
||||
import json
|
||||
|
||||
# 添加项目根目录到路径,便于导入scripts.config
|
||||
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
sys.path.append(project_root)
|
||||
|
||||
# 导入代理管理器
|
||||
from src.scripts.ProxyIP import EnhancedProxyManager
|
||||
|
||||
# 读取雪球headers和Redis配置
|
||||
try:
|
||||
from src.scripts.config import XUEQIU_HEADERS
|
||||
from src.valuation_analysis.config import REDIS_CONFIG
|
||||
except ImportError:
|
||||
XUEQIU_HEADERS = {
|
||||
'User-Agent': 'Mozilla/5.0',
|
||||
'Cookie': '', # 需要填写雪球cookie
|
||||
}
|
||||
REDIS_CONFIG = {
|
||||
'host': 'localhost',
|
||||
'port': 6379,
|
||||
'db': 0,
|
||||
'password': None
|
||||
}
|
||||
|
||||
REDIS_KEY = 'xq_hk_stock_changes_latest' # 存放港股行情的主键
|
||||
|
||||
# 创建全局代理管理器实例
|
||||
proxy_manager = EnhancedProxyManager()
|
||||
|
||||
|
||||
def get_redis_conn():
|
||||
"""获取Redis连接"""
|
||||
pool = redis.ConnectionPool(
|
||||
host=REDIS_CONFIG['host'],
|
||||
port=REDIS_CONFIG['port'],
|
||||
db=REDIS_CONFIG.get('db', 0),
|
||||
password=REDIS_CONFIG.get('password', None),
|
||||
decode_responses=True
|
||||
)
|
||||
return redis.Redis(connection_pool=pool)
|
||||
|
||||
|
||||
def fetch_and_store_hk_stock_data(page_size=90):
|
||||
"""
|
||||
批量采集雪球港股所有股票的最新行情数据,并保存到Redis。
|
||||
:param page_size: 每页采集数量
|
||||
"""
|
||||
base_url = 'https://stock.xueqiu.com/v5/stock/screener/quote/list.json'
|
||||
headers = XUEQIU_HEADERS
|
||||
|
||||
all_data = []
|
||||
|
||||
# 使用港股API参数
|
||||
params = {
|
||||
'page': 1,
|
||||
'size': page_size,
|
||||
'order': 'desc',
|
||||
'order_by': 'dividend_yield',
|
||||
'market': 'HK', # 港股市场
|
||||
'type': 'hk' # 港股类型
|
||||
}
|
||||
|
||||
# 初次请求以获取总页数,使用代理
|
||||
try:
|
||||
response = proxy_manager.request_with_proxy('get', base_url, headers=headers, params=params)
|
||||
if response.status_code != 200:
|
||||
print(f"请求港股数据失败,状态码:{response.status_code}")
|
||||
return
|
||||
except Exception as e:
|
||||
print(f"请求港股数据时发生异常:{e}")
|
||||
return
|
||||
|
||||
data = response.json()
|
||||
total_count = data['data']['count']
|
||||
total_pages = (total_count // page_size) + 1
|
||||
|
||||
print(f"开始采集港股数据,共 {total_pages} 页,{total_count} 条记录")
|
||||
|
||||
# 循环获取所有页面的数据
|
||||
for page in range(1, total_pages + 1):
|
||||
params['page'] = page
|
||||
try:
|
||||
response = proxy_manager.request_with_proxy('get', base_url, headers=headers, params=params)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
all_data.extend(data['data']['list'])
|
||||
print(f"成功采集港股第 {page}/{total_pages} 页数据")
|
||||
else:
|
||||
print(f"请求港股数据第 {page} 页失败,状态码:{response.status_code}")
|
||||
except Exception as e:
|
||||
print(f"请求港股数据第 {page} 页时发生异常:{e}")
|
||||
continue
|
||||
|
||||
# 转换为 DataFrame
|
||||
df = pd.DataFrame(all_data)
|
||||
|
||||
if not df.empty:
|
||||
df['fetch_time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||
# 存入Redis,使用hash结构,key为symbol,value为json字符串
|
||||
r = get_redis_conn()
|
||||
pipe = r.pipeline()
|
||||
# 先清空旧数据
|
||||
r.delete(REDIS_KEY)
|
||||
for _, row in df.iterrows():
|
||||
symbol = row.get('symbol')
|
||||
if not symbol:
|
||||
continue
|
||||
# 只保留必要字段,也可直接存row.to_dict()
|
||||
value = row.to_dict()
|
||||
pipe.hset(REDIS_KEY, symbol, json.dumps(value, ensure_ascii=False))
|
||||
pipe.execute()
|
||||
print(f"成功将港股数据写入Redis哈希 {REDIS_KEY},共{len(df)}条记录。")
|
||||
else:
|
||||
print("未获取到任何港股数据。")
|
||||
|
||||
|
||||
def format_hk_stock_code(stock_code):
|
||||
"""
|
||||
统一港股代码格式,支持0700.HK、HK0700等
|
||||
返回雪球格式(如0700.HK)和Redis存储格式
|
||||
"""
|
||||
stock_code = stock_code.upper()
|
||||
if '.HK' in stock_code:
|
||||
return stock_code, stock_code
|
||||
elif stock_code.startswith('HK'):
|
||||
code = stock_code[2:]
|
||||
return f'{code}.HK', f'{code}.HK'
|
||||
else:
|
||||
# 假设是纯数字,添加.HK后缀
|
||||
return f'{stock_code}.HK', f'{stock_code}.HK'
|
||||
|
||||
|
||||
def get_hk_stock_realtime_info_from_redis(stock_code):
|
||||
"""
|
||||
根据港股代码从Redis查询实时行情,并封装为指定结构。
|
||||
:param stock_code: 支持0700.HK、HK0700等格式
|
||||
:return: dict or None
|
||||
"""
|
||||
_, redis_code = format_hk_stock_code(stock_code)
|
||||
r = get_redis_conn()
|
||||
value = r.hget(REDIS_KEY, redis_code)
|
||||
if not value:
|
||||
return None
|
||||
try:
|
||||
data = json.loads(value)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
# 封装为指定结构
|
||||
result = {
|
||||
"code": None,
|
||||
"crawlDate": None,
|
||||
"marketValue": None,
|
||||
"maxPrice": None,
|
||||
"minPrice": None,
|
||||
"nowPrice": None,
|
||||
"pbRate": None,
|
||||
"rangeRiseAndFall": None,
|
||||
"shortName": None,
|
||||
"todayStartPrice": None,
|
||||
"ttm": None,
|
||||
"turnoverRate": None,
|
||||
"yesterdayEndPrice": None
|
||||
}
|
||||
|
||||
# 赋值映射
|
||||
result["code"] = data.get("symbol")
|
||||
result["crawlDate"] = data.get("fetch_time")
|
||||
result["marketValue"] = data.get("market_capital")
|
||||
result["maxPrice"] = data.get("high") if "high" in data else data.get("high52w")
|
||||
result["minPrice"] = data.get("low") if "low" in data else data.get("low52w")
|
||||
result["nowPrice"] = data.get("current")
|
||||
result["pbRate"] = data.get("pb")
|
||||
result["rangeRiseAndFall"] = data.get("percent")
|
||||
result["shortName"] = data.get("name")
|
||||
result["todayStartPrice"] = data.get("open")
|
||||
result["ttm"] = data.get("pe_ttm")
|
||||
result["turnoverRate"] = data.get("turnover_rate")
|
||||
result["yesterdayEndPrice"] = data.get("last_close") if "last_close" in data else data.get("pre_close")
|
||||
|
||||
# 兼容部分字段缺失
|
||||
if result["maxPrice"] is None and "high" in data:
|
||||
result["maxPrice"] = data["high"]
|
||||
if result["minPrice"] is None and "low" in data:
|
||||
result["minPrice"] = data["low"]
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
fetch_and_store_hk_stock_data()
|
|
@ -0,0 +1,306 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
供应商客户占比数据更新器
|
||||
用于将前五供应商占比和前五客户占比数据更新到MongoDB的eastmoney_financial_data_v2集合中
|
||||
这个脚本要在财务脚本之后执行
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import pandas as pd
|
||||
import pymongo
|
||||
import datetime
|
||||
import logging
|
||||
from typing import Dict, List, Optional
|
||||
from pathlib import Path
|
||||
|
||||
# 添加项目根路径到Python路径
|
||||
project_root = Path(__file__).parent.parent.parent
|
||||
sys.path.append(str(project_root))
|
||||
|
||||
# 导入配置
|
||||
from src.valuation_analysis.config import MONGO_CONFIG2
|
||||
|
||||
# 设置日志
|
||||
logging.basicConfig(
|
||||
level=logging.ERROR,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SupplierCustomerRatioUpdater:
|
||||
"""供应商客户占比数据更新器"""
|
||||
|
||||
def __init__(self):
|
||||
"""初始化"""
|
||||
self.mongo_client = None
|
||||
self.db = None
|
||||
self.collection_name = 'eastmoney_financial_data_v2'
|
||||
self.collection = None
|
||||
|
||||
# CSV文件路径 - 使用绝对路径
|
||||
current_dir = Path(__file__).parent
|
||||
self.supplier_file = current_dir / 'data' / '前五供应商占比.csv'
|
||||
self.customer_file = current_dir / 'data' / '前五客户占比.csv'
|
||||
|
||||
# 季度日期映射 - 根据CSV文件中的列名
|
||||
self.quarter_mapping = {
|
||||
'20200331': '2020-03-31',
|
||||
'20200630': '2020-06-30',
|
||||
'20200930': '2020-09-30',
|
||||
'20201231': '2020-12-31',
|
||||
'20210331': '2021-03-31',
|
||||
'20210630': '2021-06-30',
|
||||
'20210930': '2021-09-30',
|
||||
'20211231': '2021-12-31',
|
||||
'20220331': '2022-03-31',
|
||||
'20220630': '2022-06-30',
|
||||
'20220930': '2022-09-30',
|
||||
'20221231': '2022-12-31',
|
||||
'20230331': '2023-03-31',
|
||||
'20230630': '2023-06-30',
|
||||
'20230930': '2023-09-30',
|
||||
'20231231': '2023-12-31',
|
||||
'20240331': '2024-03-31',
|
||||
'20240630': '2024-06-30',
|
||||
'20240930': '2024-09-30',
|
||||
'20241231': '2024-12-31',
|
||||
'20250331': '2025-03-31'
|
||||
}
|
||||
|
||||
self.connect_mongodb()
|
||||
|
||||
def connect_mongodb(self):
|
||||
"""连接MongoDB数据库"""
|
||||
try:
|
||||
self.mongo_client = pymongo.MongoClient(
|
||||
host=MONGO_CONFIG2['host'],
|
||||
port=MONGO_CONFIG2['port'],
|
||||
username=MONGO_CONFIG2['username'],
|
||||
password=MONGO_CONFIG2['password']
|
||||
)
|
||||
self.db = self.mongo_client[MONGO_CONFIG2['db']]
|
||||
self.collection = self.db[self.collection_name]
|
||||
|
||||
# 测试连接
|
||||
self.mongo_client.admin.command('ping')
|
||||
logger.info(f"MongoDB连接成功,使用集合: {self.collection_name}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"MongoDB连接失败: {str(e)}")
|
||||
raise
|
||||
|
||||
def load_csv_data(self, file_path: str) -> pd.DataFrame:
|
||||
"""加载CSV文件数据"""
|
||||
try:
|
||||
if not os.path.exists(file_path):
|
||||
logger.error(f"文件不存在: {file_path}")
|
||||
return pd.DataFrame()
|
||||
|
||||
df = pd.read_csv(file_path, encoding='utf-8')
|
||||
logger.info(f"成功加载文件: {file_path}, 数据行数: {len(df)}")
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"加载CSV文件失败 {file_path}: {str(e)}")
|
||||
return pd.DataFrame()
|
||||
|
||||
def parse_ratio_data(self, df: pd.DataFrame, data_type: str) -> Dict[str, Dict[str, float]]:
|
||||
"""
|
||||
解析占比数据
|
||||
|
||||
Args:
|
||||
df: CSV数据DataFrame
|
||||
data_type: 数据类型,'supplier' 或 'customer'
|
||||
|
||||
Returns:
|
||||
Dict: {stock_code: {report_date: ratio_value}}
|
||||
"""
|
||||
ratio_data = {}
|
||||
|
||||
try:
|
||||
for _, row in df.iterrows():
|
||||
stock_code = row['stock_code']
|
||||
if pd.isna(stock_code):
|
||||
continue
|
||||
|
||||
ratio_data[stock_code] = {}
|
||||
|
||||
# 遍历所有季度列
|
||||
for quarter_col, report_date in self.quarter_mapping.items():
|
||||
if quarter_col in row:
|
||||
ratio_value = row[quarter_col]
|
||||
|
||||
# 处理数据:如果值为0或nan,跳过;否则除以100转换为小数
|
||||
if pd.notna(ratio_value) and ratio_value > 0:
|
||||
# 将百分比数值转换为小数(如56.22 -> 0.5622)
|
||||
ratio_data[stock_code][report_date] = float(ratio_value) / 100.0
|
||||
else:
|
||||
# 0值或空值不添加到字典中
|
||||
continue
|
||||
|
||||
logger.info(f"解析{data_type}数据完成,共处理 {len(ratio_data)} 只股票")
|
||||
return ratio_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"解析{data_type}数据失败: {str(e)}")
|
||||
return {}
|
||||
|
||||
def update_mongodb_record(self, stock_code: str, report_date: str,
|
||||
supplier_ratio: Optional[float], customer_ratio: Optional[float]) -> bool:
|
||||
"""
|
||||
更新MongoDB中的单条记录
|
||||
|
||||
Args:
|
||||
stock_code: 股票代码
|
||||
report_date: 报告日期
|
||||
supplier_ratio: 前五供应商占比
|
||||
customer_ratio: 前五客户占比
|
||||
|
||||
Returns:
|
||||
bool: 是否更新成功
|
||||
"""
|
||||
try:
|
||||
# 查找记录
|
||||
filter_condition = {
|
||||
'stock_code': stock_code,
|
||||
'report_date': report_date
|
||||
}
|
||||
|
||||
existing_record = self.collection.find_one(filter_condition)
|
||||
|
||||
if not existing_record:
|
||||
# 记录不存在,跳过
|
||||
logger.debug(f"记录不存在,跳过: {stock_code} - {report_date}")
|
||||
return False
|
||||
|
||||
# 准备更新字段
|
||||
update_fields = {}
|
||||
|
||||
if supplier_ratio is not None:
|
||||
update_fields['top_five_suppliers_ratio'] = supplier_ratio
|
||||
|
||||
if customer_ratio is not None:
|
||||
update_fields['top_five_customers_ratio'] = customer_ratio
|
||||
|
||||
if not update_fields:
|
||||
return False
|
||||
|
||||
# 添加更新时间
|
||||
update_fields['ratio_update_time'] = datetime.datetime.now()
|
||||
|
||||
# 执行更新
|
||||
result = self.collection.update_one(
|
||||
filter_condition,
|
||||
{'$set': update_fields}
|
||||
)
|
||||
|
||||
if result.modified_count > 0:
|
||||
logger.debug(f"更新成功: {stock_code} - {report_date}")
|
||||
return True
|
||||
else:
|
||||
logger.debug(f"无需更新: {stock_code} - {report_date}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"更新记录失败 {stock_code} - {report_date}: {str(e)}")
|
||||
return False
|
||||
|
||||
def batch_update_ratios(self):
|
||||
"""批量更新所有占比数据"""
|
||||
try:
|
||||
# 加载CSV数据
|
||||
logger.info("开始加载CSV文件...")
|
||||
supplier_df = self.load_csv_data(self.supplier_file)
|
||||
customer_df = self.load_csv_data(self.customer_file)
|
||||
|
||||
if supplier_df.empty and customer_df.empty:
|
||||
logger.error("没有可用的CSV数据")
|
||||
return False
|
||||
|
||||
# 解析数据
|
||||
logger.info("解析数据中...")
|
||||
supplier_data = self.parse_ratio_data(supplier_df, 'supplier') if not supplier_df.empty else {}
|
||||
customer_data = self.parse_ratio_data(customer_df, 'customer') if not customer_df.empty else {}
|
||||
|
||||
# 获取所有涉及的股票代码
|
||||
all_stock_codes = set()
|
||||
all_stock_codes.update(supplier_data.keys())
|
||||
all_stock_codes.update(customer_data.keys())
|
||||
|
||||
logger.info(f"开始更新数据,共涉及 {len(all_stock_codes)} 只股票")
|
||||
|
||||
# 统计
|
||||
total_count = 0
|
||||
updated_count = 0
|
||||
|
||||
# 逐个股票更新
|
||||
for stock_code in all_stock_codes:
|
||||
supplier_ratios = supplier_data.get(stock_code, {})
|
||||
customer_ratios = customer_data.get(stock_code, {})
|
||||
|
||||
# 获取所有日期
|
||||
all_dates = set()
|
||||
all_dates.update(supplier_ratios.keys())
|
||||
all_dates.update(customer_ratios.keys())
|
||||
|
||||
# 逐个日期更新
|
||||
for report_date in all_dates:
|
||||
supplier_ratio = supplier_ratios.get(report_date)
|
||||
customer_ratio = customer_ratios.get(report_date)
|
||||
|
||||
total_count += 1
|
||||
|
||||
if self.update_mongodb_record(stock_code, report_date, supplier_ratio, customer_ratio):
|
||||
updated_count += 1
|
||||
|
||||
# 每处理100条记录输出一次进度
|
||||
if total_count % 100 == 0:
|
||||
logger.info(f"已处理 {total_count} 条记录,更新 {updated_count} 条")
|
||||
|
||||
logger.info(f"数据更新完成!总计处理 {total_count} 条记录,成功更新 {updated_count} 条")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"批量更新失败: {str(e)}")
|
||||
return False
|
||||
|
||||
def close_connection(self):
|
||||
"""关闭数据库连接"""
|
||||
try:
|
||||
if self.mongo_client:
|
||||
self.mongo_client.close()
|
||||
logger.info("MongoDB连接已关闭")
|
||||
except Exception as e:
|
||||
logger.error(f"关闭MongoDB连接失败: {str(e)}")
|
||||
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
logger.info("=== 供应商客户占比数据更新器启动 ===")
|
||||
|
||||
updater = None
|
||||
try:
|
||||
# 创建更新器实例
|
||||
updater = SupplierCustomerRatioUpdater()
|
||||
|
||||
# 执行批量更新
|
||||
success = updater.batch_update_ratios()
|
||||
|
||||
if success:
|
||||
logger.info("=== 数据更新成功完成 ===")
|
||||
else:
|
||||
logger.error("=== 数据更新失败 ===")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"程序执行失败: {str(e)}")
|
||||
finally:
|
||||
if updater:
|
||||
updater.close_connection()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,729 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
科技主题基本面因子选股策略
|
||||
整合企业生命周期、财务指标和平均距离因子分析
|
||||
"""
|
||||
|
||||
import sys
|
||||
import pymongo
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import logging
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from pathlib import Path
|
||||
from sqlalchemy import create_engine, text
|
||||
from datetime import datetime
|
||||
import math
|
||||
|
||||
# 添加项目根路径到Python路径
|
||||
project_root = Path(__file__).parent.parent.parent
|
||||
sys.path.append(str(project_root))
|
||||
|
||||
# 导入依赖的工具类
|
||||
from src.quantitative_analysis.company_lifecycle_factor import CompanyLifecycleFactor
|
||||
from src.quantitative_analysis.financial_indicator_analyzer import FinancialIndicatorAnalyzer
|
||||
from src.quantitative_analysis.average_distance_factor import AverageDistanceFactor
|
||||
from src.valuation_analysis.config import MONGO_CONFIG2, DB_URL
|
||||
|
||||
# 设置日志
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TechFundamentalFactorStrategy:
|
||||
"""科技主题基本面因子选股策略"""
|
||||
|
||||
def __init__(self):
|
||||
"""初始化策略"""
|
||||
self.lifecycle_calculator = CompanyLifecycleFactor()
|
||||
self.financial_analyzer = FinancialIndicatorAnalyzer()
|
||||
self.distance_calculator = AverageDistanceFactor(DB_URL)
|
||||
|
||||
# MySQL连接
|
||||
self.mysql_engine = create_engine(
|
||||
DB_URL,
|
||||
pool_size=5,
|
||||
max_overflow=10,
|
||||
pool_recycle=3600
|
||||
)
|
||||
|
||||
# 科技概念板块列表
|
||||
self.tech_concepts = [
|
||||
"5G概念", "物联网", "云计算", "边缘计算", "信息安全", "国产软件",
|
||||
"大数据", "数据中心", "芯片", "MCU芯片", "汽车芯片", "存储芯片",
|
||||
"人工智能", "AIGC概念", "ChatGPT概念", "CPO概念", "华为鸿蒙",
|
||||
"华为海思", "华为算力", "量子科技", "区块链", "数字货币", "工业互联",
|
||||
"操作系统", "光刻机", "第三代半导体", "元宇宙概念", "云游戏", "信创",
|
||||
"东数西算", "PCB概念", "先进封装", "EDA概念", "Web3概念", "数据确权",
|
||||
"数据要素", "数字水印", "工业软件", "6G概念", "时空大数据", "算力租赁",
|
||||
"光通信", "英伟达概念", "星闪概念", "液冷服务器", "多模态AI", "Sora概念",
|
||||
"AI手机PC", "铜缆高速连接", "车联网", "财税数字化", "智谱AI", "AI智能体",
|
||||
"DeepSeek概念", "AI医疗概念"
|
||||
]
|
||||
# self.tech_concepts = [
|
||||
# "5G概念"
|
||||
# ]
|
||||
|
||||
logger.info("科技主题基本面因子选股策略初始化完成")
|
||||
|
||||
def get_tech_stocks(self) -> pd.DataFrame:
|
||||
"""
|
||||
获取科技概念板块的股票列表
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: 包含股票代码和名称的DataFrame
|
||||
"""
|
||||
try:
|
||||
# 构建查询条件
|
||||
concepts_str = "', '".join(self.tech_concepts)
|
||||
query = text(f"""
|
||||
SELECT DISTINCT gp_code as stock_code, gp_name as stock_name, bk_name as concept_name
|
||||
FROM gp_gnbk
|
||||
WHERE bk_name IN ('{concepts_str}')
|
||||
ORDER BY gp_code
|
||||
""")
|
||||
|
||||
with self.mysql_engine.connect() as conn:
|
||||
df = pd.read_sql(query, conn)
|
||||
|
||||
logger.info(f"获取到 {len(df)} 只科技概念股票")
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"获取科技概念股票失败: {str(e)}")
|
||||
return pd.DataFrame()
|
||||
|
||||
def filter_by_lifecycle(self, stock_codes: List[str], year: int = 2024) -> Dict[str, List[str]]:
|
||||
"""
|
||||
根据企业生命周期筛选股票
|
||||
|
||||
Args:
|
||||
stock_codes: 股票代码列表
|
||||
year: 分析年份
|
||||
|
||||
Returns:
|
||||
Dict: 包含成长期和成熟期股票的字典
|
||||
"""
|
||||
try:
|
||||
logger.info(f"开始分析 {len(stock_codes)} 只股票的企业生命周期")
|
||||
|
||||
# 批量计算生命周期
|
||||
lifecycle_df = self.lifecycle_calculator.batch_calculate_lifecycle_factors(stock_codes, year)
|
||||
|
||||
# 筛选目标阶段的股票
|
||||
# 引入期(1)和成长期(2)合并为成长期,成熟期(3)保持不变
|
||||
growth_stage_stocks = lifecycle_df[
|
||||
lifecycle_df['stage_id'].isin([1, 2])
|
||||
]['stock_code'].tolist()
|
||||
|
||||
mature_stage_stocks = lifecycle_df[
|
||||
lifecycle_df['stage_id'] == 3
|
||||
]['stock_code'].tolist()
|
||||
|
||||
result = {
|
||||
'growth': growth_stage_stocks,
|
||||
'mature': mature_stage_stocks
|
||||
}
|
||||
|
||||
logger.info(f"成长期股票: {len(growth_stage_stocks)} 只")
|
||||
logger.info(f"成熟期股票: {len(mature_stage_stocks)} 只")
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"生命周期筛选失败: {str(e)}")
|
||||
return {'growth': [], 'mature': []}
|
||||
|
||||
def calculate_distance_factors(self, growth_stocks: List[str], mature_stocks: List[str]) -> Tuple[pd.DataFrame, pd.DataFrame]:
|
||||
"""
|
||||
分别计算成长期和成熟期股票的平均距离因子
|
||||
|
||||
Args:
|
||||
growth_stocks: 成长期股票列表
|
||||
mature_stocks: 成熟期股票列表
|
||||
|
||||
Returns:
|
||||
Tuple: (成长期距离因子DataFrame, 成熟期距离因子DataFrame)
|
||||
"""
|
||||
try:
|
||||
growth_distance_df = pd.DataFrame()
|
||||
mature_distance_df = pd.DataFrame()
|
||||
|
||||
# 计算成长期股票距离因子
|
||||
if growth_stocks:
|
||||
logger.info(f"计算 {len(growth_stocks)} 只成长期股票的距离因子")
|
||||
growth_data = self.distance_calculator.get_stock_data(growth_stocks)
|
||||
if not growth_data.empty:
|
||||
growth_indicators = self.distance_calculator.calculate_technical_indicators(growth_data)
|
||||
growth_distance_df = self.distance_calculator.calculate_distance_factor(growth_indicators)
|
||||
|
||||
# 计算成熟期股票距离因子
|
||||
if mature_stocks:
|
||||
logger.info(f"计算 {len(mature_stocks)} 只成熟期股票的距离因子")
|
||||
mature_data = self.distance_calculator.get_stock_data(mature_stocks)
|
||||
if not mature_data.empty:
|
||||
mature_indicators = self.distance_calculator.calculate_technical_indicators(mature_data)
|
||||
mature_distance_df = self.distance_calculator.calculate_distance_factor(mature_indicators)
|
||||
|
||||
return growth_distance_df, mature_distance_df
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"计算距离因子失败: {str(e)}")
|
||||
return pd.DataFrame(), pd.DataFrame()
|
||||
|
||||
def calculate_common_factors(self, stock_codes: List[str]) -> pd.DataFrame:
|
||||
"""
|
||||
计算通用因子
|
||||
|
||||
Args:
|
||||
stock_codes: 股票代码列表
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: 包含通用因子的DataFrame
|
||||
"""
|
||||
try:
|
||||
logger.info(f"计算 {len(stock_codes)} 只股票的通用因子")
|
||||
|
||||
results = []
|
||||
latest_date = "2025-03-31" # 最新季度数据
|
||||
annual_date = "2024-12-31" # 年报数据
|
||||
|
||||
for stock_code in stock_codes:
|
||||
try:
|
||||
|
||||
factor_data = {'stock_code': stock_code}
|
||||
|
||||
# 1. 毛利率(使用最新数据)
|
||||
gross_margin = self.financial_analyzer.analyze_gross_profit_margin(stock_code, latest_date)
|
||||
factor_data['gross_profit_margin'] = gross_margin
|
||||
|
||||
# 2. 成长能力指标
|
||||
growth_capability = self.financial_analyzer.analyze_growth_capability(stock_code)
|
||||
if growth_capability is not None:
|
||||
# 成长能力越高越好,使用sigmoid函数映射到0-1
|
||||
growth_score = 1 / (1 + math.exp(-growth_capability))
|
||||
else:
|
||||
growth_score = 0.5 # 默认中性评分
|
||||
factor_data['growth_score'] = growth_score
|
||||
|
||||
# 3. 前五大供应商占比(使用年报数据)
|
||||
supplier_conc = self.financial_analyzer.analyze_supplier_concentration(stock_code, annual_date)
|
||||
factor_data['supplier_concentration'] = supplier_conc
|
||||
|
||||
# 4. 前五大客户占比(使用年报数据)
|
||||
customer_conc = self.financial_analyzer.analyze_customer_concentration(stock_code, annual_date)
|
||||
factor_data['customer_concentration'] = customer_conc
|
||||
|
||||
results.append(factor_data)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"计算股票 {stock_code} 通用因子失败: {str(e)}")
|
||||
continue
|
||||
|
||||
df = pd.DataFrame(results)
|
||||
logger.info(f"成功计算 {len(df)} 只股票的通用因子")
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"计算通用因子失败: {str(e)}")
|
||||
return pd.DataFrame()
|
||||
|
||||
def calculate_growth_specific_factors(self, stock_codes: List[str]) -> pd.DataFrame:
|
||||
"""
|
||||
计算成长期特色因子
|
||||
|
||||
Args:
|
||||
stock_codes: 成长期股票代码列表
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: 包含成长期特色因子的DataFrame
|
||||
"""
|
||||
try:
|
||||
logger.info(f"计算 {len(stock_codes)} 只成长期股票的特色因子")
|
||||
|
||||
results = []
|
||||
latest_date = "2025-03-31" # 使用最新数据
|
||||
annual_date = "2024-12-31" # 使用年度数据
|
||||
|
||||
for stock_code in stock_codes:
|
||||
try:
|
||||
|
||||
factor_data = {'stock_code': stock_code}
|
||||
|
||||
# 1. 管理费用率(使用最新数据)
|
||||
admin_ratio = self.financial_analyzer.analyze_admin_expense_ratio(stock_code, latest_date)
|
||||
factor_data['admin_expense_ratio'] = admin_ratio
|
||||
|
||||
# 2. 研发费用折旧摊销占比(使用年度数据)
|
||||
# financial_data = self.financial_analyzer.get_financial_data(stock_code, latest_date)
|
||||
financial_data = self.financial_analyzer.get_financial_data(stock_code, annual_date)
|
||||
if financial_data:
|
||||
intangible_amortize = financial_data.get('cash_flow_statement', {}).get('IA_AMORTIZE', 0)
|
||||
rd_expense = financial_data.get('profit_statement', {}).get('RESEARCH_EXPENSE', 0)
|
||||
|
||||
if rd_expense and rd_expense != 0:
|
||||
rd_amortize_ratio = intangible_amortize / rd_expense if intangible_amortize else 0
|
||||
else:
|
||||
rd_amortize_ratio = None # 使用None而不是0,避免这些股票获得最高分
|
||||
|
||||
factor_data['rd_amortize_ratio'] = rd_amortize_ratio
|
||||
else:
|
||||
factor_data['rd_amortize_ratio'] = None
|
||||
|
||||
# 3. 资产负债率(使用最新数据)
|
||||
asset_liability_ratio = self.financial_analyzer.analyze_asset_liability_ratio(stock_code, latest_date)
|
||||
factor_data['asset_liability_ratio'] = asset_liability_ratio
|
||||
|
||||
results.append(factor_data)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"计算股票 {stock_code} 成长期特色因子失败: {str(e)}")
|
||||
continue
|
||||
|
||||
df = pd.DataFrame(results)
|
||||
logger.info(f"成功计算 {len(df)} 只成长期股票的特色因子")
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"计算成长期特色因子失败: {str(e)}")
|
||||
return pd.DataFrame()
|
||||
|
||||
def calculate_mature_specific_factors(self, stock_codes: List[str]) -> pd.DataFrame:
|
||||
"""
|
||||
计算成熟期特色因子
|
||||
|
||||
Args:
|
||||
stock_codes: 成熟期股票代码列表
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: 包含成熟期特色因子的DataFrame
|
||||
"""
|
||||
try:
|
||||
logger.info(f"计算 {len(stock_codes)} 只成熟期股票的特色因子")
|
||||
|
||||
latest_date = "2025-03-31" # 使用最新数据
|
||||
|
||||
# 在循环外获取全A股PB和ROE数据,避免重复查询
|
||||
logger.info("获取全A股PB数据...")
|
||||
all_pb_data = self.financial_analyzer.get_all_stocks_pb_data()
|
||||
|
||||
logger.info("获取全A股ROE数据...")
|
||||
all_roe_data = self.financial_analyzer.get_all_stocks_roe_data(latest_date)
|
||||
|
||||
results = []
|
||||
|
||||
for stock_code in stock_codes:
|
||||
try:
|
||||
factor_data = {'stock_code': stock_code}
|
||||
|
||||
# 1. 应收账款周转率(使用最新数据)
|
||||
formatted_stock_code = self.financial_analyzer.code_formatter.to_dot_format(stock_code)
|
||||
financial_data = self.financial_analyzer.get_financial_data(formatted_stock_code, latest_date)
|
||||
if financial_data:
|
||||
revenue = financial_data.get('profit_statement', {}).get('OPERATE_INCOME', 0)
|
||||
accounts_rece = financial_data.get('balance_sheet', {}).get('ACCOUNTS_RECE', 0)
|
||||
|
||||
if accounts_rece and accounts_rece != 0:
|
||||
turnover_ratio = revenue / accounts_rece if revenue else 0
|
||||
else:
|
||||
turnover_ratio = None # 使用None而不是0
|
||||
|
||||
factor_data['accounts_receivable_turnover'] = turnover_ratio
|
||||
else:
|
||||
factor_data['accounts_receivable_turnover'] = None
|
||||
|
||||
# 2. 研发强度(使用最新数据)
|
||||
rd_intensity = self.financial_analyzer.analyze_rd_expense_ratio(stock_code, latest_date)
|
||||
factor_data['rd_intensity'] = rd_intensity
|
||||
|
||||
# 3. PB-ROE排名因子:使用预获取的全A股数据
|
||||
if all_pb_data and all_roe_data:
|
||||
pb_roe_rank_factor = self.financial_analyzer.calculate_pb_roe_rank_factor(
|
||||
stock_code, all_pb_data, all_roe_data
|
||||
)
|
||||
factor_data['pb_roe_rank_factor'] = pb_roe_rank_factor
|
||||
else:
|
||||
factor_data['pb_roe_rank_factor'] = None
|
||||
|
||||
results.append(factor_data)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"计算股票 {stock_code} 成熟期特色因子失败: {str(e)}")
|
||||
continue
|
||||
|
||||
df = pd.DataFrame(results)
|
||||
logger.info(f"成功计算 {len(df)} 只成熟期股票的特色因子")
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"计算成熟期特色因子失败: {str(e)}")
|
||||
return pd.DataFrame()
|
||||
|
||||
def run_strategy(self, year: int = 2024) -> Dict[str, pd.DataFrame]:
|
||||
"""
|
||||
运行完整的选股策略
|
||||
|
||||
Args:
|
||||
year: 分析年份
|
||||
|
||||
Returns:
|
||||
Dict: 包含成长期和成熟期股票分析结果的字典
|
||||
"""
|
||||
try:
|
||||
logger.info("开始运行科技主题基本面因子选股策略")
|
||||
|
||||
# 1. 获取科技概念股票
|
||||
tech_stocks_df = self.get_tech_stocks()
|
||||
if tech_stocks_df.empty:
|
||||
logger.error("未获取到科技概念股票")
|
||||
return {}
|
||||
|
||||
stock_codes = tech_stocks_df['stock_code'].unique().tolist()
|
||||
logger.info(f"共获取到 {len(stock_codes)} 只科技概念股票")
|
||||
|
||||
# 2. 按企业生命周期筛选
|
||||
lifecycle_result = self.filter_by_lifecycle(stock_codes, year)
|
||||
growth_stocks = lifecycle_result['growth']
|
||||
mature_stocks = lifecycle_result['mature']
|
||||
|
||||
if not growth_stocks and not mature_stocks:
|
||||
logger.warning("未找到符合条件的成长期或成熟期股票")
|
||||
return {}
|
||||
|
||||
# 3. 计算平均距离因子
|
||||
growth_distance_df, mature_distance_df = self.calculate_distance_factors(growth_stocks, mature_stocks)
|
||||
|
||||
# 4. 计算通用因子
|
||||
all_qualified_stocks = growth_stocks + mature_stocks
|
||||
common_factors_df = self.calculate_common_factors(all_qualified_stocks)
|
||||
|
||||
# 5. 计算特色因子
|
||||
growth_specific_df = self.calculate_growth_specific_factors(growth_stocks) if growth_stocks else pd.DataFrame()
|
||||
mature_specific_df = self.calculate_mature_specific_factors(mature_stocks) if mature_stocks else pd.DataFrame()
|
||||
|
||||
# 6. 合并结果并计算分数
|
||||
result = {}
|
||||
|
||||
# 处理成长期股票
|
||||
if not growth_specific_df.empty:
|
||||
# 成长期结果合并
|
||||
growth_result = growth_specific_df.copy()
|
||||
|
||||
# 合并距离因子
|
||||
if not growth_distance_df.empty:
|
||||
growth_result = growth_result.merge(
|
||||
growth_distance_df[['symbol', 'avg_distance_factor']],
|
||||
left_on='stock_code', right_on='symbol', how='left'
|
||||
).drop('symbol', axis=1)
|
||||
|
||||
# 合并通用因子
|
||||
if not common_factors_df.empty:
|
||||
growth_result = growth_result.merge(
|
||||
common_factors_df, on='stock_code', how='left'
|
||||
)
|
||||
|
||||
# 计算因子分数
|
||||
growth_result = self.calculate_factor_scores(growth_result, 'growth')
|
||||
|
||||
# 计算总分并排序
|
||||
growth_result = self.calculate_total_score(growth_result, 'growth')
|
||||
|
||||
result['growth'] = growth_result
|
||||
logger.info(f"成长期结果: {len(growth_result)} 只股票")
|
||||
|
||||
# 处理成熟期股票
|
||||
if not mature_specific_df.empty:
|
||||
# 成熟期结果合并
|
||||
mature_result = mature_specific_df.copy()
|
||||
|
||||
# 合并距离因子
|
||||
if not mature_distance_df.empty:
|
||||
mature_result = mature_result.merge(
|
||||
mature_distance_df[['symbol', 'avg_distance_factor']],
|
||||
left_on='stock_code', right_on='symbol', how='left'
|
||||
).drop('symbol', axis=1)
|
||||
|
||||
# 合并通用因子
|
||||
if not common_factors_df.empty:
|
||||
mature_result = mature_result.merge(
|
||||
common_factors_df, on='stock_code', how='left'
|
||||
)
|
||||
|
||||
# 计算因子分数
|
||||
mature_result = self.calculate_factor_scores(mature_result, 'mature')
|
||||
|
||||
# 计算总分并排序
|
||||
mature_result = self.calculate_total_score(mature_result, 'mature')
|
||||
|
||||
result['mature'] = mature_result
|
||||
logger.info(f"成熟期结果: {len(mature_result)} 只股票")
|
||||
|
||||
logger.info("科技主题基本面因子选股策略运行完成")
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"策略运行失败: {str(e)}")
|
||||
return {}
|
||||
|
||||
def calculate_factor_scores(self, df: pd.DataFrame, stage: str) -> pd.DataFrame:
|
||||
"""
|
||||
计算单因子打分(0-100分位数)
|
||||
|
||||
Args:
|
||||
df: 包含因子数据的DataFrame
|
||||
stage: 阶段类型 ('growth' 或 'mature')
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: 包含因子分数的DataFrame
|
||||
"""
|
||||
try:
|
||||
if df.empty:
|
||||
return df
|
||||
|
||||
df_scored = df.copy()
|
||||
|
||||
# 定义因子方向(正向为True,负向为False)
|
||||
factor_directions = {
|
||||
# 通用因子
|
||||
'gross_profit_margin': True, # 毛利率_环比增量 - 正向
|
||||
'growth_score': True, # 成长能力 - 正向
|
||||
'supplier_concentration': False, # 前5大供应商金额占比合计 - 负向
|
||||
'customer_concentration': False, # 前5大客户收入金额占比合计 - 负向
|
||||
'avg_distance_factor': False, # 平均距离因子 - 负向
|
||||
|
||||
# 成长期特色因子
|
||||
'admin_expense_ratio': False, # 管理费用/营业总收入_环比增量 - 负向
|
||||
'rd_amortize_ratio': False, # 研发费用折旧摊销占比_环比增量 - 负向
|
||||
'asset_liability_ratio': True, # 资产负债率 - 正向
|
||||
|
||||
# 成熟期特色因子
|
||||
'accounts_receivable_turnover': True, # 应收账款周转率 - 正向
|
||||
'rd_intensity': True, # 研发费用直接投入占比_环比增量 - 正向
|
||||
'pb_roe_rank_factor': False # PB-ROE排名因子 - 负向(越小越好)
|
||||
}
|
||||
|
||||
# 为每个因子计算分位数分数
|
||||
for column in df.columns:
|
||||
if column == 'stock_code':
|
||||
continue
|
||||
|
||||
# 只对有效值进行排名计算
|
||||
values = df_scored[column].dropna()
|
||||
if len(values) <= 1:
|
||||
# 如果只有一个值或没有值,所有股票都得50分或0分
|
||||
if len(values) == 1:
|
||||
df_scored[f'{column}_score'] = df_scored[column].apply(lambda x: 50 if pd.notna(x) else 0)
|
||||
else:
|
||||
df_scored[f'{column}_score'] = 0
|
||||
continue
|
||||
|
||||
# 根据因子方向确定排序方式
|
||||
is_positive = factor_directions.get(column, True)
|
||||
|
||||
# 计算排名分数
|
||||
if is_positive:
|
||||
# 正向因子:值越大分数越高
|
||||
ranked_values = values.rank(pct=True) * 100
|
||||
else:
|
||||
# 负向因子:值越小分数越高
|
||||
ranked_values = (1 - values.rank(pct=True)) * 100
|
||||
|
||||
# 初始化分数列
|
||||
df_scored[f'{column}_score'] = 0.0
|
||||
|
||||
# 将分数赋值给对应的行
|
||||
for idx in ranked_values.index:
|
||||
df_scored.loc[idx, f'{column}_score'] = ranked_values[idx]
|
||||
|
||||
logger.info(f"完成 {stage} 阶段 {len(df_scored)} 只股票的因子打分")
|
||||
return df_scored
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"计算因子分数失败: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return df
|
||||
|
||||
def calculate_total_score(self, df: pd.DataFrame, stage: str) -> pd.DataFrame:
|
||||
"""
|
||||
计算总分
|
||||
使用公式:总分 = 1/8 * Mean(Si) + Mean(Si)/Std(Si)
|
||||
|
||||
Args:
|
||||
df: 包含因子分数的DataFrame
|
||||
stage: 阶段类型 ('growth' 或 'mature')
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: 包含总分的DataFrame
|
||||
"""
|
||||
try:
|
||||
if df.empty:
|
||||
return df
|
||||
|
||||
df_result = df.copy()
|
||||
|
||||
# 定义因子权重(注意:这里是factor_score而不是factor)
|
||||
if stage == 'growth':
|
||||
factor_weights = {
|
||||
# 通用因子
|
||||
'gross_profit_margin_score': 1/8,
|
||||
'growth_score_score': 1/8, # 注意这里是growth_score_score
|
||||
'supplier_concentration_score': 1/8,
|
||||
'customer_concentration_score': 1/8,
|
||||
'avg_distance_factor_score': 1/8,
|
||||
|
||||
# 成长期特色因子
|
||||
'admin_expense_ratio_score': 1/8,
|
||||
'rd_amortize_ratio_score': 1/8,
|
||||
'asset_liability_ratio_score': 1/8
|
||||
}
|
||||
else: # mature
|
||||
factor_weights = {
|
||||
# 通用因子
|
||||
'gross_profit_margin_score': 1/8,
|
||||
'growth_score_score': 1/8, # 注意这里是growth_score_score
|
||||
'supplier_concentration_score': 1/8,
|
||||
'customer_concentration_score': 1/8,
|
||||
'avg_distance_factor_score': 1/8,
|
||||
|
||||
# 成熟期特色因子
|
||||
'accounts_receivable_turnover_score': 1/8,
|
||||
'rd_intensity_score': 1/8,
|
||||
'pb_roe_rank_factor_score': 1/8
|
||||
}
|
||||
|
||||
# 计算每只股票的总分
|
||||
total_scores = []
|
||||
|
||||
for index, row in df_result.iterrows():
|
||||
# 获取该股票的所有因子分数
|
||||
factor_scores = []
|
||||
valid_weights = []
|
||||
|
||||
for factor, weight in factor_weights.items():
|
||||
if factor in row and pd.notna(row[factor]) and row[factor] > 0:
|
||||
factor_scores.append(row[factor])
|
||||
valid_weights.append(weight)
|
||||
|
||||
if len(factor_scores) == 0:
|
||||
total_scores.append(0)
|
||||
continue
|
||||
|
||||
factor_scores = np.array(factor_scores)
|
||||
valid_weights = np.array(valid_weights)
|
||||
|
||||
# 重新标准化权重
|
||||
valid_weights = valid_weights / valid_weights.sum()
|
||||
|
||||
# 计算加权平均分数
|
||||
mean_score = np.average(factor_scores, weights=valid_weights)
|
||||
|
||||
# 计算调整项 Mean(Si)/Std(Si)
|
||||
if len(factor_scores) > 1 and np.std(factor_scores) > 0:
|
||||
adjustment = np.mean(factor_scores) / np.std(factor_scores)
|
||||
else:
|
||||
adjustment = 0
|
||||
|
||||
# 计算总分:1/8 * Mean(Si) + Mean(Si)/Std(Si)
|
||||
total_score = (1/8) * mean_score + adjustment
|
||||
total_scores.append(total_score)
|
||||
|
||||
df_result['total_score'] = total_scores
|
||||
|
||||
# 按总分降序排列
|
||||
df_result = df_result.sort_values('total_score', ascending=False).reset_index(drop=True)
|
||||
df_result['rank'] = range(1, len(df_result) + 1)
|
||||
|
||||
logger.info(f"完成 {stage} 阶段 {len(df_result)} 只股票的总分计算")
|
||||
return df_result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"计算总分失败: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return df
|
||||
|
||||
def close_connections(self):
|
||||
"""关闭所有数据库连接"""
|
||||
try:
|
||||
if hasattr(self, 'lifecycle_calculator'):
|
||||
del self.lifecycle_calculator
|
||||
if hasattr(self, 'financial_analyzer'):
|
||||
self.financial_analyzer.close_connection()
|
||||
if hasattr(self, 'distance_calculator'):
|
||||
del self.distance_calculator
|
||||
if hasattr(self, 'mysql_engine'):
|
||||
self.mysql_engine.dispose()
|
||||
logger.info("数据库连接已关闭")
|
||||
except Exception as e:
|
||||
logger.error(f"关闭连接失败: {str(e)}")
|
||||
|
||||
|
||||
def main():
|
||||
"""主函数 - 科技主题基本面因子选股策略"""
|
||||
strategy = None
|
||||
try:
|
||||
print("=== 科技主题基本面因子选股策略 ===")
|
||||
print("数据说明:")
|
||||
print("- 毛利率、净利润增长率等:使用最新数据 (2025-03-31)")
|
||||
print("- 供应商客户集中度、折旧摊销、研发费用:使用年报数据 (2024-12-31)")
|
||||
print()
|
||||
|
||||
# 创建策略实例
|
||||
strategy = TechFundamentalFactorStrategy()
|
||||
logger.info("策略实例创建成功")
|
||||
|
||||
# 运行策略
|
||||
results = strategy.run_strategy(year=2024)
|
||||
|
||||
# 输出结果
|
||||
if not results:
|
||||
print("未获得分析结果")
|
||||
return
|
||||
|
||||
for stage, df in results.items():
|
||||
print(f"\n=== {stage.upper()} 阶段股票分析结果 ===")
|
||||
print(f"股票数量: {len(df)}")
|
||||
|
||||
if not df.empty:
|
||||
# 调试:显示所有列名
|
||||
print(f"数据列: {list(df.columns)}")
|
||||
# 显示前5只股票的关键指标
|
||||
print("\n前5只股票:")
|
||||
display_columns = [
|
||||
'stock_code', 'gross_profit_margin', 'growth_score',
|
||||
'supplier_concentration', 'customer_concentration',
|
||||
'total_score', 'rank'
|
||||
]
|
||||
available_columns = [col for col in display_columns if col in df.columns]
|
||||
print(df[available_columns].head(5).to_string(index=False))
|
||||
|
||||
# 保存完整结果
|
||||
output_file = f"tech_fundamental_factor_{stage}_{datetime.now().strftime('%Y%m%d_%H%M')}.csv"
|
||||
df.to_csv(output_file, index=False, encoding='utf-8-sig')
|
||||
print(f"\n完整结果已保存到: {output_file}")
|
||||
|
||||
# 显示统计信息
|
||||
print(f"\n统计信息:")
|
||||
print(f" 平均总分: {df['total_score'].mean():.2f}")
|
||||
print(f" 最高总分: {df['total_score'].max():.2f}")
|
||||
print(f" 最低总分: {df['total_score'].min():.2f}")
|
||||
|
||||
print(f"\n=== 策略运行完成 ===")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"程序执行失败: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
if strategy:
|
||||
strategy.close_connections()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,342 @@
|
|||
import requests
|
||||
import redis
|
||||
import random
|
||||
import time
|
||||
import threading
|
||||
import json
|
||||
from typing import Dict, List, Optional, Union
|
||||
from datetime import datetime, timedelta
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
|
||||
class EnhancedProxyManager:
|
||||
def __init__(
|
||||
self,
|
||||
proxy_api_url: str = 'https://dps.kdlapi.com/api/getdps?secret_id=o4itop21b4byqg47eevx&signature=3d3fuvm6raah1xyjecl6bby1mj6gtx0c&num=3&format=json',
|
||||
valid_check_url: str = 'https://dps.kdlapi.com/api/checkdpsvalid?secret_id=o4itop21b4byqg47eevx&signature=3d3fuvm6raah1xyjecl6bby1mj6gtx0c',
|
||||
redis_host: str = '192.168.18.123',
|
||||
redis_port: int = 6379,
|
||||
redis_db: int = 7,
|
||||
redis_password: str = 'wlkj2018',
|
||||
redis_key: str = 'proxy_pool',
|
||||
update_interval: int = 3600,
|
||||
max_retries: int = 3,
|
||||
proxy_timeout: int = 10,
|
||||
auto_refresh: bool = False,
|
||||
max_pool_size: int = 50,
|
||||
enable_api_validation: bool = True # 新增:是否启用API验证开关
|
||||
):
|
||||
"""
|
||||
增强版代理管理器 - 支持多IP池和手动代理管理
|
||||
|
||||
:param proxy_api_url: 获取代理的API地址
|
||||
:param redis_host: Redis主机地址
|
||||
:param redis_port: Redis端口
|
||||
:param redis_db: Redis数据库
|
||||
:param redis_password: Redis密码
|
||||
:param redis_key: Redis中存储代理的键前缀
|
||||
:param update_interval: 代理更新间隔(秒)
|
||||
:param max_retries: 最大重试次数
|
||||
:param proxy_timeout: 代理连接超时时间(秒)
|
||||
:param auto_refresh: 是否自动从API获取代理
|
||||
:param max_pool_size: 代理池最大容量
|
||||
"""
|
||||
self.proxy_api_url = proxy_api_url
|
||||
self.redis_key_prefix = redis_key
|
||||
self.update_interval = update_interval
|
||||
self.max_retries = max_retries
|
||||
self.proxy_timeout = proxy_timeout
|
||||
self.auto_refresh = auto_refresh
|
||||
self.max_pool_size = max_pool_size
|
||||
self.valid_check_url = valid_check_url
|
||||
self.enable_api_validation = enable_api_validation
|
||||
|
||||
# Redis连接
|
||||
self.redis_conn = redis.StrictRedis(
|
||||
host=redis_host,
|
||||
port=redis_port,
|
||||
db=redis_db,
|
||||
password=redis_password,
|
||||
decode_responses=True
|
||||
)
|
||||
|
||||
# 线程安全控制
|
||||
self.lock = threading.Lock()
|
||||
self.condition = threading.Condition()
|
||||
|
||||
# 启动维护线程
|
||||
if self.auto_refresh:
|
||||
self._start_maintenance_thread()
|
||||
|
||||
def _start_maintenance_thread(self):
|
||||
"""启动后台维护线程"""
|
||||
|
||||
def maintenance_loop():
|
||||
while True:
|
||||
with self.condition:
|
||||
self._refresh_api_proxies()
|
||||
self.condition.notify_all()
|
||||
time.sleep(self.update_interval)
|
||||
|
||||
thread = threading.Thread(target=maintenance_loop, daemon=True)
|
||||
thread.start()
|
||||
|
||||
def _get_redis_key(self, proxy_type: str) -> str:
|
||||
"""获取Redis键名"""
|
||||
return f"{self.redis_key_prefix}:{proxy_type}"
|
||||
|
||||
def _check_proxy_valid(self, proxy_list: List[str]) -> Dict[str, bool]:
|
||||
"""通过API检查代理是否有效"""
|
||||
if not self.enable_api_validation or not proxy_list:
|
||||
return {}
|
||||
|
||||
try:
|
||||
# 拼接代理参数:proxy=ip1:port1,ip2:port2
|
||||
proxy_param = '&proxy=' + ','.join(proxy_list)
|
||||
response = requests.get(self.valid_check_url + proxy_param, timeout=10)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
if data.get('code') == 0:
|
||||
return data.get('data', {})
|
||||
except Exception as e:
|
||||
print(f"API验证代理有效性失败: {e}")
|
||||
return {}
|
||||
|
||||
def _refresh_api_proxies(self) -> bool:
|
||||
"""从API获取最新代理并存入Redis"""
|
||||
api_key = self._get_redis_key('api')
|
||||
|
||||
# 前置检查:如果已有足够数量且未过期的代理,则不需要刷新
|
||||
existing_proxies = self.redis_conn.hgetall(api_key)
|
||||
|
||||
# 检查代理数量是否足够(大于等于4)且未过期
|
||||
if len(existing_proxies) >= 4:
|
||||
# 检查代理是否过期(假设代理有效期为24小时)
|
||||
current_time = datetime.now()
|
||||
need_refresh = False
|
||||
|
||||
for proxy_json in existing_proxies.values():
|
||||
proxy = json.loads(proxy_json)
|
||||
last_checked = datetime.strptime(proxy['last_checked'], "%Y-%m-%d %H:%M:%S")
|
||||
if (current_time - last_checked) > timedelta(hours=1):
|
||||
need_refresh = True
|
||||
break
|
||||
|
||||
if not need_refresh:
|
||||
print("当前有足够数量且未过期的代理,无需刷新")
|
||||
return False
|
||||
|
||||
try:
|
||||
response = requests.get(self.proxy_api_url, timeout=self.proxy_timeout)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
if data.get('code') == 0 and data.get('data'):
|
||||
# 清空旧API代理
|
||||
api_key = self._get_redis_key('api')
|
||||
# self.redis_conn.delete(api_key)
|
||||
|
||||
# 添加新代理
|
||||
for proxy_data in data['data']['proxy_list']:
|
||||
proxy = {
|
||||
'http': f"http://{proxy_data}",
|
||||
'https': f"http://{proxy_data}",
|
||||
'server': proxy_data,
|
||||
'source': 'api',
|
||||
'last_checked': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
'status': 'active'
|
||||
}
|
||||
self.redis_conn.hset(
|
||||
api_key,
|
||||
proxy_data,
|
||||
json.dumps(proxy)
|
||||
)
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"更新API代理失败: {e}")
|
||||
return False
|
||||
|
||||
def add_manual_proxies(self, proxies: Union[List[str], str]) -> int:
|
||||
"""
|
||||
手动添加代理到池中
|
||||
|
||||
:param proxies: 代理列表(格式: ["ip:port", ...] 或 "ip:port")
|
||||
:return: 成功添加的代理数量
|
||||
"""
|
||||
if isinstance(proxies, str):
|
||||
proxies = [proxies]
|
||||
|
||||
added = 0
|
||||
manual_key = self._get_redis_key('manual')
|
||||
|
||||
# 验证并添加代理
|
||||
def _check_and_add(proxy):
|
||||
nonlocal added
|
||||
proxy_dict = {
|
||||
'http': f"http://{proxy}",
|
||||
'https': f"http://{proxy}",
|
||||
'server': proxy,
|
||||
'source': 'manual',
|
||||
'last_checked': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
'status': 'active'
|
||||
}
|
||||
|
||||
if self._is_proxy_working(proxy_dict):
|
||||
with self.lock:
|
||||
# 检查是否已存在
|
||||
if not self.redis_conn.hexists(manual_key, proxy):
|
||||
self.redis_conn.hset(manual_key, proxy, json.dumps(proxy_dict))
|
||||
added += 1
|
||||
|
||||
# 使用线程池并行验证
|
||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||
executor.map(_check_and_add, proxies)
|
||||
|
||||
print(f"成功添加 {added} 个手动代理")
|
||||
return added
|
||||
|
||||
def _is_proxy_working(self, proxy: Dict) -> bool:
|
||||
"""检查代理是否可用"""
|
||||
test_urls = [
|
||||
"http://httpbin.org/ip",
|
||||
"http://www.google.com/gen_204"
|
||||
]
|
||||
|
||||
proxies = {
|
||||
'http': proxy['http'],
|
||||
'https': proxy['https']
|
||||
}
|
||||
|
||||
for url in test_urls:
|
||||
try:
|
||||
response = requests.get(
|
||||
url,
|
||||
proxies=proxies,
|
||||
timeout=self.proxy_timeout,
|
||||
allow_redirects=False
|
||||
)
|
||||
if 200 <= response.status_code < 500:
|
||||
return True
|
||||
except:
|
||||
continue
|
||||
return False
|
||||
|
||||
def get_random_proxy(self) -> Optional[Dict]:
|
||||
"""
|
||||
随机获取一个可用代理
|
||||
|
||||
:return: 代理字典或None(如果无可用代理)
|
||||
"""
|
||||
# 优先从API代理获取
|
||||
# api_key = self._get_redis_key('api')
|
||||
manual_key = self._get_redis_key('manual')
|
||||
|
||||
# 获取所有活跃代理
|
||||
proxies = []
|
||||
|
||||
# 先检查API代理
|
||||
# api_proxies = self.redis_conn.hgetall(api_key)
|
||||
# for proxy_json in api_proxies.values():
|
||||
# proxy = json.loads(proxy_json)
|
||||
# if proxy.get('status') == 'active':
|
||||
# proxies.append(proxy)
|
||||
|
||||
# 如果API代理不可用或auto_refresh关闭,检查手动代理
|
||||
if not proxies or not self.auto_refresh:
|
||||
manual_proxies = self.redis_conn.hgetall(manual_key)
|
||||
for proxy_json in manual_proxies.values():
|
||||
proxy = json.loads(proxy_json)
|
||||
if proxy.get('status') == 'active':
|
||||
proxies.append(proxy)
|
||||
|
||||
if not proxies:
|
||||
if self.auto_refresh:
|
||||
print("代理池为空,尝试从API获取...")
|
||||
self._refresh_api_proxies()
|
||||
return self.get_random_proxy()
|
||||
else:
|
||||
print("代理池为空且自动刷新已关闭")
|
||||
return None
|
||||
|
||||
# 随机选择一个代理
|
||||
selected = random.choice(proxies)
|
||||
selected['_redis_key'] = self._get_redis_key(selected['source'])
|
||||
return selected
|
||||
|
||||
def mark_proxy_failed(self, proxy: Dict):
|
||||
"""标记代理为失败并从池中移除"""
|
||||
if '_redis_key' not in proxy:
|
||||
return
|
||||
|
||||
# 如果是API代理且启用验证,先检查是否真的失效
|
||||
if proxy.get('source') == 'api' and self.enable_api_validation:
|
||||
valid_status = self._check_proxy_valid([proxy['server']])
|
||||
is_valid = valid_status.get(proxy['server'], False)
|
||||
|
||||
if is_valid:
|
||||
print(f"代理 {proxy['server']} API验证仍有效,暂不移除")
|
||||
return
|
||||
|
||||
with self.lock:
|
||||
self.redis_conn.hdel(proxy['_redis_key'], proxy['server'])
|
||||
print(f"代理 {proxy['server']} 已被移除")
|
||||
|
||||
def request_with_proxy(
|
||||
self,
|
||||
method: str,
|
||||
url: str,
|
||||
retry_count: int = 0,
|
||||
**kwargs
|
||||
) -> requests.Response:
|
||||
"""
|
||||
使用代理发送请求
|
||||
|
||||
:param method: HTTP方法
|
||||
:param url: 请求URL
|
||||
:param retry_count: 内部重试计数
|
||||
:param kwargs: 其他requests参数
|
||||
:return: Response对象
|
||||
"""
|
||||
if retry_count >= self.max_retries:
|
||||
raise requests.exceptions.RequestException(f"达到最大重试次数 {self.max_retries}")
|
||||
|
||||
proxy = self.get_random_proxy()
|
||||
if not proxy:
|
||||
raise requests.exceptions.RequestException("无可用代理")
|
||||
|
||||
try:
|
||||
response = requests.request(
|
||||
method,
|
||||
url,
|
||||
proxies={
|
||||
'http': proxy['http'],
|
||||
'https': proxy['https']
|
||||
},
|
||||
timeout=self.proxy_timeout,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
if response.status_code >= 400:
|
||||
raise requests.exceptions.HTTPError(
|
||||
f"HTTP错误: {response.status_code}",
|
||||
response=response
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"代理 {proxy['server']} 请求失败: {e}")
|
||||
self.mark_proxy_failed(proxy)
|
||||
return self.request_with_proxy(method, url, retry_count + 1, **kwargs)
|
||||
|
||||
def get_pool_status(self) -> Dict:
|
||||
"""获取代理池状态"""
|
||||
api_key = self._get_redis_key('api')
|
||||
manual_key = self._get_redis_key('manual')
|
||||
|
||||
return {
|
||||
'api_proxies': self.redis_conn.hlen(api_key),
|
||||
'manual_proxies': self.redis_conn.hlen(manual_key),
|
||||
'auto_refresh': self.auto_refresh,
|
||||
'last_update': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
}
|
|
@ -11,7 +11,7 @@ XUEQIU_HEADERS = {
|
|||
'Accept-Encoding': 'gzip, deflate, br, zstd',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
'Client-Version': 'v2.44.75',
|
||||
'Cookie': 'cookiesu=811743062689927; device_id=33fa3c7fca4a65f8f4354e10ed6b7470; smidV2=20250327160437f244626e8b47ca2a7992f30f389e4e790074ae48656a22f10; HMACCOUNT=8B64A2E3C307C8C0; s=c611ttmqlj; xq_is_login=1; u=8493411634; bid=4065a77ca57a69c83405d6e591ab5449_m8r2nhs8; __utma=1.434320573.1747189698.1747189698.1747189698.1; __utmc=1; __utmz=1.1747189698.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); snbim_minify=true; Hm_lvt_1db88642e346389874251b5a1eded6e3=1749028611; acw_tc=0a27aa3317504105803118918e00823643107980bbedc1c9307d37d1cf7fb7; xq_a_token=5b11f7f5a3986a802def7dea48868a3b2849e719; xqat=5b11f7f5a3986a802def7dea48868a3b2849e719; xq_id_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJ1aWQiOjg0OTM0MTE2MzQsImlzcyI6InVjIiwiZXhwIjoxNzUzMDAzNjExLCJjdG0iOjE3NTA0MTE2MTEyODIsImNpZCI6ImQ5ZDBuNEFadXAifQ.FB12KEYSdWo5g3UqQbnfqR-Gopar8JkuDf54eSf86FzmuGG9XugW7osl3idav9oTgLzgWBut4X6a5-gbqn61wPPV7OV3dMO8oNyBZUxMjisaMBW_-IcUuQ1z-gtXBcHleNamANA-2H3Xf5mZNdVXAW_E0rQZE_y0TEqzeiLxfU5B_RJOTR1Zq_-BQaaOn_Tk0or_hu-nOZR-26lBtcBl1VoTR2Ov1tm_CRN375ohMcZniA265X8umpL_tysQ4m7oazNyezopJE6W7jt-djNGJXZAbLoVXF1U2ULKV325dPWHvPcSZOevxGprItb665QNZvXEzhBB-4fuzhAnYBsqGw; xq_r_token=2ba0614b400ec779704c3adaa7f17c2c2c88143b; is_overseas=0; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1750411602; .thumbcache_f24b8bbe5a5934237bbc0eda20c1b6e7=Jg9N/8vN3mjfEOHOPlAxHQ+1x+X4nN7jc9vkKRkIGulMwceWqptDd3OUgWPM6XqKNq/15EvM032gWoeeYMHgRg%3D%3D; ssxmod_itna=eqGxBDnGKYuxcD4kDRgxYq7ueYKS8DBP01Dp2xQyP08D60DB40Q0OHhqDyliGQQmhGtKq0aCDD/KlYeDZDGFdDqx0Ei6FiDHICezjQgDKgACjktpeBflQR5RYGlcNpp=0IDpnOAGdeGLDY=DCTKK420iDYYfDBYD74G+DDeDih3Dj4GmDGY=aeDFIQutVCRKdxDwDB=DmqG23ObDm4DfDDLorBD4Il2YDDtDAkaGNPDADA3doDDlYD84Kdb4DYpogQ0FdgahphusIeDMixGXzAlzx9CnoiWtV/vfrf2aHPGuDG=OcC0Hh2bmRT3f8hGxYDo5Qe8hx+Bx3rKq0DW7HRYqYYeYAh+2DR0DQhxRDxgGYgEw/rdPrd5kh6WdYYrcqsMkbZMshie5QhNiNQDoOBtQgdeAde6D/r5l05Dr=grAWG4HmmNBiQm44D; ssxmod_itna2=eqGxBDnGKYuxcD4kDRgxYq7ueYKS8DBP01Dp2xQyP08D60DB40Q0OHhqDyliGQQmhGtKq0aeDWhYebouIdHFW5NsDoenRT6eeD',
|
||||
'Cookie': 'cookiesu=811743062689927; device_id=33fa3c7fca4a65f8f4354e10ed6b7470; smidV2=20250327160437f244626e8b47ca2a7992f30f389e4e790074ae48656a22f10; HMACCOUNT=8B64A2E3C307C8C0; s=c611ttmqlj; xq_is_login=1; u=8493411634; bid=4065a77ca57a69c83405d6e591ab5449_m8r2nhs8; __utma=1.434320573.1747189698.1747189698.1747189698.1; __utmc=1; __utmz=1.1747189698.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); snbim_minify=true; Hm_lvt_1db88642e346389874251b5a1eded6e3=1749028611; xq_a_token=5b11f7f5a3986a802def7dea48868a3b2849e719; xqat=5b11f7f5a3986a802def7dea48868a3b2849e719; xq_id_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJ1aWQiOjg0OTM0MTE2MzQsImlzcyI6InVjIiwiZXhwIjoxNzUzMDAzNjExLCJjdG0iOjE3NTA0MTE2MTEyODIsImNpZCI6ImQ5ZDBuNEFadXAifQ.FB12KEYSdWo5g3UqQbnfqR-Gopar8JkuDf54eSf86FzmuGG9XugW7osl3idav9oTgLzgWBut4X6a5-gbqn61wPPV7OV3dMO8oNyBZUxMjisaMBW_-IcUuQ1z-gtXBcHleNamANA-2H3Xf5mZNdVXAW_E0rQZE_y0TEqzeiLxfU5B_RJOTR1Zq_-BQaaOn_Tk0or_hu-nOZR-26lBtcBl1VoTR2Ov1tm_CRN375ohMcZniA265X8umpL_tysQ4m7oazNyezopJE6W7jt-djNGJXZAbLoVXF1U2ULKV325dPWHvPcSZOevxGprItb665QNZvXEzhBB-4fuzhAnYBsqGw; xq_r_token=2ba0614b400ec779704c3adaa7f17c2c2c88143b; _c_WBKFRo=dsWgHR8i8KGPbIyhFlN51PHOzVuuNytvUAFppfkD; _nb_ioWEgULi=; .thumbcache_f24b8bbe5a5934237bbc0eda20c1b6e7=WLnKAYCmLCxL13sG3b3dCuaIIWxGZItK2dxyj3SQELeHQlC27oBs/jcXLvR4rVqsrBg/lnbnfvkgBRtAnBSoIQ%3D%3D; acw_tc=0a27a99417515124338191367e005b5efa24266b40be35881bb2c1c385360c; is_overseas=0; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1751513519; ssxmod_itna=eqGxBDnGKYuxcD4kDRgxYq7ueYKS8DBP01Dp2xQyP08D60DB40Q0OHhqDylQ0dV0GrtqN42D5D/SP4GzDiLPGhDBWAFdYGdTt4lBEWYpxBBIToh63dwWKwxYs=B1pxKWTGRgQW01e0aDmKDUcFYP4iiTx0rD0eDPxDYDGRWD7PDoxDrHzYDjDEp8Rom3F4DKx0kDY5Dwa4mDYPDWxDFi+0xeDowrDDCDi5fZb3DixiaTbDDBriueYmweDi3iIoGfF4LV3TIRxoD9h4DsZGBS9MBpT0vOR6Cbwm+A+3DvxDkXzEGj4umhUghmIriDb7D44rF7GYzGbmr1ADUmrKtY4WBDWAQY+rmx4Rhk0wxmxtiBYu42YOFR443mlf5/Gm/BHYerdzqxiR=uEhl7KSbKQitBu7nhHK0N7GQ897zDPA0eZYiYoqAGDD; ssxmod_itna2=eqGxBDnGKYuxcD4kDRgxYq7ueYKS8DBP01Dp2xQyP08D60DB40Q0OHhqDylQ0dV0GrtqN42YeDA4rYnRItORCDU1ZKlwhQ4eIKPSbxD',
|
||||
'Referer': 'https://weibo.com/u/7735765253',
|
||||
'Sec-Ch-Ua': '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"',
|
||||
'Sec-Ch-Ua-Mobile': '?0',
|
||||
|
|
|
@ -6,6 +6,7 @@ from sqlalchemy import create_engine, text
|
|||
from datetime import datetime, timedelta
|
||||
from tqdm import tqdm
|
||||
from src.scripts.config import XUEQIU_HEADERS
|
||||
from src.scripts.ProxyIP import EnhancedProxyManager
|
||||
import gc
|
||||
|
||||
class StockDailyDataCollector:
|
||||
|
@ -19,6 +20,8 @@ class StockDailyDataCollector:
|
|||
pool_recycle=3600
|
||||
)
|
||||
self.headers = XUEQIU_HEADERS
|
||||
# 初始化代理管理器
|
||||
self.proxy_manager = EnhancedProxyManager()
|
||||
|
||||
def fetch_all_stock_codes(self):
|
||||
# 从gp_code_all获取股票代码
|
||||
|
@ -37,15 +40,18 @@ class StockDailyDataCollector:
|
|||
codes_hk = df_hk['gp_code'].tolist()
|
||||
|
||||
# 合并去重
|
||||
all_codes = list(set(codes_all + codes_zs + codes_hk))
|
||||
print(f"获取到股票代码: {len(codes_all)}个来自gp_code_all, {len(codes_zs)}个来自gp_code_zs, {len(codes_hk)}个来自gp_code_hk, 去重后共{len(all_codes)}个")
|
||||
all_codes = list(set(codes_hk))
|
||||
# all_codes = list(set(codes_all + codes_zs + codes_hk))
|
||||
print(f"获取到股票代码: {len(codes_all)} 个来自gp_code_all, {len(codes_zs)}个来自gp_code_zs, {len(codes_hk)}个来自gp_code_hk, 去重后共{len(all_codes)}个")
|
||||
return all_codes
|
||||
|
||||
def fetch_daily_stock_data(self, symbol, begin, count=-1):
|
||||
"""获取日线数据,count=-1表示最新一天,-2表示最近两天,-1800表示最近1800天"""
|
||||
url = f"https://stock.xueqiu.com/v5/stock/chart/kline.json?symbol={symbol}&begin={begin}&period=day&type=before&count={count}&indicator=kline,pe,pb,ps,pcf,market_capital,agt,ggt,balance"
|
||||
try:
|
||||
response = requests.get(url, headers=self.headers, timeout=20)
|
||||
# 使用代理管理器发送请求
|
||||
# response = requests.get(url, headers=self.headers, timeout=20)
|
||||
response = self.proxy_manager.request_with_proxy('get', url, headers=self.headers)
|
||||
return response.json()
|
||||
except Exception as e:
|
||||
print(f"Request error for {symbol}: {e}")
|
||||
|
@ -85,9 +91,9 @@ class StockDailyDataCollector:
|
|||
start_date = datetime.strptime(date, '%Y-%m-%d')
|
||||
date_str = date
|
||||
|
||||
delete_query = text("DELETE FROM gp_day_data WHERE `timestamp` LIKE :date_str")
|
||||
with self.engine.begin() as conn:
|
||||
conn.execute(delete_query, {"date_str": f"{date_str}%"})
|
||||
# delete_query = text("DELETE FROM gp_day_data WHERE `timestamp` LIKE :date_str")
|
||||
# with self.engine.begin() as conn:
|
||||
# conn.execute(delete_query, {"date_str": f"{date_str}%"})
|
||||
|
||||
stock_codes = self.fetch_all_stock_codes()
|
||||
begin = int(start_date.replace(hour=0, minute=0, second=0, microsecond=0).timestamp() * 1000)
|
||||
|
@ -253,6 +259,30 @@ class StockDailyDataCollector:
|
|||
except Exception as e:
|
||||
print(f"!!! Error saving ex-rights log: {e}")
|
||||
|
||||
def fetch_single_stock_history(self, symbol, days=1800):
|
||||
"""
|
||||
获取单只股票的历史数据并保存到数据库
|
||||
:param symbol: 股票代码
|
||||
:param days: 获取的天数,默认1800天
|
||||
:return: 是否成功
|
||||
"""
|
||||
print(f"开始获取 {symbol} 最近 {days} 天的历史数据...")
|
||||
begin = int(datetime.now().timestamp() * 1000)
|
||||
data = self.fetch_daily_stock_data(symbol, begin, count=-days)
|
||||
|
||||
if data.get('error_code') == 0:
|
||||
df = self.transform_data(data, symbol)
|
||||
if df is not None and not df.empty:
|
||||
df.to_sql('gp_day_data', self.engine, if_exists='append', index=False)
|
||||
print(f"成功保存 {symbol} 的历史数据,共 {len(df)} 条记录")
|
||||
return True
|
||||
else:
|
||||
print(f"未能转换 {symbol} 的数据")
|
||||
return False
|
||||
else:
|
||||
print(f"获取 {symbol} 数据失败: {data.get('error_description')}")
|
||||
return False
|
||||
|
||||
def collect_stock_daily_data(db_url, date=None):
|
||||
collector = StockDailyDataCollector(db_url)
|
||||
collector.fetch_data_for_date(date)
|
||||
|
|
|
@ -0,0 +1,328 @@
|
|||
# coding:utf-8
|
||||
|
||||
import requests
|
||||
import pandas as pd
|
||||
from sqlalchemy import create_engine, text
|
||||
from datetime import datetime, timedelta
|
||||
from tqdm import tqdm
|
||||
import sys
|
||||
import os
|
||||
import gc
|
||||
|
||||
# 添加项目根目录到路径
|
||||
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
sys.path.append(project_root)
|
||||
|
||||
from src.scripts.config import XUEQIU_HEADERS
|
||||
from src.scripts.ProxyIP import EnhancedProxyManager
|
||||
from src.quantitative_analysis.batch_stock_price_collector import fetch_and_store_stock_data
|
||||
from src.valuation_analysis.stock_price_collector import StockPriceCollector
|
||||
from src.scripts.stock_daily_data_collector import StockDailyDataCollector
|
||||
|
||||
|
||||
class StockDailyDataCollectorV2:
|
||||
"""股票日线数据采集器V2版本 - 整合雪球和东方财富数据"""
|
||||
|
||||
def __init__(self, db_url):
|
||||
self.engine = create_engine(
|
||||
db_url,
|
||||
pool_size=5,
|
||||
max_overflow=10,
|
||||
pool_recycle=3600
|
||||
)
|
||||
self.headers = XUEQIU_HEADERS
|
||||
self.proxy_manager = EnhancedProxyManager()
|
||||
# 创建东方财富数据采集器
|
||||
self.eastmoney_collector = StockPriceCollector(db_url)
|
||||
# 创建原版采集器用于单只股票历史数据获取
|
||||
self.original_collector = StockDailyDataCollector(db_url)
|
||||
|
||||
def convert_symbol_format(self, symbol):
|
||||
"""
|
||||
将雪球格式的股票代码转换为东方财富格式
|
||||
雪球格式:SZ300177 -> 东方财富格式:300177.SZ
|
||||
"""
|
||||
if symbol.startswith('SZ'):
|
||||
return f"{symbol[2:]}.SZ"
|
||||
elif symbol.startswith('SH'):
|
||||
return f"{symbol[2:]}.SH"
|
||||
else:
|
||||
return symbol
|
||||
|
||||
def convert_eastmoney_to_xueqiu_format(self, stock_code):
|
||||
"""
|
||||
将东方财富格式的股票代码转换为雪球格式
|
||||
东方财富格式:300177.SZ -> 雪球格式:SZ300177
|
||||
"""
|
||||
if '.SZ' in stock_code:
|
||||
return f"SZ{stock_code.replace('.SZ', '')}"
|
||||
elif '.SH' in stock_code:
|
||||
return f"SH{stock_code.replace('.SH', '')}"
|
||||
else:
|
||||
return stock_code
|
||||
|
||||
def fetch_eastmoney_data(self):
|
||||
"""获取东方财富的实时数据"""
|
||||
print("正在获取东方财富数据...")
|
||||
df = self.eastmoney_collector.fetch_all_data()
|
||||
if not df.empty:
|
||||
# 转换股票代码格式为雪球格式
|
||||
df['symbol'] = df['stock_code'].apply(self.convert_eastmoney_to_xueqiu_format)
|
||||
print(f"成功获取东方财富数据,共 {len(df)} 条记录")
|
||||
return df
|
||||
|
||||
def merge_data(self, xueqiu_df, eastmoney_df):
|
||||
"""合并雪球和东方财富数据"""
|
||||
print("正在合并雪球和东方财富数据...")
|
||||
|
||||
# 基于symbol进行合并
|
||||
merged_df = pd.merge(
|
||||
xueqiu_df,
|
||||
eastmoney_df[['symbol', 'high_price', 'low_price', 'open_price', 'pre_close', 'list_date']],
|
||||
on='symbol',
|
||||
how='left'
|
||||
)
|
||||
|
||||
print(f"数据合并完成,共 {len(merged_df)} 条记录")
|
||||
return merged_df
|
||||
|
||||
def transform_to_gp_day_data(self, merged_df):
|
||||
"""将合并后的数据转换为gp_day_data表结构"""
|
||||
print("正在转换数据格式...")
|
||||
|
||||
# 创建符合gp_day_data表结构的DataFrame
|
||||
gp_day_df = pd.DataFrame()
|
||||
|
||||
# 映射字段
|
||||
gp_day_df['symbol'] = merged_df['symbol']
|
||||
gp_day_df['timestamp'] = pd.to_datetime(merged_df['fetch_time'])
|
||||
gp_day_df['volume'] = merged_df['volume']
|
||||
gp_day_df['open'] = merged_df['open_price']
|
||||
gp_day_df['high'] = merged_df['high_price']
|
||||
gp_day_df['low'] = merged_df['low_price']
|
||||
gp_day_df['close'] = merged_df['current']
|
||||
gp_day_df['chg'] = merged_df['chg']
|
||||
gp_day_df['percent'] = merged_df['percent']
|
||||
gp_day_df['turnoverrate'] = merged_df['turnover_rate']
|
||||
gp_day_df['amount'] = merged_df['amount']
|
||||
gp_day_df['pb'] = merged_df['pb']
|
||||
gp_day_df['pe'] = merged_df['pe_ttm']
|
||||
gp_day_df['ps'] = merged_df['ps']
|
||||
|
||||
# 添加pre_close字段(用于除权检查)
|
||||
gp_day_df['pre_close'] = merged_df['pre_close']
|
||||
|
||||
print(f"数据转换完成,共 {len(gp_day_df)} 条记录")
|
||||
return gp_day_df
|
||||
|
||||
def save_to_database(self, df):
|
||||
"""保存数据到数据库"""
|
||||
if df.empty:
|
||||
print("没有数据需要保存")
|
||||
return
|
||||
|
||||
print(f"正在保存数据到数据库,共 {len(df)} 条记录...")
|
||||
|
||||
# 删除今日数据
|
||||
today_str = datetime.now().strftime('%Y-%m-%d')
|
||||
delete_query = text("DELETE FROM gp_day_data WHERE `timestamp` LIKE :date_str")
|
||||
|
||||
try:
|
||||
with self.engine.begin() as conn:
|
||||
conn.execute(delete_query, {"date_str": f"{today_str}%"})
|
||||
print(f"已删除今日 {today_str} 的旧数据")
|
||||
except Exception as e:
|
||||
print(f"删除今日数据失败: {e}")
|
||||
|
||||
# 分批保存数据
|
||||
batch_size = 1000
|
||||
for i in range(0, len(df), batch_size):
|
||||
batch = df.iloc[i:i+batch_size]
|
||||
try:
|
||||
batch.to_sql('gp_day_data', self.engine, if_exists='append', index=False)
|
||||
print(f"已保存第 {i//batch_size + 1} 批数据")
|
||||
except Exception as e:
|
||||
print(f"保存第 {i//batch_size + 1} 批数据失败: {e}")
|
||||
|
||||
print("数据保存完成")
|
||||
|
||||
def check_ex_rights_before_save(self, df):
|
||||
"""在保存数据库之前检查除权情况,返回除权股票列表和除权日志数据"""
|
||||
print("步骤5.1: 检查除权情况(保存前)...")
|
||||
|
||||
ex_rights_stocks = []
|
||||
ex_rights_log_data = []
|
||||
today_str = datetime.now().strftime('%Y-%m-%d')
|
||||
|
||||
for _, row in tqdm(df.iterrows(), total=len(df), desc="检查除权"):
|
||||
symbol = row['symbol']
|
||||
current_pre_close = row['pre_close']
|
||||
|
||||
# 如果pre_close为空,跳过
|
||||
if pd.isna(current_pre_close):
|
||||
continue
|
||||
|
||||
# 查询数据库中该股票的最近两条收盘价记录
|
||||
query = text("""
|
||||
SELECT `close`, `timestamp` FROM gp_day_data
|
||||
WHERE symbol = :symbol
|
||||
ORDER BY `timestamp` DESC
|
||||
LIMIT 2
|
||||
""")
|
||||
|
||||
try:
|
||||
with self.engine.connect() as conn:
|
||||
results = conn.execute(query, {"symbol": symbol}).fetchall()
|
||||
|
||||
if results:
|
||||
# 检查最新记录是否为今天的数据
|
||||
latest_record = results[0]
|
||||
latest_timestamp = latest_record[1]
|
||||
latest_date_str = latest_timestamp.strftime('%Y-%m-%d')
|
||||
|
||||
if latest_date_str == today_str and len(results) > 1:
|
||||
# 如果最新记录是今天的,且有第二条记录,则用第二条记录比较
|
||||
db_last_close = float(results[1][0])
|
||||
# print(f"股票 {symbol}: 检测到今日已有数据,使用昨日收盘价 {db_last_close} 进行比较")
|
||||
else:
|
||||
# 如果最新记录不是今天的,或者只有一条记录,则用最新记录比较
|
||||
db_last_close = float(results[0][0])
|
||||
|
||||
# 比较pre_close和数据库中的收盘价
|
||||
if abs(db_last_close - current_pre_close) > 0.001:
|
||||
print(f"发现除权股票: {symbol}, 数据库收盘价: {db_last_close}, 当前昨收价: {current_pre_close}")
|
||||
ex_rights_stocks.append(symbol)
|
||||
|
||||
# 收集除权日志数据
|
||||
ex_rights_log_data.append({
|
||||
'symbol': symbol,
|
||||
'date': today_str,
|
||||
'db_price': db_last_close,
|
||||
'api_price': current_pre_close,
|
||||
'log_time': datetime.now()
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f"查询 {symbol} 历史数据失败: {e}")
|
||||
continue
|
||||
|
||||
if ex_rights_stocks:
|
||||
print(f"检测到 {len(ex_rights_stocks)} 只除权股票: {ex_rights_stocks}")
|
||||
else:
|
||||
print("未发现除权股票")
|
||||
|
||||
return ex_rights_stocks, ex_rights_log_data
|
||||
|
||||
def save_ex_rights_log(self, log_data: list):
|
||||
"""将除权日志保存到数据库"""
|
||||
if not log_data:
|
||||
return
|
||||
|
||||
print(f"正在保存 {len(log_data)} 条除权日志到gp_ex_rights_log表...")
|
||||
try:
|
||||
df = pd.DataFrame(log_data)
|
||||
# 确保列名与数据库字段匹配
|
||||
df = df.rename(columns={
|
||||
'symbol': 'stock_code',
|
||||
'date': 'change_date',
|
||||
'db_price': 'before_price',
|
||||
'api_price': 'after_price',
|
||||
'log_time': 'update_time'
|
||||
})
|
||||
df.to_sql('gp_ex_rights_log', self.engine, if_exists='append', index=False)
|
||||
print("除权日志保存成功")
|
||||
except Exception as e:
|
||||
print(f"保存除权日志失败: {e}")
|
||||
|
||||
def handle_ex_rights_stocks(self, ex_rights_stocks, ex_rights_log_data):
|
||||
"""处理除权股票:保存日志、删除历史数据并重新获取1800天数据"""
|
||||
if not ex_rights_stocks:
|
||||
return
|
||||
|
||||
print("步骤6: 处理除权股票...")
|
||||
|
||||
# 6.1 保存除权日志
|
||||
if ex_rights_log_data:
|
||||
self.save_ex_rights_log(ex_rights_log_data)
|
||||
|
||||
# 6.2 重新获取历史数据
|
||||
print(f"开始处理 {len(ex_rights_stocks)} 只除权股票,重新获取历史数据...")
|
||||
|
||||
for symbol in tqdm(ex_rights_stocks, desc="处理除权股票"):
|
||||
try:
|
||||
# 删除该股票的所有历史数据
|
||||
delete_query = text("DELETE FROM gp_day_data WHERE symbol = :symbol")
|
||||
with self.engine.begin() as conn:
|
||||
conn.execute(delete_query, {"symbol": symbol})
|
||||
print(f"已删除 {symbol} 的历史数据")
|
||||
|
||||
# 重新获取1800天的历史数据
|
||||
success = self.original_collector.fetch_single_stock_history(symbol, 1800)
|
||||
if success:
|
||||
print(f"成功重新获取 {symbol} 的历史数据")
|
||||
else:
|
||||
print(f"重新获取 {symbol} 的历史数据失败")
|
||||
|
||||
except Exception as e:
|
||||
print(f"处理除权股票 {symbol} 失败: {e}")
|
||||
|
||||
def run_daily_collection(self):
|
||||
"""执行每日数据采集"""
|
||||
print("=" * 60)
|
||||
print("股票日线数据采集器V2 - 开始运行")
|
||||
print("=" * 60)
|
||||
|
||||
try:
|
||||
# 1. 获取雪球数据
|
||||
print("步骤1: 获取雪球数据...")
|
||||
xueqiu_df = fetch_and_store_stock_data()
|
||||
if xueqiu_df.empty:
|
||||
print("雪球数据获取失败,终止运行")
|
||||
return
|
||||
|
||||
# 2. 获取东方财富数据
|
||||
print("步骤2: 获取东方财富数据...")
|
||||
eastmoney_df = self.fetch_eastmoney_data()
|
||||
if eastmoney_df.empty:
|
||||
print("东方财富数据获取失败,终止运行")
|
||||
return
|
||||
|
||||
# 3. 合并数据
|
||||
print("步骤3: 合并数据...")
|
||||
merged_df = self.merge_data(xueqiu_df, eastmoney_df)
|
||||
|
||||
# 4. 转换数据格式
|
||||
print("步骤4: 转换数据格式...")
|
||||
gp_day_df = self.transform_to_gp_day_data(merged_df)
|
||||
|
||||
# 5. 检查除权(保存前)
|
||||
ex_rights_stocks, ex_rights_log_data = self.check_ex_rights_before_save(gp_day_df)
|
||||
|
||||
# 5.2. 保存到数据库
|
||||
print("步骤5.2: 保存到数据库...")
|
||||
self.save_to_database(gp_day_df)
|
||||
|
||||
# 6. 处理除权股票(保存后)
|
||||
self.handle_ex_rights_stocks(ex_rights_stocks, ex_rights_log_data)
|
||||
|
||||
print("=" * 60)
|
||||
print("股票日线数据采集完成")
|
||||
print("=" * 60)
|
||||
|
||||
except Exception as e:
|
||||
print(f"采集过程中发生错误: {e}")
|
||||
finally:
|
||||
# 清理资源
|
||||
self.engine.dispose()
|
||||
gc.collect()
|
||||
|
||||
|
||||
def collect_stock_daily_data_v2(db_url):
|
||||
"""V2版本的股票日线数据采集入口函数"""
|
||||
collector = StockDailyDataCollectorV2(db_url)
|
||||
collector.run_daily_collection()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
db_url = 'mysql+pymysql://root:Chlry#$.8@192.168.18.199:3306/db_gp_cj'
|
||||
collect_stock_daily_data_v2(db_url)
|
|
@ -0,0 +1,244 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
股票代码格式转换工具类
|
||||
|
||||
提供多种股票代码格式之间的转换功能
|
||||
"""
|
||||
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class StockCodeFormatter:
|
||||
"""股票代码格式转换器"""
|
||||
|
||||
def __init__(self):
|
||||
"""初始化"""
|
||||
pass
|
||||
|
||||
def _parse_stock_code(self, stock_code: str) -> tuple:
|
||||
"""
|
||||
解析股票代码,提取代码和市场信息
|
||||
|
||||
Args:
|
||||
stock_code: 输入的股票代码
|
||||
|
||||
Returns:
|
||||
tuple: (代码数字, 市场标识) 如 ('688008', 'SH')
|
||||
"""
|
||||
if not stock_code:
|
||||
return None, None
|
||||
|
||||
stock_code = stock_code.strip().upper()
|
||||
|
||||
# 处理 688008.SH 格式
|
||||
if '.' in stock_code:
|
||||
parts = stock_code.split('.')
|
||||
if len(parts) == 2:
|
||||
code, market = parts[0], parts[1]
|
||||
return code, market
|
||||
|
||||
# 处理 SH688008 格式
|
||||
elif stock_code.startswith(('SZ', 'SH', 'BJ')):
|
||||
market = stock_code[:2]
|
||||
code = stock_code[2:]
|
||||
return code, market
|
||||
|
||||
# 处理纯数字 688008 格式
|
||||
elif stock_code.isdigit():
|
||||
# 根据数字前缀自动判断市场
|
||||
if stock_code.startswith(('60', '68')):
|
||||
return stock_code, 'SH'
|
||||
elif stock_code.startswith(('00', '30', '20')):
|
||||
return stock_code, 'SZ'
|
||||
elif stock_code.startswith(('8', '43', '87')):
|
||||
return stock_code, 'BJ'
|
||||
else:
|
||||
return stock_code, None
|
||||
|
||||
return None, None
|
||||
|
||||
def to_dot_format(self, stock_code: str) -> Optional[str]:
|
||||
"""
|
||||
转换为点分格式 (如: 688008.SH)
|
||||
|
||||
Args:
|
||||
stock_code: 输入的股票代码,支持多种格式
|
||||
|
||||
Returns:
|
||||
str: 点分格式的股票代码,如果无法转换则返回None
|
||||
|
||||
Examples:
|
||||
>>> formatter = StockCodeFormatter()
|
||||
>>> formatter.to_dot_format('SH688008')
|
||||
'688008.SH'
|
||||
>>> formatter.to_dot_format('688008')
|
||||
'688008.SH'
|
||||
>>> formatter.to_dot_format('688008.SH')
|
||||
'688008.SH'
|
||||
"""
|
||||
code, market = self._parse_stock_code(stock_code)
|
||||
|
||||
if code and market:
|
||||
return f"{code}.{market}"
|
||||
|
||||
return None
|
||||
|
||||
def to_prefix_format(self, stock_code: str) -> Optional[str]:
|
||||
"""
|
||||
转换为前缀格式 (如: SH688008)
|
||||
|
||||
Args:
|
||||
stock_code: 输入的股票代码,支持多种格式
|
||||
|
||||
Returns:
|
||||
str: 前缀格式的股票代码,如果无法转换则返回None
|
||||
|
||||
Examples:
|
||||
>>> formatter = StockCodeFormatter()
|
||||
>>> formatter.to_prefix_format('688008.SH')
|
||||
'SH688008'
|
||||
>>> formatter.to_prefix_format('688008')
|
||||
'SH688008'
|
||||
>>> formatter.to_prefix_format('SH688008')
|
||||
'SH688008'
|
||||
"""
|
||||
code, market = self._parse_stock_code(stock_code)
|
||||
|
||||
if code and market:
|
||||
return f"{market}{code}"
|
||||
|
||||
return None
|
||||
|
||||
def to_number_only(self, stock_code: str) -> Optional[str]:
|
||||
"""
|
||||
转换为纯数字格式 (如: 688008)
|
||||
|
||||
Args:
|
||||
stock_code: 输入的股票代码,支持多种格式
|
||||
|
||||
Returns:
|
||||
str: 纯数字格式的股票代码,如果无法转换则返回None
|
||||
|
||||
Examples:
|
||||
>>> formatter = StockCodeFormatter()
|
||||
>>> formatter.to_number_only('688008.SH')
|
||||
'688008'
|
||||
>>> formatter.to_number_only('SH688008')
|
||||
'688008'
|
||||
>>> formatter.to_number_only('688008')
|
||||
'688008'
|
||||
"""
|
||||
code, market = self._parse_stock_code(stock_code)
|
||||
|
||||
if code:
|
||||
return code
|
||||
|
||||
return None
|
||||
|
||||
def get_market(self, stock_code: str) -> Optional[str]:
|
||||
"""
|
||||
获取股票代码对应的市场标识
|
||||
|
||||
Args:
|
||||
stock_code: 输入的股票代码,支持多种格式
|
||||
|
||||
Returns:
|
||||
str: 市场标识 ('SH', 'SZ', 'BJ'),如果无法识别则返回None
|
||||
"""
|
||||
code, market = self._parse_stock_code(stock_code)
|
||||
return market
|
||||
|
||||
def is_valid_stock_code(self, stock_code: str) -> bool:
|
||||
"""
|
||||
验证股票代码是否有效
|
||||
|
||||
Args:
|
||||
stock_code: 输入的股票代码
|
||||
|
||||
Returns:
|
||||
bool: 是否为有效的股票代码
|
||||
"""
|
||||
code, market = self._parse_stock_code(stock_code)
|
||||
return code is not None and market is not None
|
||||
|
||||
def batch_convert(self, stock_codes: list, target_format: str = 'dot') -> dict:
|
||||
"""
|
||||
批量转换股票代码格式
|
||||
|
||||
Args:
|
||||
stock_codes: 股票代码列表
|
||||
target_format: 目标格式 ('dot', 'prefix', 'number')
|
||||
|
||||
Returns:
|
||||
dict: {原始代码: 转换后代码} 的映射字典
|
||||
"""
|
||||
result = {}
|
||||
|
||||
for stock_code in stock_codes:
|
||||
if target_format == 'dot':
|
||||
converted = self.to_dot_format(stock_code)
|
||||
elif target_format == 'prefix':
|
||||
converted = self.to_prefix_format(stock_code)
|
||||
elif target_format == 'number':
|
||||
converted = self.to_number_only(stock_code)
|
||||
else:
|
||||
converted = None
|
||||
|
||||
result[stock_code] = converted
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
"""示例用法"""
|
||||
print("=== 股票代码格式转换工具示例 ===")
|
||||
|
||||
# 创建格式转换器
|
||||
formatter = StockCodeFormatter()
|
||||
|
||||
# 测试用例
|
||||
test_codes = [
|
||||
'688008.SH', # 沪市科创板
|
||||
'SH688008', # 前缀格式
|
||||
'688008', # 纯数字
|
||||
'300661.SZ', # 深市创业板
|
||||
'SZ300661', # 前缀格式
|
||||
'300661', # 纯数字
|
||||
'000858.SZ', # 深市主板
|
||||
'600519.SH', # 沪市主板
|
||||
'430123.BJ', # 北交所
|
||||
'BJ430123', # 北交所前缀格式
|
||||
]
|
||||
|
||||
print("\n原始代码 -> 点分格式 -> 前缀格式 -> 纯数字格式 -> 市场")
|
||||
print("-" * 70)
|
||||
|
||||
for code in test_codes:
|
||||
dot_format = formatter.to_dot_format(code)
|
||||
prefix_format = formatter.to_prefix_format(code)
|
||||
number_format = formatter.to_number_only(code)
|
||||
market = formatter.get_market(code)
|
||||
is_valid = formatter.is_valid_stock_code(code)
|
||||
|
||||
status = "✓" if is_valid else "✗"
|
||||
print(f"{code:12} -> {dot_format or 'None':12} -> {prefix_format or 'None':12} -> {number_format or 'None':8} -> {market or 'None':4} {status}")
|
||||
|
||||
# 批量转换示例
|
||||
print(f"\n=== 批量转换示例 ===")
|
||||
batch_codes = ['SH688008', '300661', 'BJ430123']
|
||||
|
||||
dot_results = formatter.batch_convert(batch_codes, 'dot')
|
||||
print("转换为点分格式:")
|
||||
for original, converted in dot_results.items():
|
||||
print(f" {original} -> {converted}")
|
||||
|
||||
prefix_results = formatter.batch_convert(batch_codes, 'prefix')
|
||||
print("转换为前缀格式:")
|
||||
for original, converted in prefix_results.items():
|
||||
print(f" {original} -> {converted}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,35 @@
|
|||
import requests
|
||||
import json
|
||||
|
||||
def trigger_batch_stock_price_collection():
|
||||
"""
|
||||
一个简单的工具函数,用于触发批量采集A股行情的调度器接口,并打印响应。
|
||||
"""
|
||||
# 目标URL
|
||||
url = "http://192.168.16.214:5089/scheduler/batch_stock_price/collection"
|
||||
|
||||
print(f"正在向以下地址发送GET请求:\n{url}\n")
|
||||
|
||||
try:
|
||||
# 发送GET请求,可以设置一个超时时间(例如10秒)
|
||||
response = requests.get(url, timeout=30)
|
||||
|
||||
# 检查响应状态码
|
||||
print(f"请求完成,HTTP状态码: {response.status_code}\n")
|
||||
|
||||
# 尝试将响应体解析为JSON格式并打印
|
||||
try:
|
||||
response_json = response.json()
|
||||
print("服务器响应内容 (JSON格式):")
|
||||
# 使用json.dumps美化输出
|
||||
print(json.dumps(response_json, indent=2, ensure_ascii=False))
|
||||
except json.JSONDecodeError:
|
||||
print("服务器响应内容 (非JSON格式):")
|
||||
print(response.text)
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
# 处理请求过程中可能出现的异常(如网络问题、超时等)
|
||||
print(f"请求失败,发生异常: {e}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
trigger_batch_stock_price_collection()
|
Loading…
Reference in New Issue