# coding:utf-8 import requests import pandas as pd from sqlalchemy import create_engine, text from datetime import datetime from tqdm import tqdm from config import XUEQIU_HEADERS class StockDailyDataCollector: """股票日线数据采集器类""" def __init__(self, db_url): """ 初始化采集器 Parameters: ----------- db_url : str 数据库连接URL """ self.engine = create_engine(db_url) self.headers = XUEQIU_HEADERS def fetch_all_stock_codes(self): """从数据库获取所有股票代码""" query = "SELECT gp_code FROM gp_code_all_copy" df = pd.read_sql(query, self.engine) return df['gp_code'].tolist() def fetch_daily_stock_data(self, symbol, begin): """ 获取股票日线数据 Parameters: ----------- symbol : str 股票代码 begin : int 开始时间戳(毫秒) """ url = f"https://stock.xueqiu.com/v5/stock/chart/kline.json?symbol={symbol}&begin={begin}&period=day&type=before&count=-1500&indicator=kline,pe,pb,ps,pcf,market_capital,agt,ggt,balance" response = requests.get(url, headers=self.headers) return response.json() def save_daily_data_to_database(self, data, symbol): """ 保存日线数据到数据库 Parameters: ----------- data : dict API返回的数据 symbol : str 股票代码 """ try: items = data['data']['item'] columns = data['data']['column'] except KeyError as e: print(f"KeyError for {symbol}: {e}") return df = pd.DataFrame(items, columns=columns) df['symbol'] = symbol # 数据库中有的字段 required_columns = ['timestamp', 'volume', 'open', 'high', 'low', 'close', 'chg', 'percent', 'turnoverrate', 'amount', 'symbol'] # 检查并保留实际存在的字段 existing_columns = [col for col in required_columns if col in df.columns] df = df[existing_columns] # 数据类型转换 if 'timestamp' in df.columns: df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms', utc=True).dt.tz_convert('Asia/Shanghai') df.to_sql('gp_day_data', self.engine, if_exists='append', index=False) def fetch_data_for_date(self, date=None): """ 获取指定日期或当天的日线数据并保存到数据库 Parameters: ----------- date : str, optional 日期字符串,格式为'YYYY-MM-DD',如果为None则获取当天数据 """ if date is None: # 如果没有指定日期,使用当天日期 start_date = datetime.now() date_str = start_date.strftime('%Y-%m-%d') else: start_date = datetime.strptime(date, '%Y-%m-%d') date_str = date # 在插入数据之前执行删除操作 delete_query = text(f"DELETE FROM gp_day_data WHERE `timestamp` LIKE :date_str") with self.engine.connect() as conn: conn.execute(delete_query, {"date_str": f"{date_str}%"}) # 获取所有股票代码 stock_codes = self.fetch_all_stock_codes() # 循环请求每只股票的数据并保存,使用进度条显示进度 for symbol in tqdm(stock_codes, desc=f"Fetching and saving daily stock data for {date_str}"): begin = int(start_date.replace(hour=0, minute=0, second=0, microsecond=0).timestamp() * 1000) data = self.fetch_daily_stock_data(symbol, begin) if data['error_code'] == 0: self.save_daily_data_to_database(data, symbol) else: print(f"Error fetching data for {symbol} on {date_str}: {data['error_description']}") print(f"Daily data fetching and saving completed for {date_str}.") def collect_stock_daily_data(db_url, date=None): """ 快捷方法:收集股票日线数据 Parameters: ----------- db_url : str 数据库连接URL date : str, optional 日期字符串,格式为'YYYY-MM-DD',如果为None则获取当天数据 """ collector = StockDailyDataCollector(db_url) collector.fetch_data_for_date(date) if __name__ == "__main__": # 示例调用 db_url = 'mysql+pymysql://root:Chlry#$.8@192.168.18.199:3306/db_gp_cj' # 方法1:使用快捷函数获取当天数据 collect_stock_daily_data(db_url) # 方法2:使用快捷函数获取指定日期数据 # collect_stock_daily_data(db_url, '2020-01-01') # 方法3:使用完整的类 # collector = StockDailyDataCollector(db_url) # collector.fetch_data_for_date() # 获取当天数据 # collector.fetch_data_for_date('2024-09-11') # 获取指定日期数据