stock_fundamentals/src/scripts/stock_daily_data_collector.py

145 lines
4.9 KiB
Python
Raw Normal View History

2025-04-02 13:52:34 +08:00
# coding:utf-8
import requests
import pandas as pd
from sqlalchemy import create_engine, text
from datetime import datetime
from tqdm import tqdm
from config import XUEQIU_HEADERS
class StockDailyDataCollector:
"""股票日线数据采集器类"""
def __init__(self, db_url):
"""
初始化采集器
Parameters:
-----------
db_url : str
数据库连接URL
"""
self.engine = create_engine(db_url)
self.headers = XUEQIU_HEADERS
def fetch_all_stock_codes(self):
"""从数据库获取所有股票代码"""
query = "SELECT gp_code FROM gp_code_all_copy"
df = pd.read_sql(query, self.engine)
return df['gp_code'].tolist()
def fetch_daily_stock_data(self, symbol, begin):
"""
获取股票日线数据
Parameters:
-----------
symbol : str
股票代码
begin : int
开始时间戳毫秒
"""
url = f"https://stock.xueqiu.com/v5/stock/chart/kline.json?symbol={symbol}&begin={begin}&period=day&type=before&count=-1500&indicator=kline,pe,pb,ps,pcf,market_capital,agt,ggt,balance"
response = requests.get(url, headers=self.headers)
return response.json()
def save_daily_data_to_database(self, data, symbol):
"""
保存日线数据到数据库
Parameters:
-----------
data : dict
API返回的数据
symbol : str
股票代码
"""
try:
items = data['data']['item']
columns = data['data']['column']
except KeyError as e:
print(f"KeyError for {symbol}: {e}")
return
df = pd.DataFrame(items, columns=columns)
df['symbol'] = symbol
# 数据库中有的字段
required_columns = ['timestamp', 'volume', 'open', 'high', 'low', 'close',
'chg', 'percent', 'turnoverrate', 'amount', 'symbol']
# 检查并保留实际存在的字段
existing_columns = [col for col in required_columns if col in df.columns]
df = df[existing_columns]
# 数据类型转换
if 'timestamp' in df.columns:
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms', utc=True).dt.tz_convert('Asia/Shanghai')
df.to_sql('gp_day_data', self.engine, if_exists='append', index=False)
def fetch_data_for_date(self, date=None):
"""
获取指定日期或当天的日线数据并保存到数据库
Parameters:
-----------
date : str, optional
日期字符串格式为'YYYY-MM-DD'如果为None则获取当天数据
"""
if date is None:
# 如果没有指定日期,使用当天日期
start_date = datetime.now()
date_str = start_date.strftime('%Y-%m-%d')
else:
start_date = datetime.strptime(date, '%Y-%m-%d')
date_str = date
# 在插入数据之前执行删除操作
delete_query = text(f"DELETE FROM gp_day_data WHERE `timestamp` LIKE :date_str")
with self.engine.connect() as conn:
conn.execute(delete_query, {"date_str": f"{date_str}%"})
# 获取所有股票代码
stock_codes = self.fetch_all_stock_codes()
# 循环请求每只股票的数据并保存,使用进度条显示进度
for symbol in tqdm(stock_codes, desc=f"Fetching and saving daily stock data for {date_str}"):
begin = int(start_date.replace(hour=0, minute=0, second=0, microsecond=0).timestamp() * 1000)
data = self.fetch_daily_stock_data(symbol, begin)
if data['error_code'] == 0:
self.save_daily_data_to_database(data, symbol)
else:
print(f"Error fetching data for {symbol} on {date_str}: {data['error_description']}")
print(f"Daily data fetching and saving completed for {date_str}.")
def collect_stock_daily_data(db_url, date=None):
"""
快捷方法收集股票日线数据
Parameters:
-----------
db_url : str
数据库连接URL
date : str, optional
日期字符串格式为'YYYY-MM-DD'如果为None则获取当天数据
"""
collector = StockDailyDataCollector(db_url)
collector.fetch_data_for_date(date)
if __name__ == "__main__":
# 示例调用
db_url = 'mysql+pymysql://root:Chlry$%.8@192.168.1.82:3306/db_gp_cj'
# 方法1使用快捷函数获取当天数据
collect_stock_daily_data(db_url)
# 方法2使用快捷函数获取指定日期数据
# collect_stock_daily_data(db_url, '2020-01-01')
# 方法3使用完整的类
# collector = StockDailyDataCollector(db_url)
# collector.fetch_data_for_date() # 获取当天数据
# collector.fetch_data_for_date('2024-09-11') # 获取指定日期数据