forked from FinHackCN/xlib
-
Notifications
You must be signed in to change notification settings - Fork 0
/
data.py
executable file
·82 lines (69 loc) · 4.18 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import os
import akshare as ak
import pandas as pd
def get_index_data(index="000300",start_date="20150101",end_date="20220330",renew=False):
idx_cache_file=os.path.dirname(os.path.realpath(__file__))+'/cache/'+"i_"+index+"_"+start_date+"_"+end_date+".csv"
if renew or not os.path.exists(idx_cache_file):
idx_data = ak.index_zh_a_hist(symbol=index, period="daily", start_date=start_date, end_date=end_date)
idx_data.rename(columns={'日期':'date', '开盘':'open', '收盘':'close',
'最高':'high', '最低':'low', '成交量':'volume',
'成交额':'amount', '振幅':'swing', '涨跌幅':'chg_pct',
'涨跌额':'chg_amount', '换手率':'turnover',
}, inplace = True)
#idx_data['date'] = pd.to_datetime(idx_data['date'], format='%Y-%m-%d')
idx_data.drop_duplicates(subset=['date'],keep='first',inplace=True)
idx_data.to_csv(idx_cache_file)
else:
idx_data=pd.read_csv(idx_cache_file,index_col=0)
idx_data['date'] = pd.to_datetime(idx_data['date'], format='%Y-%m-%d')
idx_data=idx_data.set_index('date')
return idx_data
def get_all_index_data(index="000300",start_date="20150101",end_date="20220330",renew=False):
idx_cache_file=os.path.dirname(os.path.realpath(__file__))+'/cache/'+"i_"+index+"_"+start_date+"_"+end_date+".csv"
if renew or not os.path.exists(idx_cache_file):
idx_data = ak.index_zh_a_hist(symbol=index, period="daily", start_date=start_date, end_date=end_date)
idx_data.rename(columns={'日期':'date', '开盘':'open', '收盘':'close',
'最高':'high', '最低':'low', '成交量':'volume',
'成交额':'amount', '振幅':'swing', '涨跌幅':'chg_pct',
'涨跌额':'chg_amount', '换手率':'turnover',
}, inplace = True)
#idx_data['date'] = pd.to_datetime(idx_data['date'], format='%Y-%m-%d')
idx_data.drop_duplicates(subset=['date'],keep='first',inplace=True)
idx_data.to_csv(idx_cache_file)
else:
idx_data=pd.read_csv(idx_cache_file,index_col=0)
df_align=idx_data['date']
all_index_data=pd.DataFrame()
index_stock_cons_df = ak.index_stock_cons(symbol=index)
idx_all_cache_file=os.path.dirname(os.path.realpath(__file__))+'/cache/'+"i_all_"+index+"_"+start_date+"_"+end_date+".csv"
if renew or not os.path.exists(idx_all_cache_file):
symbol_list=index_stock_cons_df['品种代码'].tolist()
for symbol in symbol_list:
cache_file=os.path.dirname(os.path.realpath(__file__))+'/cache/'+symbol+"_"+start_date+"_"+end_date+".csv"
exists=os.path.exists(cache_file)
if(renew or not exists):
df=ak.stock_zh_a_hist(symbol=symbol, start_date=start_date, end_date=end_date, adjust="qfq")
df.rename(columns={'日期':'date', '开盘':'open', '收盘':'close',
'最高':'high', '最低':'low', '成交量':'volume',
'成交额':'amount', '振幅':'swing', '涨跌幅':'chg_pct',
'涨跌额':'chg_amount', '换手率':'turnover',
}, inplace = True)
#df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')
df['factor']=1
df.drop_duplicates(subset=['date'],keep='first',inplace=True)
df=pd.merge(df_align,df,on=['date'],how='outer', validate="one_to_many")
df['symbol']='x'+str(symbol)
df=df.fillna(0)
df.to_csv(cache_file)
print(len(df))
else:
df=pd.read_csv(cache_file,index_col=0)
if all_index_data.empty:
all_index_data=df
else:
all_index_data=all_index_data.append(df)
all_index_data=all_index_data.reset_index(drop=True)
all_index_data.to_csv(idx_all_cache_file)
else:
all_index_data=pd.read_csv(idx_all_cache_file,index_col=0)
return all_index_data