我想用boost.python在c++中使用多索引列 Dataframe 。※多索引列 Dataframe 类似于
我将多索引列 Dataframe 的类型更改为csv。我的csv文件在电子表格
上如下所示
我之所以要使用这些数据是为了回溯测试,这是我用python写的回溯测试代码,我想把它翻译成c++。
import pandas as pd
import numpy as np
from utils import load_data, load_list_csv, to_int
class No_Strategy():
def __init__(self, codes, unit, cash, position):
self.codes = codes
self.unit = unit
self.cash = cash
self.buy_signal = [0]*len(codes)
self.sell_signal = [0]*len(codes)
self.valid = 0
self.position = position
self.pass_st = 0 # 전략에 들어가지도 못한 경우
def set_data(self, prev_fs_row, fs_row, indi_row):
self.prev_fs = prev_fs_row
self.fs = fs_row # multi dimensional df
self.indi = indi_row
def _strat(self, prev_fs, curr_fs, curr_indi):
curr_rev = prev_rev = curr_ni = prev_ni = ni_growth = curr_asset = noncurr_asset = curr_asset_rat = 0
try:
prev_rev = int(prev_fs['매출액'].replace(",",""))
curr_rev = int(curr_fs['매출액'].replace(",",""))
except:
self.pass_st += 1
return 0, 0
rev_growth=(curr_rev-prev_rev)/prev_rev
try:
prev_ni = int(prev_fs['당기순이익'].replace(",",""))
curr_ni = int(curr_fs['당기순이익'].replace(",",""))
except:
self.pass_st += 1
return 0, 0
ni_growth=(curr_ni-prev_ni)/prev_ni
try:
curr_asset = int(curr_fs['유동자산'].replace(",",""))
noncurr_asset = int(curr_fs['비유동자산'].replace(",",""))
except:
self.pass_st += 1
return 0, 0
curr_asset_rat = curr_asset / noncurr_asset
#### this is the buy strategy! You can change the below ####
if (curr_indi.golden_cross) or (curr_indi.rsi_k < 0.65) :
return 1, 0
#### ************************************************** ####
#### this is the sell strategy! You can change the below ####
if (curr_indi.dead_cross):
return 0, 1
#### ************************************************** ####
return 0, 0
def run(self):
for i, code in enumerate(self.codes):
self.valid = 0
prev_fs = self.prev_fs[code]
curr_fs = self.fs[code]
curr_indi = self.indi[code]
prev_fs_cell = None
curr_fs_cell = None
try:
prev_fs_cell = prev_fs.iloc[0].replace(",","")
try:
curr_fs_cell = curr_fs.iloc[0].replace(",","")
except:
self.pass_st += 1
pass
except:
self.pass_st += 1
pass
if (curr_fs_cell != None) & (prev_fs_cell != None):
self.valid = 1
buy, sell = self._strat(prev_fs, curr_fs, curr_indi)
if self.valid == 0:
self.pass_st += 1
continue
else: # buy or sell signal get
price = curr_indi['close']
if buy:
if self.cash >= self.unit * price:
self.buy_signal[i] = self.unit
self.position[i] += self.unit
self.cash -= price * self.unit
elif sell:
if self.position[i] > 0 :
sell_num = self.position[i] - int(self.position[i]/2)
self.sell_signal[i] = sell_num
self.position[i] = int(self.position[i]/2) # 1-> 1 sell, 4 -> 2 sell ....
self.cash += price * sell_num
#@title
class Broker():
def __init__(self, codes):
self.cash = 200000000 #2억
self.cash_df = None #pd.DataFrame(columns=['cash'])
self.position = [0]*len(codes)
self.position_df = None #pd.DataFrame(columns=codes) # for accumulated profit calculation
self.buy_signal = None #pd.DataFrame(columns=codes) # codes = KOSPI_stock_names
self.sell_signal = None #pd.DataFrame(columns=codes)
self.codes = codes # 012934, 3281, ...
self.unit = 1 # 주식 매매 단위
self.pass_st = 0
def set_strat(self, strategy):
self.strategy = strategy # class
def set_time(self, time_index): # time_index type: pd.Index / time range for indi df
self.buy_signal = pd.DataFrame(columns = self.codes, index = time_index) #set_index(time_index)
self.sell_signal = pd.DataFrame(columns = self.codes, index = time_index) #.set_index(time_index)
self.position_df = pd.DataFrame(columns = self.codes, index = time_index)
self.cash_df = pd.DataFrame(columns = ['cash'], index = time_index)#.set_index(time_index)
self.time_index = time_index
def set_data(self, fs, indi, price):
self.fs = fs # multi dimensional df / start: 0th - nth
self.indi = indi # multi dimensional df / start : 1th - nth
self.price = price # 2 dimensional (date X codes : close price)
def update_data(self, strategy, date):
self.cash = strategy.cash
self.cash_df.loc[date] = strategy.cash
self.position = strategy.position
self.position_df.loc[date] = strategy.position #list
self.buy_signal.loc[date] = strategy.buy_signal #list
self.sell_signal.loc[date] = strategy.sell_signal #list
self.pass_st += strategy.pass_st
def run(self):
for date in self.time_index: #아마 수정해야 할 확률 높음
if date.year == 2021:
break
else:
prev_fs_row = self.fs.loc[date.year-1] # ex: 2014
fs_row = self.fs.loc[date.year] # 2015
indi_row = self.indi.loc[date] # 2015
strategy = self.strategy(self.codes, self.unit, self.cash, self.position)
strategy.set_data(prev_fs_row, fs_row, indi_row)
strategy.run()
self.update_data(strategy, date)
def performance(self):
# !!!! 2020년까지의 결과만 성능 평가 ####
cash_df = self.cash_df[self.cash_df.index < '2021']
position_df = self.position_df[self.position_df.index < '2021']
price = self.price[self.price.index < '2021']
buy_signal = self.buy_signal[self.buy_signal.index < '2021']
sell_signal = self.sell_signal[self.sell_signal.index < '2021']
last_price = price.iloc[-1]
total_remain_num = self.position # last(2020) position data
total_buy = (price * buy_signal).sum(axis=1).sum()
total_sell = (price * sell_signal).sum(axis=1).sum()
total_remain = (last_price * total_remain_num).sum()
print(f'remain 개수: {total_remain_num}, total_remain: {total_remain} total_buy: {total_buy}, total_sell={total_sell}')
profit = total_sell + total_remain - total_buy
try:
return_mean = profit / total_buy
except:
print("no buy")
return
accum_df = (cash_df['cash'] + ((price.fillna(0) * position_df).sum(axis=1))).to_frame() # row sum
daily_return_df = (accum_df - accum_df.shift(1))/accum_df.shift(1)-1
SSE = ((daily_return_df - return_mean)**2).sum().item()
std = np.sqrt(SSE/(accum_df.shape[0]-1)) # route(sigma(x-x_bar)^2 / (n-1))
sharp = return_mean / std
self.return_mean = return_mean
self.sharp = sharp
print(f'return_mean: {return_mean}, sharp: {sharp}')
code_path = GDRIVE_DATA_PATH + 'codes.csv'
fs_path = GDRIVE_DATA_PATH + 'fs_total.csv'
indi_path = GDRIVE_DATA_PATH + 'indi_total.csv'
price_path = GDRIVE_DATA_PATH + 'prices.csv'
fs_total = load_data("fs_total.csv")
indi_total = load_data("indi_total.csv") # stock price and indicator(Golden cross, RSI, etc.)
prices = load_data("prices.csv") # stock close price data rows:date, cols: stock code.
time_index = indi_total.index # time index of indi_total multi-index columns
broker = Broker(codes)
broker.set_strat(No_Strategy)
broker.set_time(time_index)
broker.set_data(fs_total, indi_total, prices)
broker.run()
broker.performance()
我想在代码流程上不做太大改动的情况下翻译它,但是我找不到如何在C++中获取多索引列 Dataframe ,并将其行数据转移到No_Strategy中以决定是否投资股票。
※我以前上传过类似的问题,并得到了感谢的答案,但它对我来说太复杂了,所以我的问题与详细信息再次.
1条答案
按热度按时间kd3sttzy1#
看看https://github.com/hosseinmoein/DataFrame,它在一个更快的框架中拥有大约95%的Pandas功能