From c88a31bc18247e6b4788fd786b2c655b523b3dd8 Mon Sep 17 00:00:00 2001 From: Shub Jain Date: Sun, 15 Jul 2018 14:05:30 +0100 Subject: [PATCH] ML Params --- ml_beta/Readme.md | 3 + ml_beta/problem1_ML_template.py | 320 ++++++++++++++++++ ml_beta/problem1_execution_system.py | 195 +++++++++++ ml_beta/problem1_ml_and_trading_params.py | 376 ++++++++++++++++++++++ 4 files changed, 894 insertions(+) create mode 100644 ml_beta/Readme.md create mode 100644 ml_beta/problem1_ML_template.py create mode 100644 ml_beta/problem1_execution_system.py create mode 100644 ml_beta/problem1_ml_and_trading_params.py diff --git a/ml_beta/Readme.md b/ml_beta/Readme.md new file mode 100644 index 0000000..27348f1 --- /dev/null +++ b/ml_beta/Readme.md @@ -0,0 +1,3 @@ +To get started download the folder and run `python problem1_ML_template.py` + +The other 2 files are to support the template file, you do not have to make any changes in them. diff --git a/ml_beta/problem1_ML_template.py b/ml_beta/problem1_ML_template.py new file mode 100644 index 0000000..d3c0b27 --- /dev/null +++ b/ml_beta/problem1_ML_template.py @@ -0,0 +1,320 @@ +import pandas as pd +import numpy as np +import sys, os +from sklearn import linear_model +from sklearn import metrics as sm +from datetime import datetime, timedelta +parentPath = os.path.abspath("../..") +if parentPath not in sys.path: + sys.path.insert(0, parentPath) +from backtester.features.feature import Feature +from problem1_ml_and_trading_params import MyTradingParams, MyModelLearningParams +from backtester.model_learning_and_trading_system import MLandTradingSystem +from backtester.version import updateCheck +from backtester.constants import * +from backtester.logger import * + + +## Make your changes to the functions below. +## SPECIFY the symbols you are modeling for in getSymbolsToTrade() below +## You need to specify features you want to use in getInstrumentFeatureConfigDicts() and getMarketFeatureConfigDicts() +## and create your predictions using these features in getPrediction() + +## Don't change any other function +## The toolbox does the rest for you, from downloading and loading data to running backtest + + +class MyTradingFunctions(): + + def __init__(self): #Put any global variables here + self.lookback = 1200 ## max number of historical datapoints you want at any given time + self.targetVariable = 'Y' + if datetime.today() < datetime(2018, 7, 3): + self.dataSetId = 'QQ3DataSample' + else: + self.dataSetId = 'QQ3DataDownSampled' + self.params = {} + + # for example you can import and store an ML model from scikit learn in this dict + self.model = {} + + # and set a frequency at which you want to update the model + self.updateFrequency = 150 + + ########################################### + ## ONLY FILL THE FOUR FUNCTIONS BELOW ## + ########################################### + + ############################################################################### + ### TODO 1: FILL THIS FUNCTION TO specify all stockIDs you are modeling for ### + ### USE TEMPLATE BELOW AS EXAMPLE ### + ############################################################################### + + def getSymbolsToTrade(self): + return ['SIZ', 'MLQ'] + + ''' + Specify all Features you want to use by by creating config dictionaries. + Create one dictionary per feature and return them in an array. + Feature config Dictionary have the following keys: + featureId: a str for the type of feature you want to use + featureKey: {optional} a str for the key you will use to call this feature + If not present, will just use featureId + params: {optional} A dictionary with which contains other optional params if needed by the feature + msDict = {'featureKey': 'ms_5', + 'featureId': 'moving_sum', + 'params': {'period': 5, + 'featureName': 'basis'}} + return [msDict] + You can now use this feature by in getPRediction() calling it's featureKey, 'ms_5' + ''' + + def getInstrumentFeatureConfigDicts(self): + + ############################################################################## + ### TODO 2a: FILL THIS FUNCTION TO CREATE DESIRED FEATURES for each symbol. ### + ### USE TEMPLATE BELOW AS EXAMPLE ### + ############################################################################## + mom1Dict = {'featureKey': 'mom_5', + 'featureId': 'momentum', + 'params': {'period': 5, + 'featureName': 'F5'}} + mom2Dict = {'featureKey': 'mom_10', + 'featureId': 'momentum', + 'params': {'period': 10, + 'featureName': 'F5'}} + ma1Dict = {'featureKey': 'ma_5', + 'featureId': 'moving_average', + 'params': {'period': 5, + 'featureName': 'F5'}} + ma2Dict = {'featureKey': 'ma_10', + 'featureId': 'moving_average', + 'params': {'period': 10, + 'featureName': 'F5'}} + return [ma1Dict, ma2Dict] + + + + def getMarketFeatureConfigDicts(self): + ############################################################################### + ### TODO 2b: FILL THIS FUNCTION TO CREATE features that use multiple symbols ### + ### USE TEMPLATE BELOW AS EXAMPLE ### + ############################################################################### + + # customFeatureDict = {'featureKey': 'custom_mrkt_feature', + # 'featureId': 'my_custom_mrkt_feature', + # 'params': {'param1': 'value1'}} + return [] + + ''' The below functions implement the Auto ML Training Suite. + You can read more about it here: https://bitbucket.org/auquan/auquantoolbox/wiki/Home#markdown-header-ml-training-system + ''' + + def getFeatureSelectionConfigDicts(self): + + ############################################################################### + ### TODO 3: FILL THIS FUNCTION to choose a feature selection methods ### + ### USE TEMPLATE BELOW AS EXAMPLE ### + ############################################################################### + corr = {'featureSelectionKey': 'corr', + 'featureSelectionId' : 'pearson_correlation', + 'params' : {'startPeriod' : 0, + 'endPeriod' : 60, + 'steps' : 10, + 'threshold' : 0.1, + 'topK' : 2}} + + genericSelect = {'featureSelectionKey' : 'gus', + 'featureSelectionId' : 'generic_univariate_select', + 'params' : {'scoreFunction' : 'f_classif', + 'mode' : 'k_best', + 'modeParam' : 'all'}} + return {INSTRUMENT_TYPE_STOCK : [genericSelect]} + + def getFeatureTransformationConfigDicts(self): + + ########################################################################################## + ### TODO 4: FILL THIS FUNCTION to choose feature normalization/transformation methods ### + ### USE TEMPLATE BELOW AS EXAMPLE ### + ########################################################################################## + stdScaler = {'featureTransformKey': 'stdScaler', + 'featureTransformId' : 'standard_transform', + 'params' : {}} + + minmaxScaler = {'featureTransformKey' : 'minmaxScaler', + 'featureTransformId' : 'minmax_transform', + 'params' : {'low' : -1, + 'high' : 1}} + return {INSTRUMENT_TYPE_STOCK : [stdScaler]} + + def getModelConfigDicts(self): + + ############################################################################ + ### TODO 5: FILL THIS FUNCTION to choose the model training methods ### + ### USE TEMPLATE BELOW AS EXAMPLE ### + ############################################################################ + regression_model = {'modelKey': 'linear_regression', + 'modelId' : 'linear_regression', + 'params' : {}} + + classification_model = {'modelKey': 'logistic_regression', + 'modelId' : 'logistic_regression', + 'params' : {}} + return {INSTRUMENT_TYPE_STOCK : [classification_model]} + + + ''' + Now you do not need to fill the method below. + The ML model system will automatically overide the prediction function with the best trained model + ''' + + + + def getPrediction(self, time, updateNum, instrumentManager, predictions): + + # holder for all the instrument features for all instruments + lookbackInstrumentFeatures = instrumentManager.getLookbackInstrumentFeatures() + # holder for all the market features + lookbackMarketFeatures = instrumentManager.getDataDf() + + ###################################################################################################### + ### TODO 3 : FDO NOT ILL THIS FUNCTION if you are using ML Traiing System ### + ### It will automatically use the best features and trained model from above to make predictions ### + ### USE TEMPLATE BELOW AS EXAMPLE ### + ###################################################################################################### + + # if you don't enough data yet, don't make a prediction + if updateNum<=2*self.updateFrequency: + return predictions + + # Once you have enough data, start making predictions + + # Loading the target Variable + Y = lookbackInstrumentFeatures.getFeatureDf(self.getTargetVariableKey()) + + # Loading all features + mom1 = lookbackInstrumentFeatures.getFeatureDf('mom_5') #DF with rows=timestamp and columns=stockIDS + mom2 = lookbackInstrumentFeatures.getFeatureDf('mom_10') #DF with rows=timestamp and columns=stockIDS + factor1Values = (mom1/mom2) #DF with rows=timestamp and columns=stockIDS + ma1 = lookbackInstrumentFeatures.getFeatureDf('ma_5') #DF with rows=timestamp and columns=stockIDS + ma2 = lookbackInstrumentFeatures.getFeatureDf('ma_10') #DF with rows=timestamp and columns=stockIDS + factor2Values = (ma1/ma2) #DF with rows=timestamp and columns=stockIDS + + # Now looping over all stocks: + for s in self.getSymbolsToTrade(): + #Creating a dataframe to hold features for this stock + X = pd.DataFrame(index=Y.index) #DF with rows=timestamp and columns=featureNames + X['F1'] = factor1Values[s] + X['F2'] = factor2Values[s] + + # if this is the first time we are training a model, start by creating a new model + if s not in self.model: + self.model[s] = linear_model.LogisticRegression() + + # we will update this model during further runs + + # if you are at the update frequency, update the model + if (updateNum-1)%self.updateFrequency==0: + + # drop nans and infs from X + X = X.replace([np.inf, -np.inf], np.nan).dropna() + # create a target variable vector for this stock, with same index as X + y_s = Y[s].loc[Y.index.isin(X.index)] + + print('Training...') + # make numpy arrays with the right shape + x_train = np.array(X)[:-1] # shape = timestamps x numFeatures + y_train = np.array(y_s)[:-1].astype(int).reshape(-1) # shape = timestamps x 1 + self.model[s].fit(x_train, y_train) + + # make your prediction using your model + # first verify none of the features are nan or inf + if X.iloc[-1].replace([np.inf, -np.inf], np.nan).hasnans: + y_predict = 0.5 + else: + y_predict = self.model[s].predict(X.iloc[-1].values.reshape(1,-1)) + + # if you are making probabilistic predictions, set a threshold to convert them to 0/1 + threshold = 0.8 + predictions[s] = 1 if y_predict>threshold else 0.5 + predictions[s] = 0 if y_predict<(1-threshold) else 0.5 + + return predictions + + ########################################### + ## DONOT CHANGE THESE ## + ########################################### + + def getLookbackSize(self): + return self.lookback + + def getDataSetId(self): + return self.dataSetId + + def getTargetVariableKey(self): + return self.targetVariable + + def setTargetVariableKey(self, targetVariable): + self.targetVariable = targetVariable + + ############################################### + ## CHANGE ONLY IF YOU HAVE CUSTOM FEATURES ## + ############################################### + + def getCustomFeatures(self): + return {'my_custom_feature_identifier': MyCustomFeatureClassName} + +#################################################### +## YOU CAN DEFINE ANY CUSTOM FEATURES HERE ## +## If YOU DO, MENTION THEM IN THE FUNCTION ABOVE ## +#################################################### +class MyCustomFeatureClassName(Feature): + '''' + Custom Feature to implement for instrument. This function would return the value of the feature you want to implement. + 1. create a new class MyCustomFeatureClassName for the feature and implement your logic in the function computeForInstrument() - + 2. modify function getCustomFeatures() to return a dictionary with Id for this class + (follow formats like {'my_custom_feature_identifier': MyCustomFeatureClassName}. + Make sure 'my_custom_feature_identifier' doesnt conflict with any of the pre defined feature Ids + def getCustomFeatures(self): + return {'my_custom_feature_identifier': MyCustomFeatureClassName} + 3. create a dict for this feature in getInstrumentFeatureConfigDicts() above. Dict format is: + customFeatureDict = {'featureKey': 'my_custom_feature_key', + 'featureId': 'my_custom_feature_identifier', + 'params': {'param1': 'value1'}} + You can now use this feature by calling it's featureKey, 'my_custom_feature_key' in getPrediction() + ''' + @classmethod + def computeForInstrument(cls, updateNum, time, featureParams, featureKey, instrumentManager): + # Custom parameter which can be used as input to computation of this feature + param1Value = featureParams['param1'] + + # A holder for the all the instrument features + lookbackInstrumentFeatures = instrumentManager.getLookbackInstrumentFeatures() + + # dataframe for a historical instrument feature (basis in this case). The index is the timestamps + # atmost upto lookback data points. The columns of this dataframe are the symbols/instrumentIds. + lookbackInstrumentValue = lookbackInstrumentFeatures.getFeatureDf('symbolVWAP') + + # The last row of the previous dataframe gives the last calculated value for that feature (basis in this case) + # This returns a series with symbols/instrumentIds as the index. + currentValue = lookbackInstrumentValue.iloc[-1] + + if param1Value == 'value1': + return currentValue * 0.1 + else: + return currentValue * 0.5 + + +if __name__ == "__main__": + if False:#updateCheck(): + print('Your version of the auquan toolbox package is old. Please update by running the following command:') + print('pip install -U auquan_toolbox') + else: + print('Loading your config dicts and prediction function') + tf = MyTradingFunctions() + print('Loaded config dicts and prediction function, Loading Problem 1 Params') + tsParams = MyTradingParams(tf) + dataSplitRatio = [2, 0, 1] + mlsParams = MyModelLearningParams(tsParams, dataSplitRatio, chunkSize=None) + system = MLandTradingSystem(tsParams, mlsParams) + system.trainAndBacktest(useTargetVaribleFromFile=True, useTimeFrequency = True, chunkSize=None, onlyAnalyze=False, shouldPlot=True, makeInstrumentCsvs=True) diff --git a/ml_beta/problem1_execution_system.py b/ml_beta/problem1_execution_system.py new file mode 100644 index 0000000..5adc38e --- /dev/null +++ b/ml_beta/problem1_execution_system.py @@ -0,0 +1,195 @@ +from backtester.executionSystem.base_execution_system import BaseExecutionSystem, InstrumentExection +from backtester.logger import * +import numpy as np +import pandas as pd + + +class Problem1ExecutionSystem(BaseExecutionSystem): + def __init__(self, enter_threshold=0.7, exit_threshold=0.55, longLimit=10, + shortLimit=10, capitalUsageLimit=0, enterlotSize=1, exitlotSize = 1, limitType='L', price='close'): + self.enter_threshold = enter_threshold + self.exit_threshold = exit_threshold + self.longLimit = longLimit + self.shortLimit = shortLimit + self.capitalUsageLimit = capitalUsageLimit + self.enterlotSize = enterlotSize + self.exitlotSize = exitlotSize + self.limitType = limitType + self.priceFeature = price + + def getPriceSeries(self, instrumentsManager): + instrumentLookbackData = instrumentsManager.getLookbackInstrumentFeatures() + try: + price = instrumentLookbackData.getFeatureDf(self.priceFeature).iloc[-1] + return price + except KeyError: + logError('You have specified Dollar Limit but Price Feature Key %s does not exist'%self.priceFeature) + + def getPriceDf(self, instrumentsManager): + instrumentLookbackData = instrumentsManager.getLookbackInstrumentFeatures() + try: + price = instrumentLookbackData.getFeatureDf(self.priceFeature) + return price + except KeyError: + logError('You have specified Dollar Limit but Price Feature Key %s does not exist'%self.priceFeature) + + + + def getLongLimit(self, instrumentIds, price): + if isinstance(self.longLimit, pd.DataFrame): + return self.convertLimit(self.longLimit, price) + if isinstance(self.longLimit, dict): + longLimitDf = pd.Series(self.longLimit) + return self.convertLimit(longLimitDf, price) + else: + return self.convertLimit(pd.Series(self.longLimit, index=instrumentIds), price) + + def getShortLimit(self, instrumentIds, price): + if isinstance(self.shortLimit, pd.DataFrame): + return self.convertLimit(self.shortLimit, price) + if isinstance(self.shortLimit, dict): + shortLimitDf = pd.Series(self.shortLimit) + return self.convertLimit(shortLimitDf, price) + else: + return self.convertLimit(pd.Series(self.shortLimit, index=instrumentIds), price) + + def getEnterLotSize(self, instrumentIds, price): + if isinstance(self.enterlotSize, pd.DataFrame): + return self.convertLimit(self.lotSize, price) + if isinstance(self.enterlotSize, dict): + lotSizeDf = pd.Series(self.enterlotSize) + return self.convertLimit(lotSizeDf, price) + else: + return self.convertLimit(pd.Series(self.enterlotSize, index=instrumentIds), price) + + def getExitLotSize(self, instrumentIds, price): + if isinstance(self.exitlotSize, pd.DataFrame): + return self.convertLimit(self.lotSize, price) + if isinstance(self.exitlotSize, dict): + lotSizeDf = pd.Series(self.exitlotSize) + return self.convertLimit(lotSizeDf, price) + else: + return self.convertLimit(pd.Series(self.exitlotSize, index=instrumentIds), price) + + def convertLimit(self, df, price): + if self.limitType == 'L': + return df + else: + try: + return np.floor(df / price) + except KeyError: + logError('You have specified Dollar Limit but Price Feature Key does not exist') + + def getInstrumentExecutionsFromExecutions(self, time, executions): + instrumentExecutions = [] + for (instrumentId, position) in executions.iteritems(): + if position == 0: + continue + instExecution = InstrumentExection(time=time, + instrumentId=instrumentId, + volume=np.abs(position), + executionType=np.sign(position)) + instrumentExecutions.append(instExecution) + return instrumentExecutions + + def getExecutions(self, time, instrumentsManager, capital): + instrumentLookbackData = instrumentsManager.getLookbackInstrumentFeatures() + currentPredictions = instrumentLookbackData.getFeatureDf('prediction').iloc[-1] + executions = self.exitPosition(time, instrumentsManager, currentPredictions) + executions += self.enterPosition(time, instrumentsManager, currentPredictions, capital) + # executions is a series with stocknames as index and positions to execute as column (-10 means sell 10) + return self.getInstrumentExecutionsFromExecutions(time, executions) + + def getExecutionsAtClose(self, time, instrumentsManager): + instrumentExecutions = [] + instruments = instrumentsManager.getAllInstrumentsByInstrumentId().values() + for instrument in instruments: + position = instrument.getCurrentPosition() + if position == 0: + continue + instrumentExec = InstrumentExection(time=time, + instrumentId=instrument.getInstrumentId(), + volume=np.abs(position), + executionType=-np.sign(position)) + instrumentExecutions.append(instrumentExec) + return instrumentExecutions + + def exitPosition(self, time, instrumentsManager, currentPredictions, closeAllPositions=False): + + instrumentLookbackData = instrumentsManager.getLookbackInstrumentFeatures() + positionData = instrumentLookbackData.getFeatureDf('position') + position = positionData.iloc[-1] + price = self.getPriceSeries(instrumentsManager) + executions = pd.Series([0] * len(positionData.columns), index=positionData.columns) + + if closeAllPositions: + executions = -position + return executions + executions[self.exitCondition(currentPredictions, instrumentsManager)] = -np.sign(position)*np.abs(position) + executions[self.hackCondition(currentPredictions, instrumentsManager)] = -np.sign(position)*np.abs(position) + # print('exit?',self.exitCondition(currentPredictions, instrumentsManager)) + return executions + + def enterPosition(self, time, instrumentsManager, currentPredictions, capital): + instrumentLookbackData = instrumentsManager.getLookbackInstrumentFeatures() + positionData = instrumentLookbackData.getFeatureDf('position') + position = positionData.iloc[-1] + price = self.getPriceSeries(instrumentsManager) + executions = pd.Series([0] * len(positionData.columns), index=positionData.columns) + executions[self.enterCondition(currentPredictions, instrumentsManager)] = \ + self.getEnterLotSize(positionData.columns, price) * self.getBuySell(currentPredictions, instrumentsManager) + # No executions if at position limit + executions[self.atPositionLimit(capital, positionData, price)] = 0 + # print('enter?', self.enterCondition(currentPredictions, instrumentsManager)) + # print(self.getBuySell(currentPredictions, instrumentsManager)) + return executions + + def getBuySell(self, currentPredictions, instrumentsManager): + price = self.getPriceDf(instrumentsManager) + buySell = pd.Series(0, index = price.columns) + if(len(price) > 14): + buySell[(price.iloc[-1]-price.iloc[-14])>0] = 2*currentPredictions - 1 + buySell[(price.iloc[-1]-price.iloc[-14])<0] = -( 2*currentPredictions - 1 ) + return buySell + + def enterCondition(self, currentPredictions, instrumentsManager): + instrumentLookbackData = instrumentsManager.getLookbackInstrumentFeatures() + if(len(instrumentLookbackData.getFeatureDf('prediction')) <= 28): + return pd.Series(False, index=currentPredictions.index) + else: + price = self.getPriceDf(instrumentsManager) + pastPredictions = instrumentLookbackData.getFeatureDf('prediction').iloc[-14] + currentPriceChange = np.sign(price.iloc[-1] - price.iloc[-14]) + pastPriceChange = np.sign(price.iloc[-14] - price.iloc[-28]) + + return (currentPredictions != 0.5) #& ~((currentPredictions!=pastPredictions)&(currentPriceChange==pastPriceChange)) + + def atPositionLimit(self, capital, positionData, price): + + if capital <= self.capitalUsageLimit: + logWarn('Not Enough Capital') + return pd.Series(True, index=positionData.columns) + position = positionData.iloc[-1] + # TODO: Cant do this if position and getLongLimit indexes dont match + return (position >= self.getLongLimit(positionData.columns, price)) | (position <= -self.getShortLimit(positionData.columns, price)) + + def exitCondition(self, currentPredictions, instrumentsManager): + instrumentLookbackData = instrumentsManager.getLookbackInstrumentFeatures() + if(len(instrumentLookbackData.getFeatureDf('prediction')) <= 28): + return pd.Series(False, index=currentPredictions.index) + else: + price = self.getPriceDf(instrumentsManager) + pastPredictions = instrumentLookbackData.getFeatureDf('prediction').iloc[-14] + currentPriceChange = np.sign(price.iloc[-1] - price.iloc[-14]) + pastPriceChange = np.sign(price.iloc[-14] - price.iloc[-28]) + # printdf=pd.DataFrame(index=price.columns) + # printdf['currentPredictions'] = currentPredictions + # printdf['pastPredictions'] = pastPredictions + # printdf['currentPriceChange'] = currentPriceChange + # printdf['pastPriceChange'] = pastPriceChange + # print(printdf) + return (currentPredictions == 0.5) | ((currentPredictions!=pastPredictions)&(currentPriceChange==pastPriceChange)) | ((currentPredictions==pastPredictions)&(currentPriceChange!=pastPriceChange)) + + + def hackCondition(self, currentPredictions, instrumentsManager): + return pd.Series(False, index=currentPredictions.index) \ No newline at end of file diff --git a/ml_beta/problem1_ml_and_trading_params.py b/ml_beta/problem1_ml_and_trading_params.py new file mode 100644 index 0000000..cf2896b --- /dev/null +++ b/ml_beta/problem1_ml_and_trading_params.py @@ -0,0 +1,376 @@ +from backtester.trading_system_parameters import TradingSystemParameters +from backtester.model_learning_system_parameters import ModelLearningSystemParamters, MLSTrainingPredictionFeature +from backtester.features.feature import Feature +from backtester.dataSource.csv_data_source import CsvDataSource +from backtester.timeRule.nse_time_rule import NSETimeRule +from problem1_execution_system import Problem1ExecutionSystem +from backtester.orderPlacer.backtesting_order_placer import BacktestingOrderPlacer +from backtester.trading_system import TradingSystem +from backtester.version import updateCheck +from backtester.constants import * +from backtester.features.feature import Feature +from backtester.logger import * +import pandas as pd +import numpy as np +import sys +from sklearn import linear_model +from sklearn import metrics as sm + +## Make your changes to the functions below. +## SPECIFY the symbols you are modeling for in getSymbolsToTrade() below +## You need to specify features you want to use in getInstrumentFeatureConfigDicts() and getMarketFeatureConfigDicts() +## and create your predictions using these features in getPrediction() + +## Don't change any other function +## The toolbox does the rest for you, from downloading and loading data to running backtest + + +class MyTradingParams(TradingSystemParameters): + ''' + initialize class + place any global variables here + ''' + def __init__(self, tradingFunctions): + self.__tradingFunctions = tradingFunctions + self.__dataSetId = self.__tradingFunctions.getDataSetId() + self.__instrumentIds = self.__tradingFunctions.getSymbolsToTrade() + self.__priceKey = 'F5' + self.__additionalInstrumentFeatureConfigDicts = [] + self.__additionalMarketFeatureConfigDicts = [] + self.__additionalCustomFeatures = [] + self.__fees = {'brokerage': 0.0001,'spread': 0.05} + self.__startDate = '2010/06/02' + self.__endDate = '2013/02/07' + self.__dataSourceParams = dict(cachedFolderName='historicalData/', + dataSetId=self.__dataSetId, + instrumentIds=self.__instrumentIds, + downloadUrl = 'https://raw.githubusercontent.com/Auquan/qq3Data/master', + timeKey = 'datetime', + timeStringFormat = '%Y-%m-%d %H:%M:%S', + startDateStr=self.__startDate, + endDateStr=self.__endDate, + liveUpdates=True, + pad=True) + self.__dataSourceName = 'CsvDataSource' + super(MyTradingParams, self).__init__() + + ''' + Returns the list of instrument IDs + ''' + + def getInstrumentIds(self): + return self.__instrumentIds + + def getTargetVariableKey(self): + return self.__tradingFunctions.getTargetVariableKey() + + ''' + Returns an instance of class DataParser. Source of data for instruments + ''' + + def getDataParser(self): + instrumentIds = self.__tradingFunctions.getSymbolsToTrade() + return CsvDataSource(**self.__dataSourceParams) + + def getDataSourceParams(self): + return self.__dataSourceParams + + def getDataSourceName(self): + return self.__dataSourceName + + ''' + Returns an instance of class TimeRule, which describes the times at which + we should update all the features and try to execute any trades based on + execution logic. + For eg, for intra day data, you might have a system, where you get data + from exchange at a very fast rate (ie multiple times every second). However, + you might want to run your logic of computing features or running your execution + system, only at some fixed intervals (like once every 5 seconds). This depends on your + strategy whether its a high, medium, low frequency trading strategy. Also, performance + is another concern. if your execution system and features computation are taking + a lot of time, you realistically wont be able to keep upto pace. + ''' + def getTimeRuleForUpdates(self): + return NSETimeRule(startDate=self.__startDate, endDate=self.__endDate, frequency='M', sample='30') + + ''' + Returns a timedetla object to indicate frequency of updates to features + Any updates within this frequncy to instruments do not trigger feature updates. + Consequently any trading decisions that need to take place happen with the same + frequency + ''' + + def getFrequencyOfFeatureUpdates(self): + return timedelta(60, 0) # minutes, seconds + + def getStartingCapital(self): + return 10000*len(self.__instrumentIds) + + ''' + This is a way to use any custom features you might have made. + Returns a dictionary where + key: featureId to access this feature (Make sure this doesnt conflict with any of the pre defined feature Ids) + value: Your custom Class which computes this feature. The class should be an instance of Feature + Eg. if your custom class is MyCustomFeature, and you want to access this via featureId='my_custom_feature', + you will import that class, and return this function as {'my_custom_feature': MyCustomFeature} + ''' + + def getCustomFeatures(self): + customFeatures = {'prediction': TrainingPredictionFeature, + 'fees_and_spread': FeesCalculator, + 'benchmark_PnL': BuyHoldPnL, + 'ScoreCalculator' : ScoreCalculator} + customFeatures.update(self.__tradingFunctions.getCustomFeatures()) + for featureDict in self.__additionalCustomFeatures: + customFeatures.update(featureDict) + return customFeatures + + + def getInstrumentFeatureConfigDicts(self): + # ADD RELEVANT FEATURES HERE + + predictionDict = {'featureKey': 'prediction', + 'featureId': 'prediction', + 'params': {'function': self.__tradingFunctions}} + feesConfigDict = {'featureKey': 'fees', + 'featureId': 'fees_and_spread', + 'params': {'feeDict': self.__fees, + 'price': self.getPriceFeatureKey(), + 'position' : 'position'}} + profitlossConfigDict = {'featureKey': 'pnl', + 'featureId': 'pnl', + 'params': {'price': self.getPriceFeatureKey(), + 'fees': 'fees'}} + capitalConfigDict = {'featureKey': 'capital', + 'featureId': 'capital', + 'params': {'price': self.getPriceFeatureKey(), + 'fees': 'fees', + 'capitalReqPercent': 0.95}} + benchmarkDict = {'featureKey': 'benchmark', + 'featureId': 'benchmark_PnL', + 'params': {'pnlKey': 'pnl', + 'price': self.getPriceFeatureKey()}} + + scoreDict = {'featureKey': 'score', + 'featureId': 'ScoreCalculator', + 'params': {'predictionKey': 'prediction', + 'targetVariable' : self.__tradingFunctions.getTargetVariableKey(), + 'price': self.getPriceFeatureKey()}} + + self.__stockFeatureConfigs = self.__tradingFunctions.getInstrumentFeatureConfigDicts() + + + return {INSTRUMENT_TYPE_STOCK: self.__stockFeatureConfigs + [predictionDict, + feesConfigDict,profitlossConfigDict,capitalConfigDict,benchmarkDict, scoreDict] + + self.__additionalInstrumentFeatureConfigDicts} + + def getStockFeatureConfigDicts(self): + return self.__stockFeatureConfigs + + ''' + Returns an array of market feature config dictionaries + market feature config Dictionary has the following keys: + featureId: a string representing the type of feature you want to use + featureKey: a string representing the key you will use to access the value of this feature.this + params: A dictionary with which contains other optional params if needed by the feature + ''' + + def getMarketFeatureConfigDicts(self): + # ADD RELEVANT FEATURES HERE + scoreDict = {'featureKey': 'score', + 'featureId': 'score_ll', + 'params': {'featureName': self.getPriceFeatureKey(), + 'instrument_score_feature': 'score'}} + + marketFeatureConfigs = self.__tradingFunctions.getMarketFeatureConfigDicts() + return marketFeatureConfigs + [scoreDict] +self.__additionalMarketFeatureConfigDicts + + ''' + Returns the type of execution system we want to use. Its an implementation of the class ExecutionSystem + It converts prediction to intended positions for different instruments. + ''' + + def getExecutionSystem(self): + return Problem1ExecutionSystem(enter_threshold=0.99, + exit_threshold=0.55, + longLimit=10000, + shortLimit=10000, + capitalUsageLimit=0.10 * self.getStartingCapital(), + enterlotSize=1, exitlotSize = 1, + limitType='L', price=self.getPriceFeatureKey()) + + ''' + Returns the type of order placer we want to use. its an implementation of the class OrderPlacer. + It helps place an order, and also read confirmations of orders being placed. + For Backtesting, you can just use the BacktestingOrderPlacer, which places the order which you want, and automatically confirms it too. + ''' + + def getOrderPlacer(self): + return BacktestingOrderPlacer() + + ''' + Returns the amount of lookback data you want for your calculations. The historical market features and instrument features are only + stored upto this amount. + This number is the number of times we have updated our features. + ''' + + def getLookbackSize(self): + return max(720, self.__tradingFunctions.getLookbackSize()) + + def getPriceFeatureKey(self): + return self.__priceKey + + def setPriceFeatureKey(self, priceKey='Adj_Close'): + self.__priceKey = priceKey + + def getDataSetId(self): + return self.__dataSetId + + def setDataSetId(self, dataSetId): + self.__dataSetId = dataSetId + + def getInstrumentsIds(self): + return self.__instrumentIds + + def getTradingFunctions(self): + return self.__tradingFunctions + + def setInstrumentsIds(self, instrumentIds): + self.__instrumentIds = instrumentIds + + def getDates(self): + return {'startDate':self.__startDate, + 'endDate':self.__endDate} + + def setDates(self, dateDict): + self.__startDate = dateDict['startDate'] + self.__endDate = dateDict['endDate'] + + def setFees(self, feeDict={'brokerage': 0.0001,'spread': 0.05}): + self.__fees = feeDict + + def setAdditionalInstrumentFeatureConfigDicts(self, dicts = []): + self.__additionalInstrumentFeatureConfigDicts = dicts + + def setAdditionalMarketFeatureConfigDicts(self, dicts = []): + self.__additionalMarketFeatureConfigDicts = dicts + + def setAdditionalCustomFeatures(self, dicts=[]): + dicts = dicts if isinstance(dicts, list) else [dicts] + self.__additionalCustomFeatures = dicts + + +class MyModelLearningParams(ModelLearningSystemParamters): + """ + """ + def __init__(self, tsParams, splitRatio, chunkSize=None, modelDir='savedModels'): + self.tsParams = tsParams + super(MyModelLearningParams, self).__init__(tsParams.getInstrumentIds(), chunkSize, modelDir) + dates = self.tsParams.getDates() + self.splitData(splitRatio, dates['startDate'], dates['endDate']) + + def getDataSourceName(self): + return self.tsParams.getDataSourceName() + + def getDataSourceBaseParams(self): + return self.tsParams.getDataSourceParams() + + def getInstrumentFeatureConfigDicts(self): + stockFeatureConfigs = self.tsParams.getStockFeatureConfigDicts() + return {INSTRUMENT_TYPE_STOCK : stockFeatureConfigs} + + def getCustomFeatures(self): + customFeatures = {'prediction': MLSTrainingPredictionFeature} + return customFeatures + + def getTargetVariableConfigDicts(self): + Y = {'featureKey' : self.tsParams.getTargetVariableKey(), + 'featureId' : '', + 'params' : {}} + return {INSTRUMENT_TYPE_STOCK : [Y]} + + def getFeatureSelectionConfigDicts(self): + tf = self.tsParams.getTradingFunctions() + return tf.getFeatureSelectionConfigDicts() + + def getFeatureTransformationConfigDicts(self): + tf = self.tsParams.getTradingFunctions() + return tf.getFeatureTransformationConfigDicts() + + def getModelConfigDicts(self): + tf = self.tsParams.getTradingFunctions() + return tf.getModelConfigDicts() + + +class TrainingPredictionFeature(Feature): + + @classmethod + def computeForInstrument(cls, updateNum, time, featureParams, featureKey, instrumentManager): + tf = featureParams['function'] + predictions = pd.Series(0.5, index = instrumentManager.getAllInstrumentsByInstrumentId()) + predictions = tf.getPrediction(time, updateNum, instrumentManager, predictions) + if (predictions!= 1).any() & (predictions[predictions!= 1]!= 0).any() & (predictions[(predictions!= 1)&(predictions!=0)]!= 0.5).any(): + logError('Predictions can only take values 0/1/0.5' + + '\n' + 'Code will exit!!!') + print(predictions) + print((predictions!= 1).any()) + print((predictions!= 0).any()) + print((predictions!= 0.5).any()) + sys.exit(1) + return predictions + +class FeesCalculator(Feature): + + @classmethod + def computeForInstrument(cls, updateNum, time, featureParams, featureKey, instrumentManager): + instrumentLookbackData = instrumentManager.getLookbackInstrumentFeatures() + + priceData = instrumentLookbackData.getFeatureDf(featureParams['price']) + positionData = instrumentLookbackData.getFeatureDf(featureParams['position']) + currentPosition = positionData.iloc[-1] + previousPosition = 0 if updateNum < 2 else positionData.iloc[-2] + changeInPosition = currentPosition - previousPosition + fees = pd.Series(np.abs(changeInPosition)*featureParams['feeDict']['brokerage'],index = instrumentManager.getAllInstrumentsByInstrumentId()) + if len(priceData)>1: + currentPrice = priceData.iloc[-1] + else: + currentPrice = 0 + + fees = fees*currentPrice + np.abs(changeInPosition)*featureParams['feeDict']['spread'] + + return fees + + +class BuyHoldPnL(Feature): + @classmethod + def computeForInstrument(cls, updateNum, time, featureParams, featureKey, instrumentManager): + instrumentLookbackData = instrumentManager.getLookbackInstrumentFeatures() + + priceData = instrumentLookbackData.getFeatureDf(featureParams['price']) + bhpnl = pd.Series(0,index = instrumentManager.getAllInstrumentsByInstrumentId()) + if len(priceData)>1: + bhpnl += priceData.iloc[-1] - priceData.iloc[-2] + + return bhpnl + +class ScoreCalculator(Feature): + @classmethod + def computeForInstrument(cls, updateNum, time, featureParams, featureKey, instrumentManager): + instrumentLookbackData = instrumentManager.getLookbackInstrumentFeatures() + + predictionData = instrumentLookbackData.getFeatureDf(featureParams['predictionKey']).iloc[-1] + trueValue = instrumentLookbackData.getFeatureDf(featureParams['targetVariable']).iloc[-1] + if updateNum <2 : + return pd.Series(0.5, index=predictionData.index) + previousValue = instrumentLookbackData.getFeatureDf(featureKey).iloc[-1] + currentScore = pd.Series(0.5, index=previousValue.index) + currentScore[predictionData!=0.5] = currentScore +(0.5 - np.abs(predictionData - trueValue)) + # printdf = pd.DataFrame(index=predictionData.index) + # printdf['predictionData'] = predictionData + # printdf['trueValue'] = trueValue + # printdf['previousValue'] = previousValue + # printdf['currentScore']=currentScore + + # print(printdf) + score = (previousValue*(updateNum-1)+currentScore)/updateNum#sm.accuracy_score(predictionData, trueValue) + return score