Skip to content

Commit

Permalink
modified decision tree and random forest code to handle separate trai…
Browse files Browse the repository at this point in the history
…n and test data files
  • Loading branch information
Areeb-Aatif committed Nov 28, 2019
1 parent 7efc5b5 commit 37686e8
Showing 1 changed file with 50 additions and 8 deletions.
58 changes: 50 additions & 8 deletions project3/Code/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ def decision_tree(self, kCrossValidation = 10):
print("Running iteration " + str(i+1) + " of k cross validation .....")
testData = data.loc[foldSize*i:foldSize*(i+1)-1]
trainData = data.loc[:foldSize*i-1].append(data.loc[foldSize*(i+1):])
# root = dt.decision(trainData, depth=10, minLeafRows=5)
root = dt.decision(trainData, depth=15, minLeafRows=5)
# root = dt.decision(trainData)
root = dt.decision(trainData, depth=10, minLeafRows=3)
target = testData.iloc[:,-1].values.tolist()
predicted = dt.predictData(testData.iloc[:, :-1], root)
models.append(root)
Expand All @@ -106,7 +106,28 @@ def decision_tree(self, kCrossValidation = 10):
precision.append(tmpPrecision)
recall.append(tmpRecall)
f_score.append(h.findFMeasure(tmpPrecision, tmpRecall))
return accuracy, precision, recall, f_score

print("\nMetrics on train data with k-cross validation")
h.calculateMetrics(accuracy, precision, recall, f_score)

fileName = input("\nEnter test data file name without extension (if no test file, just press enter): ")
if fileName != '':
# filePath = "../Data/"+fileName+".txt"
filePath = "CSE-601/project3/Data/"+fileName+".txt"
testData, testLabels = h.readData(filePath)
testData = h.oneHotEncoding(testData, testLabels)
predLabels = []
for _,row in testData.iloc[:,:-1].iterrows():
predictedRow = [dt.predictRow(row, root) for root in models]
predLabels.append(max(set(predictedRow), key=predictedRow.count))
print(predLabels)
truePositives, trueNegatives, falsePositives, falseNegatives = h.findParameters(predLabels, testData.iloc[:,-1].values.tolist())
accuracy = [h.findAccuracy(truePositives, trueNegatives, falsePositives, falseNegatives)]
precision = h.findPrecision(truePositives, trueNegatives, falsePositives, falseNegatives)
recall = h.findRecall(truePositives, trueNegatives, falsePositives, falseNegatives)
f_score = [h.findFMeasure(precision, recall)]
print("\nMetrics on test data with bagging")
h.calculateMetrics(accuracy, [precision], [recall], f_score)

def random_forest(self, kCrossValidation = 10):
print("\nRunning Random Forest Classifier ....................\n")
Expand Down Expand Up @@ -141,7 +162,30 @@ def random_forest(self, kCrossValidation = 10):
precision.append(tmpPrecision)
recall.append(tmpRecall)
f_score.append(h.findFMeasure(tmpPrecision, tmpRecall))
return accuracy, precision, recall, f_score

print("\nMetrics on train data with k-cross validation")
h.calculateMetrics(accuracy, precision, recall, f_score)

fileName = input("\nEnter test data file name without extension (if no test file, just press enter): ")
if fileName != '':
# filePath = "../Data/"+fileName+".txt"
filePath = "CSE-601/project3/Data/"+fileName+".txt"
testData, testLabels = h.readData(filePath)
testData = h.oneHotEncoding(testData, testLabels)
predLabels = []
for forest in models:
predLabels.append(rf.predictForest(testData, forest))
predLabels = pd.DataFrame(predLabels)
pred = []
for _, colData in predLabels.iteritems():
pred.append(colData.value_counts().index[0])
truePositives, trueNegatives, falsePositives, falseNegatives = h.findParameters(pred, testData.iloc[:,-1].values.tolist())
accuracy = [h.findAccuracy(truePositives, trueNegatives, falsePositives, falseNegatives)]
precision = h.findPrecision(truePositives, trueNegatives, falsePositives, falseNegatives)
recall = h.findRecall(truePositives, trueNegatives, falsePositives, falseNegatives)
f_score = [h.findFMeasure(precision, recall)]
print("\nMetrics on test data with bagging")
h.calculateMetrics(accuracy, [precision], [recall], f_score)

if __name__ == "__main__":
m = main()
Expand All @@ -159,8 +203,7 @@ def random_forest(self, kCrossValidation = 10):
accuracy, precision, recall, f_score = m.knn(predictData, trainData)
h.calculateMetrics(accuracy, precision, recall, f_score)
elif algorithm == 2:
accuracy, precision, recall, f_score = m.decision_tree()
h.calculateMetrics(accuracy, precision, recall, f_score)
m.decision_tree()
elif algorithm == 3:
print("Enter train File name")
trainData = h.get_file_bayes(h.get_fileName(), kCrossValidation = 10)
Expand All @@ -173,7 +216,6 @@ def random_forest(self, kCrossValidation = 10):
accuracy, precision, recall, f_score = m.bayes_naive(predictData, trainData)
h.calculateMetrics(accuracy, precision, recall, f_score)
elif algorithm == 4:
accuracy, precision, recall, f_score = m.random_forest()
h.calculateMetrics(accuracy, precision, recall, f_score)
m.random_forest()
else:
print("\nWrong input")

0 comments on commit 37686e8

Please sign in to comment.