Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/mkamran37/CSE-601
Browse files Browse the repository at this point in the history
  • Loading branch information
Kamran authored and Kamran committed Dec 3, 2019
2 parents f21e580 + b9add13 commit 0e945d4
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 20 deletions.
7 changes: 5 additions & 2 deletions project3/Code/decision_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@ def decision(self, trainData, maxFeatures=None, depth=float('inf'), minLeafRows=
features = trainData.columns.values.tolist()
features.pop()
root = self.createTree(trainData, features, maxFeatures, depth, minLeafRows, rf)
# print(root)
return root

def createTree(self, data, features, maxFeatures, depth, minLeafRows, rf):
n = Node()

if data.shape[0] == 0:
return None

if depth <= 0 or data.shape[0] <= minLeafRows:
n.feature = data.iloc[:,-1].value_counts().index[0]
return n
Expand All @@ -37,14 +39,15 @@ def createTree(self, data, features, maxFeatures, depth, minLeafRows, rf):
n.condition = condition

leftChildData = data.loc[data[bestFeature] < condition]
rightChildData = data.loc[data[bestFeature] >= condition]

if leftChildData.shape[0] == 0:
temp = Node()
temp.feature = data.iloc[:,-1].value_counts().index[0]
n.left = temp
else:
n.left = self.createTree(leftChildData, features, maxFeatures, depth-1, minLeafRows, rf)

rightChildData = data.loc[data[bestFeature] >= condition]
if rightChildData.shape[0] == 0:
temp = Node()
temp.feature = data.iloc[:,-1].value_counts().index[0]
Expand Down
46 changes: 33 additions & 13 deletions project3/Code/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,24 +105,38 @@ def decision_tree(self, kCrossValidation = 10):
f_score = []
models = []

foldSize = int(data.shape[0] / kCrossValidation)
for i in range(kCrossValidation):
print("Running iteration " + str(i+1) + " of k cross validation .....")
testData = data.loc[foldSize*i:foldSize*(i+1)-1]
trainData = data.loc[:foldSize*i-1].append(data.loc[foldSize*(i+1):])
# root = dt.decision(trainData)
root = dt.decision(trainData, depth=10, minLeafRows=3)
target = testData.iloc[:,-1].values.tolist()
predicted = dt.predictData(testData.iloc[:, :-1], root)
models.append(root)
if kCrossValidation <= 1:
root = dt.decision(data)
print(root)
target = data.iloc[:,-1]
predicted = dt.predictData(data.iloc[:, :-1], root)
truePositives, trueNegatives, falsePositives, falseNegatives = h.findParameters(predicted, target)
accuracy.append(h.findAccuracy(truePositives, trueNegatives, falsePositives, falseNegatives))
tmpPrecision = h.findPrecision(truePositives, trueNegatives, falsePositives, falseNegatives)
tmpRecall = h.findRecall(truePositives, trueNegatives, falsePositives, falseNegatives)
precision.append(tmpPrecision)
recall.append(tmpRecall)
f_score.append(h.findFMeasure(tmpPrecision, tmpRecall))

else:
foldSize = int(data.shape[0] / kCrossValidation)
for i in range(kCrossValidation):
print("Running iteration " + str(i+1) + " of k cross validation .....")
testData = data.loc[foldSize*i:foldSize*(i+1)-1]
trainData = data.loc[:foldSize*i-1].append(data.loc[foldSize*(i+1):])
root = dt.decision(trainData)
# root = dt.decision(trainData, depth=10, minLeafRows=3)
target = testData.iloc[:,-1].values.tolist()

predicted = dt.predictData(testData.iloc[:, :-1], root)
models.append(root)
truePositives, trueNegatives, falsePositives, falseNegatives = h.findParameters(predicted, target)
accuracy.append(h.findAccuracy(truePositives, trueNegatives, falsePositives, falseNegatives))
tmpPrecision = h.findPrecision(truePositives, trueNegatives, falsePositives, falseNegatives)
tmpRecall = h.findRecall(truePositives, trueNegatives, falsePositives, falseNegatives)
precision.append(tmpPrecision)
recall.append(tmpRecall)
f_score.append(h.findFMeasure(tmpPrecision, tmpRecall))

print("\nMetrics on train data with k-cross validation")
h.calculateMetrics(accuracy, precision, recall, f_score)

Expand All @@ -136,7 +150,6 @@ def decision_tree(self, kCrossValidation = 10):
for _,row in testData.iloc[:,:-1].iterrows():
predictedRow = [dt.predictRow(row, root) for root in models]
predLabels.append(max(set(predictedRow), key=predictedRow.count))
print(predLabels)
truePositives, trueNegatives, falsePositives, falseNegatives = h.findParameters(predLabels, testData.iloc[:,-1].values.tolist())
accuracy = [h.findAccuracy(truePositives, trueNegatives, falsePositives, falseNegatives)]
precision = h.findPrecision(truePositives, trueNegatives, falsePositives, falseNegatives)
Expand All @@ -156,6 +169,13 @@ def random_forest(self, kCrossValidation = 10):
data = h.oneHotEncoding(data, labels)
rf = randomForest()

try:
numTrees = int(input("\nEnter number of trees: "))
numFeatures = int(input("Enter number of features to consider: "))
except:
print("\nExecution Failed - Wrong Input")
exit()

accuracy = []
precision = []
recall = []
Expand All @@ -167,7 +187,7 @@ def random_forest(self, kCrossValidation = 10):
print("Running iteration " + str(i+1) + " of k cross validation .....")
testData = data.loc[foldSize*i:foldSize*(i+1)-1]
trainData = data.loc[:foldSize*i-1].append(data.loc[foldSize*(i+1):])
forest = rf.forest(trainData)
forest = rf.forest(trainData, numTrees=numTrees, numFeatures=numFeatures)
target = testData.iloc[:,-1].values.tolist()
predicted = rf.predictForest(testData.iloc[:, :-1], forest)
models.append(forest)
Expand Down
8 changes: 3 additions & 5 deletions project3/Code/random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,20 @@

class randomForest:

def forest(self, trainData, numTrees=5, numFeatures=None, numRows=None, maxDepth=10, minLeafRows=3, randomSeed=12):
def forest(self, trainData, numTrees=5, numFeatures=None, numRows=None, maxDepth=7, minLeafRows=5, randomSeed=12):
if numFeatures == None:
numFeatures = int(np.sqrt(trainData.shape[1]))

if numRows == None:
# numRows = int(trainData.shape[0] * 0.8)
numRows = trainData.shape[0]

forest = [self.createForest(trainData, numFeatures, numRows, maxDepth, minLeafRows, randomSeed) for i in range(numTrees)]
return forest


def createForest(self, trainData, numFeatures, numRows, maxDepth, minLeafRows, randomSeed):

# trainData = trainData.sample(numFeatures, axis=1, random_state=randomSeed, replace=False)
trainData = trainData.sample(numRows, axis=0, random_state=randomSeed, replace=False)
# Choosing number of training examples with replacement.
trainData = trainData.sample(numRows, axis=0, random_state=randomSeed, replace=True)

return dt().decision(trainData, maxFeatures=numFeatures, depth=maxDepth, minLeafRows=minLeafRows, rf=True)

Expand Down

0 comments on commit 0e945d4

Please sign in to comment.