-
Notifications
You must be signed in to change notification settings - Fork 0
/
categorical.py
74 lines (63 loc) · 2.7 KB
/
categorical.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from data_description import DataDescription
class Categorical:
# The Task associated with this class.
tasks = [
'\n1. Show Categorical Columns',
'2. Performing One Hot encoding',
'3. Show the Dataset'
]
def __init__(self, data):
self.data = data
# function to show all the categorical columns and number of unique values in them.
def categoricalColumn(self):
print('\n{0: <20}'.format("Categorical Column") + '{0: <5}'.format("Unique Values"))
# select_dtypes selects the columns with object datatype(which could be further categorize)
for column in self.data.select_dtypes(include="object"):
print('{0: <20}'.format(column) + '{0: <5}'.format(self.data[column].nunique()))
# function to encode any particular column
def encoding(self):
categorical_columns = self.data.select_dtypes(include="object")
while(1):
column = input("\nWhich column would you like to one hot encode?(Press -1 to go back) ").lower()
if column == "-1":
break
# The encoding function is only for categorical columns.
if column in categorical_columns:
self.data = pd.get_dummies(data=self.data, columns = [column])
print("Encoding is done.......\U0001F601")
choice = input("Are there more columns to be encoded?(y/n) ")
if choice == "y" or choice == "Y":
continue
else:
self.categoricalColumn()
break
else:
print("Wrong Column Name. Try Again...\U0001F974")
# The main function of the Categorical class.
def categoricalMain(self):
while(1):
print("\nTasks\U0001F447")
for task in self.tasks:
print(task)
while(1):
try:
choice = int(input(("\n\nWhat you want to do? (Press -1 to go back) ")))
except ValueError:
print("Integer Value required. Try again...\U0001F974")
continue
break
if choice == -1:
break
elif choice == 1:
self.categoricalColumn()
elif choice == 2:
self.categoricalColumn()
self.encoding()
elif choice == 3:
DataDescription.showDataset(self)
else:
print("\nWrong Integer value!! Try again..\U0001F974")
# return the data after modifying
return self.data