-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_encoding.py
29 lines (26 loc) · 887 Bytes
/
data_encoding.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# Author: name (banner no) <@dal.ca>
# File: This will return all the visualization in tab for Continents.
# imports
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
def one_hot_encoder(df, column):
ohe = OneHotEncoder()
# hotencoder accepts 2-D array so converting the column into 2D array
ohe.fit(pd.DataFrame(df[column]))
columnNames = []
for i in df.columns:
if i != column:
columnNames.append(i)
enc_df = pd.DataFrame(ohe.fit_transform(df[[column]]).toarray())
df = df.drop(column, axis=1)
temp = []
for i in range(len(df)):
temp.append(i)
df["temp"]=temp
enc_df["temp"]=temp
df = pd.merge(enc_df, df, on='temp')
df = df.drop('temp', axis=1)
columnNames = list(ohe.get_feature_names()) + columnNames
df.columns = columnNames
return df