-
Notifications
You must be signed in to change notification settings - Fork 0
/
clusters.py
60 lines (50 loc) · 2.59 KB
/
clusters.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from os.path import expanduser, join
import pandas
import argparse
from numpy import ndarray, savetxt
from sklearn.cluster import DBSCAN
from clearedData import ClearedData
from distanceMatrix import DistanceMatrix
from rowDataFromFile import RowDataFromFile
from rowDataFromURL import RowDataFromURL
class Clusters:
# https://towardsdatascience.com/clustering-on-numerical-and-categorical-features-6e0ebcf1cbad
# Initializer выполняется перед! основной программой.
# Private Instance or static Class attribute. Переменные должны начинаться с двух подчеркиваний.
__distanceMatrix: DistanceMatrix
__clusterLabels: ndarray
# Constructors
def __init__(self, distance_matrix: DistanceMatrix) -> None:
self.__distanceMatrix = distance_matrix
# Configuring the parameters of the clustering algorithm
dbscan_cluster = DBSCAN(eps=0.3,
min_samples=2,
metric="precomputed")
# Fitting the clustering algorithm
dbscan_cluster.fit(distance_matrix.get_distance_matrix())
# Adding the results to a new column in the dataframe
self.__clusterLabels = dbscan_cluster.labels_
# Methods
# Accessor( = getter) methods
def get_cluster_labels(self) -> ndarray:
return self.__clusterLabels
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='cl parameters about data source')
parser.add_argument('--path', '-p',
dest="path",
type=str,
help="path to JSON-result of the survey")
args = parser.parse_args()
if args.path is None:
rowData: RowDataFromURL = RowDataFromURL(url="https://raw.githubusercontent.com/DimitriBolt/clusterAnalysis/master/data/601285.json") # object must start from lower-case letter
else:
rowData: RowDataFromFile = RowDataFromFile(file_name=args.path)
clearedData: ClearedData = ClearedData(row_data_json=rowData.get_json()) # object must start from lower-case letter
distanceMatrix: DistanceMatrix = DistanceMatrix(cleared_data=clearedData) # object must start from lower-case letter
clusters: Clusters = Clusters(distance_matrix=distanceMatrix) # object must start from lower-case letter
clusterLabels: ndarray = clusters.get_cluster_labels()
savetxt(fname=join(expanduser("~"), "Downloads", "clusterLabels.csv"),
X=clusterLabels,
fmt='%d',
delimiter=",")
pass # Press Ctrl+8 to toggle the breakpoint.