-
Notifications
You must be signed in to change notification settings - Fork 2
/
create_clones_file_ms.py
145 lines (111 loc) · 4.03 KB
/
create_clones_file_ms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import os
import xml.etree.ElementTree as ET
import argparse
def get_start_end_line(file: str) -> list[int, int]:
"""get start and end line of the code
Args:
file (str): standalone clone file location
Returns:
list[int, int]: start and end line number
"""
file_line = open(file, "r")
start_end_lines = [0]
line_count = 0
find_empty = False
for line in file_line:
line1 = line.replace(" ", "")
if (line1 == "" or line1 == "\n") and not find_empty:
start_end_lines.append(line_count)
start_end_lines.append(line_count + 1)
find_empty = True
elif line1 != "":
pass
line_count += 1
start_end_lines.append(line_count)
return start_end_lines
def read_clone_folders(file: str) -> list[str]:
"""reading all clone folders
Args:
file (str): location of file
Returns:
list[str]: list of all clone folders
"""
text_file = open(file, "r", encoding="utf-8", errors="ignore")
lines = text_file.readlines()
file_locs = []
for line in lines:
if line.replace(" ", "") != "" or line.replace(" ", "") != "\n":
file_locs.append(line.replace("\n", ""))
return file_locs
def create_CC_xml(file_locs: list[str], output_file: str) -> None:
"""create clone cognition input xml file
Args:
file_locs (list[str]): list of clone dirs
output_file (str): output xml file name
"""
clone_str = "<clones>"
for clone_loc in file_locs:
filelist = sorted(os.listdir(clone_loc))
find_empty = False
for file in filelist: # looping through each clone pair file
file = clone_loc + "/" + file
start_end_lines = get_start_end_line(file)
clone_str += '\n<clone>\n<source file="{}" startline = "{}" endline="{}"/>\n<code>\n'.format(
file, start_end_lines[0], start_end_lines[1]
)
file_line = open(file, "r")
for line in file_line: # looping through all lines of a clone pair file
line1 = line.replace(" ", "")
if (line1 == "" or line1 == "\n") and not find_empty:
clone_str += '\n</code>\n<source file="{}" startline = "{}" endline="{}"/>\n<code>\n'.format(
file, start_end_lines[2], start_end_lines[3]
)
find_empty = True
elif line1 != "":
# replacing special character for xml parse
line = line.replace("&", "&")
line = line.replace("<", "<")
line = line.replace(">", ">")
clone_str += line
clone_str += "</code>\n</clone>"
find_empty = False
clone_str += "\n</clones>"
with open(output_file, "a+") as f:
f.write(clone_str)
def test_xml(xml_file: str) -> None:
"""Check if xml file format is valid to run CloneCognition
Args:
xml_file (str): xml file location
"""
tree = ET.parse(xml_file)
root = tree.getroot()
totalClonePairs = len(root)
assert (
totalClonePairs > 0
), "This input will not work for CloneCognition. Total clone pairs needs to be more than 0"
def main():
parser = argparse.ArgumentParser(
description="Create clone files for CloneCognition!"
)
# defining arguments for parser object
parser.add_argument(
"-f",
"--file_loc",
type=str,
nargs=1,
metavar="file_loc",
help="File location where all folder locations are saved",
)
parser.add_argument(
"-x",
"--xml_loc",
type=str,
nargs=1,
metavar="xml_loc",
help="name of xml file for CloneCognition input",
)
args = parser.parse_args()
file_locs = read_clone_folders(str(args.file_loc[0])) # read clone folders
create_CC_xml(file_locs, str(args.xml_loc[0])) # create xml file
test_xml(str(args.xml_loc[0])) # test xml file
main()