forked from idiap/importance-sampling
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathheaders.py
executable file
·177 lines (140 loc) · 4.81 KB
/
headers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
#!/usr/bin/env python
#
# Copyright (c) 2017 Idiap Research Institute, http://www.idiap.ch/
# Written by Angelos Katharopoulos <[email protected]>
#
import argparse
from itertools import chain, ifilter
import os
from os import path
from subprocess import PIPE, Popen
class Header(object):
"""Represents the copyright header for a source file"""
COPY = """#
# Copyright (c) 2017 Idiap Research Institute, http://www.idiap.ch/
# Written by """
def __init__(self, start=-1, stop=-1, content=None):
self.start = start
self.stop = stop
self.content = content
def update(self, filepath, dry_run=False):
def peek(f):
pos = f.tell()
c = f.read(1)
f.seek(pos)
return c
new_content = Header.get_content_for_file(filepath)
needs_update = new_content != self.content or self.start < 0
if not needs_update:
return
# Do the dry run
if dry_run:
print new_content
return
# Open both files and do the copy while updating the header
with open(filepath) as f_in, open(filepath+".header", "w") as f_out:
# Copy the comments that appear on top
while peek(f_in) == "#":
f_out.write(f_in.readline())
# Consume one new line
if peek(f_in) in ["\r", "\n"]:
f_in.readline()
# Add the new header
start = f_out.tell()
f_out.write(new_content)
# If the file had a header skip it while writing the rest of the data
if self.start > 0:
f_out.write(f_in.read(max(0, self.start - f_in.tell())))
f_in.seek(max(f_in.tell(), self.stop))
f_out.write(f_in.read())
stat = os.stat(filepath)
os.chmod(filepath+".header", stat.st_mode)
os.rename(filepath+".header", filepath)
@classmethod
def from_file(cls, filepath):
# Create an empty object to be filled with contents
header = cls()
# Read the file contents into memory
with open(filepath) as f:
contents = f.read()
# Find the copyright disclaimer
start = contents.find("#\n# Copyright")
if start < 0:
return header
end = contents.find("\n#\n\n", start) + 4
# Fill in the header
header.start = start
header.end = end
header.content = contents[start:end]
return header
@staticmethod
def get_content_for_file(filepath):
"""Return the generated header for the file"""
# Call into git to get the list of authors
p = Popen(["git", "shortlog", "-se", "--", filepath], stdout=PIPE)
out, _ = p.communicate()
authors = [
l.split("\t")[1].strip()
for l in out.splitlines()
if l != ""
]
return Header.COPY + ",\n# ".join(authors) + "\n#\n\n"
def is_python_file(path):
return path.endswith(".py")
def in_directory(directory):
if directory[0] == path.sep:
directory = directory[1:]
if directory[-1] == path.sep:
directory = directory[:-1]
def inner(x):
return path.sep + directory + path.sep in x
return inner
def _all(*predicates):
def inner(x):
return all(p(x) for p in predicates)
return inner
def _not(predicate):
def inner(x):
return not predicate(x)
return inner
def walk_directories(root):
"""'find' in a generator function."""
for child in os.listdir(root):
if child.startswith("."):
continue
full_path = path.join(root, child)
if path.isfile(full_path):
yield full_path
elif full_path.endswith((path.sep+".", path.sep+"..")):
continue
elif path.islink(full_path):
continue
else:
for fp in walk_directories(full_path):
yield fp
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=("Generate file copywrite headers and prepend them to "
"the files in the repository")
)
parser.add_argument(
"--dry_run",
action="store_true",
help="Don't actually change anything just write the headers to STDOUT"
)
parser.add_argument(
"--blacklist",
type=lambda x: x.split(":"),
default=[],
help="A colon separated list of directories to blacklist"
)
args = parser.parse_args()
# Loop over all python files
predicate = _all(
is_python_file,
_all(*map(_not, map(in_directory, args.blacklist)))
)
for source_file in ifilter(predicate, walk_directories(".")):
print source_file
header = Header.from_file(source_file)
header.update(source_file, args.dry_run)