-
Notifications
You must be signed in to change notification settings - Fork 35
/
django_template_i18n_lint.py
executable file
·280 lines (214 loc) · 9.33 KB
/
django_template_i18n_lint.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
#! /usr/bin/env python
"""
Prints out all
"""
import os
import re
import sys
from optparse import OptionParser
def location(str, pos):
"""Given a string str and an integer pos, find the line number and character in that line that correspond to pos"""
lineno, charpos = 1, 1
counter = 0
for char in str:
if counter == pos:
return lineno, charpos
elif char == '\n':
lineno += 1
charpos = 1
counter += 1
else:
charpos += 1
counter += 1
return lineno, charpos
# Things that are OK:
GOOD_STRINGS = re.compile(
r"""
# django comment
( {%\ comment\ %}.*?{%\ endcomment\ %}
# already translated text
|{%\ ?blocktrans.*?{%\ ?endblocktrans\ ?%}
# any django template function (catches {% trans ..) aswell
|{%.*?%}
# CSS
|<style.*?</style>
# JS
|<script.*?</script>
# A html title or value attribute that's been translated
|(?:value|title|summary|alt)="{%\ ?trans.*?%}"
# A html title or value attribute that's just a template var
|(?:value|title|summary|alt)="{{.*?}}"
# An <option> value tag
|<option[^<>]+?value="[^"]*?"
# Any html attribute that's not value or title (single quote, double quote and html5 quoteless)
# NB at the start we want to grab any trailing quote from the previous attribute
# FIXME This will fail for some quoteless attr values.
|(?:['"]\W+)?[a-z:-]+?(?<!alt)(?<!value)(?<!title)(?<!summary)=(?:'(?:{{.*?}}|{%.*?%}|[^']*)'|"(?:{{.*?}}|{%.*?%}|[^"]*)+"|[a-zA-Z\.]+)
# The actual alt/value/title tag itself cannot be translated, but the value should be
# Treat data-title/data-original-title etc as equivalanets. Think this is some bootstrap thing & HTML5
|(?:['"]\W+)?(?:data-|data-original-)?(?:alt|value|title|summary)=['"]?
# Boolean attributes
|<[^<>]+?(?:checked|selected|disabled|readonly|multiple|ismap|defer|async|declare|noresize|nowrap|noshade|compact|hidden|itemscope|autofocus|autoplay|controls|download)[^<>]*?>
# HTML opening tag
|<[\w:]+
# End of a html opening tag
# NB: catching end of quote from last attribute
|(?:['"]\W*)?/?>
# closing html tag
|</.*?>
# any django template variable
|{{.*?}}
# any django template tag
|{%.*?%}
# any angular.js template
|\[\[.*?\]\]
# HTML doctype
|<!DOCTYPE.*?>
# IE specific HTML
|<!--\[if.*?<!\[endif\]-->
# HTML comment
|<!--.*?-->
# HTML entities
|&[a-z]{1,10};
# HTML entities
|&\#x[0-9]{1,10};
# CSS style
|<style.*?</style>
# another common template comment
|{\#.*?\#}
)""",
# MULTILINE to match across lines and DOTALL to make . include the newline
re.MULTILINE | re.DOTALL | re.VERBOSE | re.IGNORECASE)
# Stops us matching non-letter parts, e.g. just hypens, full stops etc.
LETTERS = re.compile(r"[^\W\d_]")
LEADING_TRAILING_WHITESPACE = re.compile("(^\W+|\W+$)")
def split_into_good_and_bad(template):
for index, match in enumerate(GOOD_STRINGS.split(template)):
yield (index, match)
def split_trailing_space(string):
"""Given a string, returns a tuple of 3 string, the leading whitespace, middle, and trailing whitespace"""
results = LEADING_TRAILING_WHITESPACE.split(string)
if len(results) == 1:
# no spaces
return ('', string, '')
elif len(results) == 3 and results[0] == '' and results[2] != '':
# only leading whitespace
return (results[1], results[2], '')
elif len(results) == 3 and results[0] != '' and results[2] == '':
# only trailing
return ('', results[0], results[1])
elif len(results) == 5:
# leading and trailing whitespace
return (results[1], results[2], results[3])
else:
raise NotImplementedError("Unknown case: %r %r" % (string, results))
def replace_strings(filename, overwrite=False, force=False, accept=[]):
full_text_lines = []
with open(filename) as fp:
content = fp.read()
offset = 0
ignore_lines = find_ignored_lines(content)
for index, string in split_into_good_and_bad(content):
if index % 2 == 1:
full_text_lines.append(string)
elif index % 2 == 0:
# Ignore it if it doesn't have letters
m = LETTERS.search(string)
if not m:
full_text_lines.append(string)
else:
# split out the leading whitespace and trailing
leading_whitespace, message, trailing_whitespace = split_trailing_space(string)
full_text_lines.append(leading_whitespace)
# Find location of first letter
lineno, charpos = location(string, offset+m.span()[0])
if any(r.match(message) for r in accept):
full_text_lines.append(message)
elif lineno in ignore_lines:
full_text_lines.append(message)
elif force:
full_text_lines.append('{% trans "'+message.replace('"', '\\"')+'" %}')
else:
change = raw_input("Make %r translatable? [Y/n] " % message)
if change == 'y' or change == "":
full_text_lines.append('{% trans "'+message.replace('"', '\\"')+'" %}')
else:
full_text_lines.append(message)
full_text_lines.append(trailing_whitespace)
offset += len(string)
full_text = "".join(full_text_lines)
if overwrite:
save_filename = filename
else:
save_filename = filename.split(".")[0] + "_translated.html"
open(save_filename, 'w').write(full_text)
print("Fully translated! Saved as: %s" % save_filename)
def find_ignored_lines(template):
lines = set()
for m in re.finditer(r'{#\s*notrans\s*#}', template):
offset = m.span()[0]
lineno, charpos = location(template, offset)
lines.add(lineno)
return lines
def non_translated_text(template):
offset = 0
ignore_lines = find_ignored_lines(template)
# Find the parts of the template that don't match this regex
# taken from http://www.technomancy.org/python/strings-that-dont-match-regex/
for index, match in split_into_good_and_bad(template):
if index % 2 == 0:
# Ignore it if it doesn't have letters
m = LETTERS.search(match)
if m:
# Get location of first letter
lineno, charpos = location(template, offset+m.span()[0])
if lineno in ignore_lines:
offset += len(match)
continue
yield (lineno, charpos, match.strip().replace("\n", "").replace("\r", "")[:120])
offset += len(match)
def print_strings(filename, accept=[]):
with open(filename) as fp:
file_contents = fp.read()
for lineno, charpos, message in non_translated_text(file_contents):
if any(r.match(message) for r in accept):
continue
print("%s:%s:%s:%s" % (filename, lineno, charpos, message))
def filenames_to_work_on(directory, exclude_filenames):
"""Return list of files in directory that we should look at"""
files = []
for dirpath, dirs, filenames in os.walk(directory):
files.extend(os.path.join(dirpath, fname)
for fname in filenames
if (fname.endswith('.html') or fname.endswith('.txt')) and fname not in exclude_filenames)
return files
def main():
parser = OptionParser(usage="usage: %prog [options] <filenames>")
parser.add_option("-r", "--replace", action="store_true", dest="replace",
help="Ask to replace the strings in the file.", default=False)
parser.add_option("-o", "--overwrite", action="store_true", dest="overwrite",
help="When replacing the strings, overwrite the original file. If not specified, the file will be saved in a seperate file named X_translated.html", default=False)
parser.add_option("-f", "--force", action="store_true", dest="force",
help="Force to replace string with no questions", default=False)
parser.add_option("-e", "--exclude", action="append", dest="exclude_filename",
help="Exclude these filenames from being linted", default=[])
parser.add_option("-x", "--accept", action="append", dest="accept",
help="Exclude these regexes from results", default=[])
(options, args) = parser.parse_args()
# Create a list of files to check
if len(args) == 0:
args = [os.getcwd()]
files = []
for arg in args:
if os.path.isdir(arg):
files = filenames_to_work_on(arg, options.exclude_filename)
elif arg not in options.exclude_filename:
files.append(arg)
accept_regexes = [re.compile(r) for r in options.accept]
for filename in files:
if options.replace:
replace_strings(filename, overwrite=True, force=options.force, accept=accept_regexes)
else:
print_strings(filename, accept=accept_regexes)
if __name__ == '__main__':
main()