-
Notifications
You must be signed in to change notification settings - Fork 0
/
sourcetoc.ml
executable file
·530 lines (424 loc) · 14.4 KB
/
sourcetoc.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
#! /usr/bin/env ocaml
(* Copyright © 2015 Johan Grande *)
(* License = GNU GPL version 2 or 3 *)
let backup_suffix = ref "~"
let etc = ref " [...]"
let indent_width = ref 2
let line_length = ref 80
let verbosity = ref 1
type action = Fill | Clear | TocOnly
let action = ref Fill
type output = File of string | InPlace | Stdout
let output = ref InPlace
type heading_style = Atx | Html | Wiki
let heading_style = ref Wiki
let sourcetoc = Filename.basename Sys.argv.(0)
(* <TOC> *)
(* Utils ................................................................... 57
* Logging and failing ................................................... 91
* Comment patterns #data ................................................. 112
* Read source file ........................................................ 148
* Generate table of contents .............................................. 282
* Adjust line numbers and levels ........................................ 283
* Generate TOC lines .................................................... 296
* I/O ..................................................................... 360
* CLI ..................................................................... 445
*)
(* </TOC> *)
;;
#warnings "-3" (* deprecated: String.set *)
#load "unix.cma"
#load "str.cma"
open Printf
let push, pop = Queue.push, Queue.pop
type comment = BeginEnd of string * string | Line of string
type heading = int * int * string (* line number, level, text *)
(* = Utils = *)
let list_of_queue q =
Queue.fold (fun l x -> x :: l) [] q
let queue_last q =
Queue.fold (fun x y -> y) (Queue.peek q) q
let queue_map f q =
let res = Queue.create () in
Queue.iter (fun x -> push (f x) res) q;
res
let with_file_as_rchan file f =
let rchan = open_in file in
try
let res = f rchan in
close_in rchan;
res
with e ->
close_in_noerr rchan;
raise e
let with_file_as_wchan file f =
let wchan = open_out file in
try
let res = f wchan in
close_out wchan;
res
with e ->
close_out_noerr wchan;
raise e
(* == Logging and failing == *)
let log fmt =
ksprintf (fun s -> eprintf "%s: %s\n%!" sourcetoc s) fmt
let warn level fmt =
ksprintf (fun s -> if !verbosity >= level then log "Warning: %s" s) fmt
exception FileCrash
(* Stop processing of current file (start working on next file) *)
let fcrash fmt =
ksprintf (fun s -> log "%s" s; raise FileCrash) fmt
(* Exit program *)
let gcrash fmt =
ksprintf (fun s -> log "%s" s; exit 1) fmt
(* = Comment patterns #data = *)
let extension file =
let open Str in
if string_match (regexp "^.+\\.\\([^.]+\\)$") file 0 then
matched_group 1 file
else ""
(* Returns a list of comment patterns associated with file extension `ext`.
The first pattern is used to generate the TOC. *)
let find_comments ext =
match ext with
| "c" | "cc" | "cpp" | "cxx" | "c++"
| "h" | "hh" | "hpp" | "hxx" | "h++"
| "java" | "js" ->
[BeginEnd ("/*", "*/"); Line "//"]
| "hs" ->
[Line "--"; BeginEnd ("{--", "--}")]
| "ml" | "mli" | "mll" ->
[BeginEnd ("(*", "*)")]
| "mly" ->
[BeginEnd ("(*", "*)"); BeginEnd ("/*", "*/")]
| "php" ->
[BeginEnd ("/*", "*/"); Line "//"; Line "#"]
| "py" ->
Line "#" :: (List.map (fun x -> BeginEnd (x, x))
["'"; "\""; "'''"; "\"\"\""] )
| "scm" ->
[Line ";;"; Line ";"; BeginEnd ("#|", "|#")]
| "sh" | "mk" | "pl" | "pm" ->
[Line "#"]
| _ ->
fcrash "Unknown file extension: .%s." ext
(* = Read source file = *)
(* Regexp `s` in comment pattern `c` *)
let comment_regexp c s =
let open Str in
match c with
| BeginEnd (b, e) ->
regexp (sprintf "^%s *%s *%s *$" (quote b) s (quote e))
| Line b ->
regexp (sprintf "^%s *%s *$" (quote b) s)
let comment_regexps comments s =
List.map (fun c -> comment_regexp c s) comments
(* Does `line` match against at least one of `regexps`? *)
let line_match regexps line =
List.exists (fun re -> Str.string_match re line 0) regexps
(* Takes a list of comment patterns and a line of text.
Returns Some _ if the line contains a heading, None otherwise. *)
let heading_of_line com_pats =
let open Str in
let heading_regexps =
comment_regexps com_pats
(match !heading_style with
| Atx -> "\\(#+\\) *\\([^ #]\\|[^ ].*[^ #]\\) *#*"
| Html -> "<h\\([1-9]\\)> *\\([^ ]\\|[^ ].*[^ ]\\) *</h\\1>"
| Wiki -> "\\(=+\\) *\\([^ =]\\|[^ ].*[^ =]\\) *=*")
in
fun line ->
if line_match heading_regexps line then
let l, txt = matched_group 1 line, matched_group 2 line in
let level =
if !heading_style = Html then
int_of_string l
else
String.length l
in
Some (level, txt)
else
None
let line_breaks_error ="\
This file contains DOS/Windows line breaks (\\r\\n). "^sourcetoc^" only supports
Unix line breaks for now. Example usage for a file with DOS line breaks:
dos2unix $FILE && "^sourcetoc^" $FILE && unix2dos $FILE
"
let toc_error_prefix = "Table of contents place ill-defined"
let toc_unicity q s =
match Queue.length q with
| 0 -> fcrash "%s: no %s found." toc_error_prefix s
| 1 -> pop q
| n ->
fcrash "%s: several %s found, at lines %s." toc_error_prefix s
(String.concat ", " (List.map string_of_int (list_of_queue q)))
let scan_file comments rchan =
let open Str in
let heading_of_line = heading_of_line comments in
let toc_begins_regexps = comment_regexps comments "<TOC *>"
and toc_ends_regexps = comment_regexps comments "</TOC *>" in
let headings = Queue.create () in
let toc_begins = Queue.create () and toc_ends = Queue.create () in
let all_lines = Queue.create () in
let line_number = ref 0 in
(* Scan file *)
(try
while true do
let line = input_line rchan in
incr line_number;
(* Check for a DOS line break on the first line *)
if !line_number = 1
&& let l = String.length line in l > 0 && line.[l-1] = '\r'
then
(eprintf "%s%!" line_breaks_error;
raise FileCrash);
(match heading_of_line line with
| Some (level, txt) ->
push (!line_number, level, txt) headings
| None ->
if line_match toc_begins_regexps line then
push !line_number toc_begins
else if line_match toc_ends_regexps line then
push !line_number toc_ends
);
push line all_lines
done
with End_of_file -> () );
headings, toc_begins, toc_ends, all_lines, !line_number
(* Returns:
lines before TOC, <TOC> line #, headings, </TOC> line #, lines after TOC *)
let read_source comments rchan
: string Queue.t * int * heading Queue.t * int * string Queue.t =
let headings, toc_begins, toc_ends, all_lines, line_count =
scan_file comments rchan in
(* Validate TOC place *)
let toc_begin = toc_unicity toc_begins "<TOC>"
and toc_end = toc_unicity toc_ends "</TOC>" in
if toc_begin >= toc_end then
fcrash "%s: it ends (line %d) before it starts (line %d)." toc_error_prefix
toc_end toc_begin;
(* Divide the lines into before and after the TOC *)
let before_toc = Queue.create () and after_toc = Queue.create () in
for i = 1 to toc_begin do
push (pop all_lines) before_toc
done;
for i = toc_begin + 1 to toc_end - 1 do
ignore (pop all_lines)
done;
for i = toc_end to line_count do
push (pop all_lines) after_toc
done;
assert (Queue.is_empty all_lines);
before_toc, toc_begin, headings, toc_end, after_toc
(* = Generate table of contents = *)
(* == Adjust line numbers and levels == *)
let adjust toc_begin toc_end special headings : heading Queue.t =
let lines_offset =
Queue.length headings + (if special then 1 else 0) - (toc_end-toc_begin-1) in
let min_level = ref max_int in
Queue.iter (fun (n, l, t) -> min_level := min l !min_level) headings;
queue_map
(fun (n, l, t) ->
(if n < toc_begin then n else n + lines_offset), l - !min_level, t )
headings
(* == Generate TOC lines == *)
let line_of_heading line_begin line_end mlns (line, level, text) =
let sides_length =
String.length line_begin
+ level * !indent_width
+ 1 + mlns
+ String.length line_end
in
if sides_length >= !line_length then
fcrash "Line length (%d chars) too short to represent heading at line %d."
!line_length line;
sprintf "%s%s%s%s%s"
line_begin
(String.make (level * !indent_width) ' ')
(let l = String.length text in
if sides_length + l <= !line_length then
text ^ " " ^ (String.make (!line_length - sides_length - l - 1) '.')
else
String.sub text 0 (!line_length - sides_length - (String.length !etc))
^ !etc )
(let s = string_of_int line in
(String.make (mlns - String.length s + 1) ' ') ^ s )
line_end
(* Returns the ready-to-print lines of the TOC *)
let toc_of_headings comment toc_begin toc_end headings : string Queue.t =
if Queue.is_empty headings then
Queue.create ()
else
let cb, ce =
match comment with
| BeginEnd (b, e) -> b, e
| Line b -> b, ""
in
let special =
String.length cb = 2 && String.length ce = 2 && cb.[1] = ce.[0] in
(* In the "special" case we will generate something like
* the present comment.
*)
let line_begin, line_end =
if special then
" " ^ (String.sub cb 1 1) ^ " ", ""
else
cb ^ " ", if ce <> "" then " " ^ ce else ""
in
let adjusted_headings = adjust toc_begin toc_end special headings in
let max_line_number_size =
match queue_last adjusted_headings
with n, l, t -> String.length (string_of_int n)
in
let toc =
queue_map (line_of_heading line_begin line_end max_line_number_size)
adjusted_headings
in
if special then
((Queue.peek toc).[0] <- cb.[0];
Queue.push (" " ^ ce) toc );
toc
(* = I/O = *)
(* Read; compute; write *)
let process_channels action comments rchan =
if action = TocOnly then
let headings, _, _, _, line_count = scan_file comments rchan in
let toc =
toc_of_headings (List.hd comments) (line_count + 1) (line_count + 2)
headings
in
fun wchan ->
Queue.iter (fprintf wchan "%s\n") toc
else
let before_toc, toc_begin, headings, toc_end, after_toc =
read_source comments rchan
in
let toc =
if action = Clear then
Queue.create ()
else
toc_of_headings (List.hd comments) toc_begin toc_end headings
in
fun wchan ->
List.iter
(Queue.iter (fprintf wchan "%s\n"))
[before_toc; toc; after_toc]
(* Process a file *)
let process_file action comment_style source =
let open Unix in
(* Get comment patterns *)
let comment_style_1 =
if comment_style <> "" then comment_style else extension source
in
let comments = find_comments comment_style_1 in
if (stat source).st_kind <> S_REG then
fcrash "'%s' is not a regular file." source;
(* Read source file; compute table of contents *)
let writing_function =
with_file_as_rchan source (process_channels action comments)
in
(* Output result *)
match !output with
| File f ->
with_file_as_wchan f writing_function
| InPlace ->
if action = TocOnly then
warn 1 "replacing file contents with only a table of contents.";
if !backup_suffix <> "" then
(let backup = source ^ !backup_suffix in
if Sys.file_exists backup && (stat backup).st_kind <> S_REG then
fcrash "Backup file '%s' already exists but is not a regular file." backup;
(* Create backup file *)
let cp = "cp -fp" in
let com =
sprintf "%s %s %s" cp (Filename.quote source) (Filename.quote backup)
in
let ret = Sys.command com in
if ret <> 0 then
gcrash "Backup ('%s') failed with code %d! Aborting." cp ret);
(* Overwrite source file *)
let wchan = open_out source in
(try
writing_function wchan;
close_out wchan
with e ->
log "Exception encountered while writing modified file!";
close_out_noerr wchan;
raise e)
| Stdout ->
writing_function Pervasives.stdout
(* = CLI = *)
let heading_style_of_string s =
match String.lowercase s with
| "atx" | "markdown" -> Atx
| "html" -> Html
| "wiki" -> Wiki
| _ -> invalid_arg "heading_style_of_string"
let comment_style = ref ""
let files = Queue.create ()
let speclist =
let action_clear = Clear in (* versus Arg.Clear, for OCaml < 4.01 *)
let open Arg in
align [
("--clear",
Unit (fun () -> action := action_clear),
" Clear table of contents.");
("--toc-only",
Unit (fun () -> action := TocOnly; output := Stdout),
" Print table of contents only. Default output = stdout.\n");
("--in-place",
Unit (fun () -> output := InPlace),
" Overwrite source file(s). This is the default.");
("--output",
String (fun s -> output := File s),
"FILE Output to FILE.");
("--stdout",
Unit (fun () -> output := Stdout),
" Output to stdout.\n");
("--backup-suffix",
Set_string backup_suffix,
"STRING (in place) Suffix for backup files, default = '~'.");
("--no-backup",
Unit (fun () -> backup_suffix := ""),
" (in place) No backup.");
("--comment-style",
Set_string comment_style,
"EXT Use comment style associated to file extension EXT.");
("--etc",
Set_string etc,
"STRING End of line for long headings, default = ' [...]'.");
("--heading-style",
String (fun s -> heading_style := heading_style_of_string s),
"STYLE Heading style. wiki, atx (markdown), or html. Default = wiki.");
("--indent-width",
Set_int indent_width,
"INT Indent width in spaces, default = 2.");
("--line-length",
Set_int line_length,
"INT Line length in characters, default = 80.");
("--quiet",
Unit (fun () -> verbosity := 0),
" No warnings.\n");
]
let anon_fun f =
push f files
let usage_msg =
sprintf "Usage: %s (OPTION|FILE)*
See README.md or https://github.com/nahoj/sourcetoc.
" sourcetoc
let main () =
Arg.parse speclist anon_fun usage_msg;
if Queue.is_empty files then
(if !comment_style = "" then
gcrash "Please specify a comment style.";
try process_channels !action (find_comments !comment_style) stdin stdout
with FileCrash -> exit 1)
else
Queue.iter
(fun f ->
try process_file !action !comment_style f
with FileCrash -> ())
files
let _ =
Unix.handle_unix_error main ()