-
Notifications
You must be signed in to change notification settings - Fork 2
/
publish_all.pl
executable file
·477 lines (412 loc) · 13.8 KB
/
publish_all.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
#!/usr/bin/env perl -w
#
# Usage:
# publish_all.pl
# --config=<location of config file>
# --type=live|test
# --force
# --forceforce
# --excludedir=<dir1 to ignore>,...,<dirN to ignore> - ie comma-separated list
# --xmlonly
# --yes
# --localxslt
# --verbose
#
# --ignore-missing
# Ignore missing links (i.e. let the page publish with a warning
# rather than error out). The contents of the link may contain
# place-holder text.
#
# Aim:
# This script provides a way to publish all the files in the current
# directory and in any subdirectories. It essentially just finds all
# the files and then runs the publish script on it. It finds all the
# files it thinks are valid - both XML and XML - and then asks you
# whether it should process them all.
#
# If it finds a directory that contains the file DO_NOT_PUBLISH then
# it automically excludes this directory (and any sub-directories).
#
# Options:
# --config - used to find perl executable and passed through to publish.pl
# --type, --force and --forceforce are passed through to the publish script
# but the config variable is also used to get the perl/os value
# --localxslt is passed through to the publish script
# --yes means that the program will not ask you whether to process
# all the files, it will just go ahead and do it
# useful for background jobs
# --xmlonly means that only files matching *xml are published
# --excludedir is a way of specifying a set of directories that should
# be excluded from the search
# --verbose - display extra output for debugging
#
# Notes:
# - files that are checked out for editing are skipped;
# not 100% convinced got it right for RCS files
# - the thread index is published last so that it can pick up all the
# details of the threads; it probably also needs publishing before
# the threads too so that the threads can find out what groups
# they are in
# - the script currently can not be run in all directories; this
# restriction should be removed
# - there's a reasonably complicated set of rules for working out
# which files are to be skipped - it's a set of heuristic rules
# (ie we add another case to the list when we find a file to ignore)
# rather than anything clever.
#
use strict;
$|++;
use Getopt::Long;
use Cwd;
use IO::Pipe;
use FindBin;
use lib $FindBin::Bin;
use CIAODOC qw (:util :cfg);
# Do I need a 'use vars' line here for configfile?
use vars qw( $configfile );
$configfile = "$FindBin::Bin/config.dat";
# can not end in / because of regexp check below
my @prefixes =
(
"/data/da/Docs/irisweb/iris",
"/data/da/Docs/cscweb/csc1",
"/data/da/Docs/cscweb/csc2",
"/data/da/Docs/cscweb/csc2.1",
"/data/da/Docs/chartweb/internal",
"/data/da/Docs/obsvisweb/website",
"/data/da/Docs/caldbweb/caldb4",
"/data/da/Docs/ciaoweb/dev",
"/data/da/Docs/ciaoweb/ciao43",
"/data/da/Docs/sherpaweb/ciao43",
"/data/da/Docs/ciaoweb/ciao44",
"/data/da/Docs/sherpaweb/ciao44",
"/data/da/Docs/ciaoweb/ciao45",
"/data/da/Docs/sherpaweb/ciao45",
"/data/da/Docs/ciaoweb/ciao46",
"/data/da/Docs/sherpaweb/ciao46",
"/data/da/Docs/ciaoweb/ciao47",
"/data/da/Docs/sherpaweb/ciao47",
"/data/da/Docs/ciaoweb/ciao48",
"/data/da/Docs/sherpaweb/ciao48",
"/data/da/Docs/ciaoweb/ciao49",
"/data/da/Docs/sherpaweb/ciao49",
"/data/da/Docs/ciaoweb/ciao410",
"/data/da/Docs/sherpaweb/ciao410",
"/data/da/Docs/ciaoweb/ciao411",
"/data/da/Docs/sherpaweb/ciao411",
"/data/da/Docs/ciaoweb/ciao412",
"/data/da/Docs/sherpaweb/ciao412",
"/data/da/Docs/ciaoweb/ciao413",
"/data/da/Docs/sherpaweb/ciao413",
"/data/da/Docs/ciaoweb/ciao414",
"/data/da/Docs/sherpaweb/ciao414",
"/data/da/Docs/ciaoweb/ciao415",
"/data/da/Docs/sherpaweb/ciao415",
"/data/da/Docs/ciaoweb/ciao416",
"/data/da/Docs/sherpaweb/ciao416",
"/data/da/Docs/ciaoweb/ciao417",
"/data/da/Docs/sherpaweb/ciao417",
"/data/da/Docs/icxcweb/sds",
"/Users/doug/doc/ahelp/", # Doug's testing
);
my %_types = map { ($_,1); } qw( test live trial );
my $usage = "Usage: $0 --config=filename --type=live|test --force --forceforce --xmlonly --localxslt --excludedir=one,two,.. --yes --verbose --ignore-missing\n";
## Code
#
my $type = "test";
my $force = 0;
my $forceforce = 0;
my $xmlonly = 0;
my $localxslt = 0;
my $excludedirs = "";
my $yes = 0;
my $verbose = 0;
my $ignoremissinglink = 0;
die $usage unless
GetOptions
'config=s' => \$configfile,
'type=s' => \$type,
'force!' => \$force,
'forceforce!' => \$forceforce,
'excludedir=s' => \$excludedirs,
'xmlonly!' => \$xmlonly,
'yes!' => \$yes,
'localxslt!' => \$localxslt,
'ignore-missing!' => \$ignoremissinglink,
'verbose!' => \$verbose;
# check no "sentinel" file indicating this is a not-to-be-published
# directory
#
my $sentinel = "DO_NOT_PUBLISH";
die "The file $sentinel is found in this directory. Publishing is forbidden!\n"
if -e $sentinel;
$force = 1 if $forceforce;
# Get the name of the perl executable
#
my $ostype = get_ostype;
my $config = parse_config( $configfile );
my $perlexe = get_config_main_type ($config, "perl", $ostype);
my @pexe = split / /, $perlexe;
# Actually; over-riding this as it looks like this version could be
# causing problems (could change the config file to remove this but for now
# try this approach).
print "\nNOTE: over-riding @pexe\n";
@pexe = ("perl");
print " with @pexe\n\n";
die "Error: unknown type ($type)\n"
unless exists $_types{$type};
die $usage unless $#ARGV == -1;
# Check we can find the publish.pl script
#
my $script = "$FindBin::Bin/publish.pl";
die "Error: unable to find executable publish.pl - expected it to be at\n\t$script\n"
unless -e $script;
my $cwd = cwd();
my $prefix;
foreach my $p ( @prefixes ) { $prefix = $p if $cwd =~ /^$p/; }
die "Error: must be run within one of the following dir trees:\n " .
join (" ", @prefixes ) . "\n"
unless defined $prefix;
my $tmp = $cwd;
$tmp =~ s/^$prefix//;
my @dirs;
@dirs = split "/", substr($tmp,1) # remove the leading /
if $tmp ne "";
# sort out exclude dirs
#
my %excludedirs;
if ( $excludedirs ne "" ) {
# need at least one comma for the split
$excludedirs .= ",null";
%excludedirs = map { ($_,1); } split( /,/, $excludedirs );
print "Excluding directories:\n";
foreach my $dname (keys %excludedirs) {
print " $dname\n" if $dname ne "null";
}
print "\n";
}
# find all the files
# - exclude SCCS and RCS directories
#
# from 'man find'
#
# Recursively print all file names in the current directory
# and below, but skipping SCCS directories:
#
# example% find . -name SCCS -prune -o -print
#
# Example 4: Printing all file names and the SCCS directory
# name
#
# Recursively print all file names in the current directory
# and below, skipping the contents of SCCS directories, but
# printing out the SCCS directory name:
#
# example% find . -print -name SCCS -prune
#
# - exclude threads/include/ directory
# - non thread.xml files in the threads/foo/ directories
#
# might be easier to do using perl's find module doohickey
# but let's do this for now (it's ugly but seems to work).
#
my $pipe = IO::Pipe->new();
$pipe->reader( qw( find . \( -name RCS -o -name SCCS \) -prune -o -print ) );
my %files;
my %images;
my %do_not_publish_dirs;
my $threadindex;
my $nrej = 0;
my $nuserrej = 0;
my $ndir = 0;
my $nfil = 0;
while ( <$pipe> ) {
# helps checks below if we include the full path
# (eg so that can find out if in the threads directory
# if run in it/sub-dir of it)
#
my $name = $cwd . substr($_,1);
chomp $name;
# is it a directory? (can't find do this)
$ndir++, next if -d $name;
# do we reject?
#
my @dirs = split "/", $name;
my $fname = pop @dirs;
my $dname = $dirs[-1];
my $path = join "/", @dirs;
# Reject if this directory contains the sentinel file.
#
$nrej++, next if exists $do_not_publish_dirs{$path};
if (-e "${path}/${sentinel}") {
$nrej++;
$do_not_publish_dirs{$path} = 1;
next;
}
# Check we are not a child of a do-not-publish directory.
# I am not sure how we are recursing through the directories,
# so can we guarantee that we have processed the parent
# first? There are more elegant ways of doing this.
#
my $end = scalar(@dirs);
my $start = scalar(split "/", $prefix);
my $fail = 0;
for (my $i = $start - 1; $i < $end; $i++) {
my $checkpath = join "/", @dirs[0 .. $i];
if (exists $do_not_publish_dirs{$checkpath}) {
dbg "dbg - skipping $name as $checkpath in excluded directory\n";
$fail = 1;
last;
}
if ( -e $checkpath . "/" . $sentinel ) {
$do_not_publish_dirs{$checkpath} = 1;
$fail = 1;
last;
}
}
if ($fail == 1) {
$nrej++;
next;
}
# user reject; unfortunately this does not work to exclude
# sub-directories of the excluded directory.
#
$nuserrej++, next if exists $excludedirs{$dname};
# reject "backup" files
$nrej++, next if $fname =~ /^#/ or $fname =~ /^$/ or $fname =~ /~$/;
# reject "._" files created by mac osx
$nrej++, next if $fname =~ /^\._/;
# check in the threads dir
if ( $name =~ m{/threads/} ) {
$nrej++, next if $dname eq "example" or $dname eq "include";
# want to keep all the .gz contents of the data directory
# (exclude everything else)
# and want the index page
$nrej++, next unless
($fname eq "index.xml" and $dname eq "threads")
or
($dname eq "data" and $fname =~ /\.gz$/)
or
$fname eq "thread.xml"
# need to publish redirect files during s-lang removal
or
$fname eq "index.sl.xml"
or
$fname eq "index.py.xml";
}
# for the moment we reject the README in the workshop talk dirs
#
$nrej++, next if $fname eq "README" and $dname eq "talks" and $dirs[-3] eq "workshop";
# we reject a set of files from xxx_html_manual/ directories
#
# TMP dirs are empty so we don't really need to worry about them but we do
$nrej++, next if $dname eq "TMP" and $dirs[-2] =~ /_html_manual$/;
# presumably these are tmp files created during the conversion
$nrej++, next if $dname =~ /^l2h\d+$/ and $dirs[-2] =~ /_html_manual$/;
if ( $dname =~ /_html_manual$/ ) {
$nrej++, next if $fname =~ /^IMG_PARAMS\./;
$nrej++, next if $fname =~ /^(images|internals|labels)\.pl$/;
$nrej++, next if $fname =~ /^images\.(aux|log|tex)$/;
}
# we reject the download/doc/dmodel directory/contents
$nrej++, next if $path =~ /download\/doc\/dmodel($|\/)/;
# now check - is it checked out for editing?
# Not 100% convinced about the RCS check
#
if ( -e "$path/SCCS/p.$fname" ) {
print "skipping $dname/$fname as checked out [SCCS]\n";
$nrej++;
next;
}
if ( -e "$path/RCS/$fname,v" ) {
my $dummy = `rlog -L -R -l $path/RCS/$fname,v`;
die "Error: problem running 'rlog -L -R -l $path/RCS/$fname,v'\n"
unless $? == 0;
if ( $dummy ne "" ) {
print "skipping $dname/$fname as checked out [RCS]\n";
$nrej++;
next;
}
}
# skip if not an XML file?
$nrej++, next if $xmlonly && $fname !~ /\.xml$/;
# hey, we must want this
# - add to the files in this directory
# - note CIAO thread index is a special case since
# we want to process that AFTER all the threads
# have been updated, so we just do it last
#
if ( $dname eq "imgs" ) {
$images{$path} = [] unless exists $images{$path};
push @{ $images{$path} }, $fname;
} elsif ( $fname eq "index.xml" and $dname eq "threads" ) {
die "error: multiple index.xml files in dir called threads/ - what's going on\n"
if defined $threadindex;
$threadindex = $path;
} else {
$files{$path} = [] unless exists $files{$path};
push @{ $files{$path} }, $fname;
}
$nfil++;
}
$pipe->close;
#use Data::Dumper;
#print Dumper(\%images), "\n";
#print Dumper(\%files), "\n";
print "Num of files = $nfil\n";
print "Num of dirs = $ndir\n";
print "Num of rej. files = $nrej\n";
print "Num of user rej. files = $nuserrej\n";
# now loop through everything and publish it
#
unless ( $yes ) {
print "\nAre you sure you want to begin this?\n";
print "Answer \"y\" for the affirmative.\n";
print "[THIS IS TO THE " . uc($type) . " SITE]\n";
my $answer = <STDIN>;
die unless $answer eq "y\n";
}
# Publish everything but the thread indexes:
#
my $cfg_opt = "--config=$configfile";
my $type_opt = "--type=$type";
my $force_opt = $force ? "--force" : "--noforce";
my $forceforce_opt = $forceforce ? "--forceforce" : "--noforceforce";
my $localxslt_opt = $localxslt ? "--localxslt" : "--nolocalxslt";
my $verbose_opt = $verbose ? "--verbose" : "--noverbose";
my $ignore_opt = $ignoremissinglink ? "--ignore-missing" : "";
my @errors;
foreach my $href ( \%images, \%files ) {
foreach my $dir ( keys %{$href} ) {
my @files = @{ $$href{$dir} };
print "Publishing " . (1+$#files) . " files in $dir\n";
chdir $dir;
# and do the actual publishing
system @pexe, $script,
$cfg_opt, $type_opt, $force_opt, $forceforce_opt, $localxslt_opt, $verbose_opt,
$ignore_opt,
@files
and push @errors, "dir=$dir with files=" . join(" ",@files);
}
}
# we can now publish the thread index page
#
if ( defined $threadindex ) {
my @files = qw( index.xml );
my $dir = $threadindex;
print "Publishing " . (1+$#files) . " files in $dir\n";
chdir $dir;
# and do the actual publishing
system @pexe, $script,
$cfg_opt, $type_opt, $force_opt, $forceforce_opt, $localxslt_opt, $verbose_opt,
$ignore_opt,
@files
and die "\nerror in\n dir=$dir\n with files=" . join(" ",@files) . "\n\n";
}
chdir $cwd;
if ($#errors > -1) { print "\nERRORS when publishing:\n"; }
foreach my $emsg ( @errors ) { print "** $emsg\n"; }
if ($#errors > -1) { print "\n"; }
## End
#
exit;