forked from OpenGreekAndLatin/First1KGreek
-
Notifications
You must be signed in to change notification settings - Fork 0
/
cselstats.pl
executable file
·103 lines (76 loc) · 1.84 KB
/
cselstats.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/perl
print "file\tintrowords\tintrochars\tappcritwords\tappcritchars\tindexwords\tindexchars\ttextwords\ttextchars\n";
foreach $tmps (@ARGV) {
dostats($tmps);
}
print "totals\t$totintrowords\t$totintrochars\t$totappcritwords\t$totappcritchars\t$totindexwords\t$totindexchars\t$tottextwords\t$tottextchars\n";
sub dostats {
$curfile = @_[0];
$introwords = $introchars = $appcritwords = $appcritchars = $indexwords = $indexchars = $textwords = $textchars = 0;
open INF, "< $curfile";
while(<INF>) {
s#^\s+##g;
s#\s*$##g;
if(/<pb\s+n="([^"]+)/ ) {
$curpage = $1;
$inappcrit = 0;
next;
}
if(/<pb\s+xml:id="v\.[0-9]+\.p\.([^"]+)/ ) {
$curpage = $1;
$inappcrit = 0;
next;
}
if( $curfile =~ /PL/ and /<\/note>/ ) {
$inappcrit = 0;
}
if( /subtype="([^"]+)/ ) {
$cursub = $1;
}
if($cursub =~ /index/i ) {
$indexwords += s#(\s+)#$1#g;
$indexchars += length;
next;
}
if( $curpage =~ /[IXVLC]+/i ) {
$introwords += s#(\s+)#$1#g;
$introchars += length;
next;
}
if($inappcrit ) {
@w = split;
if( $#w > 0) {
$appcritwords += s#(\s+)#$1#g;
$appcritchars += length;
}
next;
}
if( /(<note type="foot.+)/ ) {
my $curnote = $1;
@w = split(/\s+/, $curnote);
if( $#w > 0) {
$appcritchars += length $curnote;
$appcritwords += $#w;
}
$inappcrit ++;
@w = split;
if( $#w > 0) {
$textwords += $#w;
$textchars += length ($_);
}
next;
}
$textwords += s#(\s+)#$1#g;
$textchars += length;
}
close INF;
$totintrowords += $introwords;
$totintrochars += $introchars;
$totappcritwords += $appcritwords;
$totappcritchars += $appcritchars;
$totindexwords += $indexwords;
$totindexchars += $indexchars;
$tottextwords += $textwords;
$tottextchars += $textchars;
print "$curfile\t$introwords\t$introchars\t$appcritwords\t$appcritchars\t$indexwords\t$indexchars\t$textwords\t$textchars\n";
}