-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathbibtexformat
executable file
·3560 lines (2865 loc) · 132 KB
/
bibtexformat
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/perl -w
####################################################################################################
#
# Program: bibtexformat
#
# Function: Adds the BibTeX labels to a library exported by Endnote or Papers and performs
# several format changes to make it look nicer. It also replaces exported ASCII
# garbage with the correct BibTeX symbols (most German, Danish, French and Spanish
# special characters and all uppercase and lowercase Greek letters). Altogether,
# more than 120 symbols are replaced. Citation labels can be generated following
# certain schemes and the journal titles *reliably* abbreviated. Reliably in this
# case means that the script tells the user if no abbreviation has been found for
# a journal, so the user can correct it. It is also possible to switch between
# different abbreviation styles.
#
# Author: Benjamin Bulheller
#
# Website: www.bulheller.com
#
# Mail address: webmaster.-at-.bulheller.com
#
# Version: $Revision: 4900 $, $Date: 2011-03-09 23:37:00 +0100 (Wed, 09 Mar 2011) $
#
# Acknowledgments: Projects like bibtexformat depend on suggestions and contributions from the users.
# Many thanks for such feedback deserve the following:
# Thomas Braun
# F. Fabian Rosales-Ortega
# Tiziano Passerini
# Douglas McKee
# Sean Anderson
# Christophe Dessimoz
# Bjoern Nadrovski
#
# Licence: This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.
#
####################################################################################################
use strict; # always use this!!!
use FindBin qw/$Bin/; # set $Bin to the script's directory
use lib $Bin; # add the script's directory to the library path
use lib "$ENV{HOME}/bin/perllib"; # add ~/bin/perllib to the library path
use Data::Dumper; # for printing arrays and hashes
use GetParameters; # to handle command line parameters
use ParseBibTeX; # returns a BibTeX library fully parsed
use DebugPrint; # handy for printing variables during debgging
use File::Copy; # for a backup copy of the original library
use File::Spec; # for concatenating paths OS independently
$Data::Dumper::Sortkeys = 1; # sort the hash keys when using print Dumper %Hash
# initialize the configuration variables that may be read in via a configuration file
my ($NewLines, $LeadingBlanks, $EqualPosition, $LabelSeparator, @AbbFiles);
my ($BackupLibrary, $Substitutions, $AuthorNames);
my ($ShowSum, $FileURL, $LocalURL, $FileDir);
# obsolete, kept for backward-compatibility
my ($PapURL, $JabURL);
my (@TypeConversions, @AOPFix, %IgnoreFields, @IgnoreItems);
our $ERROR = "\nERROR (bibtexformat)";
####################################################################################################
# Configuration Variables
####################################################################################################
# the path to the script, its libraries, configuration file, etc.
my $ScriptLocation = "$ENV{HOME}/bin/bibtexformat";
# all configuration variables should be changed in the file defined here
my $Configuration = "$ScriptLocation/configuration.cfg";
# --------------------------------------------------------------------------------------------------
# --------------------------------------------------------------------------------------------------
# Nothing should need to be changed below this line, otherwise let the author know
# --------------------------------------------------------------------------------------------------
# --------------------------------------------------------------------------------------------------
# -conf => $ScriptLocation?
# -protitle into field content instead of ParseTitle
# Manual:
# - Last brace always on single line
# - section on Papers-related problems (quick'n'dirty for the lazy)
# - url field blanks => %20, use book instead
####################################################################################################
# Initialize the configuration variables
####################################################################################################
# DO NOT CHANGE THE VARIABLES HERE, USE THE CONFIGURATION FILES INSTEAD!
# All variables needed in the program are set here to a default value (the author's favorite ones
# of course). These settings may be overridden later on by the configuration file defined above in
# $Configuration or even by a file provided via the -conf option
$NewLines = 2; # default number of lines between two BibTeX items
$LeadingBlanks = 3; # default number of blanks before a field descriptor
$EqualPosition = 15; # default position of the equal sign starting at first letter of field descriptor
$LabelSeparator = ":"; # default separator for the cite keys (e.g. Author:Year:Page)
# Papers and JabRef use different local URL formats to link to PDFs. The following variables define
# what has to be done to convert between the file paths, that is to convert from Paper to JabRef
# the RegEx s/$LocalURL/$FileURL/ will be executed.
$LocalURL = "file://localhost/Volumes/Home/username/";
$FileURL = "/Users/username/";
# backup the library file in case the input file is to be overwritten (without -o outfile)
$BackupLibrary = 1;
# items to be ignored if -s switch is given
%IgnoreFields = (
general => [ qw/Note Abstract Keywords month uri owner timestamp label date-modified
adsnote adsurl priority posted-at sn tc ut citeulike-article-id isbn
doi domains location read rating annote pmid date-added Pii language
affiliation local-url url/ ],
);
# default list with journal abbreviations, more than one can be added, separated by commas
@AbbFiles = ("$ScriptLocation/chemical_2007.txt",
"$ScriptLocation/abbreviations.cfg");
# A user defined list with regular expressions, which are executed on each line of the library.
# This is to do serve user-specific needs, e.g. nPi* => $n \rightarrow \pi^*$
$Substitutions = "$ScriptLocation/substitutions.cfg";
# A list of author names, which have been exported by Papers in the wrong way and have to be corrected, e.g
# J C Johnson Jr => Johnson, Jr, J C
# Ludwig van Beethoven => van Beethoven, Ludwig
$AuthorNames = "$ScriptLocation/authors.cfg";
# shows the absolute sum of occurrences of journals and authors when using -autlist and -joulist
$ShowSum = 0;
# Absolute or relative path to the directory containing files linked to in the file and/or local-url fields.
# -filecheck adds this value before the path given in the file and local-url fields.
$FileDir = "";
####################################################################################################
# Parameter Configuration
####################################################################################################
my $Parameters = {
o => "string", # output file
s => "switch", # short library, remove fields defined in %IgnoreFields
format => "switch", # reformat the library
nl => "integer", # new line in between entries
lb => "integer", # leading blanks (i.e. indentation for reformatting)
ep => "integer", # position of the equal sign for reformatting
combine => "switch", # combine multi-line entries in one line
wrap => "integer", # wrap lines in the library at a certain column
lc => "switch", # switch case of types and field keywords to lowercase
uc => "switch", # switch case of types and field keywords to uppercase
sort => "switch", # sort the library according to the cite keys
abb => "list", # define an additional abbreviation file
abb1 => "switch", # use abbreviation 1
abb2 => "switch", # use abbreviation 2
full => "switch", # use full journal name
labels => "switch", # create the cite keys
sep => "string", # the separator used in cute keys
pn => "switch", # use the page number in the cite keys
fy => "switch", # use the full year in cite keys instead of just two digits
f => "switch", # force the cite key generation, overwrite preexisting ones
fpapers => "switch", # force to overwrite Papers default keys only
rangefix => "switch", # expand page ranges
typereset => "switch", # change all entry types to @article
typefix => "switch", # substitutions of reference types
typecheck => "switch", # check the required and optional fields of the reference types
autfix => "switch", # substitutions of the author names
autcheck => "switch", # check the author name format and display warnings
protitle => "switch", # protect the title using an additional brace level
autmax => "integer", # limit the number of authors to a certain number
defsubst => "switch", # perform default substitutions of symbols and special characters
subst => "switch", # perform user-defined substitutions
autlist => "switch", # display the list of authors
typelist => "switch", # display the list of reference types
joulist => "switch", # display the list of journals
local2file => "switch", # convert file links to 'file' entries
file2local => "switch", # convert file links to 'local-url' entries
filecheck => "switch", # check that all referenced files exist
filedir => "string", # directory where the files are located
fileregex => "stringlist", # use regular expressions on entries in file fields
fieldregex => "stringlist", # use regular expressions on particular fields
quotes => "switch", # convert field delimiters to "quotes"
braces => "switch", # convert field delimiters to {braces}
aopfix => "switch", # fix ahead-of-print publications without page numbers
conf => "string", # use a different configuration file
new => "switch", # shortcut for the shortcut -o .new
log => "switch", # write to .log instead of STDOUT
# old parameters kept for backwards compatibility
jabpdf => "switch", # renamed to local2file
pappdf => "switch", # renamed to file2local
};
# define the default values
our $Options = {
nl => $NewLines,
lb => $LeadingBlanks,
ep => $EqualPosition,
sep => $LabelSeparator,
};
my $Help = "\n" .
"bibtexformat - automated substitutions and format changes of BibTeX library files\n" .
"\n" .
'$Revision: 4900 $' . "\n" .
'$Date: 2011-03-09 23:37:00 +0100 (Wed, 09 Mar 2011) $' . "\n" .
"\n" .
"Generates and adds the labels (cite keys) to a BibTeX library file, abbreviates journal\n" .
"titles, filters out unneeded items, performs string replacements and checks author names\n" .
"for format errors that may lead to incorrect citations.\n" .
"\n" .
"Usage: bibtexformat infile(s) [options] [-o outfile]\n" .
"\n" .
" -o Define an output file (recommended).\n" .
" A leading dot defines an extension squeezed between the original extension\n" .
" and the filename (e.g. -o .new => Library.new.bib, as shortcut and needed\n" .
" for multiple file processing)\n " .
"\n" .
" -s short library, leaves out abstract, keywords, etc. (see configuration file)\n" .
"\n" .
" -labels create the labels (cite keys) for items without one\n" .
" -pn use the first page number to create unambiguous labels of the type\n" .
" Author:Year:Page\n" .
" -fy use the full year instead of only the last two digits (mind that it may\n" .
" not always be given)\n" .
" -sep define the separator for the labels (default is \":\")\n" .
" -f force the generation of labels, even if already defined\n" .
" (this overwrites all existing labels)\n" .
" -fpapers force the generation of labels for items with a Papers default label\n" .
" (Author:2000p1234) while keeping all other preexisting ones\n" .
"\n" .
" -rangefix expand page numbers, e.g. 723-7 to 723-727\n" .
" -protitle protect the case of the title by enclosing it with double braces {{ }}\n" .
"\n" .
" -typecheck check all items for mandatory fields of the respective BibTeX type\n" .
" -typereset change all entry types to \@article (for an export by Papers)\n" .
" -typefix change BibTeX types depending on certain trigger words\n" .
" (requires configuration in $Configuration)\n" .
"\n" .
" -aopfix correct ahead-of-print publications without page numbers by using the DOI instead\n" .
" (requires configuration in $Configuration)\n" .
"\n" .
" -autcheck check the authors for correct division into first, last, von and Jr part\n" .
" -autfix perform the user-defined corrections of author names\n" .
" -autmax define a maximum number of authors before the list is shortened to et al.\n" .
"\n" .
" -autlist print the number and a list of all authors\n" .
" -joulist print the number and a list of all journals\n" .
" -typelist print a list of all found BibTeX types\n" .
"\n" .
" -defsubst perform the default substitutions in the titles\n" .
" -subst perform user-defined substitutions in the titles\n" .
"\n" .
" -fieldregex uses a regular expression on particular fields:\n" .
" -fieldregex fieldname \"from\" \"to\" fieldname \"from\" \"to\" [...]\n" .
"\n" .
" -fileregex uses a number of regular expressions on each file in the 'file' fields:\n" .
" -fileregex \"from1\" \"to1\" \"from2\" \"to2\" [...]\n\n" .
" \"^\" matches the beginning of the filenames\n" .
" \"\" to replace something with an empty string\n" .
"\n" .
" -sort sort the items alphabetically according to their BibTeX label\n" .
" -nl newlines between BibTeX items, default is $Options->{nl}\n" .
"\n" .
" -quotes convert the field delimiters to quotes\n" .
" -braces convert the field delimiters to braces\n" .
"\n" .
" -format re-format the library to improve readability\n" .
" -lb leading blanks before a field descriptor, default is $Options->{lb}\n" .
" -ep position of equal signs (including leading blanks), default is $Options->{ep}\n" .
" -lc format field keywords (\"author\") and reference types (\"\@book\") lowercase\n" .
" -uc format field keywords (\"author\") and reference types (\"\@book\") uppercase\n" .
" -combine combine multiline entries in one line\n" .
" -wrap 80 wraps the line at, for example, column 80 (indentation is considered)\n" .
"\n" .
" -abb abbreviate the journal titles, one or more files containing the abbreviations\n" .
" can be given and also defined by default in the script\n" .
" -abb1 use the first abbreviation given in the abbreviation files (default)\n" .
" -abb2 use the second abbreviation given in the abbreviation files\n" .
" (usually without periods)\n" .
" -full use the full journal title and replace all abbreviated titles with it\n" .
"\n" .
" -local2file convert all file links from 'local-url' to 'file' entries (e.g. JabRef)\n" .
" -file2local convert all file links from 'file' to 'local-url' entries (e.g. Papers)\n" .
" -filecheck check the existence of all referenced files (file =)\n" .
" -filedir base directory of the files for -filecheck\n" .
"\n" .
" -conf read in a configuration file different from the one defined in the source\n" .
" -log write all output to .log instead of STDOUT/STDERR.\n" .
"\n" .
"\n" .
"Important information for the label generation using -labels:\n" .
" If -pn is omitted, then the labels are generated using the first author, a colon,\n" .
" the year (two digits) and a lowercase letter to avoid duplicates, e.g.\n" .
" Author:99\n" .
" Author:03a\n" .
" Author:03b\n" .
"\n" .
" If :03 exists and a second match is found, then the first one is renamed :03a\n" .
" and the second becomes :03b.\n" .
" In order to always have the labels of an Endnote library assigned in the same \n" .
" sequence, even if new references are added and the library is exported again, it\n" .
" is IMPORTANT that the Endnote library is sorted according to the record numbers!\n" .
" Generally, using the page number is strongly recommended (-pn option).\n" .
"\n";
####################################################################################################
# Definition of required and optional fields for the Reference types
my $RefTypes = {
article => { Req => [ qw/author title journal year/ ],
Opt => [ qw/volume number pages month note/ ] },
book => { Req => [ qw/author-or-editor title publisher year/ ],
Opt => [ qw/volume-or-number series address edition month note/ ] },
booklet => { Req => [ qw/title/ ],
Opt => [ qw/author howpublished address month year note/ ], },
conference => { Req => [ qw/author title booktitle year/ ],
Opt => [ qw/editor volume-or-number series pages address month organization publisher/ ], },
inbook => { Req => [ qw/author-or-editor title chapter-andor-pages publisher year/ ],
Opt => [ qw/volume-or-number series type address edition month note/ ], },
incollection => { Req => [ qw/author title booktitle publisher year/ ],
Opt => [ qw/editor volume-or-number series type chapter pages address edition month note/ ], },
inproceedings => { Req => [ qw/author title booktitle year/ ],
Opt => [ qw/editor volume-or-number series pages address month organization publisher/ ], },
manual => { Req => [ qw/title/ ],
Opt => [ qw/author organization address edition month year note/ ], },
mastersthesis => { Req => [ qw/author title school year/ ],
Opt => [ qw/type address month note/ ], },
misc => { Req => [ qw// ],
Opt => [ qw/author title howpublished month year note/ ], },
phdthesis => { Req => [ qw/author title school year/ ],
Opt => [ qw/type address month note/ ], },
proceedings => { Req => [ qw/title year/ ],
Opt => [ qw/editor volume-or-number series address month organization publisher note/ ], },
techreport => { Req => [ qw/author title institution year/ ],
Opt => [ qw/type number address month note/ ], },
unpublished => { Req => [ qw/author title note/ ],
Opt => [ qw/month year/ ], },
};
####################################################################################################
# parse the command line parameters and sort them into the $Options array
GetParameters ($Parameters, $Options, $Help);
my ($InFile, $Content, $MultipleFiles, $Line, $Item, @Fields);
# this is the configuration file defined at the beginning of the code.
&ReadConfigurationFile;
####################################################################################################
# check whether an input file was given
if (not defined $Options->{rest}) {
&Output ("STDERR", "$ERROR: No input file(s) given!\n$Help");
exit 1;
}
if ($Options->{combine} and $Options->{wrap}) {
&Output ("STDERR", "\nBoth the -combine and the -wrap option were given. This is slightly "
. "schizophrenic... ;-)\n"
. "Doesn't hurt, though, the wrapping is done last and -combine, therefore, "
. "ignored,\n\n");
}
if ($Options->{o} and $Options->{new}) {
&Output ("STDERR", "$ERROR: Either -new or -o may be given.\n\n");
exit 3;
}
if ($Options->{quotes} and $Options->{braces}) {
&Output ("STDERR", "$ERROR: Either -quotes or -braces may be requested.\n\n");
exit 5;
}
####################################################################################################
# translate some obsolete parameters that were kept for backward-compatibility
if ($Options->{jabpdf}) { # -jabpdf option was renamed to -local2file
$Options->{local2file} = $Options->{jabpdf};
delete $Options->{jabpdf};
}
if ($PapURL) { # $PapURL was renamed to LocalURL
$LocalURL = $PapURL;
$PapURL = undef;
}
if ($Options->{pappdf}) { # -pdppdf was renamed to -file2local
$Options->{file2local} = $Options->{pappdf};
delete $Options->{pappdf};
}
if ($JabURL) { # $JabURL was renamed to $FileURL
$FileURL = $JabURL;
$JabURL = undef;
}
####################################################################################################
if ($Options->{abb} or $Options->{full} or $Options->{abb1} or $Options->{abb2}) {
# Add the default files to the ones given via the command line. The default ones are added to
# the end of the list, that is the ones given by the user are searched first
push @{$Options->{abb}}, @AbbFiles;
}
# the statistics are taken of all processed files (if multiple ones are processed)
my $Statistics = {}; # create a hash reference for some statistics
if (scalar @{$Options->{rest}} > 1) {
if ($Options->{o} and $Options->{o} !~ m/^\./) {
my $Count = scalar @{$Options->{rest}};
print "\n";
print "$ERROR: $Count files are processed but a single output file is given ($Options->{o}).\n";
print " You can define a new, secondary extension by a leading dot, e.g. .new for that.\n";
exit 7;
}
$MultipleFiles = 1;
}
else {
$MultipleFiles = 0;
}
####################################################################################################
while ( @{$Options->{rest}} ) { # process all input file
my $Library = []; # create a new array reference for the BibTeX items for each processed library
my ($OutputMessage, $InBaseName, $OutBaseName, $DotPosition, $Extension);
$InFile = shift @{$Options->{rest}};
# check for the existence of the given file, and try the .bib extension
if (not -f $InFile) {
if (-f "$InFile.bib") { $InFile = "$InFile.bib"; }
else {
&Output ("STDERR", "$ERROR: Library database $InFile not found!\n\n");
exit 10;
}
}
# determine the base name of the output file and the log file (if necessary)
# separate path, file name and extension
$DotPosition = rindex ($InFile, "."); # determine the position of the last dot
if ($DotPosition != -1) { # if a dot was found, copy and remove the extension
$InBaseName = $InFile; # copy the input filename
$Extension = substr ($InFile, $DotPosition); # take everything after the last dot
$InBaseName =~ s/$Extension$//; # remove the extension
}
else { # if no dot was found, there is no extension
$InBaseName = $InFile;
$Extension = "";
}
# the -new option is just a shortcut for -o .new (lazyness of the author who needed that very regularly)
if ($Options->{new}) { $Options->{o} = ".new"; }
# check whether an output file was defined or take the input filename
if ($Options->{o}) { # if an output file was given
if ($Options->{o} =~ m/^\./) { # if the output filename starts with a dot (e.g. .new)
my $Temp = $Options->{o}; # save the secondary extension
$Temp =~ s/\.$//; # remove a potentially trailing dot
$Options->{o} = $InBaseName . $Temp . $Extension; # add it before the extension, e.g. .new.bib
$OutBaseName = $InBaseName . $Temp;
}
else { # if the output file does not start with a dot
$OutBaseName = $Options->{o};
$DotPosition = rindex ($OutBaseName, "."); # determine the position of the last dot
if ($DotPosition != -1) { # if a dot was found, copy and remove the extension
$Extension = substr ($OutBaseName, $DotPosition); # take everything after the last dot
$OutBaseName =~ s/$Extension$//; # remove the extension
}
else { # if no dot was found, there is no extension and .bib is used
$OutBaseName = $Options->{o};
$Extension = ".bib";
}
}
$OutputMessage = "(new file)";
}
else { # if no output file was given
if ($Extension) { # if there was an extension
$Options->{o} = $InBaseName . $Extension;
}
else {
$Options->{o} = $InBaseName . ".new";
}
if ($BackupLibrary) { # whether a backup is to be created if the input file is overwritten
my $Backup = $InBaseName;
$Backup = $InBaseName . ".old" . $Extension;
copy $InFile, $Backup;
$OutputMessage = "(overwriting input file, backup saved to $Backup)";
}
else { # if no backup is to be created
$OutputMessage = "(overwriting input file)";
}
}
if ($Options->{log}) {
$Options->{log} = $OutBaseName . ".log";
open LOG, ">$Options->{log}" or die "$ERROR: Could not open logfile $Options->{log}: $!";
close LOG;
}
# die "\nInBaseName: $InBaseName\nOutBaseName: $OutBaseName\nOutFile: $Options->{o}\nLogFile: $Options->{log}\n\n";
$Line = "\n Processing library $InFile\n ";
for (5 .. length $Line) { $Line = $Line . "=" } # add the line to underline the heading
$Line = $Line . "\n\n";
&Output ($Line);
# read the complete input file into $Content
open FILE, "<$InFile";
@{$Content} = <FILE>;
close FILE;
if (not @{$Content} ) {
&Output ("STDERR", "$ERROR: The file $InFile appears to be empty!\n\n");
exit 15;
}
&TaskHeader ("Parsing library\n");
# check each line for necessary substitutions
&ReplaceSymbols ($Content, $Substitutions);
# creates an array with one item for each BibTeX item
ParseBibTeX ($Library, $Content);
# dp $Library;
# perform the corrections defined in the "authors" file or shorten the list to a defined number
if ($Options->{autfix} or $Options->{autmax}) {
&AuthorCorrections ($Library);
}
# expand abbreviated page ranges
if ($Options->{rangefix}) {
&RangeCorrection ($Library);
}
# protect the title from case changes by BibTeX (by enclosing it in {...})
if ($Options->{protitle}) {
&ProtectTitle ($Library);
}
# change all reference types to @article
if ($Options->{typereset}) {
&TypeReset ($Library);
}
# replace reference types in a Paper export
if ($Options->{typefix}) {
&TypeConversion ($Library);
}
# abbreviate the journal titles (should happen after TypeConversion)
if ($Options->{full} or $Options->{abb1} or $Options->{abb2}) {
&AbbreviateJournals ($Library, $Options->{abb});
}
# create the label (cite key) for each item (has to happen after TypeConversion)
if ($Options->{labels}) {
&CreateLabels ($Library);
}
# convert between JabRef (file) and Papers/BibDesk (local-url) format for links to external files
if ($Options->{local2file} or $Options->{file2local}) {
&LinkConversion ($Library);
}
# to perform regular expressions on particular fields
if ($Options->{fieldregex}) {
&FieldRegEx ($Library);
}
# to perform regular expressions on each entry in a "file" field
if ($Options->{fileregex}) {
&FileRegEx ($Library);
}
# check if all files exist
if ($Options->{filecheck}) {
&FileCheck ($Library);
}
# if no pages entry but a doi is present, use the doi value as "pages"-entry
if ($Options->{aopfix}) {
&AOPFix ($Library);
}
# remove unwanted fields defined in %IgnoreFields in the configuration file
# (this has to happen after TypeConversion!)
if ($Options->{s}) {
&ShortenLibrary ($Library);
}
# reformat the entries (align equal signs, etc.)
if ($Options->{format}) {
&FormatLibrary ($Library);
}
# check whether mandatory fields for each BibTeX entry are present
if ($Options->{typecheck}) {
&CheckTypes ($Library);
}
if ($Options->{quotes} or $Options->{braces}) {
&ConvertDelimiters ($Library);
}
# if any list is requested, collect all data of this library
if ($Options->{autlist} or $Options->{autcheck} or $Options->{joulist} or $Options->{typelist}) {
&CreateStatistics ($Library, $Statistics);
}
# dp ($Library);
# dp ($Library, $Statistics);
# check for double labels or entries without labels
my (@NoLabel, @DoubleLabel);
my $Labels = {};
foreach $Item ( @{$Library} ) {
if ($Item->{Label} and defined $Labels->{$Item->{Label}}) {
push @DoubleLabel, $Item->{Label};
}
if (not $Item->{Label} and $Item->{RefType} !~ m/string/i and $Item->{RefType} !~ m/comment/i) {
push @NoLabel, &RefSummary ($Item);
}
if ($Item->{Label}) {
$Labels->{$Item->{Label}} = 1;
}
} # of foreach $Item
if (@DoubleLabel) {
&Output (" WARNING: One or more labels have been found more than once.");
if ($Options->{sort}) { &Output (" Sorting disabled.\n "); }
else { &Output ("\n "); }
&Output ( join (" ", @DoubleLabel), "\n\n ");
$Options->{sort} = 0;
}
if (@NoLabel) {
&Output (" WARNING: No label defined for the entries below.");
if ($Options->{sort}) { &Output (" Sorting disabled.\n"); }
else { &Output ("\n"); }
&Output ( join ("", @NoLabel), "\n\n");
$Options->{sort} = 0;
}
if ($Options->{sort}) {
$Library = &SortLibrary ($Library); # sort the library alphabetically after the cite keys
}
# save the result to the output file
&TaskHeader ("Writing library to $Options->{o} $OutputMessage\n");
WriteBibTeX ($Library, $Options->{o}, $Options->{nl});
} # of while ( @{$Options->{rest}} ) { # process all input files
if ($MultipleFiles) { &Output ("\n\nCombined results from all processed libraries:\n\n") }
if ($Options->{autlist}) {
# print out a list of all authors
&PrintAuthorList ($Statistics);
}
if ($Options->{joulist}) {
# print out a list of all journals
&PrintJournalList ($Statistics);
}
if ($Options->{typelist}) {
# print out a list of all journals
&PrintTypeList ($Statistics);
}
if ($Options->{autcheck}) {
# check the list of authors, whether the different parts (first, last, von, Jr) are separated correctly
&CheckAuthors ($Statistics, $AuthorNames);
}
# just a good-bye message
&SmallHeader ("Done!\n\n");
#######################################################################################################################
#######################################################################################################################
# S U B R O U T I N E S
#######################################################################################################################
#######################################################################################################################
sub ReplaceSymbols { # checks each line for necessary substitutions
my $Content = shift;
my $Substitutions = shift;
my ($Substitution, @Substitutions, $Line, $Item);
&TaskHeader ("Replacing symbols\n");
# prepare user-defined substitutions, if given
if ($Substitutions and $Options->{subst}) {
open SUBSTITUTIONS, "<$Substitutions" or die "$ERROR: Could not open file $Substitutions: $!";
@Substitutions = <SUBSTITUTIONS>;
close SUBSTITUTIONS;
}
# check each line for needed replacements (lots of this could be combined but it was kept for readability)
foreach $Line ( @{$Content} ) {
if ($Options->{defsubst}) {
$Line = &PredefinedSubstitutions ($Line);
}
# Execute user-defined substitutions if given. This is done after all other replacements
# have been carried out, that is, all special symbols will already be in LaTeX code.
# Each $Substitution is a command such as $Line =~ s/foo/bar/g;
if (@Substitutions) {
foreach $Substitution ( @Substitutions ) {
eval $Substitution;
}
}
}
} # of sub ReplaceSymbols
#######################################################################################################################
sub PredefinedSubstitutions {
my $Line = shift;
# If the line is empty (multiple blanks and a line feed), then it would
# be removed by the /\s+/ regex because \s also matched \n.
# In order to preserve the line feed, the line is not touched if it
# contains only blanks.
if ($Line =~ m/^[ ]*\n$/) { return "\n" }
$Line =~ s/\n$//; # remove all newlines
$Line =~ s/\r$//; # remove all DOS linefeeds
$Line =~ s/\s+$//; # remove all trailing blanks
$Line =~ s/–/-/g; # this happens when a hyphen (dash) is in an Endnote field
$Line =~ s/Å/\{\\AA\}/g; # replace Angstrom
$Line =~ s/å/\{\\aa\}/g; # replace angstrom
$Line =~ s/([^\\])%/$1\\%/g; # escape percent signs
# $Line =~ s/([^\\])&/$1\\&/g; # escape ampersands (commented due to problems with math environments in multiple runs)
# Escape dollar signs, this is not trivial as it needs to be checked whether this is actually a LaTeX math
# delimiter which was added in a previous run. It is only done when a single dollar sign is found as this
# cannot be a maths environment then. In all other cases it's left to the user.
my $Dollars = ($Line =~ tr/\$/\$/); # count the number of dollar signs in the line
if ($Dollars == 1) { $Line =~ s/([^\\])\$/$1\\\$/g; } # only escape the sign if it hasn't been escaped before
$Line =~ s/Ǎ/\\v\{A\}/g; # umlaut caron vA
$Line =~ s/ǎ/\\v\{a\}/g; # umlaut caron va
$Line =~ s/Č/\\v\{C\}/g; # umlaut caron vC
$Line =~ s/č/\\v\{c\}/g; # umlaut caron vc
$Line =~ s/Ď/\\v\{D\}/g; # umlaut caron vD
$Line =~ s/Ě/\\v\{E\}/g; # umlaut caron vE
$Line =~ s/ě/\\v\{e\}/g; # umlaut caron ve
$Line =~ s/Ǧ/\\v\{G\}/g; # umlaut caron vG
$Line =~ s/ǧ/\\v\{g\}/g; # umlaut caron vg
$Line =~ s/Ȟ/\\v\{H\}/g; # umlaut caron vH
$Line =~ s/ȟ/\\v\{h\}/g; # umlaut caron vh
$Line =~ s/Ǐ/\\v\{I\}/g; # umlaut caron vI
$Line =~ s/ǐ/\\v\{i\}/g; # umlaut caron vi
$Line =~ s/ǰ/\\v\{j\}/g; # umlaut caron vj
$Line =~ s/Ǩ/\\v\{K\}/g; # umlaut caron vK
$Line =~ s/ǩ/\\v\{k\}/g; # umlaut caron vk
$Line =~ s/Ň/\\v\{N\}/g; # umlaut caron vN
$Line =~ s/ň/\\v\{n\}/g; # umlaut caron vn
$Line =~ s/Ǒ/\\v\{O\}/g; # umlaut caron vO
$Line =~ s/ǒ/\\v\{o\}/g; # umlaut caron vo
$Line =~ s/Ř/\\v\{R\}/g; # umlaut caron vR
$Line =~ s/ř/\\v\{r\}/g; # umlaut caron vr
$Line =~ s/Š/\\v\{S\}/g; # umlaut caron vS
$Line =~ s/š/\\v\{s\}/g; # umlaut caron vs
$Line =~ s/Ť/\\v\{T\}/g; # umlaut caron vT
$Line =~ s/ť/\\v\{t\}/g; # umlaut caron vt
$Line =~ s/Ǔ/\\v\{U\}/g; # umlaut caron vU
$Line =~ s/ǔ/\\v\{u\}/g; # umlaut caron vu
$Line =~ s/Ž/\\v\{Z\}/g; # umlaut caron vZ
$Line =~ s/ž/\\v\{z\}/g; # umlaut caron vz
$Line =~ s/Ä/\\"\{A\}/g; # umlaut "A
$Line =~ s/ä/\\"\{a\}/g; # umlaut "a
$Line =~ s/Ö/\\"\{O\}/g; # umlaut "O
$Line =~ s/ö/\\"\{o\}/g; # umlaut "o
$Line =~ s/Ü/\\"\{U\}/g; # umlaut "U
$Line =~ s/ü/\\"\{u\}/g; # umlaut "u
$Line =~ s/ß/\\ss/g; # umlaut \ss
$Line =~ s/Á/\\'\{A\}/g; # A accent acute
$Line =~ s/á/\\'\{a\}/g; # a accent acute
$Line =~ s/À/\\`\{A\}/g; # A accent grave
$Line =~ s/à/\\`\{a\}/g; # a accent grave
$Line =~ s/É/\\'\{E\}/g; # E accent acute
$Line =~ s/é/\\'\{e\}/g; # e accent acute
$Line =~ s/È/\\`\{E\}/g; # E accent grave
$Line =~ s/è/\\`\{e\}/g; # e accent grave
$Line =~ s/Í/\\'\{I\}/g; # I accent acute
$Line =~ s/í/\\'\{i\}/g; # i accent acute
$Line =~ s/Ì/\\`\{I\}/g; # I accent grave
$Line =~ s/ì/\\`\{i\}/g; # i accent grave
$Line =~ s/Ó/\\'\{O\}/g; # O accent acute
$Line =~ s/ó/\\'\{o\}/g; # o accent acute
$Line =~ s/Ò/\\`\{O\}/g; # O accent grave
$Line =~ s/ò/\\`\{o\}/g; # o accent grave
$Line =~ s/Ú/\\'\{U\}/g; # U accent acute
$Line =~ s/ú/\\'\{u\}/g; # u accent
$Line =~ s/Ù/\\`\{U\}/g; # U grave
$Line =~ s/ù/\\`\{u\}/g; # u accent grave
$Line =~ s/Â/\\^\{A\}/g; # A circumflex
$Line =~ s/â/\\^\{a\}/g; # a circumflex
$Line =~ s/Ê/\\^\{E\}/g; # E circumflex
$Line =~ s/ê/\\^\{e\}/g; # e circumflex
$Line =~ s/Î/\\^\{I\}/g; # I circumflex
$Line =~ s/î/\\^\{i\}/g; # i circumflex
$Line =~ s/Ô/\\^\{O\}/g; # O circumflex
$Line =~ s/ô/\\^\{o\}/g; # o circumflex
$Line =~ s/Û/\\^\{U\}/g; # U circumflex
$Line =~ s/û/\\^\{u\}/g; # u circumflex
$Line =~ s/Ç/\\c\{C\}/g; # C cedilla
$Line =~ s/ç/\\c\{c\}/g; # c cedilla
$Line =~ s/Ø/\\c\{O\}/g; # O cedilla
$Line =~ s/ø/\\c\{o\}/g; # o cedilla
$Line =~ s/Ş/\\c\{S\}/g; # S sedilla
$Line =~ s/ş/\\c\{s\}/g; # s sedilla
$Line =~ s/Ñ/\\~\{N\}/g; # N tilde
$Line =~ s/ñ/\\~\{n\}/g; # n tilde
$Line =~ s/Õ/\\~\{O\}/g; # O tilde
$Line =~ s/õ/\\~\{o\}/g; # o tilde
$Line =~ s/Ǎ/\\v\{A\}/g; # umlaut caron vA
$Line =~ s/ǎ/\\v\{a\}/g; # umlaut caron va
$Line =~ s/Č/\\v\{C\}/g; # umlaut caron vC
$Line =~ s/č/\\v\{c\}/g; # umlaut caron vc
$Line =~ s/Ď/\\v\{D\}/g; # umlaut caron vD
$Line =~ s/Ě/\\v\{E\}/g; # umlaut caron vE
$Line =~ s/ě/\\v\{e\}/g; # umlaut caron ve
$Line =~ s/Ǧ/\\v\{G\}/g; # umlaut caron vG
$Line =~ s/ǧ/\\v\{g\}/g; # umlaut caron vg
$Line =~ s/Ȟ/\\v\{H\}/g; # umlaut caron vH
$Line =~ s/ȟ/\\v\{h\}/g; # umlaut caron vh
$Line =~ s/Ǐ/\\v\{I\}/g; # umlaut caron vI
$Line =~ s/ǐ/\\v\{i\}/g; # umlaut caron vi
$Line =~ s/ǰ/\\v\{j\}/g; # umlaut caron vj
$Line =~ s/Ǩ/\\v\{K\}/g; # umlaut caron vK
$Line =~ s/ǩ/\\v\{k\}/g; # umlaut caron vk
$Line =~ s/Ň/\\v\{N\}/g; # umlaut caron vN
$Line =~ s/ň/\\v\{n\}/g; # umlaut caron vn
$Line =~ s/Ǒ/\\v\{O\}/g; # umlaut caron vO
$Line =~ s/ǒ/\\v\{o\}/g; # umlaut caron vo
$Line =~ s/Ř/\\v\{R\}/g; # umlaut caron vR
$Line =~ s/ř/\\v\{r\}/g; # umlaut caron vr
$Line =~ s/Š/\\v\{S\}/g; # umlaut caron vS
$Line =~ s/š/\\v\{s\}/g; # umlaut caron vs
$Line =~ s/Ť/\\v\{T\}/g; # umlaut caron vT
$Line =~ s/ť/\\v\{t\}/g; # umlaut caron vt
$Line =~ s/Ǔ/\\v\{U\}/g; # umlaut caron vU
$Line =~ s/ǔ/\\v\{u\}/g; # umlaut caron vu
$Line =~ s/Ž/\\v\{Z\}/g; # umlaut caron vZ
$Line =~ s/ž/\\v\{z\}/g; # umlaut caron vz
$Line =~ s/Ø/\\O/g; # danish O with dash
$Line =~ s/ø/\\o/g; # danish o with dash
$Line =~ s/æ/\\ae/g; # french ae
$Line =~ s/œ/\\oe/g; # french oe
$Line =~ s/Ÿ/\\"\{Y\}/g; # Y with two dots
$Line =~ s/ÿ/\\"\{y\}/g; # y with two dots
# Now all possible symbols which may appear in a name have been replaced. If this
# line is a BibTeX item line, the LaTeX codes have to be removed, i.e. \'{e} => e
# what is done by removing everything that is not in a certain character class.
if ($Line =~ m/^@/ and $Line !~ m/string/i and $Line !~ m/comment/i) {
$Item = $Line;
$Item =~ s/^(@[^{]+{)(.+)$/$1/; # save e.g. @article{
$Line =~ s/^(@[^{]+{)(.+)$/$2/; # save everthing else
# *.** added by Chaohui. If there is no $2, then there will be still something in $Line
if ( $Line eq $Item ) {
$Line = "";
}
# $Line =~ s/[^[email protected]_+:,]//g; # remove all chars not in a certain character class
# search for
# \\ a backslash
# ["'`^~cv] followed by one character of "'`^~cv
# \{ followed by {
# \w then a word character => backreference $1
# \} followed by }
# and replace it with
# $1 the saved word character
$Line =~ s/\\["'`^~cv]\{(\w)\}/$1/g; # remove several symbols
$Line =~ s/[\\'~"^`]//g; # remove several symbols which don't belong in there
$Line = $Item . $Line; # glue them together again
return $Line;
}
# uppercase greek letters
# $Line =~ s/Α/A/g;
# $Line =~ s/Β/B/g;
$Line =~ s/Γ/\$\\Gamma\$/g;
$Line =~ s/Δ/\$\\Delta\$/g;
# $Line =~ s/Ε/E/g;
# $Line =~ s/Ζ/Z/g;
# $Line =~ s/Η/H/g;
$Line =~ s/Θ/\$\\Theta\$/g;
# $Line =~ s/Ι/I/g;
# $Line =~ s/Κ/K/g;
$Line =~ s/Λ/\$\\Lambda\$/g;
# $Line =~ s/Μ/M/g;
# $Line =~ s/Ν/N/g;
$Line =~ s/Ξ/\$\\Xi\$/g;
# $Line =~ s/Ο/O/g;
$Line =~ s/Π/\$\\Pi\$/g;
# $Line =~ s/Ρ/P/g;
$Line =~ s/Σ/\$\\Sigma\$/g;
# $Line =~ s/Τ/T/g;
$Line =~ s/Υ/\$\\Upsilon\$/g;
$Line =~ s/Φ/\$\\Phi\$/g;
# $Line =~ s/Χ/Y/g;
$Line =~ s/Ψ/\$\\Psi\$/g;
$Line =~ s/Ω/\$\\Omega\$/g;
# lowercase greek letters
$Line =~ s/α/\$\\alpha\$/g;
$Line =~ s/β/\$\\beta\$/g;
$Line =~ s/γ/\$\\gamma\$/g;
$Line =~ s/δ/\$\\delta\$/g;
$Line =~ s/ε/\$\\epsilon\$/g;
$Line =~ s/ζ/\$\\zeta\$/g;
$Line =~ s/η/\$\\eta\$/g;
$Line =~ s/θ/\$\\theta\$/g;
$Line =~ s/ι/\$\\iota\$/g;
$Line =~ s/κ/\$\\kappa\$/g;
$Line =~ s/λ/\$\\lambda\$/g;
$Line =~ s/μ/\$\\mu\$/g;
$Line =~ s/ν/\$\\nu\$/g;
$Line =~ s/ξ/\$\\xi\$/g;
$Line =~ s/ο/\$o\$/g;
$Line =~ s/π/\$\\pi\$/g;
$Line =~ s/ρ/\$\\rho\$/g;
$Line =~ s/σ/\$\\sigma\$/g;
$Line =~ s/ς/\$\\varsigma\$/g;
$Line =~ s/τ/\$\\tau\$/g;
$Line =~ s/υ/\$\\upsilon\$/g;
$Line =~ s/φ/\$\\varphi\$/g;
$Line =~ s/χ/\$\\chi\$/g;
$Line =~ s/ψ/\$\\psi\$/g;
$Line =~ s/ω/\$\\omega\$/g;
$Line =~ s/ϑ/\$\\vartheta\$/g;