-
Notifications
You must be signed in to change notification settings - Fork 0
/
bibformat
executable file
·461 lines (429 loc) · 14.4 KB
/
bibformat
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
#!/usr/bin/env perl -w
# -*-cperl-*-
$| = 1;
# use sigtrap 'handler' => sub { die "Signal caught. Exiting.\n" }, qw<normal-signals>;
use utf8;
use open qw(:std :encoding(UTF-8));
use FileHandle;
use Getopt::Long qw(:config no_ignore_case);
use HTML::Entities;
use File::Find;
use LaTeX::ToUnicode qw(convert $endcw);
use Regexp::Common;
$Output = "-"; # name of output file (default: print on stdout)
$Format = "bib"; # default format: standard latex (other: --text, --latex, --html, --pod, --md, --creole)
$Style = "natbib"; # default style: natbib
@BIB = (); # bibtex database files (default: all .bib files in search path)
$SingleLine = 0; # format each entry as a single text line
$List = 0; # list matching bibtex keys only (no output)
$ListBST = 0; # list all known bibstyles
$Quiet = 0; # suppress non-fatal warning
$Verbose = 0; # show some progress information & warn about duplicate keys
$Debug = 0; # show debugging messages & output of external programs
$Help = 0; # show help page?
$ok = GetOptions(
"output|o=s" => \$Output,
"style|s=s" => \$Style,
"database|D=s" => \@BIB,
"html" => sub {$Format = "html"},
"text" => sub {$Format = "ascii"},
"pod" => sub {$Format = "pod"},
"markdown|md" => sub {$Format = "markdown"},
"latex|tex" => sub {$Format = "tex"},
"creole" => sub {$Format = "creole"},
"S|single|single-line" => \$SingleLine,
"list|l" => \$List,
"list-bst" => \$ListBST,
"verbose|v" => \$Verbose,
"quiet|q" => \$Quiet,
"debug|d" => \$Debug,
"help|h" => \$Help,
);
$Verbose = 1
if $Debug; # -d implies -v
die "\nUsage: bibformat [options] <entry> ... \n\n",
"This program extracts one or more bibtex entries from a .bib file\n",
"and formats them using a specified LaTeX bibstyle (default: natbib).\n",
"Entries are selected by specifying one or more bibtex keys either\n",
"as literal strings or as regular expressions enclosed in slashes.\n",
"Note that formatting (except for latex) will not be perfect and may\n",
"need some post-editing. If no .bib file is specified, all bibtex\n",
"databases in the TeX search path will be loaded (currently, recursive\n",
"search in subdirectories is NOT performed).\n\n",
"Options:\n",
" --latex format entries as TeX/LaTeX\n",
" --html format entries as HTML\n",
" --pod format entries as POD\n",
" --md format entries as Markdown\n",
" --text format entries as plain text\n",
" --creole format entries in Creole-style wiki notation\n",
" -D <file>, use bibtex database <file> (can be repeated)\n",
" --database=<file>\n",
" -o <file>, save formatted entries to <file> (default: stdout)\n",
" --output=<file>\n",
" -s <name>, use bibstyle <name> for formatting (default: natbib)\n",
" --style=<name>\n",
" -S, print each formatted entry on single text line\n",
" --single-line\n",
" -l, list matching bibtex entries (no output)\n",
" --list\n",
" --list-bst list all available bibstyles, then exit\n",
" -q, suppress all non-fatal warnings\n",
" --quiet\n",
" -v, print some progress information\n",
" --verbose\n",
" -d, show debugging output\n",
" --debug\n",
" -h, this help page\n",
" --help\n\n"
unless $ok and (@ARGV or $ListBST) and not $Help;
if ($ListBST) {
chomp($tmp = `kpsewhich --show-path=bst`);
@path = grep {s/\/+$// or 1} split /:!*/, $tmp;
%BST = ();
find(sub {
return unless /\.bst$/;
s{\.bst$}{};
$BST{$_}++;
}, grep {-d} @path);
@BST = sort keys %BST;
print "@BST\n";
exit 0;
}
@BIB = split /\s*,\s*/, join(",", @BIB); # accepts both -D a.bib -D b.bib and -D a.bib,b.bib
if (not @BIB) {
chomp($tmp = `kpsewhich --show-path=bib`);
@path = grep {s/\/+$// or 1} split /:!*/, $tmp;
foreach $dir (grep {-d} @path) {
push @BIB, glob "$dir/*.bib";
}
@BIB = sort {-M $a <=> -M $b} @BIB; # read most recent files first (so old copies don't hide newer duplicate entries)
}
%BIB = (); # load .bib database
$bibsize = 0;
$duplicates = 0;
foreach $Bibfile (@BIB) {
$duplicates += read_bib($Bibfile, \%BIB);
$bibsize = (keys %BIB) - $bibsize;
print STDERR "Read bibtex database $Bibfile ($bibsize entries)\n"
if $Verbose;
}
if ($duplicates > 0 && !$Quiet) {
printf STDERR "Warning: %d duplicate entries ignored %s\n", $duplicates, ($Verbose) ? "" : "(specify -v to display keys, -d for full entries)";
}
@Keys = (); # match/expand specified keys
foreach $entry (@ARGV) {
if ($entry =~ /^\/(.*)\/$/) {
$regexp = qr/$1/i; # match regexp against BIB keys (case-insensitive)
@cand = grep { /$regexp/ } keys %BIB;
die "Error: no match for $entry in database @BIB\n"
unless @cand > 0;
push @Keys, sort @cand;
}
else {
if (exists $BIB{$entry}) {
push @Keys, $entry; # exact match
next;
}
$regexp = qr/^\Q$entry\E$/i;
@cand = grep { /$regexp/ } keys %BIB; # try case-insensitive match
die "Error: key '$entry' has no exact match, but multiple case-insensitive ones.\n"
if @cand > 1;
if (@cand == 1) {
push @Keys, @cand;
next;
}
die "Error: no match for key '$entry' in database @BIB\n";
}
}
%seen = (); # remove duplicate keys
@Keys = grep { (not exists $seen{$_}) and $seen{$_} = 1 } @Keys;
$n_keys = @Keys;
print STDERR "$n_keys matching entries found.\n"
if $Verbose;
if ($List) { # --list: print matching keys only
foreach $key (@Keys) {
print "$key\n";
}
exit 0;
}
$FH = new FileHandle "> $Output" # open output file
or die "Can't open output file '$Output': $!";
foreach $key (@Keys) {
if ($Format eq "bib") { # default: print bibtex entries
print $FH $BIB{$key}, "\n";
}
else {
print STDERR "- $key\n" # all other formats start by re-formatting in LaTeX
if $Verbose;
@latex = format_entry_latex($key, $Format eq "tex");
if ($Format eq "tex") { # LaTeX format
@formatted = @latex;
}
elsif ($Format eq "ascii") { # plain text format
@formatted = latex2ascii(@latex);
}
elsif ($Format eq "pod") { # POD format
@formatted = latex2pod(@latex);
}
elsif ($Format eq "html") { # HTML format
@formatted = latex2html(@latex);
}
elsif ($Format eq "creole") { # Creole wiki syntax
@formatted = latex2creole(@latex);
}
elsif ($Format eq "markdown") {
@formatted = latex2markdown(@latex);
}
else {
die "Internal error: unsupported format '$Format'\n";
}
if ($SingleLine) { # write formatted entry as single line to output file
print $FH join(" ", @formatted), "\n";
} else {
foreach (@formatted) { # write formatted entry as block to output file
print $FH "$_\n";
}
print $FH "\n";
}
}
}
$FH->close;
print STDERR "Done.\n"
if $Verbose;
exit 0;
## run external command with system(), suppressing output without -d option
sub sys_cmd {
my $cmd = shift;
my $len = length($cmd) + 14;
if ($Debug) {
print STDERR " ", "_" x $len, "\n";
print STDERR "/__ system: $cmd __\n";
}
else {
$cmd .= ' >/dev/null 2>&1';
}
my $ok = system $cmd;
if ($ok != 0) {
my $exit_code = $ok >> 8;
die "Error: failed to execute command\n\t$cmd\nbecause $!\n"
if ($ok & 0xff) != 0;
die "Error: the system command\n\t$cmd\nfailed with exit code $exit_code\n";
}
print STDERR "\\", "_" x $len, "\n"
if $Debug;
}
## slurp a file into an array of chomped lines (appends to existing array)
sub slurp_file {
my $file = shift;
my $aref = shift;
$aref = []
unless defined $aref;
my $fh = new FileHandle $file
or die "Error: can't open file '$file': $!";
while (<$fh>) {
chomp;
push @$aref, $_;
}
$fh->close;
return $aref;
}
## write string or array of chomped lines to file
sub write_file {
my $file = shift;
my $fh = new FileHandle "> $file"
or die "Error: can't create file '$file': $!";
foreach my $line (@_) {
print $fh "$line\n";
}
$fh->close;
}
## format bibtex entry in latex, using bibtex on fake .aux file (returns list of chomped lines)
sub format_entry_latex {
my $key = shift;
my $include_bibitem = shift;
my $basename = "/tmp/bibformat.$$";
my $bibfile = "$basename.bib";
my $auxfile = "$basename.aux";
write_file($bibfile, $BIB{$key}); # write .bib file with current entry only
print STDERR "Wrote temporary file $bibfile\n"
if $Debug;
write_file($auxfile, # write fake .aux file
"\\citation{$key}",
"\\bibstyle{$Style}",
"\\bibdata{$basename}"
);
print STDERR "Wrote temporary file $auxfile\n"
if $Debug;
sys_cmd("bibtex $auxfile"); # execute bibtex and read resulting .bbl file
my @lines = ();
slurp_file("$basename.bbl", \@lines);
my @latex = ();
foreach (@lines) { # remove bibliography environment, \bibitem, \newblock, and blank lines
s/\\newblock\s*//;
next if /^\s*\\(begin|end)\{thebibliography\}/;
next if /^\s*\\bibitem/ and not $include_bibitem;
next if /^\s*$/;
if (s/^\s+//) {
$latex[$#latex] .= " $_"; # append indented continuation lines
}
else {
push @latex, $_;
}
}
sys_cmd("rm -f $basename.*"); # delete temporary files
return @latex;
}
## extra conversions not handled by LaTeX::ToUnicode yet (operates in-place)
sub extra_conversions {
foreach (@_) {
s/\\guillem[oe]tleft(\{\})?/«/g;
s/\\guillem[oe]tright(\{\})?/»/g;
s/\\guilsinglleft(\{\})?/‹/g;
s/\\guilsinglright(\{\})?/›/g;
s/\\quotedblbase(\{\})?/„/g;
s/\\quotesinglbase(\{\})?/‚/g;
}
}
## convert latex markup to simple markup tags that survive latex decode (in-place)
sub convert_markup {
my $argument = qr/([^{}]|$RE{balanced}{-parens => '{}'})*/;
foreach (@_) {
s/\{\\em\s+($argument)\}/[i($1)i]/g;
s/\{\\bf\s+($argument)\}/[b($1)b]/g;
s/\\textit\{($argument)\}/[i($1)i]/g;
s/\\textbf\{($argument)\}/[b($1)b]/g;
s/\\url\{($argument)\}/[url($1)url]/g;
}
}
## convert LaTeX format to plain text, removing any markup
sub latex2ascii {
my @lines = @_;
foreach (@lines) {
extra_conversions($_);
$_ = convert($_);
}
return "@lines"; # join into single line
}
## convert LaTeX format to POD
sub latex2pod {
my @lines = @_;
foreach (@lines) {
extra_conversions($_);
convert_markup($_);
$_ = convert($_);
s/\[i\((.*?)\)i\]/I<$1>/g;
s/\[b\((.*?)\)b\]/B<$1>/g;
s/\[url\((.*?)\)url\]/L<$1>/g;
}
return @lines;
}
## convert LaTeX format to HTML
sub latex2html {
my @lines = @_;
foreach (@lines) {
extra_conversions($_);
convert_markup($_);
s/(?<=\S)~+(?=\S)/[(nbsp)]/g;
$_ = convert($_);
encode_entities($_, '<>&""');
s/\[\(nbsp\)\]/ /g; # convert simple markup to HTML
s/\[i\((.*?)\)i\]/<i>$1<\/i>/g;
s/\[b\((.*?)\)b\]/<b>$1<\/b>/g;
s/\[url\((.*?)\)url\]/<a href="$1">$1<\/a>/g;
}
return("<p>", @lines, "</p>");
}
## convert LaTeX format to Markdown
sub latex2markdown {
my @lines = @_;
foreach (@lines) {
extra_conversions($_);
convert_markup($_);
$_ = convert($_);
s/\[i\((.*?)\)i\]/_$1_/g;
s/\[b\((.*?)\)b\]/**$1**/g;
s/\[url\((.*?)\)url\]/[$1]($1)/g;
s/–/--/g;
s/—/---/g;
}
return(@lines);
}
## convert LaTeX format to Creole-style wiki syntax
sub latex2creole {
my @lines = @_;
foreach (@lines) {
extra_conversions($_);
convert_markup($_);
$_ = convert($_);
s/\[i\((.*?)\)i\]/\/\/<$1>\/\//g;
s/\[b\((.*?)\)b\]/**$1**/g;
s/\[url\((.*?)\)url\]/[[$1]]/g;
s/–/--/g;
s/—/---/g;
}
return(@lines);
}
## read .bib file, adding entries in specified hash; returns number of duplicate entries (skipped)
sub read_bib {
my $file = shift;
my $href = shift;
my $n_duplicates = 0;
my $cmd = "kpsewhich $file";
$cmd .= " 2>/dev/null"
unless $Debug;
my $path = `$cmd`;
if ($path) {
chomp $path;
die "Error: can't find bibliography file '$file' at path\n\t$path\nreturned by kpsewhich tool\n"
unless -f $path;
my $fh = new FileHandle $path
or die "Error: can't open file '$path'\n";
print STDERR "Reading bibtex database: $path\n"
if $Debug;
LINE:
while (1) {
$_ = <$fh>;
last LINE unless $_; # exit loop at end of file
next LINE if /^\s*$/ or /^\s*\%/; # skip comments and empty lines
if (/^\s*\@comment/i) { # skip @comment entries
my $comment = $_;
while (1) {
$_ = <$fh>;
last LINE unless $_;
last if /^\s*$/ and $comment =~ /\}\s*$/;
$comment .= $_;
}
}
die "Format error: expected start of bibtex entry, but got:\n\t$_(line #$. of $path)\n"
unless /^\s*\@[A-Za-z]+\s*\{\s*(\S+)\s*,/;
my $key = $1; # extract key of entry
my $entry = $_;
FIELD:
while (<$fh>) {
last FIELD # blank line (or EOF) marks end of entry
if /^\s*$/ and $entry =~ /\}\s*$/; # make sure that entry ends with closing brace, so we're not confused by blank lines in entry
$entry .= $_;
}
if (exists $href->{$key} ) {
$n_duplicates++;
if ($Verbose) {
print STDERR "Warning: duplicate bibtex entry for key '$key' (ignored)\n";
if ($Debug) {
print STDERR "=" x 60, "\n",
"First entry for '$key':\n", $href->{$key}, "-" x 60, "\n",
"This entry for '$key':\n", $entry, "=" x 60, "\n";
}
}
}
$href->{$key} = $entry;
print "INSERT BIBENTRY: $key\n"
if $Debug;
}
$fh->close;
return $n_duplicates;
}
else {
die "Error: can't find bibliography file '$file'\n";
}
}