-
Notifications
You must be signed in to change notification settings - Fork 7
/
unicode-list.pl
executable file
·113 lines (101 loc) · 2.82 KB
/
unicode-list.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/perl
# Written by Dr. Ken Lunde ([email protected])
# Senior Computer Scientist 2, Adobe Inc.
# Version 2019-03-27
#
# This tool lists the Unicode code points in the specified OpenType
# font. By default, only the Unicode code points are listed, one per
# line, and if the OpenType font includes both a Format 4 (BMP-only
# UTF-16) and Format 12 (UTF-32) 'cmap' subtable, the latter is used.
#
# The "-g" command-line option will include the glyph names (for name-
# keyed fonts) or CIDs (for CID-keyed fonts) in a second column.
#
# The "-r" command-line option will turn the list of Unicode code
# points into ranges.
#
# If both command-line options are specified, the "-r" command-line
# option is ignored.
#
# Tool Dependencies: spot (AFDKO)
$addglyph = $range = $second = 0;
$data = "";
while ($ARGV[0]) {
if ($ARGV[0] =~ /^-[huHU]/) {
print STDERR "Usage: unicode-list.pl [-g|-r] <font>\n";
exit;
} elsif ($ARGV[0] =~ /^-[gG]/) {
$addglyph = 1;
shift;
} elsif ($ARGV[0] =~ /^-[rR]/) {
$range = 1;
shift;
} else {
$file = "\"$ARGV[0]\"";
shift;
}
}
$range = 0 if $addglyph;
open(FILE,"spot -tcmap=11 $file |") or die "Cannot open $file input file!\n";
while(defined($line = <FILE>)) {
chomp $line;
if ($line =~ /=/) {
if ($line =~ /^\[\s*(\d+)\]={.+(?:UTF-(16|32)).+}$/) {
if ($2 == 16) {
$utf16 = $1;
} elsif ($2 == 32) {
$utf32 = $1;
}
} elsif ($line =~ /^\[\s*(\d+)\]={Microsoft,Unicode\s(BMP|UCS[-]4).+}$/) {
if ($2 eq "BMP") {
$utf16 = $1;
} elsif ($2 eq "UCS-4") {
$utf32 = $1;
}
}
}
}
close(FILE);
if (defined $utf32) {
$index = $utf32;
undef $utf16;
} else {
$index = $utf16;
}
open(FILE,"spot -tcmap=7 -C$index $file |") or die "Cannot open $file input file!\n";
while(defined($line = <FILE>)) {
chomp $line;
if ($line =~ /^\[([0-9A-F]+)\]=<\\?(.+)>/) {
$uni = $1;
$glyph = $2;
if ($range) {
if (not $second) {
$orig = $previous = $uni;
$second = 1;
next;
}
if (hex($uni) != hex($previous) + 1) {
if ($orig eq $previous) {
$data .= "$orig\n";
} else {
$data .= "$orig-$previous\n";
}
$orig = $previous = $uni;
} else {
$previous = $uni;
}
} elsif ($addglyph) {
$data .= "$uni\t$glyph\n";
} else {
$data .= "$uni\n";
}
}
}
if ($range) {
if ($orig eq $previous) {
$data .= "$orig\n";
} else {
$data .= "$orig-$previous\n";
}
}
print STDOUT $data;