From 08ba10e26ca7c2679e62c46f1e2f4bd2d538b250 Mon Sep 17 00:00:00 2001 From: Alexander Mankuta Date: Wed, 6 Dec 2023 12:50:06 +0200 Subject: [PATCH] A whole bunch of CFF fixes ## Corrupted CFF index data There was a subtle bug in CFF Index implementation that resulted in a data corruption. In certain circumstances some items didn't get properly encoded. This happened when items were not previously accessed. This resulted, for instance, in missing glyphs. But only sometimes because indexes might've still contain data that shouldn't've been there. In combination with incorrect encoding (see further) this resulted in some glyphs still being rendered, sometimes even correctly. Along with the fix a rather large API change landed. This resulted in quite a big diff. ## Incorrect CFF encoding in subsets TTFunk used to reuse encoding from the original font. This mapping was incorrect for subset fonts which used not just a subset of glyphs but also a different encoding. A separate issue was that some fonts have empty CFF encoding. This incorrect mapping resulted in encoding that mapped all codes to glyph 0. This had impact on Prawn in particular. PDF spec explicitly says that CFF encoding is not to be used in OpenType fonts. `cmap` table should directly index charstrings in the CFF table. Despite this PDF renderers still use CFF encoding to retrieve glyphs. So TTFunk has to discard the original CFF encoding and supply its own. --- CHANGELOG.md | 35 +++++ lib/ttfunk/otf_encoder.rb | 11 +- lib/ttfunk/subset/code_page.rb | 1 + lib/ttfunk/table/cff.rb | 12 +- lib/ttfunk/table/cff/charset.rb | 31 +++-- lib/ttfunk/table/cff/charstring.rb | 4 - lib/ttfunk/table/cff/charstrings_index.rb | 18 +-- lib/ttfunk/table/cff/encoding.rb | 46 +++---- lib/ttfunk/table/cff/fd_selector.rb | 20 +-- lib/ttfunk/table/cff/font_dict.rb | 6 +- lib/ttfunk/table/cff/font_index.rb | 23 ++-- lib/ttfunk/table/cff/index.rb | 137 +++++++++++--------- lib/ttfunk/table/cff/one_based_index.rb | 2 +- lib/ttfunk/table/cff/private_dict.rb | 4 +- lib/ttfunk/table/cff/subr_index.rb | 4 +- lib/ttfunk/table/cff/top_dict.rb | 18 ++- lib/ttfunk/table/cff/top_index.rb | 15 ++- spec/ttfunk/table/cff/charset_spec.rb | 33 +++-- spec/ttfunk/table/cff/encoding_spec.rb | 31 ++--- spec/ttfunk/table/cff/fd_selector_spec.rb | 20 +-- spec/ttfunk/table/cff/font_dict_spec.rb | 6 +- spec/ttfunk/table/cff/font_index_spec.rb | 2 +- spec/ttfunk/table/cff/index_spec.rb | 143 ++++++++++++++++++--- spec/ttfunk/table/cff/private_dict_spec.rb | 4 +- spec/ttfunk/table/cff/top_dict_spec.rb | 5 +- spec/ttfunk/table/cff/top_index_spec.rb | 2 +- 26 files changed, 401 insertions(+), 232 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 546ede4a..113c68ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,41 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/). ## [Unreleased] +### Fixed + +* Corrupted CFF index data + + there was a subtle bug in cff index implementation that resulted in + a data corruption. in certain circumstances some items didn't get + properly encoded. this happened when items were not previously accessed. + + this resulted, for instance, in missing glyphs. but only sometimes + because indexes might've still contain data that shouldn't've been + there. in combination with incorrect encoding (see further) this + resulted in some glyphs still being rendered, sometimes even correctly. + + along with the fix a rather large api change landed. this resulted in + quite a big diff. + + Alexander Mankuta + +* Incorrect CFF encoding in subsets + + TTFunk used to reuse encoding from the original font. This mapping was + incorrect for subset fonts which used not just a subset of glyphs but + also a different encoding. + + A separate issue was that some fonts have empty CFF encoding. This + incorrect mapping resulted in encoding that mapped all codes to glyph 0. + + This had impact on Prawn in particular. PDF spec explicitly says that + CFF encoding is not to be used in OpenType fonts. `cmap` table should + directly index charstrings in the CFF table. Despite this PDF renderers + still use CFF encoding to retrieve glyphs. So TTFunk has to discard the + original CFF encoding and supply its own. + + Alexander Mankuta + ## 1.7.0 ### Changes diff --git a/lib/ttfunk/otf_encoder.rb b/lib/ttfunk/otf_encoder.rb index 408e1a52..3bddfde6 100644 --- a/lib/ttfunk/otf_encoder.rb +++ b/lib/ttfunk/otf_encoder.rb @@ -27,7 +27,7 @@ def base_table end def cff_table - @cff_table ||= original.cff.encode(new_to_old_glyph, old_to_new_glyph) + @cff_table ||= original.cff.encode(subset) end def vorg_table @@ -48,14 +48,5 @@ def optimal_table_order (tables.keys - ['DSIG'] - OPTIMAL_TABLE_ORDER) + ['DSIG'] end - - def collect_glyphs(glyph_ids) - # CFF top indexes are supposed to contain only one font, although they're - # capable of supporting many (no idea why this is true, maybe for CFF - # v2??). Anyway it's cool to do top_index[0], don't worry about it. - glyph_ids.each_with_object({}) do |id, h| - h[id] = original.cff.top_index[0].charstrings_index[id] - end - end end end diff --git a/lib/ttfunk/subset/code_page.rb b/lib/ttfunk/subset/code_page.rb index b943b18c..d2ad9976 100644 --- a/lib/ttfunk/subset/code_page.rb +++ b/lib/ttfunk/subset/code_page.rb @@ -40,6 +40,7 @@ def initialize(original, code_page, encoding) def to_unicode_map self.class.unicode_mapping_for(encoding) + .select { |codepoint, _unicode| @subset[codepoint] } end def use(character) diff --git a/lib/ttfunk/table/cff.rb b/lib/ttfunk/table/cff.rb index 3f40a082..4ecd459b 100644 --- a/lib/ttfunk/table/cff.rb +++ b/lib/ttfunk/table/cff.rb @@ -31,18 +31,18 @@ def tag TAG end - def encode(new_to_old, old_to_new) + def encode(subset) EncodedString.new do |result| - sub_tables = [ + result.concat( header.encode, name_index.encode, - top_index.encode(&:encode), + top_index.encode, string_index.encode, global_subr_index.encode - ] + ) - sub_tables.each { |tb| result << tb } - top_index[0].finalize(result, new_to_old, old_to_new) + charmap = subset.new_cmap_table[:charmap] + top_index[0].finalize(result, charmap) end end diff --git a/lib/ttfunk/table/cff/charset.rb b/lib/ttfunk/table/cff/charset.rb index 3c78441f..fcaded54 100644 --- a/lib/ttfunk/table/cff/charset.rb +++ b/lib/ttfunk/table/cff/charset.rb @@ -35,7 +35,7 @@ def strings_for_charset_id(charset_id) end attr_reader :entries, :length - attr_reader :top_dict, :format, :count, :offset_or_id + attr_reader :top_dict, :format, :items_count, :offset_or_id def initialize(top_dict, file, offset_or_id = nil, length = nil) @top_dict = top_dict @@ -44,7 +44,7 @@ def initialize(top_dict, file, offset_or_id = nil, length = nil) if offset super(file, offset, length) else - @count = self.class.strings_for_charset_id(offset_or_id).size + @items_count = self.class.strings_for_charset_id(offset_or_id).size end end @@ -52,7 +52,7 @@ def each return to_enum(__method__) unless block_given? # +1 adjusts for the implicit .notdef glyph - (count + 1).times { |i| yield self[i] } + (items_count + 1).times { |i| yield self[i] } end def [](glyph_id) @@ -73,13 +73,18 @@ def offset end end - # mapping is new -> old glyph ids - def encode(mapping) + def encode(charmap) # no offset means no charset was specified (i.e. we're supposed to # use a predefined charset) so there's nothing to encode return '' unless offset - sids = mapping.keys.sort.map { |new_gid| sid_for(mapping[new_gid]) } + sids = + charmap + .values + .reject { |mapping| mapping[:new].zero? } + .sort_by { |mapping| mapping[:new] } + .map { |mapping| sid_for(mapping[:old]) } + ranges = TTFunk::BinUtils.rangify(sids) range_max = ranges.map(&:last).max @@ -138,7 +143,7 @@ def find_string(sid) idx = sid - 390 - if idx < file.cff.string_index.count + if idx < file.cff.string_index.items_count file.cff.string_index[idx] end else @@ -153,23 +158,23 @@ def parse! case format_sym when :array_format - @count = top_dict.charstrings_index.count - 1 - @length = count * element_width + @items_count = top_dict.charstrings_index.items_count - 1 + @length = @items_count * element_width @entries = OneBasedArray.new(read(length, 'n*')) when :range_format8, :range_format16 # The number of ranges is not explicitly specified in the font. # Instead, software utilizing this data simply processes ranges # until all glyphs in the font are covered. - @count = 0 + @items_count = 0 @entries = [] @length = 0 - until count >= top_dict.charstrings_index.count - 1 + until @items_count >= top_dict.charstrings_index.items_count - 1 @length += 1 + element_width sid, num_left = read(element_width, element_format) - entries << (sid..(sid + num_left)) - @count += num_left + 1 + @entries << (sid..(sid + num_left)) + @items_count += num_left + 1 end end end diff --git a/lib/ttfunk/table/cff/charstring.rb b/lib/ttfunk/table/cff/charstring.rb index d7cb05ff..c3b58e89 100644 --- a/lib/ttfunk/table/cff/charstring.rb +++ b/lib/ttfunk/table/cff/charstring.rb @@ -91,10 +91,6 @@ def render(x: 0, y: 0, font_size: 72) ) end - def encode - raw - end - private def parse! diff --git a/lib/ttfunk/table/cff/charstrings_index.rb b/lib/ttfunk/table/cff/charstrings_index.rb index be86824a..e3be83b8 100644 --- a/lib/ttfunk/table/cff/charstrings_index.rb +++ b/lib/ttfunk/table/cff/charstrings_index.rb @@ -11,21 +11,21 @@ def initialize(top_dict, *remaining_args) @top_dict = top_dict end - def [](index) - entry_cache[index] ||= TTFunk::Table::Cff::Charstring.new( + private + + def decode_item(index, _offset, _length) + TTFunk::Table::Cff::Charstring.new( index, top_dict, font_dict_for(index), super ) end - # gets passed a mapping of new => old glyph ids - def encode(mapping) - super() do |_entry, index| - self[mapping[index]].encode if mapping.include?(index) - end + def encode_items(charmap) + charmap + .reject { |code, mapping| mapping[:new].zero? && !code.zero? } + .sort_by { |_code, mapping| mapping[:new] } + .map { |(_code, mapping)| items[mapping[:old]] } end - private - def font_dict_for(index) # only CID-keyed fonts contain an FD selector and font dicts if top_dict.is_cid_font? diff --git a/lib/ttfunk/table/cff/encoding.rb b/lib/ttfunk/table/cff/encoding.rb index bd4c76b3..6ab0ba3f 100644 --- a/lib/ttfunk/table/cff/encoding.rb +++ b/lib/ttfunk/table/cff/encoding.rb @@ -22,7 +22,7 @@ def codes_for_encoding_id(encoding_id) end end - attr_reader :top_dict, :format, :count, :offset_or_id + attr_reader :top_dict, :format, :items_count, :offset_or_id def initialize(top_dict, file, offset_or_id = nil, length = nil) @top_dict = top_dict @@ -30,8 +30,10 @@ def initialize(top_dict, file, offset_or_id = nil, length = nil) if offset super(file, offset, length) + @supplemental = format >> 7 == 1 else - @count = self.class.codes_for_encoding_id(offset_or_id).size + @items_count = self.class.codes_for_encoding_id(offset_or_id).size + @supplemental = false end end @@ -39,7 +41,7 @@ def each return to_enum(__method__) unless block_given? # +1 adjusts for the implicit .notdef glyph - (count + 1).times { |i| yield self[i] } + (items_count + 1).times { |i| yield self[i] } end def [](glyph_id) @@ -62,16 +64,18 @@ def offset end end - def encode(new_to_old, old_to_new) - # no offset means no encoding was specified (i.e. we're supposed to - # use a predefined encoding) so there's nothing to encode - return '' unless offset - return encode_supplemental(new_to_old, old_to_new) if supplemental? + def encode(charmap) + # Any subset encoding is all but guaranteed to be different from the + # standard encoding so we don't even attempt to see if it matches. We + # assume it's different and just encode it anew. + + return encode_supplemental(charmap) if supplemental? codes = - new_to_old.keys.sort.map do |new_gid| - code_for(new_to_old[new_gid]) - end + charmap + .reject { |_code, mapping| mapping[:new].zero? } + .sort_by { |_code, mapping| mapping[:new] } + .map { |(code, _m)| code } ranges = TTFunk::BinUtils.rangify(codes) @@ -95,18 +99,16 @@ def encode(new_to_old, old_to_new) def supplemental? # high-order bit set to 1 indicates supplemental encoding - @format >> 7 == 1 + @supplemental end private - def encode_supplemental(_new_to_old, old_to_new) + def encode_supplemental(charmap) new_entries = - @entries.each_with_object({}) do |(code, old_gid), ret| - if (new_gid = old_to_new[old_gid]) - ret[code] = new_gid - end - end + charmap + .reject { |_code, mapping| mapping[:new].zero? } + .transform_values { |mapping| mapping[:new] } result = [format_int(:supplemental), new_entries.size].pack('CC') fmt = element_format(:supplemental) @@ -150,22 +152,22 @@ def parse! case format_sym when :array_format - @count = entry_count + @items_count = entry_count @entries = OneBasedArray.new(read(length, 'C*')) when :range_format @entries = [] - @count = 0 + @items_count = 0 entry_count.times do code, num_left = read(element_width, element_format) @entries << (code..(code + num_left)) - @count += num_left + 1 + @items_count += num_left + 1 end when :supplemental @entries = {} - @count = entry_count + @items_count = entry_count entry_count.times do code, glyph = read(element_width, element_format) diff --git a/lib/ttfunk/table/cff/fd_selector.rb b/lib/ttfunk/table/cff/fd_selector.rb index 22fde67f..639465b7 100644 --- a/lib/ttfunk/table/cff/fd_selector.rb +++ b/lib/ttfunk/table/cff/fd_selector.rb @@ -12,7 +12,7 @@ class FdSelector < TTFunk::SubTable RANGE_ENTRY_SIZE = 3 ARRAY_ENTRY_SIZE = 1 - attr_reader :top_dict, :count, :entries, :n_glyphs + attr_reader :top_dict, :items_count, :entries, :n_glyphs def initialize(top_dict, file, offset, length = nil) @top_dict = top_dict @@ -48,16 +48,16 @@ def [](glyph_id) def each return to_enum(__method__) unless block_given? - count.times { |i| yield self[i] } + items_count.times { |i| yield self[i] } end - # mapping is new -> old glyph ids - def encode(mapping) + def encode(charmap) # get list of [new_gid, fd_index] pairs new_indices = - mapping.keys.sort.map do |new_gid| - [new_gid, self[mapping[new_gid]]] - end + charmap + .reject { |code, mapping| mapping[:new].zero? && !code.zero? } + .sort_by { |_code, mapping| mapping[:new] } + .map { |(_code, mapping)| [mapping[:new], self[mapping[:old]]] } ranges = rangify_gids(new_indices) total_range_size = ranges.size * RANGE_ENTRY_SIZE @@ -108,10 +108,10 @@ def parse! case format_sym when :array_format - @n_glyphs = top_dict.charstrings_index.count + @n_glyphs = top_dict.charstrings_index.items_count data = io.read(n_glyphs) @length += data.bytesize - @count = data.bytesize + @items_count = data.bytesize @entries = data.bytes when :range_format @@ -135,7 +135,7 @@ def parse! last_start_gid, last_fd_index = ranges.last @entries << [(last_start_gid...(n_glyphs + 1)), last_fd_index] - @count = entries.reduce(0) { |sum, entry| sum + entry.first.size } + @items_count = entries.reduce(0) { |sum, entry| sum + entry.first.size } end end diff --git a/lib/ttfunk/table/cff/font_dict.rb b/lib/ttfunk/table/cff/font_dict.rb index 39e4e8ec..5ac1c349 100644 --- a/lib/ttfunk/table/cff/font_dict.rb +++ b/lib/ttfunk/table/cff/font_dict.rb @@ -15,7 +15,7 @@ def initialize(top_dict, file, offset, length = nil) super(file, offset, length) end - def encode(_mapping) + def encode EncodedString.new do |result| each do |operator, operands| case OPERATOR_CODES[operator] @@ -30,8 +30,8 @@ def encode(_mapping) end end - def finalize(new_cff_data, mapping) - encoded_private_dict = private_dict.encode(mapping) + def finalize(new_cff_data) + encoded_private_dict = private_dict.encode encoded_offset = encode_integer32(new_cff_data.length) encoded_length = encode_integer32(encoded_private_dict.length) diff --git a/lib/ttfunk/table/cff/font_index.rb b/lib/ttfunk/table/cff/font_index.rb index c5f0dc4c..d282ba58 100644 --- a/lib/ttfunk/table/cff/font_index.rb +++ b/lib/ttfunk/table/cff/font_index.rb @@ -11,18 +11,21 @@ def initialize(top_dict, file, offset, length = nil) @top_dict = top_dict end - def [](index) - entry_cache[index] ||= - begin - start, finish = absolute_offsets_for(index) - TTFunk::Table::Cff::FontDict.new( - top_dict, file, start, (finish - start) + 1 - ) - end + def finalize(new_cff_data) + each { |font_dict| font_dict.finalize(new_cff_data) } end - def finalize(new_cff_data, mapping) - each { |font_dict| font_dict.finalize(new_cff_data, mapping) } + private + + def decode_item(_index, offset, length) + TTFunk::Table::Cff::FontDict.new( + top_dict, file, offset, length + ) + end + + def encode_items(*) + # Re-encode font dicts + map(&:encode) end end end diff --git a/lib/ttfunk/table/cff/index.rb b/lib/ttfunk/table/cff/index.rb index d570f4d4..fb0dfec9 100644 --- a/lib/ttfunk/table/cff/index.rb +++ b/lib/ttfunk/table/cff/index.rb @@ -6,72 +6,80 @@ class Cff < TTFunk::Table class Index < TTFunk::SubTable include Enumerable - # number of objects in the index - attr_reader :count - - # offset array element size - attr_reader :offset_size - - attr_reader :raw_offset_length, :offsets, :raw_data - attr_reader :data_start_pos - def [](index) - entry_cache[index] ||= raw_data[ - offsets[index]...offsets[index + 1] - ] + return if index >= items_count + + entry_cache[index] ||= + decode_item( + index, + data_reference_offset + offsets[index], + offsets[index + 1] - offsets[index] + ) end - def each - return to_enum(__method__) unless block_given? + def each(&block) + return to_enum(__method__) unless block - count.times { |i| yield self[i] } + items_count.times do |i| + yield self[i] + end end - def encode - result = EncodedString.new + def items_count + items.length + end - entries = - each_with_object([]).with_index do |(entry, ret), index| - new_entry = block_given? ? yield(entry, index) : entry - ret << new_entry if new_entry - end + def encode(*args) + new_items = encode_items(*args) - # "An empty INDEX is represented by a count field with a 0 value and - # no additional fields. Thus, the total size of an empty INDEX is 2 - # bytes." - result << [entries.size].pack('n') - return result if entries.empty? + if new_items.empty? + return [0].pack('n') + end - offset_size = (Math.log2(entries.size) / 8.0).round + 1 - result << [offset_size].pack('C') - data_offset = 1 + if new_items.length > 0xffff + raise Error, 'Too many items in a CFF index' + end - data = EncodedString.new + offsets_array = + new_items + .each_with_object([1]) do |item, offsets| + offsets << offsets.last + item.length + end - entries.each do |entry| - result << encode_offset(data_offset, offset_size) - data << entry - data_offset += entry.length - end + offset_size = (offsets_array.last.bit_length / 8.0).ceil - unless entries.empty? - result << encode_offset(data_offset, offset_size) - end + offsets_array.map! { |offset| encode_offset(offset, offset_size) } - result << data + EncodedString.new.concat( + [new_items.length, offset_size].pack('nC'), + *offsets_array, + *new_items + ) end private + attr_reader :items, :offsets, :data_reference_offset + def entry_cache @entry_cache ||= {} end - def absolute_offsets_for(index) - [ - table_offset + offsets[index] + data_start_pos, - table_offset + offsets[index + 1] + data_start_pos - ] + # Returns an array of EncodedString elements (plain strings, + # placeholders, or EncodedString instances). Each element is supposed to + # represent an encoded item. + # + # This is the place to do all the filtering, reordering, or individual + # item encoding. + # + # It gets all the arguments `encode` gets. + def encode_items(*) + items + end + + # By default do nothing + def decode_item(index, _offset, _length) + items[index] end def encode_offset(offset, offset_size) @@ -88,35 +96,38 @@ def encode_offset(offset, offset_size) end def parse! - @count = read(2, 'n').first + @entry_cache = {} - if count.zero? + num_entries = read(2, 'n').first + + if num_entries.zero? @length = 2 - @data = [] + @items = [] return end - @offset_size = read(1, 'C').first + offset_size = read(1, 'C').first - # read an extra offset_size bytes to get rid of the first offset, - # which is always 1 - io.read(offset_size) + @offsets = + Array.new(num_entries + 1) do + unpack_offset(io.read(offset_size), offset_size) + end - @raw_offset_length = count * offset_size - raw_offsets = io.read(raw_offset_length) + @data_reference_offset = table_offset + 3 + offsets.length * offset_size - 1 - @offsets = [0] + Array.new(count) do |idx| - start = offset_size * idx - finish = offset_size * (idx + 1) - unpack_offset(raw_offsets[start...finish]) - 1 - end + @length = + 2 + # num entries + 1 + # offset size + offsets.length * offset_size + # offsets + offsets.last - 1 # items - @raw_data = io.read(offsets.last) - @data_start_pos = 3 + offset_size + raw_offset_length - @length = data_start_pos + raw_data.size + @items = + offsets.each_cons(2).map do |offset, next_offset| + io.read(next_offset - offset) + end end - def unpack_offset(offset_data) + def unpack_offset(offset_data, offset_size) padding = "\x00" * (4 - offset_size) (padding + offset_data).unpack1('N') end diff --git a/lib/ttfunk/table/cff/one_based_index.rb b/lib/ttfunk/table/cff/one_based_index.rb index 147e287a..f2ebe4a2 100644 --- a/lib/ttfunk/table/cff/one_based_index.rb +++ b/lib/ttfunk/table/cff/one_based_index.rb @@ -11,7 +11,7 @@ class OneBasedIndex def_delegators :base_index, :each, :table_offset, - :count, + :items_count, :length, :encode diff --git a/lib/ttfunk/table/cff/private_dict.rb b/lib/ttfunk/table/cff/private_dict.rb index 1dc0a06e..ef364290 100644 --- a/lib/ttfunk/table/cff/private_dict.rb +++ b/lib/ttfunk/table/cff/private_dict.rb @@ -18,7 +18,7 @@ class PrivateDict < TTFunk::Table::Cff::Dict # @TODO: use mapping to determine which subroutines are still used. # For now, just encode them all. - def encode(_mapping) + def encode EncodedString.new do |result| each do |operator, operands| case OPERATOR_CODES[operator] @@ -72,7 +72,7 @@ def nominal_width_x private def encode_subrs - EncodedString.new.tap do |result| + EncodedString.new do |result| result << Placeholder.new( :"subrs_#{@table_offset}", length: PLACEHOLDER_LENGTH ) diff --git a/lib/ttfunk/table/cff/subr_index.rb b/lib/ttfunk/table/cff/subr_index.rb index 6cf8065e..6a1ec73a 100644 --- a/lib/ttfunk/table/cff/subr_index.rb +++ b/lib/ttfunk/table/cff/subr_index.rb @@ -5,9 +5,9 @@ class Table class Cff < TTFunk::Table class SubrIndex < TTFunk::Table::Cff::Index def bias - if count < 1240 + if items.length < 1240 107 - elsif count < 33_900 + elsif items.length < 33_900 1131 else 32_768 diff --git a/lib/ttfunk/table/cff/top_dict.rb b/lib/ttfunk/table/cff/top_dict.rb index 40148edf..50e0a29d 100644 --- a/lib/ttfunk/table/cff/top_dict.rb +++ b/lib/ttfunk/table/cff/top_dict.rb @@ -47,16 +47,16 @@ def encode(*) end end - def finalize(new_cff_data, new_to_old, old_to_new) + def finalize(new_cff_data, charmap) if charset finalize_subtable( - new_cff_data, :charset, charset.encode(new_to_old) + new_cff_data, :charset, charset.encode(charmap) ) end if encoding finalize_subtable( - new_cff_data, :encoding, encoding.encode(new_to_old, old_to_new) + new_cff_data, :encoding, encoding.encode(charmap) ) end @@ -64,7 +64,7 @@ def finalize(new_cff_data, new_to_old, old_to_new) finalize_subtable( new_cff_data, :charstrings_index, - charstrings_index.encode(new_to_old, &:encode) + charstrings_index.encode(charmap) ) end @@ -72,24 +72,22 @@ def finalize(new_cff_data, new_to_old, old_to_new) finalize_subtable( new_cff_data, :font_index, - font_index.encode do |font_dict| - font_dict.encode(new_to_old) - end + font_index.encode ) - font_index.finalize(new_cff_data, new_to_old) + font_index.finalize(new_cff_data) end if font_dict_selector finalize_subtable( new_cff_data, :font_dict_selector, - font_dict_selector.encode(new_to_old) + font_dict_selector.encode(charmap) ) end if private_dict - encoded_private_dict = private_dict.encode(new_to_old) + encoded_private_dict = private_dict.encode encoded_offset = encode_integer32(new_cff_data.length) encoded_length = encode_integer32(encoded_private_dict.length) diff --git a/lib/ttfunk/table/cff/top_index.rb b/lib/ttfunk/table/cff/top_index.rb index 08efbc4d..8c120c97 100644 --- a/lib/ttfunk/table/cff/top_index.rb +++ b/lib/ttfunk/table/cff/top_index.rb @@ -4,12 +4,15 @@ module TTFunk class Table class Cff < TTFunk::Table class TopIndex < TTFunk::Table::Cff::Index - def [](index) - entry_cache[index] ||= - begin - start, finish = absolute_offsets_for(index) - TTFunk::Table::Cff::TopDict.new(file, start, (finish - start) + 1) - end + private + + def decode_item(_index, offset, length) + TTFunk::Table::Cff::TopDict.new(file, offset, length) + end + + def encode_items(*) + # Re-encode the top dict + map(&:encode) end end end diff --git a/spec/ttfunk/table/cff/charset_spec.rb b/spec/ttfunk/table/cff/charset_spec.rb index 175ce3f7..91c609e5 100644 --- a/spec/ttfunk/table/cff/charset_spec.rb +++ b/spec/ttfunk/table/cff/charset_spec.rb @@ -28,8 +28,8 @@ # From the spec: There is one less element in the glyph name array than # nGlyphs (i.e. charstrings count) because the .notdef glyph name is # omitted. - expect(charset.count).to( - eq(font.cff.top_index[0].charstrings_index.count - 1) + expect(charset.items_count).to( + eq(font.cff.top_index[0].charstrings_index.items_count - 1) ) end @@ -58,8 +58,8 @@ # From the spec: There is one less element in the glyph name array than # nGlyphs (i.e. charstrings count) because the .notdef glyph name is # omitted. - expect(charset.count).to( - eq(font.cff.top_index[0].charstrings_index.count - 1) + expect(charset.items_count).to( + eq(font.cff.top_index[0].charstrings_index.items_count - 1) ) end @@ -85,8 +85,8 @@ # From the spec: There is one less element in the glyph name array than # nGlyphs (i.e. charstrings count) because the .notdef glyph name is # omitted. - expect(charset.count).to( - eq(font.cff.top_index[0].charstrings_index.count - 1) + expect(charset.items_count).to( + eq(font.cff.top_index[0].charstrings_index.items_count - 1) ) end @@ -115,14 +115,21 @@ describe '#encode' do let(:font_path) { test_font('NotoSansCJKsc-Thin', :otf) } - let(:encoded) { charset.encode(subset_mapping) } + let(:encoded) { charset.encode(charmap) } context 'when the subset contains non-sequential SIDs' do - let(:subset_mapping) do + let(:charmap) do # the idea here is to demonstrate that non-sequental SIDs can sometimes # be more compactly represented as individual elements as opposed to # ranges (supposed to be new => old glyph IDs) - { 1 => 1, 4 => 4, 10 => 10, 14 => 14, 15 => 15, 21 => 21 } + { + 0x20 => { old: 1, new: 1 }, + 0x23 => { old: 4, new: 4 }, + 0x29 => { old: 10, new: 10 }, + 0x2d => { old: 14, new: 14 }, + 0x2e => { old: 15, new: 15 }, + 0x34 => { old: 21, new: 21 } + } end it 'encodes using the array-based format' do @@ -141,10 +148,10 @@ end context 'when the subset contains few sequential SIDs' do - let(:subset_mapping) do + let(:charmap) do # i.e. the first 20 characters, in order # (supposed to be new => old glyph IDs) - Hash[(1..20).map { |i| [i, i] }] + Hash[(1..20).map { |i| [0x20 + i, { old: i, new: i }] }] end it 'encodes using the 8-bit range-based format' do @@ -158,10 +165,10 @@ end context 'when the subset contains many sequential SIDs' do - let(:subset_mapping) do + let(:charmap) do # we want to get a 2-byte range to demonstrate the 16-bit format # (supposed to be new => old glyph IDs) - Hash[(1..2**10).map { |i| [i, i] }] + Hash[(1..2**10).map { |i| [0x20 + i, { old: i, new: i }] }] end it 'encodes using the 16-bit range-based format' do diff --git a/spec/ttfunk/table/cff/encoding_spec.rb b/spec/ttfunk/table/cff/encoding_spec.rb index 9dd80d21..710a4cd5 100644 --- a/spec/ttfunk/table/cff/encoding_spec.rb +++ b/spec/ttfunk/table/cff/encoding_spec.rb @@ -32,14 +32,21 @@ describe '#encode' do let(:font_path) { test_font('AlbertTextBold', :otf) } - let(:encoded) { encoding.encode(subset_mapping, subset_mapping.invert) } + let(:encoded) { encoding.encode(charmap) } context 'when the subset contains non-sequential codes' do - let(:subset_mapping) do + let(:charmap) do # the idea here is to demonstrate that non-sequental codes can # sometimes be more compactly represented as individual elements # as opposed to ranges (supposed to be new => old glyph IDs) - { 1 => 1, 4 => 4, 10 => 10, 14 => 14, 15 => 15, 21 => 21 } + { + 0x20 => { old: 1, new: 1 }, + 0x23 => { old: 4, new: 4 }, + 0x29 => { old: 10, new: 10 }, + 0x2d => { old: 14, new: 14 }, + 0x2e => { old: 15, new: 15 }, + 0x34 => { old: 21, new: 13 } + } end it 'encodes using the array-based format' do @@ -48,13 +55,7 @@ it 'encodes correctly' do # format (0x00), codes (1 byte each) - expect(encoded.bytes).to eq( - [ - 0, - subset_mapping.count, - *subset_mapping.map { |old_gid, _| encoding[old_gid] } - ] - ) + expect(encoded).to eq("\x00\x06\x20\x23\x29\x34\x2d\x2e") end # unfortunately I haven't been able to find an example font that defines @@ -83,16 +84,16 @@ font.cff.top_index[0], file, fake_offset, encoded.length ) - expect(new_encoding.to_a).to eq([0, 26, 29, 35, 39, 40, 46]) + expect(new_encoding.to_a).to eq([0, 0x20, 0x23, 0x29, 0x34, 0x2d, 0x2e]) end # rubocop: enable RSpec/AnyInstance end context 'when the subset contains sequential codes' do - let(:subset_mapping) do + let(:charmap) do # i.e. the first 20 characters, in order # (supposed to be new => old glyph IDs) - Hash[(1..20).map { |i| [i, i] }] + Hash[(1..20).map { |i| [0x20 + i, { old: i, new: i }] }] end it 'encodes using the range-based format' do @@ -100,9 +101,9 @@ end it 'encodes correctly' do - # format (0x01), count (0x01, start code (0x1D, i.e. 26), + # format (0x01), count (0x01, start code (0x21, i.e. 33), # rest (0x13, i.e. 19) - expect(encoded.bytes).to eq([0x01, 0x01, 0x1A, 0x13]) + expect(encoded.bytes).to eq([0x01, 0x01, 0x21, 0x13]) end end end diff --git a/spec/ttfunk/table/cff/fd_selector_spec.rb b/spec/ttfunk/table/cff/fd_selector_spec.rb index c6adb0a8..e83bde77 100644 --- a/spec/ttfunk/table/cff/fd_selector_spec.rb +++ b/spec/ttfunk/table/cff/fd_selector_spec.rb @@ -24,7 +24,7 @@ instance_double( TTFunk::Table::Cff::CharstringsIndex, :charstrings_index, - count: entry_count + items_count: entry_count ) end let(:fd_selector) do @@ -38,7 +38,7 @@ end it 'includes entries for all the glyphs in the font' do - expect(fd_selector.count).to eq(entry_count) + expect(fd_selector.items_count).to eq(entry_count) end it 'parses the entries correctly' do @@ -46,8 +46,12 @@ end it 'encodes correctly' do - mapping = { 1 => 1, 3 => 3, 5 => 5 } - expect(fd_selector.encode(mapping)).to eq("\x00\x02\x04\x06") + charmap = { + 0x20 => { old: 1, new: 1 }, + 0x22 => { old: 3, new: 3 }, + 0x24 => { old: 5, new: 5 } + } + expect(fd_selector.encode(charmap)).to eq("\x00\x02\x04\x06") end end @@ -56,8 +60,8 @@ it 'includes entries for all the glyphs in the font' do # the charstrings index doesn't contain an entry for the .notdef glyph - expect(fd_selector.count).to( - eq(font.cff.top_index[0].charstrings_index.count + 1) + expect(fd_selector.items_count).to( + eq(font.cff.top_index[0].charstrings_index.items_count + 1) ) end @@ -78,8 +82,8 @@ end it 'encodes correctly' do - mapping = Hash[(0..15).map { |i| [i, i] }] - result = fd_selector.encode(mapping) + charmap = Hash[(0..15).map { |i| [i, { old: i, new: i }] }] + result = fd_selector.encode(charmap) expect(result).to( # fmt | count | range 1 | range 2 | n glyphs eq("\x03\x00\x02\x00\x00\x05\x00\x01\x0F\x00\x10") diff --git a/spec/ttfunk/table/cff/font_dict_spec.rb b/spec/ttfunk/table/cff/font_dict_spec.rb index 153e0773..6a29b029 100644 --- a/spec/ttfunk/table/cff/font_dict_spec.rb +++ b/spec/ttfunk/table/cff/font_dict_spec.rb @@ -24,11 +24,11 @@ end it 'produces an encoded dict that can be re-parsed successfully' do - result = font_dict.encode({}) + result = font_dict.encode dict_length = result.length - private_dict_length = font_dict.private_dict.encode({}).length + private_dict_length = font_dict.private_dict.encode.length - font_dict.finalize(result, {}) + font_dict.finalize(result) io = StringIO.new(result.string) file = TestFile.new(io) new_dict = described_class.new(top_dict, file, 0, dict_length) diff --git a/spec/ttfunk/table/cff/font_index_spec.rb b/spec/ttfunk/table/cff/font_index_spec.rb index 5f1d4d85..3c141b84 100644 --- a/spec/ttfunk/table/cff/font_index_spec.rb +++ b/spec/ttfunk/table/cff/font_index_spec.rb @@ -9,7 +9,7 @@ let(:font_path) { test_font('NotoSansCJKsc-Thin', :otf) } it 'provides access to font dicts by index' do - expect(font_index.count).to eq(19) + expect(font_index.items_count).to eq(19) expect(font_index[0]).to be_a(TTFunk::Table::Cff::FontDict) end end diff --git a/spec/ttfunk/table/cff/index_spec.rb b/spec/ttfunk/table/cff/index_spec.rb index 3abe0a34..9d206bc1 100644 --- a/spec/ttfunk/table/cff/index_spec.rb +++ b/spec/ttfunk/table/cff/index_spec.rb @@ -34,26 +34,139 @@ [0x00, 0x00] => [] } - test_cases.each_with_index do |(bytes, decoded_values), idx| - context "test case #{idx}" do - subject(:index) do - io = StringIO.new(bytes.pack('C*')) - described_class.new( - TestFile.new(io), 0, bytes.size - ) - end + describe 'decoding' do + test_cases.each_with_index do |(bytes, decoded_values), idx| + context "test case #{idx}" do + subject(:index) do + io = StringIO.new(bytes.pack('C*')) + described_class.new( + TestFile.new(io), 0, bytes.size + ) + end - it 'parses correctly' do - expect(index.map(&:bytes)).to eq(decoded_values) - end + it 'parses correctly' do + expect(index.map(&:bytes)).to eq(decoded_values) + end - it 'encodes correctly' do - expect(index.encode.bytes).to eq(bytes) + it 'encodes correctly' do + expect(index.encode.bytes).to eq(bytes) + end + + it 'calculates the length correctly' do + expect(index.length).to eq(bytes.size) + end end + end + end + + describe 'encoding' do + it 'properly encodes items (change)' do + inc_index_class = + Class.new(described_class) do + private + + def encode_items(*) + # Increase each byte by 1 + items.map { |i| [i.unpack1('C') + 1].pack('C') } + end + end + + data = [ + # count + 0x00, 0x03, + # offset len + 0x01, + # offsets + 0x01, 0x02, 0x03, 0x04, + # data + 0x01, 0x02, 0x03 + ].pack('C*') - it 'calculates the length correctly' do - expect(index.length).to eq(bytes.size) + index = + inc_index_class.new( + TestFile.new(StringIO.new(data)), 0, data.length + ) + + expect(index.encode.string).to eq("\00\03\01\01\02\03\04\02\03\04") + end + + it 'properly encodes items (filter)' do + dup_index_class = + Class.new(described_class) do + private + + def encode_items(*) + # duplicate each item + items.flat_map { |i| [i, i] } + end + end + + data = [ + # count + 0x00, 0x03, + # offset len + 0x01, + # offsets + 0x01, 0x02, 0x03, 0x04, + # data + 0x01, 0x02, 0x03 + ].pack('C*') + + index = + dup_index_class.new( + TestFile.new(StringIO.new(data)), 0, data.length + ) + + expect(index.encode.string).to eq("\00\06\01\01\02\03\04\05\06\07\01\01\02\02\03\03") + end + + [ + { item_size: 1, data_size: 6, offset_size: 1 }, + { item_size: 0xff, data_size: 262, offset_size: 2 }, + { item_size: 0xffff, data_size: 65_544, offset_size: 3 }, + { item_size: 0xffffff, data_size: 16_777_226, offset_size: 4 } + ].each do |params| + it "properly encodes offset size #{params[:offset_size]}" do + gen_index_class = + Class.new(described_class) do + attr_accessor :item_size + + private + + def encode_items(*) + ["\00" * item_size] + end + end + + gen_index = gen_index_class.new(TestFile.new(StringIO.new("\00\00")), 0, 2) + gen_index.item_size = params[:item_size] + + data = gen_index.encode.string + + expect(data.length).to eq params[:data_size] + + index = + described_class.new( + TestFile.new(StringIO.new(data)), 0, data.length + ) + + expect(index.items_count).to eq 1 end end + + it 'raises on more items than is possible to encode' do + gen_index_class = + Class.new(described_class) do + private + + def encode_items(*) + ["\00"] * 0x10000 + end + end + + gen_index = gen_index_class.new(TestFile.new(StringIO.new("\00\00")), 0, 2) + + expect { gen_index.encode }.to raise_error(/too many items/i) + end end end diff --git a/spec/ttfunk/table/cff/private_dict_spec.rb b/spec/ttfunk/table/cff/private_dict_spec.rb index b6f806b7..fb767a5b 100644 --- a/spec/ttfunk/table/cff/private_dict_spec.rb +++ b/spec/ttfunk/table/cff/private_dict_spec.rb @@ -41,7 +41,7 @@ describe '#encode' do it 'produces an encoded dict that can be re-parsed successfully' do - result = private_dict.encode({}) + result = private_dict.encode dict_length = result.length private_dict.finalize(result) @@ -57,7 +57,7 @@ ) ) - expect(new_dict.subr_index.count).to eq(private_dict.subr_index.count) + expect(new_dict.subr_index.items_count).to eq(private_dict.subr_index.items_count) end end end diff --git a/spec/ttfunk/table/cff/top_dict_spec.rb b/spec/ttfunk/table/cff/top_dict_spec.rb index abc5b4e3..e50c6eb0 100644 --- a/spec/ttfunk/table/cff/top_dict_spec.rb +++ b/spec/ttfunk/table/cff/top_dict_spec.rb @@ -18,13 +18,12 @@ describe '#encode' do it 'produces an encoded dict that can be re-parsed successfully' do - new_to_old = font.cmap.unicode.first.code_map - old_to_new = new_to_old.invert + charmap = font.cmap.unicode.first.code_map.transform_values { |v| { old: v, new: v } } encoded = top_dict.encode top_dict_length = encoded.length top_dict_hash = top_dict.to_h placeholders = encoded.placeholders.dup - top_dict.finalize(encoded, new_to_old, old_to_new) + top_dict.finalize(encoded, charmap) file = TestFile.new(StringIO.new(encoded.string)) new_top_dict = described_class.new(file, 0, top_dict_length) diff --git a/spec/ttfunk/table/cff/top_index_spec.rb b/spec/ttfunk/table/cff/top_index_spec.rb index fd243c02..4ed2c41e 100644 --- a/spec/ttfunk/table/cff/top_index_spec.rb +++ b/spec/ttfunk/table/cff/top_index_spec.rb @@ -13,7 +13,7 @@ end it 'always contains a single top dict' do - expect(top_index.count).to eq(1) + expect(top_index.items_count).to eq(1) end end