diff --git a/CHANGELOG.md b/CHANGELOG.md index 546ede4..113c68a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,41 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/). ## [Unreleased] +### Fixed + +* Corrupted CFF index data + + there was a subtle bug in cff index implementation that resulted in + a data corruption. in certain circumstances some items didn't get + properly encoded. this happened when items were not previously accessed. + + this resulted, for instance, in missing glyphs. but only sometimes + because indexes might've still contain data that shouldn't've been + there. in combination with incorrect encoding (see further) this + resulted in some glyphs still being rendered, sometimes even correctly. + + along with the fix a rather large api change landed. this resulted in + quite a big diff. + + Alexander Mankuta + +* Incorrect CFF encoding in subsets + + TTFunk used to reuse encoding from the original font. This mapping was + incorrect for subset fonts which used not just a subset of glyphs but + also a different encoding. + + A separate issue was that some fonts have empty CFF encoding. This + incorrect mapping resulted in encoding that mapped all codes to glyph 0. + + This had impact on Prawn in particular. PDF spec explicitly says that + CFF encoding is not to be used in OpenType fonts. `cmap` table should + directly index charstrings in the CFF table. Despite this PDF renderers + still use CFF encoding to retrieve glyphs. So TTFunk has to discard the + original CFF encoding and supply its own. + + Alexander Mankuta + ## 1.7.0 ### Changes diff --git a/lib/ttfunk/otf_encoder.rb b/lib/ttfunk/otf_encoder.rb index 408e1a5..3bddfde 100644 --- a/lib/ttfunk/otf_encoder.rb +++ b/lib/ttfunk/otf_encoder.rb @@ -27,7 +27,7 @@ def base_table end def cff_table - @cff_table ||= original.cff.encode(new_to_old_glyph, old_to_new_glyph) + @cff_table ||= original.cff.encode(subset) end def vorg_table @@ -48,14 +48,5 @@ def optimal_table_order (tables.keys - ['DSIG'] - OPTIMAL_TABLE_ORDER) + ['DSIG'] end - - def collect_glyphs(glyph_ids) - # CFF top indexes are supposed to contain only one font, although they're - # capable of supporting many (no idea why this is true, maybe for CFF - # v2??). Anyway it's cool to do top_index[0], don't worry about it. - glyph_ids.each_with_object({}) do |id, h| - h[id] = original.cff.top_index[0].charstrings_index[id] - end - end end end diff --git a/lib/ttfunk/subset/code_page.rb b/lib/ttfunk/subset/code_page.rb index b943b18..d2ad997 100644 --- a/lib/ttfunk/subset/code_page.rb +++ b/lib/ttfunk/subset/code_page.rb @@ -40,6 +40,7 @@ def initialize(original, code_page, encoding) def to_unicode_map self.class.unicode_mapping_for(encoding) + .select { |codepoint, _unicode| @subset[codepoint] } end def use(character) diff --git a/lib/ttfunk/table/cff.rb b/lib/ttfunk/table/cff.rb index 3f40a08..4ecd459 100644 --- a/lib/ttfunk/table/cff.rb +++ b/lib/ttfunk/table/cff.rb @@ -31,18 +31,18 @@ def tag TAG end - def encode(new_to_old, old_to_new) + def encode(subset) EncodedString.new do |result| - sub_tables = [ + result.concat( header.encode, name_index.encode, - top_index.encode(&:encode), + top_index.encode, string_index.encode, global_subr_index.encode - ] + ) - sub_tables.each { |tb| result << tb } - top_index[0].finalize(result, new_to_old, old_to_new) + charmap = subset.new_cmap_table[:charmap] + top_index[0].finalize(result, charmap) end end diff --git a/lib/ttfunk/table/cff/charset.rb b/lib/ttfunk/table/cff/charset.rb index 3c78441..fcaded5 100644 --- a/lib/ttfunk/table/cff/charset.rb +++ b/lib/ttfunk/table/cff/charset.rb @@ -35,7 +35,7 @@ def strings_for_charset_id(charset_id) end attr_reader :entries, :length - attr_reader :top_dict, :format, :count, :offset_or_id + attr_reader :top_dict, :format, :items_count, :offset_or_id def initialize(top_dict, file, offset_or_id = nil, length = nil) @top_dict = top_dict @@ -44,7 +44,7 @@ def initialize(top_dict, file, offset_or_id = nil, length = nil) if offset super(file, offset, length) else - @count = self.class.strings_for_charset_id(offset_or_id).size + @items_count = self.class.strings_for_charset_id(offset_or_id).size end end @@ -52,7 +52,7 @@ def each return to_enum(__method__) unless block_given? # +1 adjusts for the implicit .notdef glyph - (count + 1).times { |i| yield self[i] } + (items_count + 1).times { |i| yield self[i] } end def [](glyph_id) @@ -73,13 +73,18 @@ def offset end end - # mapping is new -> old glyph ids - def encode(mapping) + def encode(charmap) # no offset means no charset was specified (i.e. we're supposed to # use a predefined charset) so there's nothing to encode return '' unless offset - sids = mapping.keys.sort.map { |new_gid| sid_for(mapping[new_gid]) } + sids = + charmap + .values + .reject { |mapping| mapping[:new].zero? } + .sort_by { |mapping| mapping[:new] } + .map { |mapping| sid_for(mapping[:old]) } + ranges = TTFunk::BinUtils.rangify(sids) range_max = ranges.map(&:last).max @@ -138,7 +143,7 @@ def find_string(sid) idx = sid - 390 - if idx < file.cff.string_index.count + if idx < file.cff.string_index.items_count file.cff.string_index[idx] end else @@ -153,23 +158,23 @@ def parse! case format_sym when :array_format - @count = top_dict.charstrings_index.count - 1 - @length = count * element_width + @items_count = top_dict.charstrings_index.items_count - 1 + @length = @items_count * element_width @entries = OneBasedArray.new(read(length, 'n*')) when :range_format8, :range_format16 # The number of ranges is not explicitly specified in the font. # Instead, software utilizing this data simply processes ranges # until all glyphs in the font are covered. - @count = 0 + @items_count = 0 @entries = [] @length = 0 - until count >= top_dict.charstrings_index.count - 1 + until @items_count >= top_dict.charstrings_index.items_count - 1 @length += 1 + element_width sid, num_left = read(element_width, element_format) - entries << (sid..(sid + num_left)) - @count += num_left + 1 + @entries << (sid..(sid + num_left)) + @items_count += num_left + 1 end end end diff --git a/lib/ttfunk/table/cff/charstring.rb b/lib/ttfunk/table/cff/charstring.rb index d7cb05f..c3b58e8 100644 --- a/lib/ttfunk/table/cff/charstring.rb +++ b/lib/ttfunk/table/cff/charstring.rb @@ -91,10 +91,6 @@ def render(x: 0, y: 0, font_size: 72) ) end - def encode - raw - end - private def parse! diff --git a/lib/ttfunk/table/cff/charstrings_index.rb b/lib/ttfunk/table/cff/charstrings_index.rb index be86824..e3be83b 100644 --- a/lib/ttfunk/table/cff/charstrings_index.rb +++ b/lib/ttfunk/table/cff/charstrings_index.rb @@ -11,21 +11,21 @@ def initialize(top_dict, *remaining_args) @top_dict = top_dict end - def [](index) - entry_cache[index] ||= TTFunk::Table::Cff::Charstring.new( + private + + def decode_item(index, _offset, _length) + TTFunk::Table::Cff::Charstring.new( index, top_dict, font_dict_for(index), super ) end - # gets passed a mapping of new => old glyph ids - def encode(mapping) - super() do |_entry, index| - self[mapping[index]].encode if mapping.include?(index) - end + def encode_items(charmap) + charmap + .reject { |code, mapping| mapping[:new].zero? && !code.zero? } + .sort_by { |_code, mapping| mapping[:new] } + .map { |(_code, mapping)| items[mapping[:old]] } end - private - def font_dict_for(index) # only CID-keyed fonts contain an FD selector and font dicts if top_dict.is_cid_font? diff --git a/lib/ttfunk/table/cff/encoding.rb b/lib/ttfunk/table/cff/encoding.rb index bd4c76b..6ab0ba3 100644 --- a/lib/ttfunk/table/cff/encoding.rb +++ b/lib/ttfunk/table/cff/encoding.rb @@ -22,7 +22,7 @@ def codes_for_encoding_id(encoding_id) end end - attr_reader :top_dict, :format, :count, :offset_or_id + attr_reader :top_dict, :format, :items_count, :offset_or_id def initialize(top_dict, file, offset_or_id = nil, length = nil) @top_dict = top_dict @@ -30,8 +30,10 @@ def initialize(top_dict, file, offset_or_id = nil, length = nil) if offset super(file, offset, length) + @supplemental = format >> 7 == 1 else - @count = self.class.codes_for_encoding_id(offset_or_id).size + @items_count = self.class.codes_for_encoding_id(offset_or_id).size + @supplemental = false end end @@ -39,7 +41,7 @@ def each return to_enum(__method__) unless block_given? # +1 adjusts for the implicit .notdef glyph - (count + 1).times { |i| yield self[i] } + (items_count + 1).times { |i| yield self[i] } end def [](glyph_id) @@ -62,16 +64,18 @@ def offset end end - def encode(new_to_old, old_to_new) - # no offset means no encoding was specified (i.e. we're supposed to - # use a predefined encoding) so there's nothing to encode - return '' unless offset - return encode_supplemental(new_to_old, old_to_new) if supplemental? + def encode(charmap) + # Any subset encoding is all but guaranteed to be different from the + # standard encoding so we don't even attempt to see if it matches. We + # assume it's different and just encode it anew. + + return encode_supplemental(charmap) if supplemental? codes = - new_to_old.keys.sort.map do |new_gid| - code_for(new_to_old[new_gid]) - end + charmap + .reject { |_code, mapping| mapping[:new].zero? } + .sort_by { |_code, mapping| mapping[:new] } + .map { |(code, _m)| code } ranges = TTFunk::BinUtils.rangify(codes) @@ -95,18 +99,16 @@ def encode(new_to_old, old_to_new) def supplemental? # high-order bit set to 1 indicates supplemental encoding - @format >> 7 == 1 + @supplemental end private - def encode_supplemental(_new_to_old, old_to_new) + def encode_supplemental(charmap) new_entries = - @entries.each_with_object({}) do |(code, old_gid), ret| - if (new_gid = old_to_new[old_gid]) - ret[code] = new_gid - end - end + charmap + .reject { |_code, mapping| mapping[:new].zero? } + .transform_values { |mapping| mapping[:new] } result = [format_int(:supplemental), new_entries.size].pack('CC') fmt = element_format(:supplemental) @@ -150,22 +152,22 @@ def parse! case format_sym when :array_format - @count = entry_count + @items_count = entry_count @entries = OneBasedArray.new(read(length, 'C*')) when :range_format @entries = [] - @count = 0 + @items_count = 0 entry_count.times do code, num_left = read(element_width, element_format) @entries << (code..(code + num_left)) - @count += num_left + 1 + @items_count += num_left + 1 end when :supplemental @entries = {} - @count = entry_count + @items_count = entry_count entry_count.times do code, glyph = read(element_width, element_format) diff --git a/lib/ttfunk/table/cff/fd_selector.rb b/lib/ttfunk/table/cff/fd_selector.rb index 22fde67..639465b 100644 --- a/lib/ttfunk/table/cff/fd_selector.rb +++ b/lib/ttfunk/table/cff/fd_selector.rb @@ -12,7 +12,7 @@ class FdSelector < TTFunk::SubTable RANGE_ENTRY_SIZE = 3 ARRAY_ENTRY_SIZE = 1 - attr_reader :top_dict, :count, :entries, :n_glyphs + attr_reader :top_dict, :items_count, :entries, :n_glyphs def initialize(top_dict, file, offset, length = nil) @top_dict = top_dict @@ -48,16 +48,16 @@ def [](glyph_id) def each return to_enum(__method__) unless block_given? - count.times { |i| yield self[i] } + items_count.times { |i| yield self[i] } end - # mapping is new -> old glyph ids - def encode(mapping) + def encode(charmap) # get list of [new_gid, fd_index] pairs new_indices = - mapping.keys.sort.map do |new_gid| - [new_gid, self[mapping[new_gid]]] - end + charmap + .reject { |code, mapping| mapping[:new].zero? && !code.zero? } + .sort_by { |_code, mapping| mapping[:new] } + .map { |(_code, mapping)| [mapping[:new], self[mapping[:old]]] } ranges = rangify_gids(new_indices) total_range_size = ranges.size * RANGE_ENTRY_SIZE @@ -108,10 +108,10 @@ def parse! case format_sym when :array_format - @n_glyphs = top_dict.charstrings_index.count + @n_glyphs = top_dict.charstrings_index.items_count data = io.read(n_glyphs) @length += data.bytesize - @count = data.bytesize + @items_count = data.bytesize @entries = data.bytes when :range_format @@ -135,7 +135,7 @@ def parse! last_start_gid, last_fd_index = ranges.last @entries << [(last_start_gid...(n_glyphs + 1)), last_fd_index] - @count = entries.reduce(0) { |sum, entry| sum + entry.first.size } + @items_count = entries.reduce(0) { |sum, entry| sum + entry.first.size } end end diff --git a/lib/ttfunk/table/cff/font_dict.rb b/lib/ttfunk/table/cff/font_dict.rb index 39e4e8e..5ac1c34 100644 --- a/lib/ttfunk/table/cff/font_dict.rb +++ b/lib/ttfunk/table/cff/font_dict.rb @@ -15,7 +15,7 @@ def initialize(top_dict, file, offset, length = nil) super(file, offset, length) end - def encode(_mapping) + def encode EncodedString.new do |result| each do |operator, operands| case OPERATOR_CODES[operator] @@ -30,8 +30,8 @@ def encode(_mapping) end end - def finalize(new_cff_data, mapping) - encoded_private_dict = private_dict.encode(mapping) + def finalize(new_cff_data) + encoded_private_dict = private_dict.encode encoded_offset = encode_integer32(new_cff_data.length) encoded_length = encode_integer32(encoded_private_dict.length) diff --git a/lib/ttfunk/table/cff/font_index.rb b/lib/ttfunk/table/cff/font_index.rb index c5f0dc4..d282ba5 100644 --- a/lib/ttfunk/table/cff/font_index.rb +++ b/lib/ttfunk/table/cff/font_index.rb @@ -11,18 +11,21 @@ def initialize(top_dict, file, offset, length = nil) @top_dict = top_dict end - def [](index) - entry_cache[index] ||= - begin - start, finish = absolute_offsets_for(index) - TTFunk::Table::Cff::FontDict.new( - top_dict, file, start, (finish - start) + 1 - ) - end + def finalize(new_cff_data) + each { |font_dict| font_dict.finalize(new_cff_data) } end - def finalize(new_cff_data, mapping) - each { |font_dict| font_dict.finalize(new_cff_data, mapping) } + private + + def decode_item(_index, offset, length) + TTFunk::Table::Cff::FontDict.new( + top_dict, file, offset, length + ) + end + + def encode_items(*) + # Re-encode font dicts + map(&:encode) end end end diff --git a/lib/ttfunk/table/cff/index.rb b/lib/ttfunk/table/cff/index.rb index d570f4d..fb0dfec 100644 --- a/lib/ttfunk/table/cff/index.rb +++ b/lib/ttfunk/table/cff/index.rb @@ -6,72 +6,80 @@ class Cff < TTFunk::Table class Index < TTFunk::SubTable include Enumerable - # number of objects in the index - attr_reader :count - - # offset array element size - attr_reader :offset_size - - attr_reader :raw_offset_length, :offsets, :raw_data - attr_reader :data_start_pos - def [](index) - entry_cache[index] ||= raw_data[ - offsets[index]...offsets[index + 1] - ] + return if index >= items_count + + entry_cache[index] ||= + decode_item( + index, + data_reference_offset + offsets[index], + offsets[index + 1] - offsets[index] + ) end - def each - return to_enum(__method__) unless block_given? + def each(&block) + return to_enum(__method__) unless block - count.times { |i| yield self[i] } + items_count.times do |i| + yield self[i] + end end - def encode - result = EncodedString.new + def items_count + items.length + end - entries = - each_with_object([]).with_index do |(entry, ret), index| - new_entry = block_given? ? yield(entry, index) : entry - ret << new_entry if new_entry - end + def encode(*args) + new_items = encode_items(*args) - # "An empty INDEX is represented by a count field with a 0 value and - # no additional fields. Thus, the total size of an empty INDEX is 2 - # bytes." - result << [entries.size].pack('n') - return result if entries.empty? + if new_items.empty? + return [0].pack('n') + end - offset_size = (Math.log2(entries.size) / 8.0).round + 1 - result << [offset_size].pack('C') - data_offset = 1 + if new_items.length > 0xffff + raise Error, 'Too many items in a CFF index' + end - data = EncodedString.new + offsets_array = + new_items + .each_with_object([1]) do |item, offsets| + offsets << offsets.last + item.length + end - entries.each do |entry| - result << encode_offset(data_offset, offset_size) - data << entry - data_offset += entry.length - end + offset_size = (offsets_array.last.bit_length / 8.0).ceil - unless entries.empty? - result << encode_offset(data_offset, offset_size) - end + offsets_array.map! { |offset| encode_offset(offset, offset_size) } - result << data + EncodedString.new.concat( + [new_items.length, offset_size].pack('nC'), + *offsets_array, + *new_items + ) end private + attr_reader :items, :offsets, :data_reference_offset + def entry_cache @entry_cache ||= {} end - def absolute_offsets_for(index) - [ - table_offset + offsets[index] + data_start_pos, - table_offset + offsets[index + 1] + data_start_pos - ] + # Returns an array of EncodedString elements (plain strings, + # placeholders, or EncodedString instances). Each element is supposed to + # represent an encoded item. + # + # This is the place to do all the filtering, reordering, or individual + # item encoding. + # + # It gets all the arguments `encode` gets. + def encode_items(*) + items + end + + # By default do nothing + def decode_item(index, _offset, _length) + items[index] end def encode_offset(offset, offset_size) @@ -88,35 +96,38 @@ def encode_offset(offset, offset_size) end def parse! - @count = read(2, 'n').first + @entry_cache = {} - if count.zero? + num_entries = read(2, 'n').first + + if num_entries.zero? @length = 2 - @data = [] + @items = [] return end - @offset_size = read(1, 'C').first + offset_size = read(1, 'C').first - # read an extra offset_size bytes to get rid of the first offset, - # which is always 1 - io.read(offset_size) + @offsets = + Array.new(num_entries + 1) do + unpack_offset(io.read(offset_size), offset_size) + end - @raw_offset_length = count * offset_size - raw_offsets = io.read(raw_offset_length) + @data_reference_offset = table_offset + 3 + offsets.length * offset_size - 1 - @offsets = [0] + Array.new(count) do |idx| - start = offset_size * idx - finish = offset_size * (idx + 1) - unpack_offset(raw_offsets[start...finish]) - 1 - end + @length = + 2 + # num entries + 1 + # offset size + offsets.length * offset_size + # offsets + offsets.last - 1 # items - @raw_data = io.read(offsets.last) - @data_start_pos = 3 + offset_size + raw_offset_length - @length = data_start_pos + raw_data.size + @items = + offsets.each_cons(2).map do |offset, next_offset| + io.read(next_offset - offset) + end end - def unpack_offset(offset_data) + def unpack_offset(offset_data, offset_size) padding = "\x00" * (4 - offset_size) (padding + offset_data).unpack1('N') end diff --git a/lib/ttfunk/table/cff/one_based_index.rb b/lib/ttfunk/table/cff/one_based_index.rb index 147e287..f2ebe4a 100644 --- a/lib/ttfunk/table/cff/one_based_index.rb +++ b/lib/ttfunk/table/cff/one_based_index.rb @@ -11,7 +11,7 @@ class OneBasedIndex def_delegators :base_index, :each, :table_offset, - :count, + :items_count, :length, :encode diff --git a/lib/ttfunk/table/cff/private_dict.rb b/lib/ttfunk/table/cff/private_dict.rb index 1dc0a06..ef36429 100644 --- a/lib/ttfunk/table/cff/private_dict.rb +++ b/lib/ttfunk/table/cff/private_dict.rb @@ -18,7 +18,7 @@ class PrivateDict < TTFunk::Table::Cff::Dict # @TODO: use mapping to determine which subroutines are still used. # For now, just encode them all. - def encode(_mapping) + def encode EncodedString.new do |result| each do |operator, operands| case OPERATOR_CODES[operator] @@ -72,7 +72,7 @@ def nominal_width_x private def encode_subrs - EncodedString.new.tap do |result| + EncodedString.new do |result| result << Placeholder.new( :"subrs_#{@table_offset}", length: PLACEHOLDER_LENGTH ) diff --git a/lib/ttfunk/table/cff/subr_index.rb b/lib/ttfunk/table/cff/subr_index.rb index 6cf8065..6a1ec73 100644 --- a/lib/ttfunk/table/cff/subr_index.rb +++ b/lib/ttfunk/table/cff/subr_index.rb @@ -5,9 +5,9 @@ class Table class Cff < TTFunk::Table class SubrIndex < TTFunk::Table::Cff::Index def bias - if count < 1240 + if items.length < 1240 107 - elsif count < 33_900 + elsif items.length < 33_900 1131 else 32_768 diff --git a/lib/ttfunk/table/cff/top_dict.rb b/lib/ttfunk/table/cff/top_dict.rb index 40148ed..50e0a29 100644 --- a/lib/ttfunk/table/cff/top_dict.rb +++ b/lib/ttfunk/table/cff/top_dict.rb @@ -47,16 +47,16 @@ def encode(*) end end - def finalize(new_cff_data, new_to_old, old_to_new) + def finalize(new_cff_data, charmap) if charset finalize_subtable( - new_cff_data, :charset, charset.encode(new_to_old) + new_cff_data, :charset, charset.encode(charmap) ) end if encoding finalize_subtable( - new_cff_data, :encoding, encoding.encode(new_to_old, old_to_new) + new_cff_data, :encoding, encoding.encode(charmap) ) end @@ -64,7 +64,7 @@ def finalize(new_cff_data, new_to_old, old_to_new) finalize_subtable( new_cff_data, :charstrings_index, - charstrings_index.encode(new_to_old, &:encode) + charstrings_index.encode(charmap) ) end @@ -72,24 +72,22 @@ def finalize(new_cff_data, new_to_old, old_to_new) finalize_subtable( new_cff_data, :font_index, - font_index.encode do |font_dict| - font_dict.encode(new_to_old) - end + font_index.encode ) - font_index.finalize(new_cff_data, new_to_old) + font_index.finalize(new_cff_data) end if font_dict_selector finalize_subtable( new_cff_data, :font_dict_selector, - font_dict_selector.encode(new_to_old) + font_dict_selector.encode(charmap) ) end if private_dict - encoded_private_dict = private_dict.encode(new_to_old) + encoded_private_dict = private_dict.encode encoded_offset = encode_integer32(new_cff_data.length) encoded_length = encode_integer32(encoded_private_dict.length) diff --git a/lib/ttfunk/table/cff/top_index.rb b/lib/ttfunk/table/cff/top_index.rb index 08efbc4..8c120c9 100644 --- a/lib/ttfunk/table/cff/top_index.rb +++ b/lib/ttfunk/table/cff/top_index.rb @@ -4,12 +4,15 @@ module TTFunk class Table class Cff < TTFunk::Table class TopIndex < TTFunk::Table::Cff::Index - def [](index) - entry_cache[index] ||= - begin - start, finish = absolute_offsets_for(index) - TTFunk::Table::Cff::TopDict.new(file, start, (finish - start) + 1) - end + private + + def decode_item(_index, offset, length) + TTFunk::Table::Cff::TopDict.new(file, offset, length) + end + + def encode_items(*) + # Re-encode the top dict + map(&:encode) end end end diff --git a/spec/ttfunk/table/cff/charset_spec.rb b/spec/ttfunk/table/cff/charset_spec.rb index 175ce3f..91c609e 100644 --- a/spec/ttfunk/table/cff/charset_spec.rb +++ b/spec/ttfunk/table/cff/charset_spec.rb @@ -28,8 +28,8 @@ # From the spec: There is one less element in the glyph name array than # nGlyphs (i.e. charstrings count) because the .notdef glyph name is # omitted. - expect(charset.count).to( - eq(font.cff.top_index[0].charstrings_index.count - 1) + expect(charset.items_count).to( + eq(font.cff.top_index[0].charstrings_index.items_count - 1) ) end @@ -58,8 +58,8 @@ # From the spec: There is one less element in the glyph name array than # nGlyphs (i.e. charstrings count) because the .notdef glyph name is # omitted. - expect(charset.count).to( - eq(font.cff.top_index[0].charstrings_index.count - 1) + expect(charset.items_count).to( + eq(font.cff.top_index[0].charstrings_index.items_count - 1) ) end @@ -85,8 +85,8 @@ # From the spec: There is one less element in the glyph name array than # nGlyphs (i.e. charstrings count) because the .notdef glyph name is # omitted. - expect(charset.count).to( - eq(font.cff.top_index[0].charstrings_index.count - 1) + expect(charset.items_count).to( + eq(font.cff.top_index[0].charstrings_index.items_count - 1) ) end @@ -115,14 +115,21 @@ describe '#encode' do let(:font_path) { test_font('NotoSansCJKsc-Thin', :otf) } - let(:encoded) { charset.encode(subset_mapping) } + let(:encoded) { charset.encode(charmap) } context 'when the subset contains non-sequential SIDs' do - let(:subset_mapping) do + let(:charmap) do # the idea here is to demonstrate that non-sequental SIDs can sometimes # be more compactly represented as individual elements as opposed to # ranges (supposed to be new => old glyph IDs) - { 1 => 1, 4 => 4, 10 => 10, 14 => 14, 15 => 15, 21 => 21 } + { + 0x20 => { old: 1, new: 1 }, + 0x23 => { old: 4, new: 4 }, + 0x29 => { old: 10, new: 10 }, + 0x2d => { old: 14, new: 14 }, + 0x2e => { old: 15, new: 15 }, + 0x34 => { old: 21, new: 21 } + } end it 'encodes using the array-based format' do @@ -141,10 +148,10 @@ end context 'when the subset contains few sequential SIDs' do - let(:subset_mapping) do + let(:charmap) do # i.e. the first 20 characters, in order # (supposed to be new => old glyph IDs) - Hash[(1..20).map { |i| [i, i] }] + Hash[(1..20).map { |i| [0x20 + i, { old: i, new: i }] }] end it 'encodes using the 8-bit range-based format' do @@ -158,10 +165,10 @@ end context 'when the subset contains many sequential SIDs' do - let(:subset_mapping) do + let(:charmap) do # we want to get a 2-byte range to demonstrate the 16-bit format # (supposed to be new => old glyph IDs) - Hash[(1..2**10).map { |i| [i, i] }] + Hash[(1..2**10).map { |i| [0x20 + i, { old: i, new: i }] }] end it 'encodes using the 16-bit range-based format' do diff --git a/spec/ttfunk/table/cff/encoding_spec.rb b/spec/ttfunk/table/cff/encoding_spec.rb index 9dd80d2..710a4cd 100644 --- a/spec/ttfunk/table/cff/encoding_spec.rb +++ b/spec/ttfunk/table/cff/encoding_spec.rb @@ -32,14 +32,21 @@ describe '#encode' do let(:font_path) { test_font('AlbertTextBold', :otf) } - let(:encoded) { encoding.encode(subset_mapping, subset_mapping.invert) } + let(:encoded) { encoding.encode(charmap) } context 'when the subset contains non-sequential codes' do - let(:subset_mapping) do + let(:charmap) do # the idea here is to demonstrate that non-sequental codes can # sometimes be more compactly represented as individual elements # as opposed to ranges (supposed to be new => old glyph IDs) - { 1 => 1, 4 => 4, 10 => 10, 14 => 14, 15 => 15, 21 => 21 } + { + 0x20 => { old: 1, new: 1 }, + 0x23 => { old: 4, new: 4 }, + 0x29 => { old: 10, new: 10 }, + 0x2d => { old: 14, new: 14 }, + 0x2e => { old: 15, new: 15 }, + 0x34 => { old: 21, new: 13 } + } end it 'encodes using the array-based format' do @@ -48,13 +55,7 @@ it 'encodes correctly' do # format (0x00), codes (1 byte each) - expect(encoded.bytes).to eq( - [ - 0, - subset_mapping.count, - *subset_mapping.map { |old_gid, _| encoding[old_gid] } - ] - ) + expect(encoded).to eq("\x00\x06\x20\x23\x29\x34\x2d\x2e") end # unfortunately I haven't been able to find an example font that defines @@ -83,16 +84,16 @@ font.cff.top_index[0], file, fake_offset, encoded.length ) - expect(new_encoding.to_a).to eq([0, 26, 29, 35, 39, 40, 46]) + expect(new_encoding.to_a).to eq([0, 0x20, 0x23, 0x29, 0x34, 0x2d, 0x2e]) end # rubocop: enable RSpec/AnyInstance end context 'when the subset contains sequential codes' do - let(:subset_mapping) do + let(:charmap) do # i.e. the first 20 characters, in order # (supposed to be new => old glyph IDs) - Hash[(1..20).map { |i| [i, i] }] + Hash[(1..20).map { |i| [0x20 + i, { old: i, new: i }] }] end it 'encodes using the range-based format' do @@ -100,9 +101,9 @@ end it 'encodes correctly' do - # format (0x01), count (0x01, start code (0x1D, i.e. 26), + # format (0x01), count (0x01, start code (0x21, i.e. 33), # rest (0x13, i.e. 19) - expect(encoded.bytes).to eq([0x01, 0x01, 0x1A, 0x13]) + expect(encoded.bytes).to eq([0x01, 0x01, 0x21, 0x13]) end end end diff --git a/spec/ttfunk/table/cff/fd_selector_spec.rb b/spec/ttfunk/table/cff/fd_selector_spec.rb index c6adb0a..e83bde7 100644 --- a/spec/ttfunk/table/cff/fd_selector_spec.rb +++ b/spec/ttfunk/table/cff/fd_selector_spec.rb @@ -24,7 +24,7 @@ instance_double( TTFunk::Table::Cff::CharstringsIndex, :charstrings_index, - count: entry_count + items_count: entry_count ) end let(:fd_selector) do @@ -38,7 +38,7 @@ end it 'includes entries for all the glyphs in the font' do - expect(fd_selector.count).to eq(entry_count) + expect(fd_selector.items_count).to eq(entry_count) end it 'parses the entries correctly' do @@ -46,8 +46,12 @@ end it 'encodes correctly' do - mapping = { 1 => 1, 3 => 3, 5 => 5 } - expect(fd_selector.encode(mapping)).to eq("\x00\x02\x04\x06") + charmap = { + 0x20 => { old: 1, new: 1 }, + 0x22 => { old: 3, new: 3 }, + 0x24 => { old: 5, new: 5 } + } + expect(fd_selector.encode(charmap)).to eq("\x00\x02\x04\x06") end end @@ -56,8 +60,8 @@ it 'includes entries for all the glyphs in the font' do # the charstrings index doesn't contain an entry for the .notdef glyph - expect(fd_selector.count).to( - eq(font.cff.top_index[0].charstrings_index.count + 1) + expect(fd_selector.items_count).to( + eq(font.cff.top_index[0].charstrings_index.items_count + 1) ) end @@ -78,8 +82,8 @@ end it 'encodes correctly' do - mapping = Hash[(0..15).map { |i| [i, i] }] - result = fd_selector.encode(mapping) + charmap = Hash[(0..15).map { |i| [i, { old: i, new: i }] }] + result = fd_selector.encode(charmap) expect(result).to( # fmt | count | range 1 | range 2 | n glyphs eq("\x03\x00\x02\x00\x00\x05\x00\x01\x0F\x00\x10") diff --git a/spec/ttfunk/table/cff/font_dict_spec.rb b/spec/ttfunk/table/cff/font_dict_spec.rb index 153e077..6a29b02 100644 --- a/spec/ttfunk/table/cff/font_dict_spec.rb +++ b/spec/ttfunk/table/cff/font_dict_spec.rb @@ -24,11 +24,11 @@ end it 'produces an encoded dict that can be re-parsed successfully' do - result = font_dict.encode({}) + result = font_dict.encode dict_length = result.length - private_dict_length = font_dict.private_dict.encode({}).length + private_dict_length = font_dict.private_dict.encode.length - font_dict.finalize(result, {}) + font_dict.finalize(result) io = StringIO.new(result.string) file = TestFile.new(io) new_dict = described_class.new(top_dict, file, 0, dict_length) diff --git a/spec/ttfunk/table/cff/font_index_spec.rb b/spec/ttfunk/table/cff/font_index_spec.rb index 5f1d4d8..3c141b8 100644 --- a/spec/ttfunk/table/cff/font_index_spec.rb +++ b/spec/ttfunk/table/cff/font_index_spec.rb @@ -9,7 +9,7 @@ let(:font_path) { test_font('NotoSansCJKsc-Thin', :otf) } it 'provides access to font dicts by index' do - expect(font_index.count).to eq(19) + expect(font_index.items_count).to eq(19) expect(font_index[0]).to be_a(TTFunk::Table::Cff::FontDict) end end diff --git a/spec/ttfunk/table/cff/index_spec.rb b/spec/ttfunk/table/cff/index_spec.rb index 3abe0a3..b89cca1 100644 --- a/spec/ttfunk/table/cff/index_spec.rb +++ b/spec/ttfunk/table/cff/index_spec.rb @@ -34,26 +34,139 @@ [0x00, 0x00] => [] } - test_cases.each_with_index do |(bytes, decoded_values), idx| - context "test case #{idx}" do - subject(:index) do - io = StringIO.new(bytes.pack('C*')) - described_class.new( - TestFile.new(io), 0, bytes.size - ) - end + describe 'decoding' do + test_cases.each_with_index do |(bytes, decoded_values), idx| + context "test case #{idx}" do + subject(:index) do + io = StringIO.new(bytes.pack('C*')) + described_class.new( + TestFile.new(io), 0, bytes.size + ) + end - it 'parses correctly' do - expect(index.map(&:bytes)).to eq(decoded_values) - end + it 'parses correctly' do + expect(index.map(&:bytes)).to eq(decoded_values) + end - it 'encodes correctly' do - expect(index.encode.bytes).to eq(bytes) + it 'encodes correctly' do + expect(index.encode.bytes).to eq(bytes) + end + + it 'calculates the length correctly' do + expect(index.length).to eq(bytes.size) + end end + end + end + + describe 'encoding' do + it 'properly encodes items (change)' do + inc_index_class = + Class.new(described_class) do + private + + def encode_items(*) + # Increase each byte by 1 + items.map { |i| [i.unpack1('C') + 1].pack('C') } + end + end + + data = [ + # count + 0x00, 0x03, + # offset len + 0x01, + # offsets + 0x01, 0x02, 0x03, 0x04, + # data + 0x01, 0x02, 0x03 + ].pack('C*') - it 'calculates the length correctly' do - expect(index.length).to eq(bytes.size) + index = + inc_index_class.new( + TestFile.new(StringIO.new(data)), 0, data.length + ) + + expect(index.encode.string).to eq("\00\03\01\01\02\03\04\02\03\04") + end + + it 'properly encodes items (filter)' do + dup_index_class = + Class.new(described_class) do + private + + def encode_items(*) + # duplicate each item + items.flat_map { |i| [i, i] } + end + end + + data = [ + # count + 0x00, 0x03, + # offset len + 0x01, + # offsets + 0x01, 0x02, 0x03, 0x04, + # data + 0x01, 0x02, 0x03 + ].pack('C*') + + index = + dup_index_class.new( + TestFile.new(StringIO.new(data)), 0, data.length + ) + + expect(index.encode.string).to eq("\00\06\01\01\02\03\04\05\06\07\01\01\02\02\03\03") + end + + [ + { item_size: 1, data_size: 6, offset_size: 1 }, + { item_size: 0xff, data_size: 262, offset_size: 2 }, + { item_size: 0xffff, data_size: 65_544, offset_size: 3 }, + { item_size: 0xffffff, data_size: 16_777_226, offset_size: 4 } + ].each do |params| + it "properly encodes offset size #{params[:offset_size]}" do + gen_index_class = + Class.new(described_class) do + attr_accessor :item_size + + private + + def encode_items(*) + ["\00" * item_size] + end + end + + gen_index = gen_index_class.new(TestFile.new(StringIO.new("\00\00")), 0, 2) + gen_index.item_size = params[:item_size] + + data = gen_index.encode.string + + expect(data.length).to eq params[:data_size] + + index = + described_class.new( + TestFile.new(StringIO.new(data)), 0, data.length + ) + + expect(index.items_count).to eq 1 end end + + it "raises on more items than is possible to encode" do + gen_index_class = + Class.new(described_class) do + private + + def encode_items(*) + ["\00"] * 0x10000 + end + end + + gen_index = gen_index_class.new(TestFile.new(StringIO.new("\00\00")), 0, 2) + + expect { gen_index.encode }.to raise_error(/too many items/i) + end end end diff --git a/spec/ttfunk/table/cff/private_dict_spec.rb b/spec/ttfunk/table/cff/private_dict_spec.rb index b6f806b..fb767a5 100644 --- a/spec/ttfunk/table/cff/private_dict_spec.rb +++ b/spec/ttfunk/table/cff/private_dict_spec.rb @@ -41,7 +41,7 @@ describe '#encode' do it 'produces an encoded dict that can be re-parsed successfully' do - result = private_dict.encode({}) + result = private_dict.encode dict_length = result.length private_dict.finalize(result) @@ -57,7 +57,7 @@ ) ) - expect(new_dict.subr_index.count).to eq(private_dict.subr_index.count) + expect(new_dict.subr_index.items_count).to eq(private_dict.subr_index.items_count) end end end diff --git a/spec/ttfunk/table/cff/top_dict_spec.rb b/spec/ttfunk/table/cff/top_dict_spec.rb index abc5b4e..e50c6eb 100644 --- a/spec/ttfunk/table/cff/top_dict_spec.rb +++ b/spec/ttfunk/table/cff/top_dict_spec.rb @@ -18,13 +18,12 @@ describe '#encode' do it 'produces an encoded dict that can be re-parsed successfully' do - new_to_old = font.cmap.unicode.first.code_map - old_to_new = new_to_old.invert + charmap = font.cmap.unicode.first.code_map.transform_values { |v| { old: v, new: v } } encoded = top_dict.encode top_dict_length = encoded.length top_dict_hash = top_dict.to_h placeholders = encoded.placeholders.dup - top_dict.finalize(encoded, new_to_old, old_to_new) + top_dict.finalize(encoded, charmap) file = TestFile.new(StringIO.new(encoded.string)) new_top_dict = described_class.new(file, 0, top_dict_length) diff --git a/spec/ttfunk/table/cff/top_index_spec.rb b/spec/ttfunk/table/cff/top_index_spec.rb index fd243c0..4ed2c41 100644 --- a/spec/ttfunk/table/cff/top_index_spec.rb +++ b/spec/ttfunk/table/cff/top_index_spec.rb @@ -13,7 +13,7 @@ end it 'always contains a single top dict' do - expect(top_index.count).to eq(1) + expect(top_index.items_count).to eq(1) end end