From 951f12bfac42037444a9db4f7c4f91257a27cf36 Mon Sep 17 00:00:00 2001 From: James Healy Date: Fri, 18 Oct 2019 15:30:11 +1100 Subject: [PATCH] HACK track the graphics state for each character how does it vary over the page? Are there some characters that we should ignore? --- lib/pdf/reader/page_text_receiver.rb | 8 +++++++- lib/pdf/reader/text_run.rb | 11 ++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/lib/pdf/reader/page_text_receiver.rb b/lib/pdf/reader/page_text_receiver.rb index ee1e1748..816e2ff3 100644 --- a/lib/pdf/reader/page_text_receiver.rb +++ b/lib/pdf/reader/page_text_receiver.rb @@ -26,6 +26,12 @@ class PageTextReceiver def_delegators :@state, :set_line_cap_style, :set_line_dash, :set_line_join_style def_delegators :@state, :set_line_width, :set_miter_limit + # Graphics State Operators (colour) + def_delegators :@state, :set_cmyk_color_for_stroking, :set_cmyk_color_for_nonstroking + def_delegators :@state, :set_gray_color_for_stroking, :set_gray_color_for_nonstroking + def_delegators :@state, :set_rgb_color_for_stroking, :set_rgb_color_for_nonstroking + def_delegators :@state, :set_stroke_color_space, :set_nonstroke_color_space + # Matrix Operators def_delegators :@state, :concatenate_matrix @@ -140,7 +146,7 @@ def internal_show_text(string) th = 1 scaled_glyph_width = glyph_width * @state.font_size * th unless utf8_chars == SPACE - @characters << TextRun.new(newx, newy, scaled_glyph_width, @state.font_size, utf8_chars) + @characters << TextRun.new(newx, newy, scaled_glyph_width, @state.font_size, utf8_chars, @state.clone_state) end @state.process_glyph_displacement(glyph_width, 0, utf8_chars == SPACE) end diff --git a/lib/pdf/reader/text_run.rb b/lib/pdf/reader/text_run.rb index e47311a9..b2d62923 100644 --- a/lib/pdf/reader/text_run.rb +++ b/lib/pdf/reader/text_run.rb @@ -7,15 +7,16 @@ class PDF::Reader class TextRun include Comparable - attr_reader :origin, :width, :font_size, :text + attr_reader :origin, :width, :font_size, :text, :state alias :to_s :text - def initialize(x, y, width, font_size, text) + def initialize(x, y, width, font_size, text, state) @origin = PDF::Reader::Point.new(x, y) @width = width @font_size = font_size @text = text + @state = state end # Allows collections of TextRun objects to be sorted. They will be sorted @@ -62,14 +63,14 @@ def +(other) raise ArgumentError, "#{other} cannot be merged with this run" unless mergable?(other) if (other.x - endx) <( font_size * 0.2) - TextRun.new(x, y, other.endx - x, font_size, text + other.text) + TextRun.new(x, y, other.endx - x, font_size, text + other.text, {}) else - TextRun.new(x, y, other.endx - x, font_size, "#{text} #{other.text}") + TextRun.new(x, y, other.endx - x, font_size, "#{text} #{other.text}", {}) end end def inspect - "#{text} w:#{width} f:#{font_size} @#{x},#{y}" + "#{text} w:#{width} f:#{font_size} @#{x},#{y} #{@state.inspect}" end def intersect?(other_run)