Skip to content

Commit

Permalink
HACK track the graphics state for each character
Browse files Browse the repository at this point in the history
how does it vary over the page? Are there some characters that we should
ignore?
  • Loading branch information
yob committed Dec 29, 2021
1 parent e027096 commit 951f12b
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 6 deletions.
8 changes: 7 additions & 1 deletion lib/pdf/reader/page_text_receiver.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ class PageTextReceiver
def_delegators :@state, :set_line_cap_style, :set_line_dash, :set_line_join_style
def_delegators :@state, :set_line_width, :set_miter_limit

# Graphics State Operators (colour)
def_delegators :@state, :set_cmyk_color_for_stroking, :set_cmyk_color_for_nonstroking
def_delegators :@state, :set_gray_color_for_stroking, :set_gray_color_for_nonstroking
def_delegators :@state, :set_rgb_color_for_stroking, :set_rgb_color_for_nonstroking
def_delegators :@state, :set_stroke_color_space, :set_nonstroke_color_space

# Matrix Operators
def_delegators :@state, :concatenate_matrix

Expand Down Expand Up @@ -140,7 +146,7 @@ def internal_show_text(string)
th = 1
scaled_glyph_width = glyph_width * @state.font_size * th
unless utf8_chars == SPACE
@characters << TextRun.new(newx, newy, scaled_glyph_width, @state.font_size, utf8_chars)
@characters << TextRun.new(newx, newy, scaled_glyph_width, @state.font_size, utf8_chars, @state.clone_state)
end
@state.process_glyph_displacement(glyph_width, 0, utf8_chars == SPACE)
end
Expand Down
11 changes: 6 additions & 5 deletions lib/pdf/reader/text_run.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,16 @@ class PDF::Reader
class TextRun
include Comparable

attr_reader :origin, :width, :font_size, :text
attr_reader :origin, :width, :font_size, :text, :state

alias :to_s :text

def initialize(x, y, width, font_size, text)
def initialize(x, y, width, font_size, text, state)
@origin = PDF::Reader::Point.new(x, y)
@width = width
@font_size = font_size
@text = text
@state = state
end

# Allows collections of TextRun objects to be sorted. They will be sorted
Expand Down Expand Up @@ -62,14 +63,14 @@ def +(other)
raise ArgumentError, "#{other} cannot be merged with this run" unless mergable?(other)

if (other.x - endx) <( font_size * 0.2)
TextRun.new(x, y, other.endx - x, font_size, text + other.text)
TextRun.new(x, y, other.endx - x, font_size, text + other.text, {})
else
TextRun.new(x, y, other.endx - x, font_size, "#{text} #{other.text}")
TextRun.new(x, y, other.endx - x, font_size, "#{text} #{other.text}", {})
end
end

def inspect
"#{text} w:#{width} f:#{font_size} @#{x},#{y}"
"#{text} w:#{width} f:#{font_size} @#{x},#{y} #{@state.inspect}"
end

def intersect?(other_run)
Expand Down

0 comments on commit 951f12b

Please sign in to comment.