Parent

PDF::Reader::TextReceiver

An example receiver class that processes all text found in a PDF file. All text that is found will be printed to the IO object specified in the constructor.

Usage:

receiver = PDF::Reader::TextReceiver.new($stdout)
PDF::Reader.file("somefile.pdf", receiver)

DEPRECATED: this class was deprecated in version 0.11.0 and will

eventually be removed

Public Class Methods

new(main_receiver) click to toggle source

Initialize with the library user’s receiver

# File lib/pdf/reader/text_receiver.rb, line 41
def initialize (main_receiver)
  @main_receiver = main_receiver
  @upper_corners = []
end

Public Instance Methods

begin_document(root) click to toggle source

Called when the document parsing begins

# File lib/pdf/reader/text_receiver.rb, line 47
def begin_document (root)
  @upper_corners = []
end
begin_page(info) click to toggle source

Called when new page parsing begins

# File lib/pdf/reader/text_receiver.rb, line 65
def begin_page (info)
  @page = info

  @state = [{
    :char_spacing     => 0,
    :word_spacing     => 0,
    :hori_scaling     => 100,
    :leading          => 0,
    :tj_adjustment    => 0,
  }]

  @upper_corners.push(media_box_check(info))

  @output = []
  @line = 0
  @location = 0
  @displacement = {}
  @smallest_y_loc = @upper_corners.last[:ury]
  @written_to = false
end
begin_page_container(page) click to toggle source
# File lib/pdf/reader/text_receiver.rb, line 56
def begin_page_container (page)
  @upper_corners.push(media_box_check(page))
end
begin_text_object() click to toggle source

PDF operator BT

# File lib/pdf/reader/text_receiver.rb, line 93
def begin_text_object
  @state.push(@state.last.dup)
end
calculate_line_and_location(new_loc) click to toggle source
# File lib/pdf/reader/text_receiver.rb, line 235
def calculate_line_and_location (new_loc)
  ##puts "calculate_line_and_location(#{new_loc})"
  key = new_loc; key.freeze

  #key = new_loc.to_s # because hashes with string keys are magic (auto-freeze)

  if @written_to
    unless @displacement.has_key?(key)
      if key < @location
        @displacement[key] = @line + 1
      elsif key < @smallest_y_loc
        @displacement[key] = @line + 1
      else
        key = @displacement.keys.find_all {|i| key > i}.sort.last
        @displacement[key] = 0 unless @displacement.has_key?(key)
      end
    end
  else
    @displacement[key] = 0
  end

  @smallest_y_loc = key if key < @smallest_y_loc
  @location = key
  @line = @displacement[key]
end
end_document() click to toggle source

Called when the document parsing ends

# File lib/pdf/reader/text_receiver.rb, line 52
def end_document
  @state.clear
end
end_page() click to toggle source

Called when page parsing ends

# File lib/pdf/reader/text_receiver.rb, line 87
def end_page
  @main_receiver << @output.join("\n")
  @upper_corners.pop
end
end_page_container() click to toggle source
# File lib/pdf/reader/text_receiver.rb, line 60
def end_page_container
  @upper_corners.pop
end
end_text_object() click to toggle source

PDF operator ET

# File lib/pdf/reader/text_receiver.rb, line 98
def end_text_object
  @state.pop
end
media_box_check(dict) click to toggle source
# File lib/pdf/reader/text_receiver.rb, line 223
def media_box_check (dict)
  corners = (@upper_corners.last || {:urx => 0, :ury => 0}).dup

  if dict.has_key?(:MediaBox)
    media_box = dict[:MediaBox]
    corners[:urx] = media_box[2] - media_box[0]
    corners[:ury] = media_box[3] - media_box[1]
  end

  corners
end
move_text_position(tx, ty) click to toggle source

PDF operator Td

# File lib/pdf/reader/text_receiver.rb, line 135
def move_text_position (tx, ty)
  #puts "#{tx} #{ty} Td"
  calculate_line_and_location(@location + ty)
end
move_text_position_and_set_leading(tx, ty) click to toggle source

PDF operator TD

# File lib/pdf/reader/text_receiver.rb, line 141
def move_text_position_and_set_leading (tx, ty)
  set_text_leading(ty)# * -1)
  move_text_position(tx, ty)
end
move_to_next_line_and_show_text(string) click to toggle source

PDF operator '

# File lib/pdf/reader/text_receiver.rb, line 211
def move_to_next_line_and_show_text (string)
  move_to_start_of_next_line
  show_text(string)
end
move_to_start_of_next_line() click to toggle source

PDF operator T*

# File lib/pdf/reader/text_receiver.rb, line 130
def move_to_start_of_next_line
  move_text_position(0, @state.last[:leading])
end
set_character_spacing(n) click to toggle source

PDF operator Tc

# File lib/pdf/reader/text_receiver.rb, line 110
def set_character_spacing (n)
  @state.last[:char_spacing] = n
end
set_horizontal_text_scaling(n) click to toggle source

PDF operator Tz

# File lib/pdf/reader/text_receiver.rb, line 120
def set_horizontal_text_scaling (n)
  @state.last[:hori_scaling] = n/100
end
set_spacing_next_line_show_text(aw, ac, string) click to toggle source

PDF operator "

# File lib/pdf/reader/text_receiver.rb, line 217
def set_spacing_next_line_show_text (aw, ac, string)
  set_word_spacing(aw)
  set_character_spacing(ac)
  move_to_next_line_and_show_text(string)
end
set_text_leading(n) click to toggle source

PDF operator TL

# File lib/pdf/reader/text_receiver.rb, line 125
def set_text_leading (n)
  @state.last[:leading] = n
end
set_text_matrix_and_text_line_matrix(*args) click to toggle source

PDF operator Tm

# File lib/pdf/reader/text_receiver.rb, line 103
def set_text_matrix_and_text_line_matrix (*args)
  # these variable names look bad, but they're from the PDF spec
  a, b, c, d, e, f = *args
  calculate_line_and_location(f)
end
set_word_spacing(n) click to toggle source

PDF operator Tw

# File lib/pdf/reader/text_receiver.rb, line 115
def set_word_spacing (n)
  @state.last[:word_spacing] = n
end
show_text(string) click to toggle source

PDF operator Tj

# File lib/pdf/reader/text_receiver.rb, line 147
def show_text (string)
  #puts "getting line #@line"

  place = (@output[@line] ||= "")
  #place << "  " unless place.empty?

  place << " " * (@state.last[:tj_adjustment].abs/900) if @state.last[:tj_adjustment] < -1000
  place << string

  #puts "place is now: #{place}"
  @written_to = true
end
show_text_with_positioning(params) click to toggle source

PDF operator TJ

# File lib/pdf/reader/text_receiver.rb, line 195
def show_text_with_positioning (params)
  prev_adjustment = @state.last[:tj_adjustment]

  params.each do |p|
    case p
    when Float, Fixnum
      @state.last[:tj_adjustment] = p
    else
      show_text(p)
    end
  end

  @state.last[:tj_adjustment]  = prev_adjustment
end
super_show_text(string) click to toggle source
# File lib/pdf/reader/text_receiver.rb, line 159
def super_show_text (string)
  urx = @upper_corners.last[:urx]/TS_UNITS_PER_H_CHAR
  ury = @upper_corners.last[:ury]/TS_UNITS_PER_V_CHAR

  x = (@tm[2,0]/TS_UNITS_PER_H_CHAR).to_i
  y = (ury - (@tm[2,1]/TS_UNITS_PER_V_CHAR)).to_i

  #puts "rendering '#{string}' to #{x}x#{y}"

  place = (@output[y] ||= (" " * urx.to_i))
  #puts "#{urx} #{place.size} #{string.size} #{x}"
  return if x+string.size >= urx

  string.split(//).each do |c|
    chars = 1

    case c
    when " "
      chars += @state.last[:word_spacing].to_i
      place[x-1, chars] = (" " * chars)
    else
      chars += @state.last[:char_spacing].to_i
      chars -= (@state.last[:tj_adjustment]/1000).to_i if @state.last[:tj_adjustment]
      chars = 1 if chars < 1

      place[x-1] = c
      place[x, chars-1] = (" " * (chars-1)) if chars > 1
    end

    x += chars
  end

  @tm += Matrix.rows([[1, 0, 0], [0, 1, 0], [x*TS_UNITS_PER_H_CHAR, y*TS_UNITS_PER_V_CHAR, 1]])
end

[Validate]

Generated with the Darkfish Rdoc Generator 2.