Skip to content

Commit

Permalink
Adds ability to specify pdftotext options
Browse files Browse the repository at this point in the history
* Adds test for `-table` option that produces tabular output
  • Loading branch information
Phil Gooch authored and Phil Gooch committed May 3, 2017
1 parent 50e973d commit ef7a6dd
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 4 deletions.
7 changes: 4 additions & 3 deletions lib/grim/page.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,17 +40,18 @@ def save(path, options={})
Grim.processor.save(@pdf, @index, path, options)
end

# Extracts the text from the selected page.
# Extracts the text from the selected page, using additional options.
#
# For example:
#
# pdf[1].text
# # => "This is text from slide 2.\n\nAnd even more text from slide 2."
#
# pdf[1].text(options=["-table"])
# Returns a String.
#
def text
command = [@pdftotext_path, "-enc", "UTF-8", "-f", @number, "-l", @number, Shellwords.escape(@pdf.path), "-"].join(' ')
def text(options=[])
command = [@pdftotext_path, "-enc", "UTF-8", "-f", @number, "-l", @number, options.join(", "), Shellwords.escape(@pdf.path), "-"].join(' ')
Grim.logger.debug { "Running pdftotext command" }
Grim.logger.debug { command }
`#{command}`
Expand Down
2 changes: 1 addition & 1 deletion lib/grim/version.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# encoding: UTF-8
module Grim
VERSION = "1.2.0" unless defined?(::Grim::VERSION)
VERSION = "1.2.1" unless defined?(::Grim::VERSION)
end
Binary file added spec/fixtures/table.pdf
Binary file not shown.
8 changes: 8 additions & 0 deletions spec/lib/grim/page_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@
eq("Step 1: get someone to print this curve for you to scale, 72” wide\nStep 2: Get a couple 55 gallon drums\n\n\f")
end

it "should extract tabular data with the -table option" do
pdf = Grim::Pdf.new(fixture_path("table.pdf"))
expect(pdf[0].text(options=["-table"])).to \
include(
" Male 979 (85) 968 (85)\n\n" +
" Female 169 (15) 169 (15)\n")
end

it "works with full path to pdftotext" do
pdftotext_path = `which pdftotext`.chomp
pdf = Grim::Pdf.new(fixture_path("smoker.pdf"), pdftotext_path: pdftotext_path)
Expand Down

0 comments on commit ef7a6dd

Please sign in to comment.