diff --git a/src/infoparser.cr b/src/infoparser.cr index f428105..d7c9b1b 100644 --- a/src/infoparser.cr +++ b/src/infoparser.cr @@ -21,7 +21,7 @@ module Muse::Dl end def self.author(myhtml : Myhtml::Parser) - myhtml.css("#book_about_info .author").map(&.inner_text).to_a[0].strip + myhtml.css("#book_about_info .author").map(&.inner_text).to_a[0].strip.gsub("
", ", ").gsub("\n", " ") end def self.date(myhtml : Myhtml::Parser) @@ -33,7 +33,12 @@ module Muse::Dl end def self.summary(myhtml : Myhtml::Parser) - myhtml.css("#book_about_info .card_summary").map(&.inner_text).to_a[0].strip + begin + return myhtml.css("#book_about_info .card_summary").map(&.inner_text).to_a[0].strip + rescue e : Exception + STDERR.puts "Could not fetch summary" + return "NA" + end end def self.summary_html(myhtml : Myhtml::Parser) diff --git a/src/muse-dl.cr b/src/muse-dl.cr index 764fcd1..07dcefa 100644 --- a/src/muse-dl.cr +++ b/src/muse-dl.cr @@ -17,17 +17,30 @@ module Muse::Dl # Will have no effect if parser has a custom title parser.output = Util.slug_filename "#{thing.title}.pdf" - # Save each chapter - thing.chapters.each do |chapter| - Fetch.save_chapter(parser.tmp, chapter[0], chapter[1], parser.bookmarks) + # If file exists and we can't clobber + if File.exists?(parser.output) && parser.clobber == false + STDERR.puts "File already exists, not doing anything" + Process.exit(1) end - chapter_ids = thing.chapters.map { |c| c[0] } - - # Stitch the PDFs together + temp_stitched_file = nil pdf_builder = Pdftk.new(parser.tmp) - temp_stitched_file = pdf_builder.stitch chapter_ids - pdf_builder.add_metadata(temp_stitched_file, parser.output, thing) - temp_stitched_file.delete + + unless parser.input_pdf + # Save each chapter + thing.chapters.each do |chapter| + Fetch.save_chapter(parser.tmp, chapter[0], chapter[1], parser.bookmarks) + end + chapter_ids = thing.chapters.map { |c| c[0] } + + # Stitch the PDFs together + temp_stitched_file = pdf_builder.stitch chapter_ids + pdf_builder.add_metadata(temp_stitched_file, parser.output, thing) + else + x = parser.input_pdf + pdf_builder.add_metadata(File.open(x), parser.output, thing) if x + end + + temp_stitched_file.delete if temp_stitched_file puts "Saved final output to #{parser.output}" end end diff --git a/src/parser.cr b/src/parser.cr index 752ad32..d2c2374 100644 --- a/src/parser.cr +++ b/src/parser.cr @@ -8,10 +8,12 @@ module Muse::Dl @cleanup = true @output = DEFAULT_FILE_NAME @url = "INVALID_URL" + @input_pdf : String | Nil + @clobber = false DEFAULT_FILE_NAME = "tempfilename.pdf" - getter :bookmarks, :tmp, :cleanup, :output, :url + getter :bookmarks, :tmp, :cleanup, :output, :url, :input_pdf, :clobber # Update the output filename unless we have a custom one passed def output=(output_file : String) @@ -29,6 +31,7 @@ module Muse::Dl def initialize(arg : Array(String) = [] of String) @tmp = Dir.tempdir + @input_pdf = nil parser = OptionParser.new parser.banner = "Usage: muse-dl [--flags] URL" @@ -36,6 +39,8 @@ module Muse::Dl parser.on(long_flag = "--tmp-dir PATH", description = "Temporary Directory to use") { |path| @tmp = path } parser.on(long_flag = "--output FILE", description = "Output Filename") { |file| @output = file } parser.on(long_flag = "--no-bookmarks", description = "Don't add bookmarks in the PDF") { @bookmarks = false } + parser.on(long_flag = "--input-pdf INPUT", description = "Input Stitched PDF. Will not download anything") { |input| @input_pdf = input } + parser.on(long_flag = "--clobber", description = "Overwrite the output file, if it already exists") { @clobber = true } parser.on("-h", "--help", "Show this help") { puts parser } parser.unknown_args do |args| diff --git a/src/pdftk.cr b/src/pdftk.cr index 0cd0355..5ab02a4 100644 --- a/src/pdftk.cr +++ b/src/pdftk.cr @@ -54,7 +54,15 @@ module Muse::Dl def add_metadata(input_file : File, output_file : String, book : Book) # First we have to dump the current metadata metadata_text_file = File.tempfile("muse-dl-metadata-tmp", ".txt") - # TODO: Add version info in the Creator/Producer + keywords = "Publisher:#{book.publisher}, Published:#{book.date}" + + # Known Info keys, if they are present + ["ISBN", "Related ISBN", "DOI", "Language", "OCLC"].each do |label| + if book.info.has_key? label + keywords += ", #{label}:#{book.info[label]}" + end + end + text = <<-EOT InfoBegin InfoKey: Creator @@ -67,27 +75,21 @@ module Muse::Dl InfoValue: #{book.title} InfoBegin InfoKey: Keywords - InfoValue: Publisher:#{book.publisher}, Published:#{book.date} + InfoValue: #{keywords} InfoBegin InfoKey: Author InfoValue: #{book.author} InfoBegin InfoKey: Subject InfoValue: #{book.summary.gsub(/\n\s+/, " ")} + InfoBegin + InfoKey: ModDate + InfoValue: + InfoBegin + InfoKey: CreationDate + InfoValue: EOT - # Known Info keys, if they are present - - ["ISBN", "Related ISBN", "DOI", "Language", "OCLC"].each do |label| - if book.info.has_key? label - text += <<-EOT - InfoBegin - InfoKey: #{label} - InfoValue: #{book.info[label]} - EOT - end - end - File.write(metadata_text_file.path, text) execute [input_file.path, "update_info_utf8", metadata_text_file.path, "output", output_file] metadata_text_file.delete