diff --git a/src/article.cr b/src/article.cr index 64a7c39..bf6ac1c 100644 --- a/src/article.cr +++ b/src/article.cr @@ -10,5 +10,10 @@ module Muse::Dl @id = id @url = "https://muse.jhu.edu/article/#{id}" end + + # TODO: Fix this + def open_access + return false + end end end diff --git a/src/issue.cr b/src/issue.cr index 22c8dae..8f759d2 100644 --- a/src/issue.cr +++ b/src/issue.cr @@ -24,6 +24,13 @@ module Muse::Dl @info = Hash(String, String).new end + def open_access + if @info.has_key? "Open Access" + return @info["Open Access"] == "Yes" + end + false + end + def parse(html : String) h = Myhtml::Parser.new html @info = InfoParser.infobox(h) diff --git a/src/journal.cr b/src/journal.cr index 9c828f6..45b6214 100644 --- a/src/journal.cr +++ b/src/journal.cr @@ -19,6 +19,13 @@ module Muse::Dl parse_volumes(h) end + def open_access + if @info.has_key? "Open Access" + return @info["Open Access"] == "Yes" + end + false + end + def parse_volumes(myhtml : Myhtml::Parser) myhtml.css("#available_issues_list_text a").each do |a| link = a.attribute_by("href").to_s diff --git a/src/muse-dl.cr b/src/muse-dl.cr index 07e6f25..47095ab 100644 --- a/src/muse-dl.cr +++ b/src/muse-dl.cr @@ -15,6 +15,11 @@ module Muse::Dl thing = Fetch.get_info(url) if url return unless thing + if (thing.open_access) && (parser.skip_oa) + STDERR.puts "Skipping #{url}, available under Open Access" + return + end + if thing.is_a? Muse::Dl::Book unless thing.formats.includes? :pdf STDERR.puts "Book not available in PDF format, skipping: #{url}" diff --git a/src/parser.cr b/src/parser.cr index ebfd2d7..a98d27e 100644 --- a/src/parser.cr +++ b/src/parser.cr @@ -14,10 +14,11 @@ module Muse::Dl @input_list : String | Nil @cookie : String | Nil @h : Bool | Nil + @skip_oa = false DEFAULT_FILE_NAME = "tempfilename.pdf" - getter :bookmarks, :tmp, :cleanup, :output, :url, :clobber, :input_list, :cookie, :strip_first + getter :bookmarks, :tmp, :cleanup, :output, :url, :clobber, :input_list, :cookie, :strip_first, :skip_oa setter :url # Update the output filename unless we have a custom one passed @@ -57,6 +58,7 @@ module Muse::Dl parser.on(long_flag = "--clobber", description = "Overwrite the output file, if it already exists. Not compatible with input-pdf") { @clobber = true } parser.on(long_flag = "--dont-strip-first-page", description = "Disables first page from being stripped. Use carefully") { @strip_first = false } parser.on(long_flag = "--cookie COOKIE", description = "Cookie-header") { |cookie| @cookie = cookie } + parser.on(long_flag = "--skip-open-access", description = "Don't download open access content") { @skip_oa = true } parser.on("-h", "--help", "Show this help") { @h = true; puts parser } parser.unknown_args do |args| diff --git a/src/thing.cr b/src/thing.cr index 96a105a..2b0e3fd 100644 --- a/src/thing.cr +++ b/src/thing.cr @@ -19,6 +19,13 @@ module Muse::Dl private getter :h + def open_access + if @info.has_key? "Open Access" + return @info["Open Access"] == "Yes" + end + false + end + def initialize(html : String) @h = Myhtml::Parser.new html @info = InfoParser.infobox(h)