🏡 index : github.com/captn3m0/muse-dl.git

author Nemo <me@captnemo.in> 2020-06-30 17:59:56.0 +05:30:00
committer Nemo <me@captnemo.in> 2020-06-30 18:09:38.0 +05:30:00
commit
3a2d45fb6ee2f727e5f839f1323bbff53a81691f [patch]
tree
0ae2375ba0117276fd01897e6d5b7c13df946fe3
parent
62e6a21c84695786e64f7aa8ab51866e2e5c99a7
download
3a2d45fb6ee2f727e5f839f1323bbff53a81691f.tar.gz

Adds a skip-open-access flag



Diff

 src/article.cr | 5 +++++
 src/issue.cr   | 7 +++++++
 src/journal.cr | 7 +++++++
 src/muse-dl.cr | 5 +++++
 src/parser.cr  | 4 +++-
 src/thing.cr   | 7 +++++++
 6 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/src/article.cr b/src/article.cr
index 64a7c39..bf6ac1c 100644
--- a/src/article.cr
+++ a/src/article.cr
@@ -10,5 +10,10 @@
      @id = id
      @url = "https://muse.jhu.edu/article/#{id}"
    end

    # TODO: Fix this
    def open_access
      return false
    end
  end
end
diff --git a/src/issue.cr b/src/issue.cr
index 22c8dae..8f759d2 100644
--- a/src/issue.cr
+++ a/src/issue.cr
@@ -24,6 +24,13 @@
      @info = Hash(String, String).new
    end

    def open_access
      if @info.has_key? "Open Access"
        return @info["Open Access"] == "Yes"
      end
      false
    end

    def parse(html : String)
      h = Myhtml::Parser.new html
      @info = InfoParser.infobox(h)
diff --git a/src/journal.cr b/src/journal.cr
index 9c828f6..45b6214 100644
--- a/src/journal.cr
+++ a/src/journal.cr
@@ -19,6 +19,13 @@
      parse_volumes(h)
    end

    def open_access
      if @info.has_key? "Open Access"
        return @info["Open Access"] == "Yes"
      end
      false
    end

    def parse_volumes(myhtml : Myhtml::Parser)
      myhtml.css("#available_issues_list_text a").each do |a|
        link = a.attribute_by("href").to_s
diff --git a/src/muse-dl.cr b/src/muse-dl.cr
index 07e6f25..47095ab 100644
--- a/src/muse-dl.cr
+++ a/src/muse-dl.cr
@@ -15,6 +15,11 @@
      thing = Fetch.get_info(url) if url
      return unless thing

      if (thing.open_access) && (parser.skip_oa)
        STDERR.puts "Skipping #{url}, available under Open Access"
        return
      end

      if thing.is_a? Muse::Dl::Book
        unless thing.formats.includes? :pdf
          STDERR.puts "Book not available in PDF format, skipping: #{url}"
diff --git a/src/parser.cr b/src/parser.cr
index ebfd2d7..a98d27e 100644
--- a/src/parser.cr
+++ a/src/parser.cr
@@ -14,10 +14,11 @@
    @input_list : String | Nil
    @cookie : String | Nil
    @h : Bool | Nil
    @skip_oa = false

    DEFAULT_FILE_NAME = "tempfilename.pdf"

    getter :bookmarks, :tmp, :cleanup, :output, :url, :clobber, :input_list, :cookie, :strip_first
    getter :bookmarks, :tmp, :cleanup, :output, :url, :clobber, :input_list, :cookie, :strip_first, :skip_oa
    setter :url

    # Update the output filename unless we have a custom one passed
@@ -57,6 +58,7 @@
      parser.on(long_flag = "--clobber", description = "Overwrite the output file, if it already exists. Not compatible with input-pdf") { @clobber = true }
      parser.on(long_flag = "--dont-strip-first-page", description = "Disables first page from being stripped. Use carefully") { @strip_first = false }
      parser.on(long_flag = "--cookie COOKIE", description = "Cookie-header") { |cookie| @cookie = cookie }
      parser.on(long_flag = "--skip-open-access", description = "Don't download open access content") { @skip_oa = true }
      parser.on("-h", "--help", "Show this help") { @h = true; puts parser }

      parser.unknown_args do |args|
diff --git a/src/thing.cr b/src/thing.cr
index 96a105a..2b0e3fd 100644
--- a/src/thing.cr
+++ a/src/thing.cr
@@ -19,6 +19,13 @@

    private getter :h

    def open_access
      if @info.has_key? "Open Access"
        return @info["Open Access"] == "Yes"
      end
      false
    end

    def initialize(html : String)
      @h = Myhtml::Parser.new html
      @info = InfoParser.infobox(h)