From 955aec3a1b46fd6ecdaab5ea06b45c452afdcc75 Mon Sep 17 00:00:00 2001 From: Nemo Date: Mon, 30 Mar 2020 02:33:55 +0530 Subject: [PATCH] Adds support for cookies --- README.md | 16 ++++++++++++++-- src/fetch.cr | 22 ++++++++++++---------- src/muse-dl.cr | 2 +- src/parser.cr | 4 +++- 4 files changed, 30 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index e167ea6..5df47cc 100644 --- a/README.md +++ b/README.md @@ -19,12 +19,18 @@ Please ensure you have `pdftk` installed, and run the `muse-dl` binary. To build ## Usage ``` -Usage: muse-dl [--flags] URL +Usage: muse-dl [--flags] [URL|INPUT_FILE] + +URL: A link to a book on the Project MUSE website, eg https://muse.jhu.edu/book/875 +INPUT_FILE: Path to a file containing a list of links + --no-cleanup Don't cleanup temporary files --tmp-dir PATH Temporary Directory to use --output FILE Output Filename --no-bookmarks Don't add bookmarks in the PDF - --clobber Overwrite the output file, if it already exists + --input-pdf INPUT Input Stitched PDF. Will not download anything + --clobber Overwrite the output file, if it already exists. Not compatible with input-pdf + --cookie COOKIE Cookie-header -h, --help Show this help ``` @@ -35,6 +41,12 @@ muse-dl https://muse.jhu.edu/book/875 Saved final output to Accommodating Revolutions- Virginia's Northern Neck in an Era of Transformations, 1760-1810.pdf ``` +Alternatively, if you pass a `input-file.txt` ([sample](https://paste.ubuntu.com/p/myBkNn6DSP/)), you can pass it as the sole parameter. + +`muse-dl input.txt` + +And it will download all the links in that file. + ## License Licensed under the [MIT License](https://nemo.mit-license.org/). See LICENSE file for details. \ No newline at end of file diff --git a/src/fetch.cr b/src/fetch.cr index 840a4c8..41973b3 100644 --- a/src/fetch.cr +++ b/src/fetch.cr @@ -4,22 +4,19 @@ require "./errors/*" module Muse::Dl class Fetch USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36" - HEADERS = { - "User-Agent" => USER_AGENT, - "Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", - "Accept-Language" => "en-US,en;q=0.5", - "DNT" => "1", - "Cookie" => "session=124.123.104.8.1585420925750331; session=25719682.5a1ef8cb90ec8", - "Connection" => "keep-alive", - "Upgrade-Insecure-Requests" => "1", - "Cache-Control" => "max-age=0", + + HEADERS = { + "User-Agent" => USER_AGENT, + "Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", + "Accept-Language" => "en-US,en;q=0.5", + "Connection" => "keep-alive", } def self.chapter_file_name(id : String, tmp_path : String) "#{tmp_path}/chapter-#{id}.pdf" end - def self.save_chapter(tmp_path : String, chapter_id : String, chapter_title : String, add_bookmark = true) + def self.save_chapter(tmp_path : String, chapter_id : String, chapter_title : String, cookie : String | Nil = nil, add_bookmark = true) final_pdf_file = chapter_file_name chapter_id, tmp_path tmp_pdf_file = "#{final_pdf_file}.tmp" @@ -33,6 +30,11 @@ module Muse::Dl "Referer" => "https://muse.jhu.edu/verify?url=%2Fchapter%2F#{chapter_id}%2Fpdf", }) + if cookie + headers["Cookie"] = cookie + end + + # TODO: Add validation for the downloaded file (should be PDF) Crest.get(url, max_redirects: 0, handle_errors: false, headers: headers) do |response| File.open(tmp_pdf_file, "w") do |file| IO.copy(response.body_io, file) diff --git a/src/muse-dl.cr b/src/muse-dl.cr index 087748e..25f30d4 100644 --- a/src/muse-dl.cr +++ b/src/muse-dl.cr @@ -29,7 +29,7 @@ module Muse::Dl unless parser.input_pdf # Save each chapter thing.chapters.each do |chapter| - Fetch.save_chapter(parser.tmp, chapter[0], chapter[1], parser.bookmarks) + Fetch.save_chapter(parser.tmp, chapter[0], chapter[1], parser.cookie, parser.bookmarks) end chapter_ids = thing.chapters.map { |c| c[0] } diff --git a/src/parser.cr b/src/parser.cr index 6e79527..7a41be4 100644 --- a/src/parser.cr +++ b/src/parser.cr @@ -11,10 +11,11 @@ module Muse::Dl @input_pdf : String | Nil @clobber = false @input_list : String | Nil + @cookie : String | Nil DEFAULT_FILE_NAME = "tempfilename.pdf" - getter :bookmarks, :tmp, :cleanup, :output, :url, :input_pdf, :clobber, :input_list + getter :bookmarks, :tmp, :cleanup, :output, :url, :input_pdf, :clobber, :input_list, :cookie setter :url # Update the output filename unless we have a custom one passed @@ -53,6 +54,7 @@ module Muse::Dl parser.on(long_flag = "--no-bookmarks", description = "Don't add bookmarks in the PDF") { @bookmarks = false } parser.on(long_flag = "--input-pdf INPUT", description = "Input Stitched PDF. Will not download anything") { |input| @input_pdf = input } parser.on(long_flag = "--clobber", description = "Overwrite the output file, if it already exists. Not compatible with input-pdf") { @clobber = true } + parser.on(long_flag = "--cookie COOKIE", description = "Cookie-header") { |cookie| @cookie = cookie } parser.on("-h", "--help", "Show this help") { puts parser } parser.unknown_args do |args|