Adds support for cookies

This commit is contained in:
Nemo 2020-03-30 02:33:55 +05:30
parent 923e589413
commit 955aec3a1b
4 changed files with 30 additions and 14 deletions

View File

@ -19,12 +19,18 @@ Please ensure you have `pdftk` installed, and run the `muse-dl` binary. To build
## Usage
```
Usage: muse-dl [--flags] URL
Usage: muse-dl [--flags] [URL|INPUT_FILE]
URL: A link to a book on the Project MUSE website, eg https://muse.jhu.edu/book/875
INPUT_FILE: Path to a file containing a list of links
--no-cleanup Don't cleanup temporary files
--tmp-dir PATH Temporary Directory to use
--output FILE Output Filename
--no-bookmarks Don't add bookmarks in the PDF
--clobber Overwrite the output file, if it already exists
--input-pdf INPUT Input Stitched PDF. Will not download anything
--clobber Overwrite the output file, if it already exists. Not compatible with input-pdf
--cookie COOKIE Cookie-header
-h, --help Show this help
```
@ -35,6 +41,12 @@ muse-dl https://muse.jhu.edu/book/875
Saved final output to Accommodating Revolutions- Virginia's Northern Neck in an Era of Transformations, 1760-1810.pdf
```
Alternatively, if you pass a `input-file.txt` ([sample](https://paste.ubuntu.com/p/myBkNn6DSP/)), you can pass it as the sole parameter.
`muse-dl input.txt`
And it will download all the links in that file.
## License
Licensed under the [MIT License](https://nemo.mit-license.org/). See LICENSE file for details.

View File

@ -4,22 +4,19 @@ require "./errors/*"
module Muse::Dl
class Fetch
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"
HEADERS = {
"User-Agent" => USER_AGENT,
"Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Language" => "en-US,en;q=0.5",
"DNT" => "1",
"Cookie" => "session=124.123.104.8.1585420925750331; session=25719682.5a1ef8cb90ec8",
"Connection" => "keep-alive",
"Upgrade-Insecure-Requests" => "1",
"Cache-Control" => "max-age=0",
HEADERS = {
"User-Agent" => USER_AGENT,
"Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Language" => "en-US,en;q=0.5",
"Connection" => "keep-alive",
}
def self.chapter_file_name(id : String, tmp_path : String)
"#{tmp_path}/chapter-#{id}.pdf"
end
def self.save_chapter(tmp_path : String, chapter_id : String, chapter_title : String, add_bookmark = true)
def self.save_chapter(tmp_path : String, chapter_id : String, chapter_title : String, cookie : String | Nil = nil, add_bookmark = true)
final_pdf_file = chapter_file_name chapter_id, tmp_path
tmp_pdf_file = "#{final_pdf_file}.tmp"
@ -33,6 +30,11 @@ module Muse::Dl
"Referer" => "https://muse.jhu.edu/verify?url=%2Fchapter%2F#{chapter_id}%2Fpdf",
})
if cookie
headers["Cookie"] = cookie
end
# TODO: Add validation for the downloaded file (should be PDF)
Crest.get(url, max_redirects: 0, handle_errors: false, headers: headers) do |response|
File.open(tmp_pdf_file, "w") do |file|
IO.copy(response.body_io, file)

View File

@ -29,7 +29,7 @@ module Muse::Dl
unless parser.input_pdf
# Save each chapter
thing.chapters.each do |chapter|
Fetch.save_chapter(parser.tmp, chapter[0], chapter[1], parser.bookmarks)
Fetch.save_chapter(parser.tmp, chapter[0], chapter[1], parser.cookie, parser.bookmarks)
end
chapter_ids = thing.chapters.map { |c| c[0] }

View File

@ -11,10 +11,11 @@ module Muse::Dl
@input_pdf : String | Nil
@clobber = false
@input_list : String | Nil
@cookie : String | Nil
DEFAULT_FILE_NAME = "tempfilename.pdf"
getter :bookmarks, :tmp, :cleanup, :output, :url, :input_pdf, :clobber, :input_list
getter :bookmarks, :tmp, :cleanup, :output, :url, :input_pdf, :clobber, :input_list, :cookie
setter :url
# Update the output filename unless we have a custom one passed
@ -53,6 +54,7 @@ module Muse::Dl
parser.on(long_flag = "--no-bookmarks", description = "Don't add bookmarks in the PDF") { @bookmarks = false }
parser.on(long_flag = "--input-pdf INPUT", description = "Input Stitched PDF. Will not download anything") { |input| @input_pdf = input }
parser.on(long_flag = "--clobber", description = "Overwrite the output file, if it already exists. Not compatible with input-pdf") { @clobber = true }
parser.on(long_flag = "--cookie COOKIE", description = "Cookie-header") { |cookie| @cookie = cookie }
parser.on("-h", "--help", "Show this help") { puts parser }
parser.unknown_args do |args|