diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..4df2ec4 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,8 @@ +.git +LICENSE +Dockerfile +spec/ +bin/ +Makefile +.dockerignore +*.Dockerfile \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index f8f9d5e..6941f68 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,8 +1,18 @@ language: crystal +env: + # Path to 'hadolint' binary + HADOLINT: "${HOME}/hadolint" + +install: + # Download hadolint binary and set it as executable + - curl -sL -o ${HADOLINT} "https://github.com/hadolint/hadolint/releases/download/v1.17.5/hadolint-$(uname -s)-$(uname -m)" + && chmod 700 ${HADOLINT} + - shards install script: - crystal spec - crystal tool format --check + - git ls-files --exclude='Dockerfile*' --ignored | xargs --max-lines=1 ${HADOLINT} addons: apt: diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..2c2d883 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,43 @@ +FROM debian:10-slim + +WORKDIR /build + +COPY . . + +# Add the key for the crystal debian repo +ADD https://keybase.io/crystal/pgp_keys.asc /tmp/crystal.gpg + +# See https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=863199 for why mkdir is needed +RUN mkdir -p /usr/share/man/man1 && \ + apt-get update && \ + apt-get install --yes --no-install-recommends \ + # Install gnupg for the apt-key operation + gnupg=2.2.12-1+deb10u1 \ + # libssl for faster TLS in Crystal + libssl-dev=1.1.1d-0+deb10u2 \ + # pdftk as a dependency for muse-dl + pdftk=2.02-5 \ + # ca-certificates for talking to crystal-lang.org + ca-certificates=20190110 \ + # git to let shards install happen + git=1:2.20.1-2+deb10u1 \ + # build --release + zlib1g-dev=1:1.2.11.dfsg-1 && \ + # See https://crystal-lang.org/install/ + apt-key add /tmp/crystal.gpg && \ + echo "deb https://dist.crystal-lang.org/apt crystal main" > /etc/apt/sources.list.d/crystal.list && \ + apt-get update && \ + apt-get install --no-install-recommends --yes crystal=0.33.0-1 && \ + # Cleanup + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +RUN shards install && shards build --release && \ + ln /build/bin/muse-dl /usr/bin/muse-dl + +RUN apt-get --yes remove git gnupg + +WORKDIR /data +VOLUME /data + +ENTRYPOINT ["/usr/bin/muse-dl"] \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..98a984e --- /dev/null +++ b/Makefile @@ -0,0 +1,10 @@ +mkfile_path := $(abspath $(lastword $(MAKEFILE_LIST))) +current_dir := $(notdir $(patsubst %/,%,$(dir $(mkfile_path)))) + +release: + # Build a static binary and save it in muse-dl-static + docker build --tag muse-dl-static --file static.Dockerfile . + # Then extract the image | extract the layer.tar file (we only have one layer) | extract the muse-dl-static file + docker image save muse-dl-static | tar xf - --wildcards "*/layer.tar" -O | tar xf - "muse-dl-static" + # And move it to the bin/ directory + mv -f muse-dl-static bin/ \ No newline at end of file diff --git a/README.md b/README.md index dc219bb..32598c6 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,8 @@ Any downloads you perform with this tool are for your own usage. I personally ha # Installation +## Linux / Build + ``` git clone https://github.com/captn3m0/muse-dl.git cd muse-dl @@ -16,9 +18,28 @@ shards build ./bin/muse-dl --help ``` +## Linux / Download + +A linux x86_64 static build is available in the latest release: . Save the file as `muse-dl` and remember to mark it as executable (`chmod +x`). + +## Docker + +A docker image is available at `captn3m0/muse-dl` on Docker Hub. The working directory for the image is set as `/data`, so you'll need to mount your output-directory as `/data` for it to work. Sample invocations; + +``` +# Download the book, and put it in your Downloads directory +docker run -it /home/nemo/Downloads:/data captn3m0/muse-dl https://muse.jhu.edu/book/875 + +# If you have a list.txt file in your Downloads directory, then you can run +docker run -it /home/nemo/Downloads:/data captn3m0/muse-dl /data/list.txt + +# If you want to keep the temporary files with your host, and not delete them +docker run -it /home/nemo/Downloads:/data /tmp:/musetmp --tmp-dir /musetmp --no-cleanup https://muse.jhu.edu/book/875 +``` + ## Requirements -Please ensure you have `pdftk` installed, and run the `muse-dl` binary. To build the binary, please run the steps in Installation. +Please ensure you have `pdftk` installed, unless you're running via docker. ## Usage diff --git a/spec/fetch_spec.cr b/spec/fetch_spec.cr new file mode 100644 index 0000000..6e97dfd --- /dev/null +++ b/spec/fetch_spec.cr @@ -0,0 +1,12 @@ +require "./spec_helper" +# require "errors/muse_corrupt_pdf.cr" + +describe Muse::Dl::Book do + it "should notice the unable to construct chapter PDF error" do + f = "/tmp/chapter-2379787.pdf" + File.delete(f) if File.exists? f + expect_raises Muse::Dl::Errors::MuseCorruptPDF do + Muse::Dl::Fetch.save_chapter("/tmp", "2379787", "NA") + end + end +end diff --git a/src/errors/muse_corrupt_pdf.cr b/src/errors/muse_corrupt_pdf.cr new file mode 100644 index 0000000..1c534ea --- /dev/null +++ b/src/errors/muse_corrupt_pdf.cr @@ -0,0 +1,4 @@ +module Muse::Dl::Errors + class MuseCorruptPDF < Exception + end +end diff --git a/src/fetch.cr b/src/fetch.cr index 240ab8b..524afef 100644 --- a/src/fetch.cr +++ b/src/fetch.cr @@ -1,5 +1,6 @@ require "crest" require "./errors/*" +require "myhtml" module Muse::Dl class Fetch @@ -42,6 +43,18 @@ module Muse::Dl # TODO: Add validation for the downloaded file (should be PDF) Crest.get(url, max_redirects: 0, handle_errors: false, headers: headers) do |response| + # puts response.headers["Content-Type"] + content_type = response.headers["Content-Type"] + if content_type.is_a? String + if /html/.match content_type + puts response + response.body_io.each_line do |line| + if /Unable to construct chapter PDF/.match line + raise Muse::Dl::Errors::MuseCorruptPDF.new + end + end + end + end File.open(tmp_pdf_file, "w") do |file| IO.copy(response.body_io, file) end diff --git a/src/muse-dl.cr b/src/muse-dl.cr index b5b9519..496cbc6 100644 --- a/src/muse-dl.cr +++ b/src/muse-dl.cr @@ -33,7 +33,12 @@ module Muse::Dl unless parser.input_pdf # Save each chapter thing.chapters.each do |chapter| - Fetch.save_chapter(parser.tmp, chapter[0], chapter[1], parser.cookie, parser.bookmarks) + begin + Fetch.save_chapter(parser.tmp, chapter[0], chapter[1], parser.cookie, parser.bookmarks) + rescue e : Muse::Dl::Errors::MuseCorruptPDF + STDERR.puts "Got a 'Unable to construct chapter PDF' error from MUSE, skipping: #{url}" + return + end end chapter_ids = thing.chapters.map { |c| c[0] } diff --git a/static.Dockerfile b/static.Dockerfile new file mode 100644 index 0000000..6ddd650 --- /dev/null +++ b/static.Dockerfile @@ -0,0 +1,12 @@ +FROM crystallang/crystal:latest as builder + +WORKDIR /build + +COPY . . + +RUN shards install && \ + shards build --release --static + +FROM scratch + +COPY --from=builder /build/bin/muse-dl /muse-dl-static \ No newline at end of file