From 82ce97cbc8b0349212075739e50772f7769e647b Mon Sep 17 00:00:00 2001 From: Nemo Date: Thu, 2 Apr 2020 03:09:32 +0530 Subject: [PATCH 01/11] Adds fix for corrupt PDF error from MUSE --- spec/fetch_spec.cr | 12 ++++++++++++ src/errors/muse_corrupt_pdf.cr | 4 ++++ src/fetch.cr | 13 +++++++++++++ src/muse-dl.cr | 7 ++++++- 4 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 spec/fetch_spec.cr create mode 100644 src/errors/muse_corrupt_pdf.cr diff --git a/spec/fetch_spec.cr b/spec/fetch_spec.cr new file mode 100644 index 0000000..6e97dfd --- /dev/null +++ b/spec/fetch_spec.cr @@ -0,0 +1,12 @@ +require "./spec_helper" +# require "errors/muse_corrupt_pdf.cr" + +describe Muse::Dl::Book do + it "should notice the unable to construct chapter PDF error" do + f = "/tmp/chapter-2379787.pdf" + File.delete(f) if File.exists? f + expect_raises Muse::Dl::Errors::MuseCorruptPDF do + Muse::Dl::Fetch.save_chapter("/tmp", "2379787", "NA") + end + end +end diff --git a/src/errors/muse_corrupt_pdf.cr b/src/errors/muse_corrupt_pdf.cr new file mode 100644 index 0000000..1c534ea --- /dev/null +++ b/src/errors/muse_corrupt_pdf.cr @@ -0,0 +1,4 @@ +module Muse::Dl::Errors + class MuseCorruptPDF < Exception + end +end diff --git a/src/fetch.cr b/src/fetch.cr index 240ab8b..524afef 100644 --- a/src/fetch.cr +++ b/src/fetch.cr @@ -1,5 +1,6 @@ require "crest" require "./errors/*" +require "myhtml" module Muse::Dl class Fetch @@ -42,6 +43,18 @@ module Muse::Dl # TODO: Add validation for the downloaded file (should be PDF) Crest.get(url, max_redirects: 0, handle_errors: false, headers: headers) do |response| + # puts response.headers["Content-Type"] + content_type = response.headers["Content-Type"] + if content_type.is_a? String + if /html/.match content_type + puts response + response.body_io.each_line do |line| + if /Unable to construct chapter PDF/.match line + raise Muse::Dl::Errors::MuseCorruptPDF.new + end + end + end + end File.open(tmp_pdf_file, "w") do |file| IO.copy(response.body_io, file) end diff --git a/src/muse-dl.cr b/src/muse-dl.cr index b5b9519..496cbc6 100644 --- a/src/muse-dl.cr +++ b/src/muse-dl.cr @@ -33,7 +33,12 @@ module Muse::Dl unless parser.input_pdf # Save each chapter thing.chapters.each do |chapter| - Fetch.save_chapter(parser.tmp, chapter[0], chapter[1], parser.cookie, parser.bookmarks) + begin + Fetch.save_chapter(parser.tmp, chapter[0], chapter[1], parser.cookie, parser.bookmarks) + rescue e : Muse::Dl::Errors::MuseCorruptPDF + STDERR.puts "Got a 'Unable to construct chapter PDF' error from MUSE, skipping: #{url}" + return + end end chapter_ids = thing.chapters.map { |c| c[0] } From ce0a901b47b7f3feaf8fe86749302304b84a1279 Mon Sep 17 00:00:00 2001 From: Nemo Date: Wed, 1 Apr 2020 13:25:50 +0530 Subject: [PATCH 02/11] Initial Dockerfile --- .dockerignore | 5 +++++ Dockerfile | 13 +++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 .dockerignore create mode 100644 Dockerfile diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..0c20ebb --- /dev/null +++ b/.dockerignore @@ -0,0 +1,5 @@ +.git +LICENSE +Dockerfile +spec/ +bin/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..626bc68 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,13 @@ +FROM jrei/crystal-alpine as builder + +WORKDIR /build + +COPY . . + +RUN shards install && shards build --release --static + +FROM scratch + +COPY --from=builder /build/bin/muse-dl / + +ENTRYPOINT ["/muse-dl"] \ No newline at end of file From b354440d60b9d3a42942d84a52d67f321b589027 Mon Sep 17 00:00:00 2001 From: Nemo Date: Thu, 2 Apr 2020 17:46:45 +0530 Subject: [PATCH 03/11] Adds ubuntu based Dockerfile --- .travis.yml | 9 +++++++++ Dockerfile | 21 ++++++++++++++++----- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index f8f9d5e..549a6bc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,8 +1,17 @@ language: crystal +env: + # Path to 'hadolint' binary + HADOLINT: "${HOME}/hadolint" + +install: + # Download hadolint binary and set it as executable + - curl -sL -o ${HADOLINT} "https://github.com/hadolint/hadolint/releases/download/v1.17.5/hadolint-$(uname -s)-$(uname -m)" + && chmod 700 ${HADOLINT} script: - crystal spec - crystal tool format --check + - git ls-files --exclude='Dockerfile*' --ignored | xargs --max-lines=1 ${HADOLINT} addons: apt: diff --git a/Dockerfile b/Dockerfile index 626bc68..fd06a59 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,13 +1,24 @@ -FROM jrei/crystal-alpine as builder +FROM ubuntu:19.04 WORKDIR /build COPY . . -RUN shards install && shards build --release --static +# Add the key for the crystal debian repo +ADD https://keybase.io/crystal/pgp_keys.asc /tmp/crystal.gpg -FROM scratch +# Install gnupg for the apt-key operation and openssl for our TLS stuff +RUN apt-get update && \ + apt-get install --yes --no-install-recommends gnupg=2.2.12-1ubuntu3 libssl-dev=1.1.1b-1ubuntu2.4 && \ + # See https://crystal-lang.org/install/ + apt-key add /tmp/crystal.gpg && \ + echo "deb https://dist.crystal-lang.org/apt crystal main" > /etc/apt/sources.list.d/crystal.list && \ + apt-get update && \ + apt-get install --no-install-recommends --yes crystal=0.33.0-1 pdftk=2.02-5 && \ + # Cleanup + apt-get clean && \ + rm -rf /var/lib/apt/lists/* -COPY --from=builder /build/bin/muse-dl / +RUN shards install && shards build --release -ENTRYPOINT ["/muse-dl"] \ No newline at end of file +ENTRYPOINT ["/build/bin/muse-dl"] \ No newline at end of file From 252e89678580d6100d02f1a39c873b1f1231a8d0 Mon Sep 17 00:00:00 2001 From: Nemo Date: Thu, 2 Apr 2020 17:49:23 +0530 Subject: [PATCH 04/11] Adds a static builder --- static.Dockerfile | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 static.Dockerfile diff --git a/static.Dockerfile b/static.Dockerfile new file mode 100644 index 0000000..5201417 --- /dev/null +++ b/static.Dockerfile @@ -0,0 +1,12 @@ +FROM jrei/crystal-alpine:edge as builder + +WORKDIR /build + +COPY . . + +RUN shards install && \ + shards build --release --static + +FROM scratch + +COPY --from=builder /build/bin/muse-dl /muse-dl \ No newline at end of file From 81dc6e0a76dbbee103ea9624fd248160acecd601 Mon Sep 17 00:00:00 2001 From: Nemo Date: Thu, 2 Apr 2020 20:08:45 +0530 Subject: [PATCH 05/11] Run shards install --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 549a6bc..5d2f4fb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,6 +9,7 @@ install: - curl -sL -o ${HADOLINT} "https://github.com/hadolint/hadolint/releases/download/v1.17.5/hadolint-$(uname -s)-$(uname -m)" && chmod 700 ${HADOLINT} script: + - shards install - crystal spec - crystal tool format --check - git ls-files --exclude='Dockerfile*' --ignored | xargs --max-lines=1 ${HADOLINT} From 71325be236015d780d46f59e29ebc1b5d4acf2e0 Mon Sep 17 00:00:00 2001 From: Nemo Date: Thu, 2 Apr 2020 20:20:55 +0530 Subject: [PATCH 06/11] move shards install to install stage --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 5d2f4fb..6941f68 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,8 +8,8 @@ install: # Download hadolint binary and set it as executable - curl -sL -o ${HADOLINT} "https://github.com/hadolint/hadolint/releases/download/v1.17.5/hadolint-$(uname -s)-$(uname -m)" && chmod 700 ${HADOLINT} -script: - shards install +script: - crystal spec - crystal tool format --check - git ls-files --exclude='Dockerfile*' --ignored | xargs --max-lines=1 ${HADOLINT} From 1de2b56f4bc18d6703cc16a20ec2b009a9348bd9 Mon Sep 17 00:00:00 2001 From: Nemo Date: Sat, 4 Apr 2020 01:09:53 +0530 Subject: [PATCH 07/11] docker --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index fd06a59..352c5d1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,4 +21,6 @@ RUN apt-get update && \ RUN shards install && shards build --release +VOLUME /output + ENTRYPOINT ["/build/bin/muse-dl"] \ No newline at end of file From 0b2867cf2fe2f89c4bf7339b7762a88bb84004e3 Mon Sep 17 00:00:00 2001 From: Nemo Date: Sat, 4 Apr 2020 01:53:23 +0530 Subject: [PATCH 08/11] Adds a release build script --- .dockerignore | 5 ++++- Makefile | 10 ++++++++++ static.Dockerfile | 2 +- 3 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 Makefile diff --git a/.dockerignore b/.dockerignore index 0c20ebb..4df2ec4 100644 --- a/.dockerignore +++ b/.dockerignore @@ -2,4 +2,7 @@ LICENSE Dockerfile spec/ -bin/ \ No newline at end of file +bin/ +Makefile +.dockerignore +*.Dockerfile \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..98a984e --- /dev/null +++ b/Makefile @@ -0,0 +1,10 @@ +mkfile_path := $(abspath $(lastword $(MAKEFILE_LIST))) +current_dir := $(notdir $(patsubst %/,%,$(dir $(mkfile_path)))) + +release: + # Build a static binary and save it in muse-dl-static + docker build --tag muse-dl-static --file static.Dockerfile . + # Then extract the image | extract the layer.tar file (we only have one layer) | extract the muse-dl-static file + docker image save muse-dl-static | tar xf - --wildcards "*/layer.tar" -O | tar xf - "muse-dl-static" + # And move it to the bin/ directory + mv -f muse-dl-static bin/ \ No newline at end of file diff --git a/static.Dockerfile b/static.Dockerfile index 5201417..ac47121 100644 --- a/static.Dockerfile +++ b/static.Dockerfile @@ -9,4 +9,4 @@ RUN shards install && \ FROM scratch -COPY --from=builder /build/bin/muse-dl /muse-dl \ No newline at end of file +COPY --from=builder /build/bin/muse-dl /muse-dl-static \ No newline at end of file From 2d7430e78098c32bbc0b07f7619e30099951706f Mon Sep 17 00:00:00 2001 From: Nemo Date: Sat, 4 Apr 2020 03:39:19 +0530 Subject: [PATCH 09/11] Switch to official crystal image --- static.Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/static.Dockerfile b/static.Dockerfile index ac47121..6ddd650 100644 --- a/static.Dockerfile +++ b/static.Dockerfile @@ -1,4 +1,4 @@ -FROM jrei/crystal-alpine:edge as builder +FROM crystallang/crystal:latest as builder WORKDIR /build From 88a23f637047e8af4b2897f18178c7210583fd01 Mon Sep 17 00:00:00 2001 From: Nemo Date: Sat, 4 Apr 2020 03:39:35 +0530 Subject: [PATCH 10/11] Finishes Dockerfile for running --- Dockerfile | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/Dockerfile b/Dockerfile index 352c5d1..2c2d883 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:19.04 +FROM debian:10-slim WORKDIR /build @@ -7,20 +7,37 @@ COPY . . # Add the key for the crystal debian repo ADD https://keybase.io/crystal/pgp_keys.asc /tmp/crystal.gpg -# Install gnupg for the apt-key operation and openssl for our TLS stuff -RUN apt-get update && \ - apt-get install --yes --no-install-recommends gnupg=2.2.12-1ubuntu3 libssl-dev=1.1.1b-1ubuntu2.4 && \ +# See https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=863199 for why mkdir is needed +RUN mkdir -p /usr/share/man/man1 && \ + apt-get update && \ + apt-get install --yes --no-install-recommends \ + # Install gnupg for the apt-key operation + gnupg=2.2.12-1+deb10u1 \ + # libssl for faster TLS in Crystal + libssl-dev=1.1.1d-0+deb10u2 \ + # pdftk as a dependency for muse-dl + pdftk=2.02-5 \ + # ca-certificates for talking to crystal-lang.org + ca-certificates=20190110 \ + # git to let shards install happen + git=1:2.20.1-2+deb10u1 \ + # build --release + zlib1g-dev=1:1.2.11.dfsg-1 && \ # See https://crystal-lang.org/install/ apt-key add /tmp/crystal.gpg && \ echo "deb https://dist.crystal-lang.org/apt crystal main" > /etc/apt/sources.list.d/crystal.list && \ apt-get update && \ - apt-get install --no-install-recommends --yes crystal=0.33.0-1 pdftk=2.02-5 && \ + apt-get install --no-install-recommends --yes crystal=0.33.0-1 && \ # Cleanup apt-get clean && \ rm -rf /var/lib/apt/lists/* -RUN shards install && shards build --release +RUN shards install && shards build --release && \ + ln /build/bin/muse-dl /usr/bin/muse-dl -VOLUME /output +RUN apt-get --yes remove git gnupg -ENTRYPOINT ["/build/bin/muse-dl"] \ No newline at end of file +WORKDIR /data +VOLUME /data + +ENTRYPOINT ["/usr/bin/muse-dl"] \ No newline at end of file From e1cab9afe2f9624e56b7df5f98d65fb95efd80c2 Mon Sep 17 00:00:00 2001 From: Nemo Date: Sat, 4 Apr 2020 03:39:46 +0530 Subject: [PATCH 11/11] [docs] Docker --- README.md | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index dc219bb..32598c6 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,8 @@ Any downloads you perform with this tool are for your own usage. I personally ha # Installation +## Linux / Build + ``` git clone https://github.com/captn3m0/muse-dl.git cd muse-dl @@ -16,9 +18,28 @@ shards build ./bin/muse-dl --help ``` +## Linux / Download + +A linux x86_64 static build is available in the latest release: . Save the file as `muse-dl` and remember to mark it as executable (`chmod +x`). + +## Docker + +A docker image is available at `captn3m0/muse-dl` on Docker Hub. The working directory for the image is set as `/data`, so you'll need to mount your output-directory as `/data` for it to work. Sample invocations; + +``` +# Download the book, and put it in your Downloads directory +docker run -it /home/nemo/Downloads:/data captn3m0/muse-dl https://muse.jhu.edu/book/875 + +# If you have a list.txt file in your Downloads directory, then you can run +docker run -it /home/nemo/Downloads:/data captn3m0/muse-dl /data/list.txt + +# If you want to keep the temporary files with your host, and not delete them +docker run -it /home/nemo/Downloads:/data /tmp:/musetmp --tmp-dir /musetmp --no-cleanup https://muse.jhu.edu/book/875 +``` + ## Requirements -Please ensure you have `pdftk` installed, and run the `muse-dl` binary. To build the binary, please run the steps in Installation. +Please ensure you have `pdftk` installed, unless you're running via docker. ## Usage