Merge pull request #7 from captn3m0/docker

Docker support
This commit is contained in:
Nemo 2020-04-04 03:43:29 +05:30 committed by GitHub
commit 60b8581b73
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 140 additions and 2 deletions

8
.dockerignore Normal file
View File

@ -0,0 +1,8 @@
.git
LICENSE
Dockerfile
spec/
bin/
Makefile
.dockerignore
*.Dockerfile

View File

@ -1,8 +1,18 @@
language: crystal
env:
# Path to 'hadolint' binary
HADOLINT: "${HOME}/hadolint"
install:
# Download hadolint binary and set it as executable
- curl -sL -o ${HADOLINT} "https://github.com/hadolint/hadolint/releases/download/v1.17.5/hadolint-$(uname -s)-$(uname -m)"
&& chmod 700 ${HADOLINT}
- shards install
script:
- crystal spec
- crystal tool format --check
- git ls-files --exclude='Dockerfile*' --ignored | xargs --max-lines=1 ${HADOLINT}
addons:
apt:

43
Dockerfile Normal file
View File

@ -0,0 +1,43 @@
FROM debian:10-slim
WORKDIR /build
COPY . .
# Add the key for the crystal debian repo
ADD https://keybase.io/crystal/pgp_keys.asc /tmp/crystal.gpg
# See https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=863199 for why mkdir is needed
RUN mkdir -p /usr/share/man/man1 && \
apt-get update && \
apt-get install --yes --no-install-recommends \
# Install gnupg for the apt-key operation
gnupg=2.2.12-1+deb10u1 \
# libssl for faster TLS in Crystal
libssl-dev=1.1.1d-0+deb10u2 \
# pdftk as a dependency for muse-dl
pdftk=2.02-5 \
# ca-certificates for talking to crystal-lang.org
ca-certificates=20190110 \
# git to let shards install happen
git=1:2.20.1-2+deb10u1 \
# build --release
zlib1g-dev=1:1.2.11.dfsg-1 && \
# See https://crystal-lang.org/install/
apt-key add /tmp/crystal.gpg && \
echo "deb https://dist.crystal-lang.org/apt crystal main" > /etc/apt/sources.list.d/crystal.list && \
apt-get update && \
apt-get install --no-install-recommends --yes crystal=0.33.0-1 && \
# Cleanup
apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN shards install && shards build --release && \
ln /build/bin/muse-dl /usr/bin/muse-dl
RUN apt-get --yes remove git gnupg
WORKDIR /data
VOLUME /data
ENTRYPOINT ["/usr/bin/muse-dl"]

10
Makefile Normal file
View File

@ -0,0 +1,10 @@
mkfile_path := $(abspath $(lastword $(MAKEFILE_LIST)))
current_dir := $(notdir $(patsubst %/,%,$(dir $(mkfile_path))))
release:
# Build a static binary and save it in muse-dl-static
docker build --tag muse-dl-static --file static.Dockerfile .
# Then extract the image | extract the layer.tar file (we only have one layer) | extract the muse-dl-static file
docker image save muse-dl-static | tar xf - --wildcards "*/layer.tar" -O | tar xf - "muse-dl-static"
# And move it to the bin/ directory
mv -f muse-dl-static bin/

View File

@ -8,6 +8,8 @@ Any downloads you perform with this tool are for your own usage. I personally ha
# Installation
## Linux / Build
```
git clone https://github.com/captn3m0/muse-dl.git
cd muse-dl
@ -16,9 +18,28 @@ shards build
./bin/muse-dl --help
```
## Linux / Download
A linux x86_64 static build is available in the latest release: <https://github.com/captn3m0/muse-dl/releases/latest>. Save the file as `muse-dl` and remember to mark it as executable (`chmod +x`).
## Docker
A docker image is available at `captn3m0/muse-dl` on Docker Hub. The working directory for the image is set as `/data`, so you'll need to mount your output-directory as `/data` for it to work. Sample invocations;
```
# Download the book, and put it in your Downloads directory
docker run -it /home/nemo/Downloads:/data captn3m0/muse-dl https://muse.jhu.edu/book/875
# If you have a list.txt file in your Downloads directory, then you can run
docker run -it /home/nemo/Downloads:/data captn3m0/muse-dl /data/list.txt
# If you want to keep the temporary files with your host, and not delete them
docker run -it /home/nemo/Downloads:/data /tmp:/musetmp --tmp-dir /musetmp --no-cleanup https://muse.jhu.edu/book/875
```
## Requirements
Please ensure you have `pdftk` installed, and run the `muse-dl` binary. To build the binary, please run the steps in Installation.
Please ensure you have `pdftk` installed, unless you're running via docker.
## Usage

12
spec/fetch_spec.cr Normal file
View File

@ -0,0 +1,12 @@
require "./spec_helper"
# require "errors/muse_corrupt_pdf.cr"
describe Muse::Dl::Book do
it "should notice the unable to construct chapter PDF error" do
f = "/tmp/chapter-2379787.pdf"
File.delete(f) if File.exists? f
expect_raises Muse::Dl::Errors::MuseCorruptPDF do
Muse::Dl::Fetch.save_chapter("/tmp", "2379787", "NA")
end
end
end

View File

@ -0,0 +1,4 @@
module Muse::Dl::Errors
class MuseCorruptPDF < Exception
end
end

View File

@ -1,5 +1,6 @@
require "crest"
require "./errors/*"
require "myhtml"
module Muse::Dl
class Fetch
@ -42,6 +43,18 @@ module Muse::Dl
# TODO: Add validation for the downloaded file (should be PDF)
Crest.get(url, max_redirects: 0, handle_errors: false, headers: headers) do |response|
# puts response.headers["Content-Type"]
content_type = response.headers["Content-Type"]
if content_type.is_a? String
if /html/.match content_type
puts response
response.body_io.each_line do |line|
if /Unable to construct chapter PDF/.match line
raise Muse::Dl::Errors::MuseCorruptPDF.new
end
end
end
end
File.open(tmp_pdf_file, "w") do |file|
IO.copy(response.body_io, file)
end

View File

@ -33,7 +33,12 @@ module Muse::Dl
unless parser.input_pdf
# Save each chapter
thing.chapters.each do |chapter|
Fetch.save_chapter(parser.tmp, chapter[0], chapter[1], parser.cookie, parser.bookmarks)
begin
Fetch.save_chapter(parser.tmp, chapter[0], chapter[1], parser.cookie, parser.bookmarks)
rescue e : Muse::Dl::Errors::MuseCorruptPDF
STDERR.puts "Got a 'Unable to construct chapter PDF' error from MUSE, skipping: #{url}"
return
end
end
chapter_ids = thing.chapters.map { |c| c[0] }

12
static.Dockerfile Normal file
View File

@ -0,0 +1,12 @@
FROM crystallang/crystal:latest as builder
WORKDIR /build
COPY . .
RUN shards install && \
shards build --release --static
FROM scratch
COPY --from=builder /build/bin/muse-dl /muse-dl-static