Improve generation by adapting code from @lesensei

- Auto generation of chapters, no manual updates - Internationalization support, thanks to @lesensei - Made qpdf, kindlegen, calibre optional dependencies - Changed the cover image size to exactly A4 - Generatd cover.pdf, and switched to qpdf from pdftk I like pdftk, but the installation is stupidly hard these days and qpdf seems to be much more lightweight. The disadvantage is losing the bookmarks in the PDF Also faced a few issues on pandoc while working on this, created issues on the pandoc repo: https://github.com/jgm/pandoc/issues/created_by/captn3m0
2020-06-09 18:07:35 +05:30 · 2020-06-09 18:07:35 +05:30 · 9b7b085805
parent 11f69c5370
commit 9b7b085805
7 changed files with 103 additions and 65 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,3 +2,5 @@
 *.epub
 *.pdf
 !cover.pdf
 out/
 metadata.xml
--- a/README.md
+++ b/README.md
@ -6,8 +6,9 @@ Generates ebooks for The Ickabog by J.K Rowling. Original text from https://www.
 - `wget`
 - [`pup`](https://github.com/ericchiang/pup)
- [`pandoc`](https://pandoc.org/)
+- [`pandoc`](https://pandoc.org/) to generate EPUB and PDF files
- [`pdftk`]
+- `qpdf` to add cover to PDF files. (optional)
 - `kindlegen` or `calibre` installed to generate MOBI files. (optional)
 ## How to run
@ -29,6 +30,8 @@ The cover art is [Avanyu](http://edan.si.edu/saam/id/object/1979.144.85) by Juli
 > Julian Martinez, Avanyu, ca. 1923, watercolor, ink, and pencil on paper, Smithsonian American Art Museum, Corbin-Henderson Collection, gift of Alice H. Rossin, 1979.144.85
 Code for internationalization and automatic chapter updates via [@lesensei](https://github.com/lesensei/ickabog-ebook/commits/master)'s fork.
 ## License
 The little code in this repository is licensed under the [MIT License](https://nemo.mit-license.org/). See LICENSE file for details.
--- a/cover.jpg
+++ b/cover.jpg
--- a/cover.ora
+++ b/cover.ora
--- a/cover.pdf
+++ b/cover.pdf
--- a/generate.sh
+++ b/generate.sh
@ -1,77 +1,114 @@
 #!/bin/bash
 set -euo pipefail
 IFS=$'\n\t'
 OUTPUT_DIR=out
 mkdir -p html
-
+mkdir -p "$OUTPUT_DIR"
 MAIN_STORY_OUTPUT_FILE="$OUTPUT_DIR/read-the-story.html"
 HTML_FILE=ickabog.html
 echo "<html><head><title>The Ickabog</title></head><body>" > "$HTML_FILE"
 LC=${LC:-""}
 if [[ "$LC" != "" ]]; then
    LC="/$LC"
 fi
 MAIN_STORY_URL="https://www.theickabog.com$LC/read-the-story/"
 echo "[+] Fetching $MAIN_STORY_URL"
 wget --quiet "$MAIN_STORY_URL" --output-document "$MAIN_STORY_OUTPUT_FILE"
 LANG=$(cat "$MAIN_STORY_OUTPUT_FILE"| pup 'html attr{lang}')
 echo "[+] Language set to $LANG"
 MAIN_TITLE=$(cat "$MAIN_STORY_OUTPUT_FILE" | pup 'ul.chapters__list a json{}' | jq -r '[.[] | {url: .href, chapter: .children[0].children[0].children[0].children[0].text, title: .children[0].children[0].children[0].children[1].text}] | sort_by(.chapter) | .[]|[.chapter, .title, .url] | @tsv' | grep $' 2\t' | while IFS=$'\t' read -r chapter title url; do echo "$title"; done)
 echo "[+] Title set to $MAIN_TITLE"
 echo "<html lang=$LANG><head><meta charset=UTF-8><title>$MAIN_TITLE</title></head><body>" > "$HTML_FILE"
 # args = "$url" "$chapter" "$title"
 function download_chapter() {
-    [ -s "html/$2.html" ] || wget --quiet "https://www.theickabog.com/$1" -O "html/$2.html"
+    [[ $2 =~ 1$ ]] && MAIN_TITLE=$3
    URL=$( [[ $1 =~ ^http ]] && echo "$1" || echo "https://www.theickabog.com$1" )
    [ -s "html/$2.html" ] || wget --quiet "$URL" -O "html/$2.html"
    echo "<h1>$3</h1>" >> "$HTML_FILE"
    cat "html/$2.html" | pup 'article div.row:nth-child(2) div.entry-content' >> "$HTML_FILE"
 }
-download_chapter king-fred-the-fearless/ ch1
+cat "$MAIN_STORY_OUTPUT_FILE" |
-download_chapter the-ickabog/ ch2
+pup 'ul.chapters__list a json{}' |
-download_chapter death-of-a-seamstress/ ch3
+jq -r '[.[] | {url: .href, chapter: .children[0].children[0].children[0].children[0].text, title: .children[0].children[0].children[0].children[1].text}] | sort_by(.chapter | match("[0-9]+$")) | .[]|[.chapter, .title, .url] | @tsv' |
-download_chapter the-quiet-house/ ch4
+while IFS=$'\t' read -r chapter title url; do download_chapter "$url" "$chapter" "$title"; done
 download_chapter daisy-dovetail/ ch5
 download_chapter the-fight-in-the-courtyard/ ch6
 download_chapter lord-spittleworth-tells-tales/ ch7
 download_chapter the-day-of-petition/ ch8
 download_chapter the-shepherds-story/ ch9
 download_chapter king-freds-quest/ ch10
 download_chapter the-journey-north/ ch11
 download_chapter the-kings-lost-sword/ ch12
 download_chapter the-accident/ ch13
 download_chapter lord-spittleworths-plan/ ch14
 download_chapter the-king-returns/ ch15
 download_chapter bert-says-goodbye/ ch16
 download_chapter goodfellow-makes-a-stand/ ch17
 download_chapter end-of-an-advisor/ ch18
 download_chapter lady-eslanda/ ch19
 download_chapter medals-for-beamish-and-buttons/ ch20
 download_chapter professor-fraudysham/ ch21
 for i in $(seq 1 21); do
  CHAPTER_TITLE=$(cat "html/ch$i.html" | pup 'h1.entry-title:nth-child(2) text{}')
  echo "<h2>$CHAPTER_TITLE</h2>" >> "$HTML_FILE"
  cat "html/ch$i.html" | pup 'article div.row:nth-child(2) div.entry-content' >> "$HTML_FILE"
 done
 echo "</body></html>" >> "$HTML_FILE"
-pandoc --from=html --to=pdf \
+cat <<__METADATA__ > metadata.xml
-    --output=ickabog1.pdf \
+<dc:creator opf:role="aut">J.K Rowling</dc:creator>
-    --metadata title="The Ickabog" \
+__METADATA__
-    --metadata author="J.K Rowling" \
+
 pandoc --from=html \
    --output="$OUTPUT_DIR/ickabog.epub" \
    --epub-metadata=metadata.xml \
    --epub-cover-image=cover.jpg \
    --epub-chapter-level=1 \
    "$HTML_FILE"
 echo "[+] Generated $OUTPUT_DIR/ickabog.epub"
 if command -v kindlegen > /dev/null; then
    kindlegen "$OUTPUT_DIR/ickabog.epub" > /dev/null 2>&1
    echo "[+] Generated MOBI using kindlegen: $OUTPUT_DIR/ickabog.mobi"
 elif command -v ebook-convert > /dev/null; then
    ebook-convert "$OUTPUT_DIR/ickabog.epub" \
        "$OUTPUT_DIR/ickabog.mobi" \
        --metadata title="$MAIN_TITLE" \
        > /dev/null 2>&1
    echo "[+] Generated MOBI using ebook-convert: $OUTPUT_DIR/ickabog.mobi"
 else
    echo "[-] Could not generate MOBI, install kindlegen or calibre"
 fi
 command -v xelatex >/dev/null && \
 pandoc --from=html \
    --pdf-engine=xelatex \
-    --dpi=300 \
+    --metadata title="$MAIN_TITLE" \
-    -V book \
+    --metadata author="J.K Rowling" \
-    -V lang=en-US \
+    --output="$OUTPUT_DIR/ickabog-no-cover.pdf" \
    -V lang="$LANG" \
    -V geometry=margin=1.5cm \
    "$HTML_FILE"
-pdftk cover.pdf ickabog1.pdf cat output ickabog.pdf
+if command -v qpdf > /dev/null; then
    qpdf --empty --pages cover.pdf "$OUTPUT_DIR/ickabog-no-cover.pdf" -- "$OUTPUT_DIR/ickabog.pdf"
 else
    mv "$OUTPUT_DIR/ickabog-no-cover.pdf" "$OUTPUT_DIR/ickabog.pdf"
 fi
-pandoc --from=html --to=epub \
+echo "[+] Generated PDF using xelatex: $OUTPUT_DIR/ickabog.pdf"
    --output=ickabog.epub \
    --epub-metadata=metadata.xml \
    --epub-cover-image=cover.jpg \
    --metadata title="The Ickabog" \
    "$HTML_FILE"
-pandoc --from=html --to=pdf \
+# Run only if context is available
-    -V fontsize=18pt \
+if command -v context>/dev/null; then
-    --output=ickabog2.pdf \
+    pandoc --from=html --to=pdf \
-    --metadata title="The Ickabog" \
+        -V fontsize=18pt \
-    --metadata author="J.K Rowling" \
+        --output="$OUTPUT_DIR/ickabog-large-no-cover.pdf" \
-    --pdf-engine=context \
+        --metadata title="$MAIN_TITLE" \
-    -V margin-left=0cm \
+        --metadata author="J.K Rowling" \
-    -V margin-right=0cm \
+        --pdf-engine=context \
-    -V margin-top=0cm \
+        -V margin-left=0cm \
-    -V margin-bottom=0cm \
+        -V margin-right=0cm \
-    -V geometry=margin=0cm \
+        -V margin-top=0cm \
-    -V lang=en-US \
+        -V margin-bottom=0cm \
-    "$HTML_FILE"
+        -V geometry=margin=0cm \
        -V lang="$LANG" \
        "$HTML_FILE"
-pdftk cover.pdf ickabog2.pdf cat output ickabog-large.pdf
+    if command -v qpdf > /dev/null; then
        qpdf --empty --pages cover.pdf "$OUTPUT_DIR/ickabog-large-no-cover.pdf" -- "$OUTPUT_DIR/ickabog-large.pdf"
    else
        mv "$OUTPUT_DIR/ickabog-no-cover.pdf" "$OUTPUT_DIR/ickabog-large.pdf"
    fi
 fi
 echo "[+] Generated PDF using context: $OUTPUT_DIR/ickabog-large.pdf"
--- a/metadata.xml
+++ b/metadata.xml
@ -1,4 +0,0 @@
 <dc:title id="epub-title-1">The Ickabog</dc:title>
 <dc:date>2020-05-20</dc:date>
 <dc:language>en-US</dc:language>
 <dc:creator id="epub-creator-1" opf:role="aut">The Ickabog</dc:creator>