diff --git a/.gitignore b/.gitignore
index 00e3e33..8cff6b1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,5 @@
*.epub
*.pdf
!cover.pdf
+out/
+metadata.xml
diff --git a/README.md b/README.md
index 2438dcb..a911088 100644
--- a/README.md
+++ b/README.md
@@ -6,8 +6,9 @@ Generates ebooks for The Ickabog by J.K Rowling. Original text from https://www.
- `wget`
- [`pup`](https://github.com/ericchiang/pup)
-- [`pandoc`](https://pandoc.org/)
-- [`pdftk`]
+- [`pandoc`](https://pandoc.org/) to generate EPUB and PDF files
+- `qpdf` to add cover to PDF files. (optional)
+- `kindlegen` or `calibre` installed to generate MOBI files. (optional)
## How to run
@@ -29,6 +30,8 @@ The cover art is [Avanyu](http://edan.si.edu/saam/id/object/1979.144.85) by Juli
> Julian Martinez, Avanyu, ca. 1923, watercolor, ink, and pencil on paper, Smithsonian American Art Museum, Corbin-Henderson Collection, gift of Alice H. Rossin, 1979.144.85
+Code for internationalization and automatic chapter updates via [@lesensei](https://github.com/lesensei/ickabog-ebook/commits/master)'s fork.
+
## License
The little code in this repository is licensed under the [MIT License](https://nemo.mit-license.org/). See LICENSE file for details.
diff --git a/cover.jpg b/cover.jpg
index b00ea81..0001821 100644
Binary files a/cover.jpg and b/cover.jpg differ
diff --git a/cover.ora b/cover.ora
index 59714f6..b423876 100644
Binary files a/cover.ora and b/cover.ora differ
diff --git a/cover.pdf b/cover.pdf
index 28fcf85..bd42002 100644
Binary files a/cover.pdf and b/cover.pdf differ
diff --git a/generate.sh b/generate.sh
index cc2940a..ed976af 100755
--- a/generate.sh
+++ b/generate.sh
@@ -1,77 +1,114 @@
#!/bin/bash
+set -euo pipefail
+IFS=$'\n\t'
+
+OUTPUT_DIR=out
mkdir -p html
-
+mkdir -p "$OUTPUT_DIR"
+MAIN_STORY_OUTPUT_FILE="$OUTPUT_DIR/read-the-story.html"
HTML_FILE=ickabog.html
-echo "
The Ickabog" > "$HTML_FILE"
+LC=${LC:-""}
+if [[ "$LC" != "" ]]; then
+ LC="/$LC"
+fi
+MAIN_STORY_URL="https://www.theickabog.com$LC/read-the-story/"
+
+echo "[+] Fetching $MAIN_STORY_URL"
+
+wget --quiet "$MAIN_STORY_URL" --output-document "$MAIN_STORY_OUTPUT_FILE"
+
+LANG=$(cat "$MAIN_STORY_OUTPUT_FILE"| pup 'html attr{lang}')
+echo "[+] Language set to $LANG"
+
+MAIN_TITLE=$(cat "$MAIN_STORY_OUTPUT_FILE" | pup 'ul.chapters__list a json{}' | jq -r '[.[] | {url: .href, chapter: .children[0].children[0].children[0].children[0].text, title: .children[0].children[0].children[0].children[1].text}] | sort_by(.chapter) | .[]|[.chapter, .title, .url] | @tsv' | grep $' 2\t' | while IFS=$'\t' read -r chapter title url; do echo "$title"; done)
+
+echo "[+] Title set to $MAIN_TITLE"
+
+echo "$MAIN_TITLE" > "$HTML_FILE"
+
+# args = "$url" "$chapter" "$title"
function download_chapter() {
- [ -s "html/$2.html" ] || wget --quiet "https://www.theickabog.com/$1" -O "html/$2.html"
+ [[ $2 =~ 1$ ]] && MAIN_TITLE=$3
+ URL=$( [[ $1 =~ ^http ]] && echo "$1" || echo "https://www.theickabog.com$1" )
+ [ -s "html/$2.html" ] || wget --quiet "$URL" -O "html/$2.html"
+ echo "$3
" >> "$HTML_FILE"
+ cat "html/$2.html" | pup 'article div.row:nth-child(2) div.entry-content' >> "$HTML_FILE"
}
-download_chapter king-fred-the-fearless/ ch1
-download_chapter the-ickabog/ ch2
-download_chapter death-of-a-seamstress/ ch3
-download_chapter the-quiet-house/ ch4
-download_chapter daisy-dovetail/ ch5
-download_chapter the-fight-in-the-courtyard/ ch6
-download_chapter lord-spittleworth-tells-tales/ ch7
-download_chapter the-day-of-petition/ ch8
-download_chapter the-shepherds-story/ ch9
-download_chapter king-freds-quest/ ch10
-download_chapter the-journey-north/ ch11
-download_chapter the-kings-lost-sword/ ch12
-download_chapter the-accident/ ch13
-download_chapter lord-spittleworths-plan/ ch14
-download_chapter the-king-returns/ ch15
-download_chapter bert-says-goodbye/ ch16
-download_chapter goodfellow-makes-a-stand/ ch17
-download_chapter end-of-an-advisor/ ch18
-download_chapter lady-eslanda/ ch19
-download_chapter medals-for-beamish-and-buttons/ ch20
-download_chapter professor-fraudysham/ ch21
-
-
-for i in $(seq 1 21); do
- CHAPTER_TITLE=$(cat "html/ch$i.html" | pup 'h1.entry-title:nth-child(2) text{}')
- echo "$CHAPTER_TITLE
" >> "$HTML_FILE"
- cat "html/ch$i.html" | pup 'article div.row:nth-child(2) div.entry-content' >> "$HTML_FILE"
-done
+cat "$MAIN_STORY_OUTPUT_FILE" |
+pup 'ul.chapters__list a json{}' |
+jq -r '[.[] | {url: .href, chapter: .children[0].children[0].children[0].children[0].text, title: .children[0].children[0].children[0].children[1].text}] | sort_by(.chapter | match("[0-9]+$")) | .[]|[.chapter, .title, .url] | @tsv' |
+while IFS=$'\t' read -r chapter title url; do download_chapter "$url" "$chapter" "$title"; done
echo "" >> "$HTML_FILE"
-pandoc --from=html --to=pdf \
- --output=ickabog1.pdf \
- --metadata title="The Ickabog" \
- --metadata author="J.K Rowling" \
+cat <<__METADATA__ > metadata.xml
+J.K Rowling
+__METADATA__
+
+pandoc --from=html \
+ --output="$OUTPUT_DIR/ickabog.epub" \
+ --epub-metadata=metadata.xml \
+ --epub-cover-image=cover.jpg \
+ --epub-chapter-level=1 \
+ "$HTML_FILE"
+
+echo "[+] Generated $OUTPUT_DIR/ickabog.epub"
+
+if command -v kindlegen > /dev/null; then
+ kindlegen "$OUTPUT_DIR/ickabog.epub" > /dev/null 2>&1
+ echo "[+] Generated MOBI using kindlegen: $OUTPUT_DIR/ickabog.mobi"
+elif command -v ebook-convert > /dev/null; then
+ ebook-convert "$OUTPUT_DIR/ickabog.epub" \
+ "$OUTPUT_DIR/ickabog.mobi" \
+ --metadata title="$MAIN_TITLE" \
+ > /dev/null 2>&1
+ echo "[+] Generated MOBI using ebook-convert: $OUTPUT_DIR/ickabog.mobi"
+else
+ echo "[-] Could not generate MOBI, install kindlegen or calibre"
+fi
+
+command -v xelatex >/dev/null && \
+pandoc --from=html \
--pdf-engine=xelatex \
- --dpi=300 \
- -V book \
- -V lang=en-US \
+ --metadata title="$MAIN_TITLE" \
+ --metadata author="J.K Rowling" \
+ --output="$OUTPUT_DIR/ickabog-no-cover.pdf" \
+ -V lang="$LANG" \
-V geometry=margin=1.5cm \
"$HTML_FILE"
-pdftk cover.pdf ickabog1.pdf cat output ickabog.pdf
+if command -v qpdf > /dev/null; then
+ qpdf --empty --pages cover.pdf "$OUTPUT_DIR/ickabog-no-cover.pdf" -- "$OUTPUT_DIR/ickabog.pdf"
+else
+ mv "$OUTPUT_DIR/ickabog-no-cover.pdf" "$OUTPUT_DIR/ickabog.pdf"
+fi
-pandoc --from=html --to=epub \
- --output=ickabog.epub \
- --epub-metadata=metadata.xml \
- --epub-cover-image=cover.jpg \
- --metadata title="The Ickabog" \
- "$HTML_FILE"
+echo "[+] Generated PDF using xelatex: $OUTPUT_DIR/ickabog.pdf"
-pandoc --from=html --to=pdf \
- -V fontsize=18pt \
- --output=ickabog2.pdf \
- --metadata title="The Ickabog" \
- --metadata author="J.K Rowling" \
- --pdf-engine=context \
- -V margin-left=0cm \
- -V margin-right=0cm \
- -V margin-top=0cm \
- -V margin-bottom=0cm \
- -V geometry=margin=0cm \
- -V lang=en-US \
- "$HTML_FILE"
+# Run only if context is available
+if command -v context>/dev/null; then
+ pandoc --from=html --to=pdf \
+ -V fontsize=18pt \
+ --output="$OUTPUT_DIR/ickabog-large-no-cover.pdf" \
+ --metadata title="$MAIN_TITLE" \
+ --metadata author="J.K Rowling" \
+ --pdf-engine=context \
+ -V margin-left=0cm \
+ -V margin-right=0cm \
+ -V margin-top=0cm \
+ -V margin-bottom=0cm \
+ -V geometry=margin=0cm \
+ -V lang="$LANG" \
+ "$HTML_FILE"
-pdftk cover.pdf ickabog2.pdf cat output ickabog-large.pdf
+ if command -v qpdf > /dev/null; then
+ qpdf --empty --pages cover.pdf "$OUTPUT_DIR/ickabog-large-no-cover.pdf" -- "$OUTPUT_DIR/ickabog-large.pdf"
+ else
+ mv "$OUTPUT_DIR/ickabog-no-cover.pdf" "$OUTPUT_DIR/ickabog-large.pdf"
+ fi
+fi
+
+echo "[+] Generated PDF using context: $OUTPUT_DIR/ickabog-large.pdf"
diff --git a/metadata.xml b/metadata.xml
deleted file mode 100644
index 8028220..0000000
--- a/metadata.xml
+++ /dev/null
@@ -1,4 +0,0 @@
-The Ickabog
-2020-05-20
-en-US
-The Ickabog