From a4b44684e4fb343c726651c979372eeae770408b Mon Sep 17 00:00:00 2001 From: John Wood Date: Wed, 29 Jul 2020 01:00:24 +0900 Subject: [PATCH] Switches to Web Archive, since book is no longer accessible (#2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * The author has removed the book from her website. Gone, but not forgotten... by archive.org :) * Specify UTF-8 with pup to work around it replacing ' with ’ * Oops, pup gets called twice. Let's fix them both. --- generate.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/generate.sh b/generate.sh index 0297d41..3a7ee53 100755 --- a/generate.sh +++ b/generate.sh @@ -13,7 +13,7 @@ LC=${LC:-""} if [[ "$LC" != "" ]]; then LC="/$LC" fi -MAIN_STORY_URL="https://www.theickabog.com$LC/read-the-story/" +MAIN_STORY_URL="https://web.archive.org/web/20200713135719/https://www.theickabog.com$LC/read-the-story/" echo "[+] Fetching $MAIN_STORY_URL" @@ -31,14 +31,14 @@ echo "$MAIN_TITLE$3" >> "$HTML_FILE" - cat "html/$2.html" | pup 'article div.row:nth-child(2) div.entry-content' >> "$HTML_FILE" + cat "html/$2.html" | pup -p --charset UTF-8 'article div.row:nth-child(2) div.entry-content' >> "$HTML_FILE" } cat "$MAIN_STORY_OUTPUT_FILE" | -pup 'ul.chapters__list a json{}' | +pup -p --charset UTF-8 'ul.chapters__list a json{}' | jq -r '[.[] | {url: .href, chapter: .children[0].children[0].children[0].children[0].text, title: .children[0].children[0].children[0].children[1].text}] | sort_by(.chapter | match("[0-9]+$")) | .[]|[.chapter, .title, .url] | @tsv' | while IFS=$'\t' read -r chapter title url; do download_chapter "$url" "$chapter" "$title"; done