Improve wget invocation to get images as well

2018-12-02 20:17:52 +05:30 · 2018-12-02 20:17:52 +05:30 · 823b6d20e1
parent 56dd68f63d
commit 823b6d20e1
2 changed files with 25 additions and 16 deletions
--- a/bootstrap.sh
+++ b/bootstrap.sh
@ -1,17 +1,27 @@
 #!/bin/bash
+TOC_URL="https://landing.google.com/sre/sre-book/toc/index.html"
+# Make sure that links are relative \
+# # Remove the /sre/ directories
+# Save stuff in html/ directory
+# Do not create a landing.google.com directory
+# Enable recursion, timestamping (--mirror)
+# Images are hosted elsewhere, download them as well.
+# We need to go up a level from /toc/ where we start
+wget \
+    --convert-links \
+    --directory-prefix=html \
+    --page-requisites \
+    --adjust-extension \
+    --span-hosts \
+    --trust-server-names \
+    --backup-converted \
+    --mirror \
+    --no-verbose \
+    --recursive \
+    --domains=lh3.googleusercontent.com,landing.google.com \
+    "$TOC_URL"

-# Cleanup
-rm -rf html
-mkdir -p html/index
-mkdir -p html/sre-book
-cd html
-
-# Download
-wget --convert-links --mirror https://landing.google.com/sre/book/
-mv landing.google.com/sre/sre-book/* ./sre-book
-mv landing.google.com/sre/book/index.html ./index
-rm -rf landing.google.com
-cd ..
+exit

 if [ $1 != "docker" ];then
    bundle install
--- a/generate.rb
+++ b/generate.rb
@ -3,7 +3,7 @@ require 'pp'
 require 'fileutils'
 # First we get the list of all the book sections:

-chapter_links = Nokogiri::HTML(open("html/index/index.html"))
+chapter_links = Nokogiri::HTML(open("html/toc/index.html"))
  .css('#drop-down a')
  .map {|l| l.attribute('href').value}

@ -11,7 +11,7 @@ html = ''
 chapter_links.each do |chapter_link|
  chapter_file = File.basename chapter_link
  html += "<span class=\"hidden\" name=\"#{chapter_file}\"></span>"
-  doc = Nokogiri::HTML(open("html/index/#{chapter_link}"))
+  doc = Nokogiri::HTML(open("html/toc/#{chapter_link}"))
  content = doc.css('.content')

  # this title is with additional 'chapter X' in front
@ -40,7 +40,6 @@ chapter_links.each do |chapter_link|
  content.css(headers.join(',')).each do |e|
    # If chapter heading
    if e == chapter_header
-      puts "Chapter Header"
      e.name = 'h1'
    else
      # Reduce everything by 1
@ -75,5 +74,5 @@ chapter_links.each do |chapter_link|
  html += content.inner_html
 end

-File.open("html/sre-book/chapters/sre.html", 'w') { |file| file.write(html) }
+File.open("html/sre-book/toc/complete.html", 'w') { |file| file.write(html) }
 puts "[html] Generated HTML file"