Improve wget invocation to get images as well

This commit is contained in:
Nemo 2018-12-02 20:17:52 +05:30
parent 56dd68f63d
commit 823b6d20e1
2 changed files with 25 additions and 16 deletions

View File

@ -1,17 +1,27 @@
#!/bin/bash
TOC_URL="https://landing.google.com/sre/sre-book/toc/index.html"
# Make sure that links are relative \
# # Remove the /sre/ directories
# Save stuff in html/ directory
# Do not create a landing.google.com directory
# Enable recursion, timestamping (--mirror)
# Images are hosted elsewhere, download them as well.
# We need to go up a level from /toc/ where we start
wget \
--convert-links \
--directory-prefix=html \
--page-requisites \
--adjust-extension \
--span-hosts \
--trust-server-names \
--backup-converted \
--mirror \
--no-verbose \
--recursive \
--domains=lh3.googleusercontent.com,landing.google.com \
"$TOC_URL"
# Cleanup
rm -rf html
mkdir -p html/index
mkdir -p html/sre-book
cd html
# Download
wget --convert-links --mirror https://landing.google.com/sre/book/
mv landing.google.com/sre/sre-book/* ./sre-book
mv landing.google.com/sre/book/index.html ./index
rm -rf landing.google.com
cd ..
exit
if [ $1 != "docker" ];then
bundle install

View File

@ -3,7 +3,7 @@ require 'pp'
require 'fileutils'
# First we get the list of all the book sections:
chapter_links = Nokogiri::HTML(open("html/index/index.html"))
chapter_links = Nokogiri::HTML(open("html/toc/index.html"))
.css('#drop-down a')
.map {|l| l.attribute('href').value}
@ -11,7 +11,7 @@ html = ''
chapter_links.each do |chapter_link|
chapter_file = File.basename chapter_link
html += "<span class=\"hidden\" name=\"#{chapter_file}\"></span>"
doc = Nokogiri::HTML(open("html/index/#{chapter_link}"))
doc = Nokogiri::HTML(open("html/toc/#{chapter_link}"))
content = doc.css('.content')
# this title is with additional 'chapter X' in front
@ -40,7 +40,6 @@ chapter_links.each do |chapter_link|
content.css(headers.join(',')).each do |e|
# If chapter heading
if e == chapter_header
puts "Chapter Header"
e.name = 'h1'
else
# Reduce everything by 1
@ -75,5 +74,5 @@ chapter_links.each do |chapter_link|
html += content.inner_html
end
File.open("html/sre-book/chapters/sre.html", 'w') { |file| file.write(html) }
File.open("html/sre-book/toc/complete.html", 'w') { |file| file.write(html) }
puts "[html] Generated HTML file"