Improve wget invocation to get images as well
This commit is contained in:
parent
56dd68f63d
commit
823b6d20e1
34
bootstrap.sh
34
bootstrap.sh
|
@ -1,17 +1,27 @@
|
|||
#!/bin/bash
|
||||
TOC_URL="https://landing.google.com/sre/sre-book/toc/index.html"
|
||||
# Make sure that links are relative \
|
||||
# # Remove the /sre/ directories
|
||||
# Save stuff in html/ directory
|
||||
# Do not create a landing.google.com directory
|
||||
# Enable recursion, timestamping (--mirror)
|
||||
# Images are hosted elsewhere, download them as well.
|
||||
# We need to go up a level from /toc/ where we start
|
||||
wget \
|
||||
--convert-links \
|
||||
--directory-prefix=html \
|
||||
--page-requisites \
|
||||
--adjust-extension \
|
||||
--span-hosts \
|
||||
--trust-server-names \
|
||||
--backup-converted \
|
||||
--mirror \
|
||||
--no-verbose \
|
||||
--recursive \
|
||||
--domains=lh3.googleusercontent.com,landing.google.com \
|
||||
"$TOC_URL"
|
||||
|
||||
# Cleanup
|
||||
rm -rf html
|
||||
mkdir -p html/index
|
||||
mkdir -p html/sre-book
|
||||
cd html
|
||||
|
||||
# Download
|
||||
wget --convert-links --mirror https://landing.google.com/sre/book/
|
||||
mv landing.google.com/sre/sre-book/* ./sre-book
|
||||
mv landing.google.com/sre/book/index.html ./index
|
||||
rm -rf landing.google.com
|
||||
cd ..
|
||||
exit
|
||||
|
||||
if [ $1 != "docker" ];then
|
||||
bundle install
|
||||
|
|
|
@ -3,7 +3,7 @@ require 'pp'
|
|||
require 'fileutils'
|
||||
# First we get the list of all the book sections:
|
||||
|
||||
chapter_links = Nokogiri::HTML(open("html/index/index.html"))
|
||||
chapter_links = Nokogiri::HTML(open("html/toc/index.html"))
|
||||
.css('#drop-down a')
|
||||
.map {|l| l.attribute('href').value}
|
||||
|
||||
|
@ -11,7 +11,7 @@ html = ''
|
|||
chapter_links.each do |chapter_link|
|
||||
chapter_file = File.basename chapter_link
|
||||
html += "<span class=\"hidden\" name=\"#{chapter_file}\"></span>"
|
||||
doc = Nokogiri::HTML(open("html/index/#{chapter_link}"))
|
||||
doc = Nokogiri::HTML(open("html/toc/#{chapter_link}"))
|
||||
content = doc.css('.content')
|
||||
|
||||
# this title is with additional 'chapter X' in front
|
||||
|
@ -40,7 +40,6 @@ chapter_links.each do |chapter_link|
|
|||
content.css(headers.join(',')).each do |e|
|
||||
# If chapter heading
|
||||
if e == chapter_header
|
||||
puts "Chapter Header"
|
||||
e.name = 'h1'
|
||||
else
|
||||
# Reduce everything by 1
|
||||
|
@ -75,5 +74,5 @@ chapter_links.each do |chapter_link|
|
|||
html += content.inner_html
|
||||
end
|
||||
|
||||
File.open("html/sre-book/chapters/sre.html", 'w') { |file| file.write(html) }
|
||||
File.open("html/sre-book/toc/complete.html", 'w') { |file| file.write(html) }
|
||||
puts "[html] Generated HTML file"
|
||||
|
|
Loading…
Reference in New Issue