diff --git a/.gitignore b/.gitignore index 2ae273e..56348ec 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ html/ .direnv *.epub +*.pdf *.mobi vendor/ diff --git a/bootstrap.sh b/bootstrap.sh index 653edc3..05e57b3 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -1,26 +1,38 @@ #!/bin/bash +set -euo pipefail +IFS=$'\n\t' -# Cleanup -rm -rf html -mkdir -p html/index -mkdir -p html/sre-book -cd html +TOC_URL="https://landing.google.com/sre/sre-book/toc/index.html" +# Make sure that links are relative \ +# # Remove the /sre/ directories +# Save stuff in html/ directory +# Do not create a landing.google.com directory +# Enable recursion, timestamping (--mirror) +# Images are hosted elsewhere, download them as well. +# We need to go up a level from /toc/ where we start +wget \ + --convert-links \ + --directory-prefix=html \ + --page-requisites \ + --adjust-extension \ + --span-hosts \ + --trust-server-names \ + --backup-converted \ + --mirror \ + --no-verbose \ + --recursive \ + --domains=lh3.googleusercontent.com,landing.google.com https://landing.google.com/sre/sre-book/toc/index.html -# Download -wget --convert-links --mirror https://landing.google.com/sre/book/ -mv landing.google.com/sre/sre-book/* ./sre-book -mv landing.google.com/sre/book/index.html ./index -rm -rf landing.google.com -cd .. +MODE=${1:-} -if [ $1 != "docker" ];then +if [ "$MODE" != "docker" ];then bundle install fi ruby generate.rb -pushd html/sre-book/chapters -pandoc -f html -t epub -o ../../../google-sre.epub --epub-metadata=../../../metadata.xml --epub-cover-image=../../../cover.jpg sre.html +pushd html/landing.google.com/sre/sre-book/toc +pandoc -f html -t epub -o ../../../../../google-sre.epub --epub-metadata=../../../../../metadata.xml --epub-cover-image=../../../../../cover.jpg complete.html popd ebook-convert google-sre.epub google-sre.mobi ebook-convert google-sre.epub google-sre.pdf diff --git a/generate.rb b/generate.rb index b924e28..d98ea63 100644 --- a/generate.rb +++ b/generate.rb @@ -1,17 +1,27 @@ require 'nokogiri' -require 'pp' +require 'pathname' require 'fileutils' + # First we get the list of all the book sections: -chapter_links = Nokogiri::HTML(open("html/index/index.html")) +Dir.chdir("html/landing.google.com/sre/sre-book/toc") +chapter_links = Nokogiri::HTML(open("index.html")) .css('#drop-down a') .map {|l| l.attribute('href').value} -html = '' +html = < + + + Site Reliability Engineering + + + +EOT chapter_links.each do |chapter_link| - chapter_file = File.basename chapter_link - html += "" - doc = Nokogiri::HTML(open("html/index/#{chapter_link}")) + chapter_file = File.basename File.dirname chapter_link + html += "" + doc = Nokogiri::HTML(open(chapter_link)) content = doc.css('.content') # this title is with additional 'chapter X' in front @@ -25,9 +35,18 @@ chapter_links.each do |chapter_link| content.css('a').each do |a| link = a.attribute('href') if link - matches = link.value.scan /^([\w-]+.html)#([\w-]+)$/ - if matches.length == 1 - a['href'] = '#' + matches[0][1] + + matches = link.value.scan /^(\S*index.html)+(#[\w-]+)?/ + # pp [link.value, matches] if link.value and link.value.include? 'lessons-learned' + if matches.length == 1 and matches[0].length == 2 + # Self Links + if matches[0][0] =="index.html" and matches[0][1] + a['href'] = matches[0][1] + # If it points to start of a different chapter + else + chapter_slug = File.basename File.dirname matches[0][0] + a['href'] = "##{chapter_slug}" + end end end end @@ -36,11 +55,9 @@ chapter_links.each do |chapter_link| headers = (1..6).map {|x| "h#{x}"} - # headers.each_with_index content.css(headers.join(',')).each do |e| # If chapter heading if e == chapter_header - puts "Chapter Header" e.name = 'h1' else # Reduce everything by 1 @@ -50,17 +67,17 @@ chapter_links.each do |chapter_link| end end - content.css('a').each do |a| - link = a.attribute('href') - if link - # Link to a direct chapter - matches = link.value.scan /^([\w-]+.html)$/ - if matches.length == 1 - a['href'] = '#' + matches[0][0] - end + content.css('img').each do |img| + img_file = img.attribute('src') + if img_file + chapter_directory = File.dirname chapter_link + absolute_image_path = Pathname.new File.absolute_path img_file, chapter_directory + cwd = Pathname.new Dir.pwd + img['src'] = absolute_image_path.relative_path_from cwd end end + if content.children.css('section > h1').length > 0 # remove additional parent section tag content = content.children.at_css('section') @@ -69,11 +86,15 @@ chapter_links.each do |chapter_link| content = content.children.at_css('div') end + + # replace h1 title content.at_css('h1').inner_html = title html += content.inner_html end -File.open("html/sre-book/chapters/sre.html", 'w') { |file| file.write(html) } +html+="" + +File.open("complete.html", 'w') { |file| file.write(html) } puts "[html] Generated HTML file"