From ee45e65a5833daf4514f21d452553236ac73d689 Mon Sep 17 00:00:00 2001 From: Nemo Date: Sat, 16 Sep 2017 23:15:59 +0530 Subject: [PATCH] Working script in ruby --- .gitignore | 2 ++ README.md | 14 ++++++------ bootstrap.sh | 8 ++++++- generate.rb | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++ metadata.xml | 18 +++++++++++++++ 5 files changed, 98 insertions(+), 8 deletions(-) create mode 100644 generate.rb create mode 100644 metadata.xml diff --git a/.gitignore b/.gitignore index f5e1ea6..5d966a8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ html/ .direnv +*.epub +*.mobi diff --git a/README.md b/README.md index 6e637a6..f209d15 100644 --- a/README.md +++ b/README.md @@ -2,14 +2,14 @@ ![Cover](cover.jpg) -Generates a EPUB for the Google SRE Book. +Generates a EPUB/MOBI for the Google SRE Book. Original sources are downloaded from https://landing.google.com/sre/ -Review and run the `bootstrap.sh` script to generate the EPUB. +Review and run the `bootstrap.sh` script to generate the EPUB and MOBI files -**Note**: Currently relies on python2 and pip to install dependencies -and does not setup a virtualenv. Please review the script and edit -as per your needs. If you have [direnv](https://direnv.net/) -installed, it will set up the virtualenv for you after you run -`direnv allow`. +Requirements: + +- Ruby +- bundler +- Installs nokogiri diff --git a/bootstrap.sh b/bootstrap.sh index b653882..c07e461 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -1,8 +1,9 @@ #!/bin/bash +rm -rf html mkdir -p html cd html -wget --mirror https://landing.google.com/sre/book/ +wget --convert-links --mirror https://landing.google.com/sre/book/ mv landing.google.com/sre/book/* . rm -rf landing.google.com @@ -10,3 +11,8 @@ cd .. bundle install ruby generate.rb + +cd html/chapters + +pandoc -S -o ../../google-sre.epub --epub-metadata=../../metadata.xml --epub-cover-image=../../cover.jpg sre.html +ebook-convert google-sre.epub google-sre.mobi diff --git a/generate.rb b/generate.rb new file mode 100644 index 0000000..d3a613d --- /dev/null +++ b/generate.rb @@ -0,0 +1,64 @@ +require 'nokogiri' +require 'pp' +require 'fileutils' +# First we get the list of all the book sections: + +chapter_links = Nokogiri::HTML(open("html/index.html")) + .css('#drop-down a') + .map {|l| l.attribute('href').value} + +html = '' +chapter_links.each do |chapter_link| + chapter_file = File.basename chapter_link + html += "" + content = Nokogiri::HTML(open("html/#{chapter_link}")).css('.content') + + content.css('.cont').each do |e| + e.remove + end + + # Ensure that all links are to the same file + content.css('a').each do |a| + link = a.attribute('href') + if link + matches = link.value.scan /^([\w-]+.html)#([\w-]+)$/ + if matches.length == 1 + a['href'] = '#' + matches[0][1] + end + end + end + + chapter_header = content.at('.heading') + + headers = (1..6).map {|x| "h#{x}"} + + # headers.each_with_index + content.css(headers.join(',')).each do |e| + # If chapter heading + if e == chapter_header + puts "Chapter Header" + e.name = 'h1' + else + # Reduce everything by 1 + i = headers.index e.name + new_name = headers[i+1] ? headers[i+1] : 'h6' + e.name = new_name + end + end + + content.css('a').each do |a| + link = a.attribute('href') + if link + # Link to a direct chapter + matches = link.value.scan /^([\w-]+.html)$/ + if matches.length == 1 + a['href'] = '#' + matches[0][0] + end + end + end + + html += content.inner_html +end + +File.open("html/chapters/sre.html", 'w') { |file| file.write(html) } +puts "[html] Generated HTML file" diff --git a/metadata.xml b/metadata.xml new file mode 100644 index 0000000..96f54c9 --- /dev/null +++ b/metadata.xml @@ -0,0 +1,18 @@ +149192912X +978-978-1491929124 +Site Reliability Engineering: How Google Runs Production Systems +2016-04-01 +en-US +Betsy Beyer +Chris Jones +Jennifer Petoff +Niall Richard Murphy +Kavita Guliani +Carmela Quinito +Benjamin Treynor Sloss +JC van Winkel +Marc Alvidrez +Mark Roth +Cody Smith +John Wilkes +Marc Alvidrez