Working script in ruby

This commit is contained in:
Nemo 2017-09-16 23:15:59 +05:30
parent 935b636d23
commit ee45e65a58
5 changed files with 98 additions and 8 deletions

.gitignore vendored
View File

@ -1,2 +1,4 @@

View File

@ -2,14 +2,14 @@
Generates a EPUB for the Google SRE Book.
Generates a EPUB/MOBI for the Google SRE Book.
Original sources are downloaded from
Review and run the `` script to generate the EPUB.
Review and run the `` script to generate the EPUB and MOBI files
**Note**: Currently relies on python2 and pip to install dependencies
and does not setup a virtualenv. Please review the script and edit
as per your needs. If you have [direnv](
installed, it will set up the virtualenv for you after you run
`direnv allow`.
- Ruby
- bundler
- Installs nokogiri

View File

@ -1,8 +1,9 @@
rm -rf html
mkdir -p html
cd html
wget --mirror
wget --convert-links --mirror
mv* .
rm -rf
@ -10,3 +11,8 @@ cd ..
bundle install
ruby generate.rb
cd html/chapters
pandoc -S -o ../../google-sre.epub --epub-metadata=../../metadata.xml --epub-cover-image=../../cover.jpg sre.html
ebook-convert google-sre.epub

generate.rb Normal file
View File

@ -0,0 +1,64 @@
require 'nokogiri'
require 'pp'
require 'fileutils'
# First we get the list of all the book sections:
chapter_links = Nokogiri::HTML(open("html/index.html"))
.css('#drop-down a')
.map {|l| l.attribute('href').value}
html = ''
chapter_links.each do |chapter_link|
chapter_file = File.basename chapter_link
html += "<span class=\"hidden\" name=\"#{chapter_file}\"></span>"
content = Nokogiri::HTML(open("html/#{chapter_link}")).css('.content')
content.css('.cont').each do |e|
# Ensure that all links are to the same file
content.css('a').each do |a|
link = a.attribute('href')
if link
matches = link.value.scan /^([\w-]+.html)#([\w-]+)$/
if matches.length == 1
a['href'] = '#' + matches[0][1]
chapter_header ='.heading')
headers = (1..6).map {|x| "h#{x}"}
# headers.each_with_index
content.css(headers.join(',')).each do |e|
# If chapter heading
if e == chapter_header
puts "Chapter Header" = 'h1'
# Reduce everything by 1
i = headers.index
new_name = headers[i+1] ? headers[i+1] : 'h6' = new_name
content.css('a').each do |a|
link = a.attribute('href')
if link
# Link to a direct chapter
matches = link.value.scan /^([\w-]+.html)$/
if matches.length == 1
a['href'] = '#' + matches[0][0]
html += content.inner_html
end"html/chapters/sre.html", 'w') { |file| file.write(html) }
puts "[html] Generated HTML file"

metadata.xml Normal file
View File

@ -0,0 +1,18 @@
<dc:identifier id="epub-id-1" opf:scheme="ISBN-10">149192912X</dc:identifier>
<dc:identifier id="epub-id-1" opf:scheme="ISBN-13">978-978-1491929124</dc:identifier>
<dc:title id="epub-title-1">Site Reliability Engineering: How Google Runs Production Systems</dc:title>
<dc:creator id="epub-creator-1" opf:role="edt">Betsy Beyer</dc:creator>
<dc:creator id="epub-creator-1" opf:role="edt">Chris Jones</dc:creator>
<dc:creator id="epub-creator-1" opf:role="edt">Jennifer Petoff</dc:creator>
<dc:creator id="epub-creator-1" opf:role="edt">Niall Richard Murphy</dc:creator>
<dc:creator id="epub-creator-1" opf:role="edt">Kavita Guliani</dc:creator>
<dc:creator id="epub-creator-1" opf:role="edt">Carmela Quinito</dc:creator>
<dc:creator id="epub-creator-1" opf:role="aut">Benjamin Treynor Sloss</dc:creator>
<dc:creator id="epub-creator-1" opf:role="aut">JC van Winkel</dc:creator>
<dc:creator id="epub-creator-1" opf:role="aut">Marc Alvidrez</dc:creator>
<dc:creator id="epub-creator-1" opf:role="aut">Mark Roth</dc:creator>
<dc:creator id="epub-creator-1" opf:role="aut">Cody Smith</dc:creator>
<dc:creator id="epub-creator-1" opf:role="aut">John Wilkes</dc:creator>
<dc:creator id="epub-creator-1" opf:role="aut">Marc Alvidrez</dc:creator>