Working script in ruby

This commit is contained in:
Nemo 2017-09-16 23:15:59 +05:30
parent 935b636d23
commit ee45e65a58
5 changed files with 98 additions and 8 deletions

2
.gitignore vendored
View File

@ -1,2 +1,4 @@
html/
.direnv
*.epub
*.mobi

View File

@ -2,14 +2,14 @@
![Cover](cover.jpg)
Generates a EPUB for the Google SRE Book.
Generates a EPUB/MOBI for the Google SRE Book.
Original sources are downloaded from https://landing.google.com/sre/
Review and run the `bootstrap.sh` script to generate the EPUB.
Review and run the `bootstrap.sh` script to generate the EPUB and MOBI files
**Note**: Currently relies on python2 and pip to install dependencies
and does not setup a virtualenv. Please review the script and edit
as per your needs. If you have [direnv](https://direnv.net/)
installed, it will set up the virtualenv for you after you run
`direnv allow`.
Requirements:
- Ruby
- bundler
- Installs nokogiri

View File

@ -1,8 +1,9 @@
#!/bin/bash
rm -rf html
mkdir -p html
cd html
wget --mirror https://landing.google.com/sre/book/
wget --convert-links --mirror https://landing.google.com/sre/book/
mv landing.google.com/sre/book/* .
rm -rf landing.google.com
@ -10,3 +11,8 @@ cd ..
bundle install
ruby generate.rb
cd html/chapters
pandoc -S -o ../../google-sre.epub --epub-metadata=../../metadata.xml --epub-cover-image=../../cover.jpg sre.html
ebook-convert google-sre.epub google-sre.mobi

64
generate.rb Normal file
View File

@ -0,0 +1,64 @@
require 'nokogiri'
require 'pp'
require 'fileutils'
# First we get the list of all the book sections:
chapter_links = Nokogiri::HTML(open("html/index.html"))
.css('#drop-down a')
.map {|l| l.attribute('href').value}
html = ''
chapter_links.each do |chapter_link|
chapter_file = File.basename chapter_link
html += "<span class=\"hidden\" name=\"#{chapter_file}\"></span>"
content = Nokogiri::HTML(open("html/#{chapter_link}")).css('.content')
content.css('.cont').each do |e|
e.remove
end
# Ensure that all links are to the same file
content.css('a').each do |a|
link = a.attribute('href')
if link
matches = link.value.scan /^([\w-]+.html)#([\w-]+)$/
if matches.length == 1
a['href'] = '#' + matches[0][1]
end
end
end
chapter_header = content.at('.heading')
headers = (1..6).map {|x| "h#{x}"}
# headers.each_with_index
content.css(headers.join(',')).each do |e|
# If chapter heading
if e == chapter_header
puts "Chapter Header"
e.name = 'h1'
else
# Reduce everything by 1
i = headers.index e.name
new_name = headers[i+1] ? headers[i+1] : 'h6'
e.name = new_name
end
end
content.css('a').each do |a|
link = a.attribute('href')
if link
# Link to a direct chapter
matches = link.value.scan /^([\w-]+.html)$/
if matches.length == 1
a['href'] = '#' + matches[0][0]
end
end
end
html += content.inner_html
end
File.open("html/chapters/sre.html", 'w') { |file| file.write(html) }
puts "[html] Generated HTML file"

18
metadata.xml Normal file
View File

@ -0,0 +1,18 @@
<dc:identifier id="epub-id-1" opf:scheme="ISBN-10">149192912X</dc:identifier>
<dc:identifier id="epub-id-1" opf:scheme="ISBN-13">978-978-1491929124</dc:identifier>
<dc:title id="epub-title-1">Site Reliability Engineering: How Google Runs Production Systems</dc:title>
<dc:date>2016-04-01</dc:date>
<dc:language>en-US</dc:language>
<dc:creator id="epub-creator-1" opf:role="edt">Betsy Beyer</dc:creator>
<dc:creator id="epub-creator-1" opf:role="edt">Chris Jones</dc:creator>
<dc:creator id="epub-creator-1" opf:role="edt">Jennifer Petoff</dc:creator>
<dc:creator id="epub-creator-1" opf:role="edt">Niall Richard Murphy</dc:creator>
<dc:creator id="epub-creator-1" opf:role="edt">Kavita Guliani</dc:creator>
<dc:creator id="epub-creator-1" opf:role="edt">Carmela Quinito</dc:creator>
<dc:creator id="epub-creator-1" opf:role="aut">Benjamin Treynor Sloss</dc:creator>
<dc:creator id="epub-creator-1" opf:role="aut">JC van Winkel</dc:creator>
<dc:creator id="epub-creator-1" opf:role="aut">Marc Alvidrez</dc:creator>
<dc:creator id="epub-creator-1" opf:role="aut">Mark Roth</dc:creator>
<dc:creator id="epub-creator-1" opf:role="aut">Cody Smith</dc:creator>
<dc:creator id="epub-creator-1" opf:role="aut">John Wilkes</dc:creator>
<dc:creator id="epub-creator-1" opf:role="aut">Marc Alvidrez</dc:creator>