Working script in ruby

2017-09-16 23:15:59 +05:30 · 2017-09-16 23:15:59 +05:30 · ee45e65a58
parent 935b636d23
commit ee45e65a58
5 changed files with 98 additions and 8 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +1,4 @@
 html/
 .direnv
+*.epub
+*.mobi
--- a/README.md
+++ b/README.md
@ -2,14 +2,14 @@

 ![Cover](cover.jpg)

-Generates a EPUB for the Google SRE Book.
+Generates a EPUB/MOBI for the Google SRE Book.

 Original sources are downloaded from https://landing.google.com/sre/

-Review and run the `bootstrap.sh` script to generate the EPUB.
+Review and run the `bootstrap.sh` script to generate the EPUB and MOBI files

-**Note**: Currently relies on python2 and pip to install dependencies
-and does not setup a virtualenv. Please review the script and edit
-as per your needs. If you have [direnv](https://direnv.net/)
-installed, it will set up the virtualenv for you after you run
-`direnv allow`.
+Requirements:
+
+- Ruby
+- bundler
+- Installs nokogiri
--- a/bootstrap.sh
+++ b/bootstrap.sh
@ -1,8 +1,9 @@
 #!/bin/bash

+rm -rf html
 mkdir -p html
 cd html
-wget --mirror https://landing.google.com/sre/book/
+wget --convert-links --mirror https://landing.google.com/sre/book/

 mv landing.google.com/sre/book/* .
 rm -rf landing.google.com
@ -10,3 +11,8 @@ cd ..

 bundle install
 ruby generate.rb
+
+cd html/chapters
+
+pandoc -S -o ../../google-sre.epub --epub-metadata=../../metadata.xml --epub-cover-image=../../cover.jpg sre.html
+ebook-convert google-sre.epub google-sre.mobi
--- a/generate.rb
+++ b/generate.rb
@ -0,0 +1,64 @@
+require 'nokogiri'
+require 'pp'
+require 'fileutils'
+# First we get the list of all the book sections:
+
+chapter_links = Nokogiri::HTML(open("html/index.html"))
+  .css('#drop-down a')
+  .map {|l| l.attribute('href').value}
+
+html = ''
+chapter_links.each do |chapter_link|
+  chapter_file = File.basename chapter_link
+  html += "<span class=\"hidden\" name=\"#{chapter_file}\"></span>"
+  content = Nokogiri::HTML(open("html/#{chapter_link}")).css('.content')
+
+  content.css('.cont').each do |e|
+    e.remove
+  end
+
+  # Ensure that all links are to the same file
+  content.css('a').each do |a|
+    link = a.attribute('href')
+    if link
+      matches = link.value.scan /^([\w-]+.html)#([\w-]+)$/
+      if matches.length == 1
+        a['href'] = '#' + matches[0][1]
+      end
+    end
+  end
+
+  chapter_header = content.at('.heading')
+
+  headers = (1..6).map {|x| "h#{x}"}
+
+  # headers.each_with_index
+  content.css(headers.join(',')).each do |e|
+    # If chapter heading
+    if e == chapter_header
+      puts "Chapter Header"
+      e.name = 'h1'
+    else
+      # Reduce everything by 1
+      i = headers.index e.name
+      new_name = headers[i+1] ? headers[i+1] : 'h6'
+      e.name = new_name
+    end
+  end
+
+  content.css('a').each do |a|
+    link = a.attribute('href')
+    if link
+      # Link to a direct chapter
+      matches = link.value.scan /^([\w-]+.html)$/
+      if matches.length == 1
+        a['href'] = '#' + matches[0][0]
+      end
+    end
+  end
+
+  html += content.inner_html
+end
+
+File.open("html/chapters/sre.html", 'w') { |file| file.write(html) }
+puts "[html] Generated HTML file"
--- a/metadata.xml
+++ b/metadata.xml
@ -0,0 +1,18 @@
+<dc:identifier id="epub-id-1" opf:scheme="ISBN-10">149192912X</dc:identifier>
+<dc:identifier id="epub-id-1" opf:scheme="ISBN-13">978-978-1491929124</dc:identifier>
+<dc:title id="epub-title-1">Site Reliability Engineering: How Google Runs Production Systems</dc:title>
+<dc:date>2016-04-01</dc:date>
+<dc:language>en-US</dc:language>
+<dc:creator id="epub-creator-1" opf:role="edt">Betsy Beyer</dc:creator>
+<dc:creator id="epub-creator-1" opf:role="edt">Chris Jones</dc:creator>
+<dc:creator id="epub-creator-1" opf:role="edt">Jennifer Petoff</dc:creator>
+<dc:creator id="epub-creator-1" opf:role="edt">Niall Richard Murphy</dc:creator>
+<dc:creator id="epub-creator-1" opf:role="edt">Kavita Guliani</dc:creator>
+<dc:creator id="epub-creator-1" opf:role="edt">Carmela Quinito</dc:creator>
+<dc:creator id="epub-creator-1" opf:role="aut">Benjamin Treynor Sloss</dc:creator>
+<dc:creator id="epub-creator-1" opf:role="aut">JC van Winkel</dc:creator>
+<dc:creator id="epub-creator-1" opf:role="aut">Marc Alvidrez</dc:creator>
+<dc:creator id="epub-creator-1" opf:role="aut">Mark Roth</dc:creator>
+<dc:creator id="epub-creator-1" opf:role="aut">Cody Smith</dc:creator>
+<dc:creator id="epub-creator-1" opf:role="aut">John Wilkes</dc:creator>
+<dc:creator id="epub-creator-1" opf:role="aut">Marc Alvidrez</dc:creator>