commit 1f2deebe39852fc4db2a311d21d07cfc888cdcc1 Author: Nemo Date: Fri Sep 8 19:57:36 2017 +0530 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9f67f6b --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +Oathbringer.* +Oathbringer_* +html/ +cover.pdf diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..56fce5e --- /dev/null +++ b/COPYING @@ -0,0 +1,14 @@ + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + Version 2, December 2004 + + Copyright (C) 2015 Abhay Rana + + Everyone is permitted to copy and distribute verbatim or modified + copies of this license document, and changing it is allowed as long + as the name is changed. + + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. You just DO WHAT THE FUCK YOU WANT TO. + diff --git a/README.md b/README.md new file mode 100644 index 0000000..9e43154 --- /dev/null +++ b/README.md @@ -0,0 +1,37 @@ +# Oathbringer + +Tor.com is publishing Oathbringer in serialized form till Chapter 32. This script +downloads all of these posts and converts them into a publishable format, including +epub, mobi, pdf and html. You can find the tor.com announcement at https://www.tor.com/2017/08/15/brandon-sanderson-oathbringer-serialization-announcement/ + +For obvious reasons, the converted ebook is not part of this repo. You must download +and run the script on your own machine to generate the copies. + +The code for this is mostly adapted from [hoshruba](https://github.captnemo.in/hoshruba). + +## Requirements + +- Ruby +- Nokogiri gem installed (`gem install nokogiri`) +- Unix system with `wget` installed +- `pandoc` installed and available +- `ebook-convert` (from calibre) available to generate the mobi file +- `wkhtmltopdf` for converting html to pdf +- `pdftk` to stitch the final PDF file +- `imagemagick` to convert jpg to PDF + +The final 3 tools can be skipped if you don't care about the PDF generation. + +You can also skip calibre if you only want the EPUB file. + +# Setup + +After downloading the repo and installing the requirements, just run + + ruby setup.rb + +All the generated files will be saved with the filename `Oathbringer.{epub|pdf|mobi|html}` + +# LICENSE + +This is licensed under WTFPL. See COPYING file for the full text. diff --git a/cover.jpg b/cover.jpg new file mode 100644 index 0000000..a319a95 Binary files /dev/null and b/cover.jpg differ diff --git a/metadata.xml b/metadata.xml new file mode 100644 index 0000000..dea2bb8 --- /dev/null +++ b/metadata.xml @@ -0,0 +1,6 @@ +076532637X +978-0765326379 +Oathbringer: Book Three of the Stormlight Archive +2017-11-14 +en-US +Brandon Sanderson \ No newline at end of file diff --git a/setup.rb b/setup.rb new file mode 100644 index 0000000..feec61c --- /dev/null +++ b/setup.rb @@ -0,0 +1,75 @@ +require 'date' +require 'fileutils' +require 'nokogiri' + +FileUtils.mkdir_p("html") + +BASE = 'https://www.tor.com/2017/' + +links = [ + '08/22/oathbringer-brandon-sanderson-prologue/', + '08/29/oathbringer-brandon-sanderson-chapter-1-3/', + '09/05/oathbringer-by-brandon-sanderson-chapters-4-6/' +] + +episode = 1 + +for link in links + url = BASE + link + puts "Download #{url}" + if !File.exists? "html/#{episode}.html" + `wget --no-clobber "#{url}" --output-document "html/#{episode}.html" -o /dev/null` + end + episode +=1 +end + +# Now we have all the files +html = "" +for i in 1..3 + page = Nokogiri::HTML(open("html/#{i}.html")).css('.entry-content') + start = ending = false + page.children.each do |e| + if e.name == 'h3' + e.name = 'h1' + start = true + end + + if e.attribute('class') and e['class'].include? 'frontmatter' and start + ending = true + end + + if !start or ending + e.remove + end + end + html += page.inner_html +end + +# Write it in the book +File.open("Oathbringer.html", 'w') { |file| file.write(html) } +puts "[html] Generated HTML file" + +# Convert it to epub +`pandoc -S -o Oathbringer.epub --epub-metadata=metadata.xml --epub-cover-image=cover.jpg Oathbringer.html` +puts "[epub] Generated EPUB file" + +# Convert epub to a mobi +`ebook-convert Oathbringer.epub Oathbringer.mobi` +puts "[mobi] Generated MOBI file" + +# Generate PDF as well +# First, lets make a better css version of the html +`pandoc Oathbringer.html -s -c style.css -o Oathbringer_pdf.html` +puts "[pdf] Generated html for pdf" + +# Now we convert the cover to a pdf +`convert cover.jpg cover.pdf` +puts "[pdf] Generated cover for pdf" + +# Print the pdf_html file to pdf +`wkhtmltopdf Oathbringer_pdf.html /tmp/Oathbringer.pdf` +puts "[pdf] Generated PDF without cover" + +# Join the cover and pdf together +`pdftk cover.pdf /tmp/Oathbringer.pdf cat output Oathbringer.pdf` +puts "[pdf] Generated PDF file" diff --git a/style.css b/style.css new file mode 100644 index 0000000..bfed1dd --- /dev/null +++ b/style.css @@ -0,0 +1,450 @@ +/* + Buttondown + A Markdown/MultiMarkdown/Pandoc HTML output CSS stylesheet + Author: Ryan Gray + Date: 15 Feb 2011 + Revised: 21 Feb 2012 + + General style is clean, with minimal re-definition of the defaults or + overrides of user font settings. The body text and header styles are + left alone except title, author and date classes are centered. A Pandoc TOC + is not printed, URLs are printed after hyperlinks in parentheses. + Block quotes are italicized. Tables are lightly styled with lines above + and below the table and below the header with a boldface header. Code + blocks are line wrapped. + + All elements that Pandoc and MultiMarkdown use should be listed here, even + if the style is empty so you can easily add styling to anything. + + There are some elements in here for HTML5 output of Pandoc, but I have not + gotten around to testing that yet. +*/ + +/* NOTES: + + Stuff tried and failed: + + It seems that specifying font-family:serif in Safari will always use + Times New Roman rather than the user's preferences setting. + + Making the font size different or a fixed value for print in case the screen + font size is making the print font too big: Making font-size different for + print than for screen causes horizontal lines to disappear in math when using + MathJax under Safari. +*/ + +/* ---- Front Matter ---- */ + +/* Pandoc header DIV. Contains .title, .author and .date. Comes before div#TOC. + Only appears if one of those three are in the document. +*/ + +div#header, header + { + /* Put border on bottom. Separates it from TOC or body that comes after it. */ + border-bottom: 1px solid #aaa; + margin-bottom: 0.5em; + } + +.title /* Pandoc title header (h1.title) */ + { + text-align: center; + } + +.author, .date /* Pandoc author(s) and date headers (h2.author and h3.date) */ + { + text-align: center; + } + +/* Pandoc table of contents DIV when using the --toc option. + NOTE: this doesn't support Pandoc's --id-prefix option for #TOC and #header. + Probably would need to use div[id$='TOC'] and div[id$='header'] as selectors. +*/ + +div#TOC, nav#TOC + { + /* Put border on bottom to separate it from body. */ + border-bottom: 1px solid #aaa; + margin-bottom: 0.5em; + } + +@media print + { + div#TOC, nav#TOC + { + /* Don't display TOC in print */ + display: none; + } + } + +/* ---- Headers and sections ---- */ + +h1, h2, h3, h4, h5, h6 +{ + font-family: "Helvetica Neue", Helvetica, "Liberation Sans", Calibri, Arial, sans-serif; /* Sans-serif headers */ + + /* font-family: "Liberation Serif", "Georgia", "Times New Roman", serif; /* Serif headers */ + + page-break-after: avoid; /* Firefox, Chrome, and Safari do not support the property value "avoid" */ +} + +/* Pandoc with --section-divs option */ + +div div, section section /* Nested sections */ + { + margin-left: 2em; /* This will increasingly indent nested header sections */ + } + +p {} + +blockquote + { + font-style: italic; + } + +li /* All list items */ + { + } + +li > p /* Loosely spaced list item */ + { + margin-top: 1em; /* IE: lack of space above a
  • when the item is inside a

    */ + } + +ul /* Whole unordered list */ + { + } + +ul li /* Unordered list item */ + { + } + +ol /* Whole ordered list */ + { + } + +ol li /* Ordered list item */ + { + } + +hr {} + +/* ---- Some span elements --- */ + +sub /* Subscripts. Pandoc: H~2~O */ + { + } + +sup /* Superscripts. Pandoc: The 2^nd^ try. */ + { + } + +em /* Emphasis. Markdown: *emphasis* or _emphasis_ */ + { + } + +em > em /* Emphasis within emphasis: *This is all *emphasized* except that* */ + { + font-style: normal; + } + +strong /* Markdown **strong** or __strong__ */ + { + } + +/* ---- Links (anchors) ---- */ + +a /* All links */ + { + /* Keep links clean. On screen, they are colored; in print, they do nothing anyway. */ + text-decoration: none; + } + +@media screen + { + a:hover + { + /* On hover, we indicate a bit more that it is a link. */ + text-decoration: underline; + } + } + +@media print + { + a { + /* In print, a colored link is useless, so un-style it. */ + color: black; + background: transparent; + } + + a[href^="http://"]:after, a[href^="https://"]:after + { + /* However, links that go somewhere else, might be useful to the reader, + so for http and https links, print the URL after what was the link + text in parens + */ + content: " (" attr(href) ") "; + font-size: 90%; + } + } + +/* ---- Images ---- */ + +img + { + /* Let it be inline left/right where it wants to be, but verticality make + it in the middle to look nicer, but opinions differ, and if in a multi-line + paragraph, it might not be so great. + */ + vertical-align: middle; + } + +div.figure /* Pandoc figure-style image */ + { + /* Center the image and caption */ + margin-left: auto; + margin-right: auto; + text-align: center; + font-style: italic; + } + +p.caption /* Pandoc figure-style caption within div.figure */ + { + /* Inherits div.figure props by default */ + } + +/* ---- Code blocks and spans ---- */ + +pre, code + { + background-color: #fdf7ee; + /* BEGIN word wrap */ + /* Need all the following to word wrap instead of scroll box */ + /* This will override the overflow:auto if present */ + white-space: pre-wrap; /* css-3 */ + white-space: -moz-pre-wrap !important; /* Mozilla, since 1999 */ + white-space: -pre-wrap; /* Opera 4-6 */ + white-space: -o-pre-wrap; /* Opera 7 */ + word-wrap: break-word; /* Internet Explorer 5.5+ */ + /* END word wrap */ + } + +pre /* Code blocks */ + { + /* Distinguish pre blocks from other text by more than the font with a background tint. */ + padding: 0.5em; /* Since we have a background color */ + border-radius: 5px; /* Softens it */ + /* Give it a some definition */ + border: 1px solid #aaa; + /* Set it off left and right, seems to look a bit nicer when we have a background */ + margin-left: 0.5em; + margin-right: 0.5em; + } + +@media screen + { + pre + { + /* On screen, use an auto scroll box for long lines, unless word-wrap is enabled */ + white-space: pre; + overflow: auto; + /* Dotted looks better on screen and solid seems to print better. */ + border: 1px dotted #777; + } + } + +code /* All inline code spans */ + { + } + +p > code, li > code /* Code spans in paragraphs and tight lists */ + { + /* Pad a little from adjacent text */ + padding-left: 2px; + padding-right: 2px; + } + +li > p code /* Code span in a loose list */ + { + /* We have room for some more background color above and below */ + padding: 2px; + } + +/* ---- Math ---- */ + +span.math /* Pandoc inline math default and --jsmath inline math */ + { + /* Tried font-style:italic here, and it messed up MathJax rendering in some browsers. Maybe don't mess with at all. */ + } + +div.math /* Pandoc --jsmath display math */ + { + } + +span.LaTeX /* Pandoc --latexmathml math */ + { + } + +eq /* Pandoc --gladtex math */ + { + } + +/* ---- Tables ---- */ + +/* A clean textbook-like style with horizontal lines above and below and under + the header. Rows highlight on hover to help scanning the table on screen. +*/ + +table + { + border-collapse: collapse; + border-spacing: 0; /* IE 6 */ + + border-bottom: 2pt solid #000; + border-top: 2pt solid #000; /* The caption on top will not have a bottom-border */ + + /* Center */ + margin-left: auto; + margin-right: auto; + } + +thead /* Entire table header */ + { + border-bottom: 1pt solid #000; + background-color: #eee; /* Does this BG print well? */ + } + +tr.header /* Each header row */ + { + } + +tbody /* Entire table body */ + { + } + +/* Table body rows */ + +tr { + } +tr.odd:hover, tr.even:hover /* Use .odd and .even classes to avoid styling rows in other tables */ + { + background-color: #eee; + } + +/* Odd and even rows */ +tr.odd {} +tr.even {} + +td, th /* Table cells and table header cells */ + { + vertical-align: top; /* Word */ + vertical-align: baseline; /* Others */ + padding-left: 0.5em; + padding-right: 0.5em; + padding-top: 0.2em; + padding-bottom: 0.2em; + } + +/* Removes padding on left and right of table for a tight look. Good if thead has no background color*/ +/* +tr td:last-child, tr th:last-child + { + padding-right: 0; + } +tr td:first-child, tr th:first-child + { + padding-left: 0; + } +*/ + +th /* Table header cells */ + { + font-weight: bold; + } + +tfoot /* Table footer (what appears here if caption is on top?) */ + { + } + +caption /* This is for a table caption tag, not the p.caption Pandoc uses in a div.figure */ + { + caption-side: top; + border: none; + font-size: 0.9em; + font-style: italic; + text-align: center; + margin-bottom: 0.3em; /* Good for when on top */ + padding-bottom: 0.2em; + } + +/* ---- Definition lists ---- */ + +dl /* The whole list */ + { + border-top: 2pt solid black; + padding-top: 0.5em; + border-bottom: 2pt solid black; + } + +dt /* Definition term */ + { + font-weight: bold; + } + +dd+dt /* 2nd or greater term in the list */ + { + border-top: 1pt solid black; + padding-top: 0.5em; + } + +dd /* A definition */ + { + margin-bottom: 0.5em; + } + +dd+dd /* 2nd or greater definition of a term */ + { + border-top: 1px solid black; /* To separate multiple definitions */ + } + +/* ---- Footnotes ---- */ + +a.footnote, a.footnoteRef { /* Pandoc, MultiMarkdown footnote links */ + font-size: small; + vertical-align: text-top; +} + +a[href^="#fnref"], a.reversefootnote /* Pandoc, MultiMarkdown, ?? footnote back links */ + { + } + +@media print + { + a[href^="#fnref"], a.reversefootnote /* Pandoc, MultiMarkdown */ + { + /* Don't display these at all in print since the arrow is only something to click on */ + display: none; + } + } + +div.footnotes /* Pandoc footnotes div at end of the document */ + { + } + +div.footnotes li[id^="fn"] /* A footnote item within that div */ + { + } + +/* You can class stuff as "noprint" to not print. + Useful since you can't set this media conditional inside an HTML element's + style attribute (I think), and you don't want to make another stylesheet that + imports this one and adds a class just to do this. +*/ + +@media print + { + .noprint + { + display:none; + } + }