never-say-you-cant-survive/generate.rb

70 lines
2.0 KiB
Ruby
Raw Permalink Normal View History

2020-10-09 10:14:15 +00:00
require 'nokogiri'
skip_classes = ['ebook-link-wrapper']
2020-10-09 10:40:47 +00:00
section_headers = []
2020-10-17 14:46:21 +00:00
section_headers[0] = "Section I: Introduction"
section_headers[4] = "Section II: Whats A Story, and How Do You Find One?"
section_headers[10] = "Section III: Your Feelings are Valid—and Powerful"
section_headers[15] = "Section IV: What We Write About When We Write About Spaceships"
section_headers[20] = "Section V: How to Use Writerly Tricks to Gain Unstoppable Powers"
2020-10-09 10:40:47 +00:00
2020-10-09 10:14:15 +00:00
for i in Dir.glob('html/*.html')
html = ""
complete_html = Nokogiri::HTML(open(i))
page = complete_html.css('.entry-content')
title = complete_html.css('.entry-title>a').inner_html[29..]
started = ended = false
page.children.each do |e|
unless started
if e.class != Nokogiri::XML::Text
unless e.classes.include? 'frontmatter'
started = true
end
end
end
if started
2020-10-09 10:40:47 +00:00
# we don't need any empty tags
2020-10-09 10:14:15 +00:00
trimmed = e.inner_text.gsub(/[[:space:]]/, '')
2020-10-09 10:40:47 +00:00
if trimmed == ""
2020-10-09 10:14:15 +00:00
next
end
if e.name == "h3"
2020-10-09 10:40:47 +00:00
if /Introduction/.match? e.inner_text
e.name = "h2"
elsif /Section/.match? e.inner_text
next
elsif /Chapter/.match? e.inner_text
2020-10-09 10:14:15 +00:00
e.name = "h2"
end
end
skip = (e.classes & skip_classes).size > 0
ended = true if e.matches?('.squib')
if skip == false and ended == false
html += e.to_s
end
end
end
fn = File.basename(i)
2020-10-19 08:48:01 +00:00
File.open("chapters/#{fn}", "w:UTF-8") do |f|
f.write html
end
2020-10-09 10:14:15 +00:00
end
html = ""
2020-10-09 10:40:47 +00:00
File.open("urls.txt").each_with_index do |url, index|
2020-10-09 10:14:15 +00:00
title = url.strip.split('/')[-1]
2020-10-09 10:40:47 +00:00
unless section_headers[index].nil?
html += "<h1>#{section_headers[index]}</h1>\n\n"
end
2020-10-19 08:48:01 +00:00
html += File.open("chapters/#{title}.html", "r:UTF-8", &:read)
2020-10-09 10:14:15 +00:00
end
File.write("never-say-you-cant-survive.html", html)
`pandoc --metadata title="Never Say You Cant Survive" -o never-say-you-cant-survive.epub --epub-metadata=metadata.xml --epub-cover-image=cover.jpg never-say-you-cant-survive.html`