never-say-you-cant-survive/generate.rb

57 lines
1.5 KiB
Ruby
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

require 'nokogiri'
skip_classes = ['ebook-link-wrapper']
for i in Dir.glob('html/*.html')
html = ""
puts i
complete_html = Nokogiri::HTML(open(i))
page = complete_html.css('.entry-content')
title = complete_html.css('.entry-title>a').inner_html[29..]
started = ended = false
page.children.each do |e|
unless started
if e.class != Nokogiri::XML::Text
unless e.classes.include? 'frontmatter'
puts e.classes
started = true
end
end
end
if started
trimmed = e.inner_text.gsub(/[[:space:]]/, '')
if trimmed == "" and not ['hr'].include? e.name
next
end
if e.name == "h3"
if e.inner_text.strip[0..7] == "Section" or e.inner_text.strip[0..12] == "Introduction"
e.name = "h1"
else
e.name = "h2"
end
end
skip = (e.classes & skip_classes).size > 0
ended = true if e.matches?('.squib')
if skip == false and ended == false
html += e.to_s
end
end
end
fn = File.basename(i)
File.write("chapters/#{fn}", html)
end
html = ""
File.open("urls.txt").each do |url|
title = url.strip.split('/')[-1]
html += File.read("chapters/#{title}.html")
html += "\n\n"
end
File.write("never-say-you-cant-survive.html", html)
`pandoc --metadata title="Never Say You Cant Survive" -o never-say-you-cant-survive.epub --epub-metadata=metadata.xml --epub-cover-image=cover.jpg never-say-you-cant-survive.html`