cosmere-books/oathbringer.rb

71 lines
1.8 KiB
Ruby
Raw Permalink Normal View History

2019-12-30 18:30:53 +00:00
# frozen_string_literal: true
2017-09-08 14:27:36 +00:00
require 'date'
require 'fileutils'
require 'nokogiri'
require_relative './methods'
2017-09-17 04:30:34 +00:00
FileUtils.mkdir_p('oathbringer')
2017-09-08 14:27:36 +00:00
2019-12-30 18:30:53 +00:00
BASE = 'https://www.tor.com/2017/'
2017-09-08 14:27:36 +00:00
links = [
2017-09-17 04:30:34 +00:00
'08/22/oathbringer-brandon-sanderson-prologue/',
'08/29/oathbringer-brandon-sanderson-chapter-1-3/',
'09/05/oathbringer-by-brandon-sanderson-chapters-4-6/'
2017-09-08 14:27:36 +00:00
]
# Automatically adds all recent chapters
puts 'Downloading all found links'
chapter = Integer(links.last.split('-').last.gsub(/[^0-9]/, '')) + 1
next_date = Date.new(1970, 1, 1)
loop do
links.last.split('/')
month = links.last.split('/').first
day = links.last.split('/')[1]
next_date = Date.new(2017, month.to_i, day.to_i) + 7
ending_chapter = [chapter + 2, 32].min
links << "#{next_date.strftime('%m')}/#{next_date.strftime('%d')}/oathbringer-by-brandon-sanderson-chapters-#{chapter}-#{ending_chapter}/"
chapter += 3
break if next_date + 7 > Date.today
end
next_date += 7
2017-09-08 14:27:36 +00:00
episode = 1
2017-09-17 04:30:34 +00:00
links.each do |link|
url = BASE + link
puts "Download #{url}"
unless File.exist? "oathbringer/#{episode}.html"
`wget --no-clobber "#{url}" --output-document "oathbringer/#{episode}.html" -o /dev/null`
end
episode += 1
2017-09-08 14:27:36 +00:00
end
# Now we have all the files
2017-09-17 04:30:34 +00:00
html = ''
2017-09-15 06:37:25 +00:00
for i in 1..(links.length)
2017-09-15 08:28:26 +00:00
page = Nokogiri::HTML(open("oathbringer/#{i}.html")).css('.entry-content')
2017-09-15 06:39:39 +00:00
start = ending = false
page.children.each do |e|
2017-09-17 04:30:34 +00:00
if e.name == 'h3'
e.name = 'h1'
start = true
end
2017-09-15 06:39:39 +00:00
2017-09-17 04:30:34 +00:00
ending = true if e.class?('frontmatter') && start
2017-09-15 06:39:39 +00:00
2017-09-17 04:30:34 +00:00
e.remove if !start || ending
2017-09-15 06:39:39 +00:00
end
html += page.inner_html
2017-09-27 16:44:55 +00:00
url = BASE + links[i - 1]
2017-09-15 06:39:39 +00:00
html += "<p>Visit <a href='#{url}'>tor.com</a> for discussion.</p>"
2017-09-08 14:27:36 +00:00
end
2017-09-27 16:44:55 +00:00
html += "<p>Next 3 chapters out on #{next_date}</p>"
2017-09-17 04:30:34 +00:00
File.open('books/Oathbringer.html', 'w') { |file| file.write(html) }
puts '[html] Generated HTML file'
2017-09-08 14:27:36 +00:00
2017-09-17 04:30:34 +00:00
generate('Oathbringer', :all)