# frozen_string_literal: true require 'uri' require 'date' require 'fileutils' require 'nokogiri' require_relative './methods' FileUtils.mkdir_p('lost-metal') BASE = 'https://www.tor.com/2022/' links = [ '09/19/read-the-lost-metal-by-brandon-sanderson-prologue-and-chapters-1-2/', '09/26/read-the-lost-metal-by-brandon-sanderson-chapters-3-4/', '10/03/read-the-lost-metal-by-brandon-sanderson-chapters-5-8/', '10/10/read-the-lost-metal-by-brandon-sanderson-chapter-nine/', '10/17/read-the-lost-metal-by-brandon-sanderson-chapters-ten-and-eleven/', '10/24/read-the-lost-metal-by-brandon-sanderson-chapters-twelve-and-thirteen/', '10/31/read-the-lost-metal-by-brandon-sanderson-chapters-fourteen-and-fifteen/', '11/07/read-the-lost-metal-by-brandon-sanderson-chapters-sixteen-through-eighteen/', '11/14/read-the-lost-metal-by-brandon-sanderson-chapter-nineteen/', ] episode = 1 counter = 0 links.each do |link| url = BASE + link puts "Download #{url}" unless File.exist? "lost-metal/#{episode}.html" `wget --no-clobber "#{url}" --output-document "lost-metal/#{episode}.html" -o /dev/null` end episode += 1 end # Now we have all the files html = '' for i in 1..(links.length) page = Nokogiri::HTML(open("lost-metal/#{i}.html")).css('.entry-content') start = ending = false page.children.each do |e| if ['h1', 'h2', 'h3', 'h4', 'hr'].include? e.name e.remove end if e.text ==' ' e.remove end if e.name == 'p' e.children.each do |ee| if ee.name == 'img' u = URI::parse ee['src'] if counter == 0 e.add_previous_sibling "

Prologue

" else e.add_previous_sibling "

Chapter #{counter}" end counter += 1 ee.delete 'srcset' ee.delete 'class' ee.delete 'loading' ee.delete 'sizes' ee.delete 'data-recalc-dims' end end end start = true if e.class?('ebook-link-wrapper') ending = true if e.class?('frontmatter') && start e.remove if !start || ending || e.class?('ebook-link-wrapper') end html += page.inner_html url = BASE + links[i - 1] html += "

Visit tor.com for discussion.

" end File.open('books/lost-metal.html', 'w') { |file| file.write(html) } puts '[html] Generated HTML file' generate('lost-metal', :all)