diff --git a/lost-metal.rb b/lost-metal.rb index 8f24ee3..d2ac677 100644 --- a/lost-metal.rb +++ b/lost-metal.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true +require 'uri' require 'date' require 'fileutils' require 'nokogiri' @@ -10,10 +11,13 @@ BASE = 'https://www.tor.com/2022/' links = [ '09/19/read-the-lost-metal-by-brandon-sanderson-prologue-and-chapters-1-2/', + '09/26/read-the-lost-metal-by-brandon-sanderson-chapters-3-4/' ] episode = 1 +counter = 0 + links.each do |link| url = BASE + link puts "Download #{url}" @@ -24,17 +28,36 @@ links.each do |link| end # Now we have all the files -html = '

Prologue

' +html = '' for i in 1..(links.length) page = Nokogiri::HTML(open("lost-metal/#{i}.html")).css('.entry-content') start = ending = false page.children.each do |e| - if e.name == 'h4' - e.name = 'h1' + if ['h1', 'h2', 'h3', 'h4', 'hr'].include? e.name + e.remove end - if e.name == 'h3' - e.name = 'div' + if e.text ==' ' + e.remove + end + + if e.name == 'p' + e.children.each do |ee| + if ee.name == 'img' + u = URI::parse ee['src'] + if counter == 0 + e.add_previous_sibling "

Prologue

" + else + e.add_previous_sibling "

Chapter #{counter}" + end + counter += 1 + ee.delete 'srcset' + ee.delete 'class' + ee.delete 'loading' + ee.delete 'sizes' + ee.delete 'data-recalc-dims' + end + end end start = true if e.class?('ebook-link-wrapper')