Fix formatting and cleanup Wok Prime
This commit is contained in:
parent
93d337218f
commit
5e6d5b9da5
|
@ -48,6 +48,7 @@ def gen_epub(name, format)
|
|||
to 'epub'
|
||||
epub_metadata "metadata/#{name}.xml"
|
||||
epub_cover_image "covers/#{name}.jpg"
|
||||
metadata title: name
|
||||
data_dir Dir.pwd
|
||||
output "books/#{name}.epub"
|
||||
end.convert File.read("books/#{name}.html")
|
||||
|
|
31
wok-prime.rb
31
wok-prime.rb
|
@ -11,7 +11,6 @@ BASE = 'https://brandonsanderson.com/'
|
|||
|
||||
links = [
|
||||
'the-way-of-kings-prime-jeksonsonvallano/',
|
||||
'altered-perceptions/',
|
||||
'way-of-kings-prime-chapter-1-dalenar-1/',
|
||||
'way-of-kings-prime-chapter-3-merin-1/',
|
||||
'way-of-kings-prime-chapter-5-merin-2/',
|
||||
|
@ -28,13 +27,13 @@ links = [
|
|||
'the-way-of-kings-chapter-13-d/',
|
||||
'the-way-of-kings-chapter-15-d/',
|
||||
'the-way-of-kings-chapter-16-d/',
|
||||
'the-way-of-kings-chapter-18-d/'
|
||||
'the-way-of-kings-chapter-18-d/',
|
||||
'the-way-of-kings-chapter-20-d/',
|
||||
'the-way-of-kings-chapters-23-and-24-d/',
|
||||
'the-way-of-kings-chapter-26-d/',
|
||||
'the-way-of-kings-chapter-28-d/',
|
||||
'the-way-of-kings-early-brainstorms-outlines/',
|
||||
'the-way-of-kings-tiens-death-attempt-1/',
|
||||
'the-way-of-kings-tiens-death-attempt-1/'
|
||||
]
|
||||
|
||||
episode = 1
|
||||
|
@ -48,26 +47,34 @@ links.each do |link|
|
|||
episode += 1
|
||||
end
|
||||
|
||||
html = ''
|
||||
html = '<html lang=en><head><title>Way of Kings Prime</title></head><body>'
|
||||
|
||||
(1..(links.length)).each do |i|
|
||||
complete_html = Nokogiri::HTML(open("wok-prime/#{i}.html"))
|
||||
page = complete_html.css('article')[0]
|
||||
page = complete_html.css('.vc_col-sm-7 .vc_column-inner .wpb_content_element .wpb_wrapper')[0]
|
||||
|
||||
ending = false
|
||||
|
||||
page.traverse do |e|
|
||||
whitelist = %w[p div span article h1 h2 h3 h4 a h5 h6 i text]
|
||||
blacklist = ['.post-meta', '.addthis_toolbox', '.book-links', 'post-nav']
|
||||
e.remove if whitelist.include?(e.name) == false
|
||||
begin
|
||||
page.traverse do |e|
|
||||
whitelist = %w[p div span article h1 h2 h3 h4 a h5 h6 i text]
|
||||
blacklist = ['.post-meta', '.addthis_toolbox', '.book-links', 'post-nav']
|
||||
e.remove if whitelist.include?(e.name) == false
|
||||
|
||||
blacklist.each do |selector|
|
||||
page.css(selector).each(&:remove)
|
||||
blacklist.each do |selector|
|
||||
page.css(selector).each(&:remove)
|
||||
end
|
||||
end
|
||||
rescue Exception => e
|
||||
puts e
|
||||
puts page.class
|
||||
end
|
||||
|
||||
html += page.inner_html
|
||||
html += "<h1>#{links[i - 1][0...-1]}</h1>" + page.inner_html
|
||||
end
|
||||
|
||||
html += '</body></html>'
|
||||
|
||||
File.open('books/wok-prime.html', 'w') { |file| file.write(html) }
|
||||
puts '[html] Generated HTML file'
|
||||
|
||||
|
|
Loading…
Reference in New Issue