Fix formatting and cleanup Wok Prime

This commit is contained in:
Nemo 2020-04-26 06:56:36 +05:30
parent 93d337218f
commit 5e6d5b9da5
2 changed files with 20 additions and 12 deletions

View File

@ -48,6 +48,7 @@ def gen_epub(name, format)
to 'epub'
epub_metadata "metadata/#{name}.xml"
epub_cover_image "covers/#{name}.jpg"
metadata title: name
data_dir Dir.pwd
output "books/#{name}.epub"
end.convert File.read("books/#{name}.html")

View File

@ -11,7 +11,6 @@ BASE = 'https://brandonsanderson.com/'
links = [
'the-way-of-kings-prime-jeksonsonvallano/',
'altered-perceptions/',
'way-of-kings-prime-chapter-1-dalenar-1/',
'way-of-kings-prime-chapter-3-merin-1/',
'way-of-kings-prime-chapter-5-merin-2/',
@ -28,13 +27,13 @@ links = [
'the-way-of-kings-chapter-13-d/',
'the-way-of-kings-chapter-15-d/',
'the-way-of-kings-chapter-16-d/',
'the-way-of-kings-chapter-18-d/'
'the-way-of-kings-chapter-18-d/',
'the-way-of-kings-chapter-20-d/',
'the-way-of-kings-chapters-23-and-24-d/',
'the-way-of-kings-chapter-26-d/',
'the-way-of-kings-chapter-28-d/',
'the-way-of-kings-early-brainstorms-outlines/',
'the-way-of-kings-tiens-death-attempt-1/',
'the-way-of-kings-tiens-death-attempt-1/'
]
episode = 1
@ -48,26 +47,34 @@ links.each do |link|
episode += 1
end
html = ''
html = '<html lang=en><head><title>Way of Kings Prime</title></head><body>'
(1..(links.length)).each do |i|
complete_html = Nokogiri::HTML(open("wok-prime/#{i}.html"))
page = complete_html.css('article')[0]
page = complete_html.css('.vc_col-sm-7 .vc_column-inner .wpb_content_element .wpb_wrapper')[0]
ending = false
page.traverse do |e|
whitelist = %w[p div span article h1 h2 h3 h4 a h5 h6 i text]
blacklist = ['.post-meta', '.addthis_toolbox', '.book-links', 'post-nav']
e.remove if whitelist.include?(e.name) == false
begin
page.traverse do |e|
whitelist = %w[p div span article h1 h2 h3 h4 a h5 h6 i text]
blacklist = ['.post-meta', '.addthis_toolbox', '.book-links', 'post-nav']
e.remove if whitelist.include?(e.name) == false
blacklist.each do |selector|
page.css(selector).each(&:remove)
blacklist.each do |selector|
page.css(selector).each(&:remove)
end
end
rescue Exception => e
puts e
puts page.class
end
html += page.inner_html
html += "<h1>#{links[i - 1][0...-1]}</h1>" + page.inner_html
end
html += '</body></html>'
File.open('books/wok-prime.html', 'w') { |file| file.write(html) }
puts '[html] Generated HTML file'