hn-classics/add-metadata.rb

67 lines
1.3 KiB
Ruby
Raw Normal View History

require 'json'
require 'yaml'
2018-02-25 10:48:00 +00:00
require 'curb'
require 'front_matter_parser'
stories = JSON.parse File.read 'stories.json'
2018-02-25 20:11:00 +00:00
def is_hard_url(url)
# We don't want PDFs for now
if url.split(//).last(4).join === '.pdf'
return true
# Or fermatlibrary links
elsif /fermatslibrary/.match(url)
return true
end
return false
end
stories.each do |year, storiesByYear|
storiesByYear.each do |story|
story.delete '_highlightResult'
id = story['objectID']
2018-02-25 10:48:00 +00:00
url = story['url']
fn = "_stories/#{year}/#{id}.md"
2018-02-25 10:48:00 +00:00
next if url.nil?
2018-02-25 20:11:00 +00:00
if is_hard_url(url)
2018-02-25 10:48:00 +00:00
File.delete fn if File.exist? fn
2018-02-25 20:11:00 +00:00
next
2018-02-25 10:48:00 +00:00
end
if File.exists? fn
2018-02-25 10:48:00 +00:00
parsed = FrontMatterParser::Parser.parse_file(fn)
if parsed.front_matter.nil?
content = "#{story.to_yaml}\n---\n#{parsed.content}"
File.open(fn, "w") { |file| file.write content }
end
2018-02-25 10:51:33 +00:00
# File is empty
if parsed.content.strip.empty?
File.delete fn
end
2018-02-25 10:48:00 +00:00
else
2018-02-25 20:11:00 +00:00
puts "[DL] #{url}"
2018-02-25 10:51:33 +00:00
2018-02-25 20:11:00 +00:00
begin
http = Curl.get("http://heckyesmarkdown.com/go/", {read: '1', u: url}) do |http|
http.timeout = 3
end
if http.body_str.size > 100
content = "#{story.to_yaml}\n---\n#{http.body_str}"
File.open(fn, "w") { |file| file.write content }
puts "[info] Saved"
end
rescue StandardError => e
next
end
end
end
end