hn-classics/add-metadata.rb

67 lines
1.3 KiB
Ruby

require 'json'
require 'yaml'
require 'curb'
require 'front_matter_parser'
stories = JSON.parse File.read 'stories.json'
def is_hard_url(url)
# We don't want PDFs for now
if url.split(//).last(4).join === '.pdf'
return true
# Or fermatlibrary links
elsif /fermatslibrary/.match(url)
return true
end
return false
end
stories.each do |year, storiesByYear|
storiesByYear.each do |story|
story.delete '_highlightResult'
id = story['objectID']
url = story['url']
fn = "_stories/#{year}/#{id}.md"
next if url.nil?
if is_hard_url(url)
File.delete fn if File.exist? fn
next
end
if File.exists? fn
parsed = FrontMatterParser::Parser.parse_file(fn)
if parsed.front_matter.nil?
content = "#{story.to_yaml}\n---\n#{parsed.content}"
File.open(fn, "w") { |file| file.write content }
end
# File is empty
if parsed.content.strip.empty?
File.delete fn
end
else
puts "[DL] #{url}"
begin
http = Curl.get("http://heckyesmarkdown.com/go/", {read: '1', u: url}) do |http|
http.timeout = 3
end
if http.body_str.size > 100
content = "#{story.to_yaml}\n---\n#{http.body_str}"
File.open(fn, "w") { |file| file.write content }
puts "[info] Saved"
end
rescue StandardError => e
next
end
end
end
end