require 'json' require 'yaml' require 'curb' require 'front_matter_parser' stories = JSON.parse File.read 'stories.json' def is_hard_url(url) # We don't want PDFs for now if url.split(//).last(4).join === '.pdf' return true # Or fermatlibrary links elsif /fermatslibrary/.match(url) return true end return false end stories.each do |year, storiesByYear| storiesByYear.each do |story| story.delete '_highlightResult' id = story['objectID'] url = story['url'] fn = "_stories/#{year}/#{id}.md" next if url.nil? if is_hard_url(url) File.delete fn if File.exist? fn next end if File.exists? fn parsed = FrontMatterParser::Parser.parse_file(fn) if parsed.front_matter.nil? content = "#{story.to_yaml}\n---\n#{parsed.content}" File.open(fn, "w") { |file| file.write content } end # File is empty if parsed.content.strip.empty? File.delete fn end else puts "[DL] #{url}" begin http = Curl.get("http://heckyesmarkdown.com/go/", {read: '1', u: url}) do |http| http.timeout = 3 end if http.body_str.size > 100 content = "#{story.to_yaml}\n---\n#{http.body_str}" File.open(fn, "w") { |file| file.write content } puts "[info] Saved" end rescue StandardError => e next end end end end