require 'json' require 'yaml' require 'curb' require 'front_matter_parser' stories = JSON.parse File.read 'stories.json' def is_hard_url(url) # We don't want PDFs for now if url.split(//).last(4).join === '.pdf' return true # Or fermatlibrary links elsif /fermatslibrary/.match(url) return true end return false end def process(url, story, fn) puts "[DL] #{url}" # Convert to GFM markdown = `python parse.py "#{url}" | pandoc --from=html --to=gfm-raw_html-native_divs-native_spans-fenced_divs` if markdown.size > 100 content = "#{story.to_yaml}\n---\n#{markdown}" File.open(fn, "w") { |file| file.write content } puts "[info] Saved" end end def write_story(file_name, front_matter, content) file_contents = "#{front_matter.to_yaml}\n---\n#{content}" puts "Writing to #{file_name}" File.open(file_name, "w") { |file| file.write file_contents } end stories.each do |year, storiesByYear| storiesByYear.each do |story| story.delete '_highlightResult' id = story['objectID'] url = story['url'] fn = "_stories/#{year}/#{id}.md" next if url.nil? if is_hard_url(url) File.delete fn if File.exist? fn next end story['year'] = fn.split("/")[1].to_i if File.exists? fn puts fn parsed = FrontMatterParser::Parser.parse_file(fn) # If no frontmatter or if it does not contain the year if parsed.front_matter.nil? or parsed.front_matter.has_key?('year') == false write_story(fn, story, parsed.content) end # File is empty if parsed.content.strip.empty? or parsed.content.strip.split("\n").size == 1 File.delete fn process url, story, fn end else next begin process url, story, fn rescue StandardError => e next end end end end