80 lines
1.8 KiB
Ruby
80 lines
1.8 KiB
Ruby
require 'json'
|
|
require 'yaml'
|
|
require 'curb'
|
|
require 'front_matter_parser'
|
|
|
|
stories = JSON.parse File.read 'stories.json'
|
|
|
|
def is_hard_url(url)
|
|
# We don't want PDFs for now
|
|
if url.split(//).last(4).join === '.pdf'
|
|
return true
|
|
# Or fermatlibrary links
|
|
elsif /fermatslibrary/.match(url)
|
|
return true
|
|
end
|
|
|
|
return false
|
|
end
|
|
|
|
|
|
def process(url, story, fn)
|
|
puts "[DL] #{url}"
|
|
|
|
# Convert to GFM
|
|
markdown = `python parse.py "#{url}" | pandoc --from=html --to=gfm-raw_html-native_divs-native_spans-fenced_divs`
|
|
|
|
if markdown.size > 100
|
|
content = "#{story.to_yaml}\n---\n#{markdown}"
|
|
File.open(fn, "w") { |file| file.write content }
|
|
puts "[info] Saved"
|
|
end
|
|
end
|
|
|
|
def write_story(file_name, front_matter, content)
|
|
file_contents = "#{front_matter.to_yaml}\n---\n#{content}"
|
|
puts "Writing to #{file_name}"
|
|
File.open(file_name, "w") { |file| file.write file_contents }
|
|
end
|
|
|
|
stories.each do |year, storiesByYear|
|
|
storiesByYear.each do |story|
|
|
story.delete '_highlightResult'
|
|
id = story['objectID']
|
|
url = story['url']
|
|
|
|
fn = "_stories/#{year}/#{id}.md"
|
|
|
|
next if url.nil?
|
|
|
|
if is_hard_url(url)
|
|
File.delete fn if File.exist? fn
|
|
next
|
|
end
|
|
|
|
story['year'] = fn.split("/")[1].to_i
|
|
|
|
if File.exists? fn
|
|
puts fn
|
|
parsed = FrontMatterParser::Parser.parse_file(fn)
|
|
# If no frontmatter or if it does not contain the year
|
|
if parsed.front_matter.nil? or parsed.front_matter.has_key?('year') == false
|
|
write_story(fn, story, parsed.content)
|
|
end
|
|
|
|
# File is empty
|
|
if parsed.content.strip.empty? or parsed.content.strip.split("\n").size == 1
|
|
File.delete fn
|
|
process url, story, fn
|
|
end
|
|
else
|
|
next
|
|
begin
|
|
process url, story, fn
|
|
rescue StandardError => e
|
|
next
|
|
end
|
|
end
|
|
end
|
|
end
|