diff --git a/_plugins/fetch_posts.rb b/_plugins/fetch_posts.rb index 8650719..bc16c48 100644 --- a/_plugins/fetch_posts.rb +++ b/_plugins/fetch_posts.rb @@ -1,4 +1,5 @@ # frozen_string_literal: true + require 'sanitize' require 'uri' require 'net/http' @@ -6,16 +7,16 @@ require 'set' require 'date' SANITIZE_CONFIG = { - :elements => ['a', 'span', 'p', 'i', 'br'], + elements: %w[a span p i br], - :attributes => { - 'a' => ['href', 'title'] + attributes: { + 'a' => %w[href title] }, - :protocols => { - 'a' => {'href' => ['http', 'https']} + protocols: { + 'a' => { 'href' => %w[http https] } } -} +}.freeze class PageWithoutAFile < Jekyll::Page def read_yaml(*) @@ -33,39 +34,43 @@ class BeatrootNews < Jekyll::Generator def fix_dates_for_dev(data) # Calculate number of days since 2023-11-30, the date of our fixture days_since = (DateTime.now - DateTime.new(2023, 11, 30)).floor - seconds_to_add = days_since * 86400 + seconds_to_add = days_since * 86_400 data.each do |article| - article['attributes']['modules']['updated_on'] = article['attributes']['modules']['updated_on'].to_i + seconds_to_add + article['attributes']['modules']['updated_on'] = + article['attributes']['modules']['updated_on'].to_i + seconds_to_add end data end - def make_request(url, retries=5) + def make_request(url, retries = 5) uri = URI.parse(url) http = Net::HTTP.new(uri.host, uri.port) http.use_ssl = true http.read_timeout = 10 http.open_timeout = 10 request = Net::HTTP::Get.new(uri.request_uri) - response = http.request(request) - if response.code == "200" - return response.body - elsif retries > 0 - Jekyll.logger.warn "News:", "Retrying #{url} (#{retries} retries left)" - return make_request(url, retries - 1) - else - Jekyll.logger.error "News:", "Failed to fetch #{url}" - raise "Failed to fetch news after 5 attempts" + begin + response = http.request(request) + return response.body if response.code == '200' + + raise StandardError, "Response code #{response.code} for #{url}" + rescue StandardError + if retries.positive? + Jekyll.logger.warn 'News:', "Retrying #{url} (#{retries} retries left)" + make_request(url, retries - 1) + else + raise StandardError, 'Failed to fetch news after 5 attempts' + end end end def get_content - unless Jekyll.env == 'production' - body = File.read '_development/fixture.json' - else - body = make_request(SOURCE_URL, 5) - end + body = if Jekyll.env == 'production' + make_request(SOURCE_URL, 5) + else + File.read '_development/fixture.json' + end data = JSON.parse(body)['data'] data = fix_dates_for_dev(data) unless Jekyll.env == 'production' data @@ -75,43 +80,43 @@ class BeatrootNews < Jekyll::Generator def generate(site) @site = site # Topic Counter - site.data['topics'] = site.data['topics'].to_h {|topic| [topic, 0]} + site.data['topics'] = site.data['topics'].to_h { |topic| [topic, 0] } get_content.each do |article| page = make_page(article['attributes']['modules']) - if page - site.pages << page - page['topics'].each do |topic| - unless site.data['topics'].include? topic - site.data['topics'][topic] = 0 - Jekyll.logger.warn "News:", "New Topic #{topic}" - end + next unless page + + site.pages << page + page['topics'].each do |topic| + unless site.data['topics'].include? topic + site.data['topics'][topic] = 0 + Jekyll.logger.warn 'News:', "New Topic #{topic}" end - site.data['topics'][page['topics'].first] += 1 end + site.data['topics'][page['topics'].first] += 1 end site.data['topics'].each do |topic, count| @site.pages << make_topic_page(topic, count) end - Jekyll.logger.info "News:", "Generated #{site.data['topics'].values.sum} article pages" + Jekyll.logger.info 'News:', "Generated #{site.data['topics'].values.sum} article pages" # These are fallback checks to make sure if we have a bug or get bad data, # we don't update the website with not enough news # better to fail the build than show an empty website. - raise "Not enough articles, not updating website" if site.data['topics'].values.sum < 10 - raise "Not enough topics, not updating website" if site.data['topics'].size < 5 + raise 'Not enough articles, not updating website' if site.data['topics'].values.sum < 10 + raise 'Not enough topics, not updating website' if site.data['topics'].size < 5 end private def make_topic_page(topic, count) - PageWithoutAFile.new(@site, __dir__, topic, "index.html").tap do |file| + PageWithoutAFile.new(@site, __dir__, topic, 'index.html').tap do |file| file.data.merge!( - 'title' => topic.capitalize, - 'layout' => 'topic', - 'topic' => topic, + 'title' => topic.capitalize, + 'layout' => 'topic', + 'topic' => topic, 'permalink' => "/#{topic}/", - 'article_count' => count + 'article_count' => count ) file.output end @@ -119,64 +124,63 @@ class BeatrootNews < Jekyll::Generator def timestamp(ts) d = Time.at(ts.to_i).to_datetime - d.new_offset("+0530") + d.new_offset('+0530') end def syndicated?(article) sources = article['sources'].map(&:downcase) - return !(sources & @site.config['syndication_sources']).empty? + !(sources & @site.config['syndication_sources']).empty? end def make_page(article) return nil if article['topic'].nil? return nil if article['body_json']['blocks'].empty? + n = DateTime.now - now = DateTime.new(n.year, n.month, n.day, 23, 59, 59, "+0530") + now = DateTime.new(n.year, n.month, n.day, 23, 59, 59, '+0530') date = timestamp(article['updated_on']) days_ago = (now - date).floor # We only return news for today(0) or yesterday(1) return nil if days_ago > 1 - PageWithoutAFile.new(@site, __dir__, article['id'], "index.html").tap do |file| - html = article['body_json']['blocks'].map{ |t| t['data']['text']}.join(" ") + PageWithoutAFile.new(@site, __dir__, article['id'], 'index.html').tap do |file| + html = article['body_json']['blocks'].map { |t| t['data']['text'] }.join(' ') html = Sanitize.fragment(html, SANITIZE_CONFIG) topics = article['topic'].map { |topic| topic.split('-').first } twt = nil if article['trigger_warning'] twt = article['trigger_warning_text'] || 'Trigger Warning' - unless twt.downcase.include? 'trigger' - twt = 'Trigger Warning: ' + twt - end + twt = "Trigger Warning: #{twt}" unless twt.downcase.include? 'trigger' html = "#{twt}
" + html end file.content = html - + file.data.merge!( - 'sources' => article['sources'].reject(&:empty?), - "date" => date, - "id" => article['id'], - "slug" => article['slug'], - "title" => article['title'], - "layout" => 'article', - "topics" => topics, - "days_ago" => days_ago, - "day" => days_ago == 0 ? "today" : "yesterday", + 'sources' => article['sources'].reject(&:empty?), + 'date' => date, + 'id' => article['id'], + 'slug' => article['slug'], + 'title' => article['title'], + 'layout' => 'article', + 'topics' => topics, + 'days_ago' => days_ago, + 'day' => days_ago.zero? ? 'today' : 'yesterday', # We use 300 characters here # and the SEO plugin strips down to 200 with ellepsis - "description" => Sanitize.fragment(html)[0...199] + "…", - "trigger_warning" => twt, - "syndicated" => syndicated?(article), - "seo" => { - "type" => "NewsArticle", - "links" => [ + 'description' => "#{Sanitize.fragment(html)[0...199]}…", + 'trigger_warning' => twt, + 'syndicated' => syndicated?(article), + 'seo' => { + 'type' => 'NewsArticle', + 'links' => [ "https://app.beatrootnews.com/#article-#{article['id']}", "https://beatrootnews.com/custom/share?type=article&slug=#{article['slug']}" ], - "date_modified" => date + 'date_modified' => date }, - "media_link" => article['media_link'] ? article['media_link'] : nil, + 'media_link' => article['media_link'] || nil # This is currently disabled because the page doesn't load in desktop # Or rather doesn't load at all for old links. # "canonical_url" => "https://app.beatrootnews.com/#article-#{article['id']}" @@ -184,5 +188,4 @@ class BeatrootNews < Jekyll::Generator file.output end end - -end \ No newline at end of file +end