catch errors as well for fetch retry
This commit is contained in:
parent
e72862c80d
commit
8a6a1d9412
|
@ -1,4 +1,5 @@
|
||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
|
|
||||||
require 'sanitize'
|
require 'sanitize'
|
||||||
require 'uri'
|
require 'uri'
|
||||||
require 'net/http'
|
require 'net/http'
|
||||||
|
@ -6,16 +7,16 @@ require 'set'
|
||||||
require 'date'
|
require 'date'
|
||||||
|
|
||||||
SANITIZE_CONFIG = {
|
SANITIZE_CONFIG = {
|
||||||
:elements => ['a', 'span', 'p', 'i', 'br'],
|
elements: %w[a span p i br],
|
||||||
|
|
||||||
:attributes => {
|
attributes: {
|
||||||
'a' => ['href', 'title']
|
'a' => %w[href title]
|
||||||
},
|
},
|
||||||
|
|
||||||
:protocols => {
|
protocols: {
|
||||||
'a' => {'href' => ['http', 'https']}
|
'a' => { 'href' => %w[http https] }
|
||||||
}
|
}
|
||||||
}
|
}.freeze
|
||||||
|
|
||||||
class PageWithoutAFile < Jekyll::Page
|
class PageWithoutAFile < Jekyll::Page
|
||||||
def read_yaml(*)
|
def read_yaml(*)
|
||||||
|
@ -33,39 +34,43 @@ class BeatrootNews < Jekyll::Generator
|
||||||
def fix_dates_for_dev(data)
|
def fix_dates_for_dev(data)
|
||||||
# Calculate number of days since 2023-11-30, the date of our fixture
|
# Calculate number of days since 2023-11-30, the date of our fixture
|
||||||
days_since = (DateTime.now - DateTime.new(2023, 11, 30)).floor
|
days_since = (DateTime.now - DateTime.new(2023, 11, 30)).floor
|
||||||
seconds_to_add = days_since * 86400
|
seconds_to_add = days_since * 86_400
|
||||||
data.each do |article|
|
data.each do |article|
|
||||||
article['attributes']['modules']['updated_on'] = article['attributes']['modules']['updated_on'].to_i + seconds_to_add
|
article['attributes']['modules']['updated_on'] =
|
||||||
|
article['attributes']['modules']['updated_on'].to_i + seconds_to_add
|
||||||
end
|
end
|
||||||
|
|
||||||
data
|
data
|
||||||
end
|
end
|
||||||
|
|
||||||
def make_request(url, retries=5)
|
def make_request(url, retries = 5)
|
||||||
uri = URI.parse(url)
|
uri = URI.parse(url)
|
||||||
http = Net::HTTP.new(uri.host, uri.port)
|
http = Net::HTTP.new(uri.host, uri.port)
|
||||||
http.use_ssl = true
|
http.use_ssl = true
|
||||||
http.read_timeout = 10
|
http.read_timeout = 10
|
||||||
http.open_timeout = 10
|
http.open_timeout = 10
|
||||||
request = Net::HTTP::Get.new(uri.request_uri)
|
request = Net::HTTP::Get.new(uri.request_uri)
|
||||||
response = http.request(request)
|
begin
|
||||||
if response.code == "200"
|
response = http.request(request)
|
||||||
return response.body
|
return response.body if response.code == '200'
|
||||||
elsif retries > 0
|
|
||||||
Jekyll.logger.warn "News:", "Retrying #{url} (#{retries} retries left)"
|
raise StandardError, "Response code #{response.code} for #{url}"
|
||||||
return make_request(url, retries - 1)
|
rescue StandardError
|
||||||
else
|
if retries.positive?
|
||||||
Jekyll.logger.error "News:", "Failed to fetch #{url}"
|
Jekyll.logger.warn 'News:', "Retrying #{url} (#{retries} retries left)"
|
||||||
raise "Failed to fetch news after 5 attempts"
|
make_request(url, retries - 1)
|
||||||
|
else
|
||||||
|
raise StandardError, 'Failed to fetch news after 5 attempts'
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def get_content
|
def get_content
|
||||||
unless Jekyll.env == 'production'
|
body = if Jekyll.env == 'production'
|
||||||
body = File.read '_development/fixture.json'
|
make_request(SOURCE_URL, 5)
|
||||||
else
|
else
|
||||||
body = make_request(SOURCE_URL, 5)
|
File.read '_development/fixture.json'
|
||||||
end
|
end
|
||||||
data = JSON.parse(body)['data']
|
data = JSON.parse(body)['data']
|
||||||
data = fix_dates_for_dev(data) unless Jekyll.env == 'production'
|
data = fix_dates_for_dev(data) unless Jekyll.env == 'production'
|
||||||
data
|
data
|
||||||
|
@ -75,43 +80,43 @@ class BeatrootNews < Jekyll::Generator
|
||||||
def generate(site)
|
def generate(site)
|
||||||
@site = site
|
@site = site
|
||||||
# Topic Counter
|
# Topic Counter
|
||||||
site.data['topics'] = site.data['topics'].to_h {|topic| [topic, 0]}
|
site.data['topics'] = site.data['topics'].to_h { |topic| [topic, 0] }
|
||||||
get_content.each do |article|
|
get_content.each do |article|
|
||||||
page = make_page(article['attributes']['modules'])
|
page = make_page(article['attributes']['modules'])
|
||||||
if page
|
next unless page
|
||||||
site.pages << page
|
|
||||||
page['topics'].each do |topic|
|
site.pages << page
|
||||||
unless site.data['topics'].include? topic
|
page['topics'].each do |topic|
|
||||||
site.data['topics'][topic] = 0
|
unless site.data['topics'].include? topic
|
||||||
Jekyll.logger.warn "News:", "New Topic #{topic}"
|
site.data['topics'][topic] = 0
|
||||||
end
|
Jekyll.logger.warn 'News:', "New Topic #{topic}"
|
||||||
end
|
end
|
||||||
site.data['topics'][page['topics'].first] += 1
|
|
||||||
end
|
end
|
||||||
|
site.data['topics'][page['topics'].first] += 1
|
||||||
end
|
end
|
||||||
|
|
||||||
site.data['topics'].each do |topic, count|
|
site.data['topics'].each do |topic, count|
|
||||||
@site.pages << make_topic_page(topic, count)
|
@site.pages << make_topic_page(topic, count)
|
||||||
end
|
end
|
||||||
|
|
||||||
Jekyll.logger.info "News:", "Generated #{site.data['topics'].values.sum} article pages"
|
Jekyll.logger.info 'News:', "Generated #{site.data['topics'].values.sum} article pages"
|
||||||
# These are fallback checks to make sure if we have a bug or get bad data,
|
# These are fallback checks to make sure if we have a bug or get bad data,
|
||||||
# we don't update the website with not enough news
|
# we don't update the website with not enough news
|
||||||
# better to fail the build than show an empty website.
|
# better to fail the build than show an empty website.
|
||||||
raise "Not enough articles, not updating website" if site.data['topics'].values.sum < 10
|
raise 'Not enough articles, not updating website' if site.data['topics'].values.sum < 10
|
||||||
raise "Not enough topics, not updating website" if site.data['topics'].size < 5
|
raise 'Not enough topics, not updating website' if site.data['topics'].size < 5
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
def make_topic_page(topic, count)
|
def make_topic_page(topic, count)
|
||||||
PageWithoutAFile.new(@site, __dir__, topic, "index.html").tap do |file|
|
PageWithoutAFile.new(@site, __dir__, topic, 'index.html').tap do |file|
|
||||||
file.data.merge!(
|
file.data.merge!(
|
||||||
'title' => topic.capitalize,
|
'title' => topic.capitalize,
|
||||||
'layout' => 'topic',
|
'layout' => 'topic',
|
||||||
'topic' => topic,
|
'topic' => topic,
|
||||||
'permalink' => "/#{topic}/",
|
'permalink' => "/#{topic}/",
|
||||||
'article_count' => count
|
'article_count' => count
|
||||||
)
|
)
|
||||||
file.output
|
file.output
|
||||||
end
|
end
|
||||||
|
@ -119,64 +124,63 @@ class BeatrootNews < Jekyll::Generator
|
||||||
|
|
||||||
def timestamp(ts)
|
def timestamp(ts)
|
||||||
d = Time.at(ts.to_i).to_datetime
|
d = Time.at(ts.to_i).to_datetime
|
||||||
d.new_offset("+0530")
|
d.new_offset('+0530')
|
||||||
end
|
end
|
||||||
|
|
||||||
def syndicated?(article)
|
def syndicated?(article)
|
||||||
sources = article['sources'].map(&:downcase)
|
sources = article['sources'].map(&:downcase)
|
||||||
return !(sources & @site.config['syndication_sources']).empty?
|
!(sources & @site.config['syndication_sources']).empty?
|
||||||
end
|
end
|
||||||
|
|
||||||
def make_page(article)
|
def make_page(article)
|
||||||
return nil if article['topic'].nil?
|
return nil if article['topic'].nil?
|
||||||
return nil if article['body_json']['blocks'].empty?
|
return nil if article['body_json']['blocks'].empty?
|
||||||
|
|
||||||
n = DateTime.now
|
n = DateTime.now
|
||||||
now = DateTime.new(n.year, n.month, n.day, 23, 59, 59, "+0530")
|
now = DateTime.new(n.year, n.month, n.day, 23, 59, 59, '+0530')
|
||||||
date = timestamp(article['updated_on'])
|
date = timestamp(article['updated_on'])
|
||||||
days_ago = (now - date).floor
|
days_ago = (now - date).floor
|
||||||
# We only return news for today(0) or yesterday(1)
|
# We only return news for today(0) or yesterday(1)
|
||||||
return nil if days_ago > 1
|
return nil if days_ago > 1
|
||||||
|
|
||||||
PageWithoutAFile.new(@site, __dir__, article['id'], "index.html").tap do |file|
|
PageWithoutAFile.new(@site, __dir__, article['id'], 'index.html').tap do |file|
|
||||||
html = article['body_json']['blocks'].map{ |t| t['data']['text']}.join(" ")
|
html = article['body_json']['blocks'].map { |t| t['data']['text'] }.join(' ')
|
||||||
html = Sanitize.fragment(html, SANITIZE_CONFIG)
|
html = Sanitize.fragment(html, SANITIZE_CONFIG)
|
||||||
topics = article['topic'].map { |topic| topic.split('-').first }
|
topics = article['topic'].map { |topic| topic.split('-').first }
|
||||||
twt = nil
|
twt = nil
|
||||||
|
|
||||||
if article['trigger_warning']
|
if article['trigger_warning']
|
||||||
twt = article['trigger_warning_text'] || 'Trigger Warning'
|
twt = article['trigger_warning_text'] || 'Trigger Warning'
|
||||||
unless twt.downcase.include? 'trigger'
|
twt = "Trigger Warning: #{twt}" unless twt.downcase.include? 'trigger'
|
||||||
twt = 'Trigger Warning: ' + twt
|
|
||||||
end
|
|
||||||
html = "<b>#{twt}</b><br>" + html
|
html = "<b>#{twt}</b><br>" + html
|
||||||
end
|
end
|
||||||
|
|
||||||
file.content = html
|
file.content = html
|
||||||
|
|
||||||
file.data.merge!(
|
file.data.merge!(
|
||||||
'sources' => article['sources'].reject(&:empty?),
|
'sources' => article['sources'].reject(&:empty?),
|
||||||
"date" => date,
|
'date' => date,
|
||||||
"id" => article['id'],
|
'id' => article['id'],
|
||||||
"slug" => article['slug'],
|
'slug' => article['slug'],
|
||||||
"title" => article['title'],
|
'title' => article['title'],
|
||||||
"layout" => 'article',
|
'layout' => 'article',
|
||||||
"topics" => topics,
|
'topics' => topics,
|
||||||
"days_ago" => days_ago,
|
'days_ago' => days_ago,
|
||||||
"day" => days_ago == 0 ? "today" : "yesterday",
|
'day' => days_ago.zero? ? 'today' : 'yesterday',
|
||||||
# We use 300 characters here
|
# We use 300 characters here
|
||||||
# and the SEO plugin strips down to 200 with ellepsis
|
# and the SEO plugin strips down to 200 with ellepsis
|
||||||
"description" => Sanitize.fragment(html)[0...199] + "…",
|
'description' => "#{Sanitize.fragment(html)[0...199]}…",
|
||||||
"trigger_warning" => twt,
|
'trigger_warning' => twt,
|
||||||
"syndicated" => syndicated?(article),
|
'syndicated' => syndicated?(article),
|
||||||
"seo" => {
|
'seo' => {
|
||||||
"type" => "NewsArticle",
|
'type' => 'NewsArticle',
|
||||||
"links" => [
|
'links' => [
|
||||||
"https://app.beatrootnews.com/#article-#{article['id']}",
|
"https://app.beatrootnews.com/#article-#{article['id']}",
|
||||||
"https://beatrootnews.com/custom/share?type=article&slug=#{article['slug']}"
|
"https://beatrootnews.com/custom/share?type=article&slug=#{article['slug']}"
|
||||||
],
|
],
|
||||||
"date_modified" => date
|
'date_modified' => date
|
||||||
},
|
},
|
||||||
"media_link" => article['media_link'] ? article['media_link'] : nil,
|
'media_link' => article['media_link'] || nil
|
||||||
# This is currently disabled because the page doesn't load in desktop
|
# This is currently disabled because the page doesn't load in desktop
|
||||||
# Or rather doesn't load at all for old links.
|
# Or rather doesn't load at all for old links.
|
||||||
# "canonical_url" => "https://app.beatrootnews.com/#article-#{article['id']}"
|
# "canonical_url" => "https://app.beatrootnews.com/#article-#{article['id']}"
|
||||||
|
@ -184,5 +188,4 @@ class BeatrootNews < Jekyll::Generator
|
||||||
file.output
|
file.output
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
end
|
||||||
end
|
|
||||||
|
|
Loading…
Reference in New Issue