From ca60d2fec9c1452eacd8342428dc961c55771df3 Mon Sep 17 00:00:00 2001 From: Nemo <commits@captnemo.in> Date: Tue, 12 Dec 2023 17:25:12 +0530 Subject: [PATCH] WIP: switch to curl impersonate --- Gemfile | 1 + Gemfile.lock | 75 +++++++++++++++++++++++++++++++++++++++++++++++---------------------------- build.rb | 13 +++++++++++++ generate.php | 5 ----- .github/workflows/pages.yml | 2 +- 5 files changed, 53 insertions(+), 43 deletions(-) diff --git a/Gemfile b/Gemfile index cea816d..9f4db7b 100644 --- a/Gemfile +++ a/Gemfile @@ -1,4 +1,5 @@ source 'https://rubygems.org' gem 'weneedfeed', "~> 0.20" gem "webrick", "~> 1.7" +gem "faraday-curb", github: 'captn3m0/faraday-curb', branch: 'master' diff --git a/Gemfile.lock b/Gemfile.lock index 60fa421..886c372 100644 --- a/Gemfile.lock +++ a/Gemfile.lock @@ -1,40 +1,39 @@ +GIT + remote: https://github.com/captn3m0/faraday-curb.git + revision: de7c4c52ef95615de81c0cab2b7a68b437b51f7b + branch: master + specs: + faraday-curb (0.0.5) + curb (~> 0) + faraday (~> 0) + GEM remote: https://rubygems.org/ specs: - activesupport (7.0.6) + activesupport (7.1.2) + base64 + bigdecimal concurrent-ruby (~> 1.0, >= 1.0.2) + connection_pool (>= 2.2.5) + drb i18n (>= 1.6, < 2) minitest (>= 5.1) + mutex_m tzinfo (~> 2.0) - addressable (2.8.4) + addressable (2.8.6) public_suffix (>= 2.0.2, < 6.0) + base64 (0.2.0) + bigdecimal (3.1.4) builder (3.2.4) concurrent-ruby (1.2.2) - faraday (1.10.3) - faraday-em_http (~> 1.0) - faraday-em_synchrony (~> 1.0) - faraday-excon (~> 1.1) - faraday-httpclient (~> 1.0) - faraday-multipart (~> 1.0) - faraday-net_http (~> 1.0) - faraday-net_http_persistent (~> 1.0) - faraday-patron (~> 1.0) - faraday-rack (~> 1.0) - faraday-retry (~> 1.0) - ruby2_keywords (>= 0.0.4) - faraday-em_http (1.0.0) - faraday-em_synchrony (1.0.0) - faraday-excon (1.1.0) - faraday-httpclient (1.0.1) - faraday-multipart (1.0.4) - multipart-post (~> 2) - faraday-net_http (1.0.1) - faraday-net_http_persistent (1.2.0) - faraday-patron (1.0.0) - faraday-rack (1.0.0) - faraday-retry (1.0.3) - faraday_middleware (1.2.0) - faraday (~> 1.0) + connection_pool (2.4.1) + curb (0.9.11) + drb (2.2.0) + ruby2_keywords + faraday (0.17.6) + multipart-post (>= 1.2, < 3) + faraday_middleware (0.14.0) + faraday (>= 0.7.4, < 1.0) hanami-router (2.0.0.alpha3) mustermann (~> 1.0) mustermann-contrib (~> 1.0) @@ -47,26 +46,27 @@ i18n (1.14.1) concurrent-ruby (~> 1.0) marcel (1.0.2) - minitest (5.18.1) + minitest (5.20.0) multipart-post (2.3.0) mustermann (1.1.2) ruby2_keywords (~> 0.0.1) mustermann-contrib (1.1.2) hansi (~> 0.2.0) mustermann (= 1.1.2) - nokogiri (1.15.2-x86_64-linux) + mutex_m (0.2.0) + nokogiri (1.15.5-x86_64-linux) racc (~> 1.4) - psych (5.1.0) + psych (5.1.1.1) stringio - public_suffix (5.0.1) - racc (1.7.1) - rack (2.2.7) + public_suffix (5.0.4) + racc (1.7.3) + rack (2.2.8) rack-capture (0.4.0) rack ruby2_keywords (0.0.5) - stringio (3.0.7) - thor (1.2.2) - tilt (2.2.0) + stringio (3.1.0) + thor (1.3.0) + tilt (2.3.0) tzinfo (2.0.6) concurrent-ruby (~> 1.0) webrick (1.8.1) @@ -87,8 +87,9 @@ x86_64-linux DEPENDENCIES + faraday-curb! webrick (~> 1.7) weneedfeed (~> 0.20) BUNDLED WITH - 2.4.13 + 2.4.22 diff --git a/build.rb b/build.rb new file mode 100644 index 0000000..edd7fe5 100644 --- /dev/null +++ a/build.rb @@ -1,0 +1,13 @@ +require 'weneedfeed' +require 'faraday/curb' + +contents = File.read('weneedfeed.yml.tpl') +c = contents.gsub('__YEAR__', Time.now.strftime('%Y')) +File.write('weneedfeed.yml', c) + +Weneedfeed::Scraping.faraday_connection.adapter :curb + +Weneedfeed::Capture.call( + base_url: "https://captnemo.in/npci-rss-feeds", + schema_path: 'weneedfeed.yml' +)diff --git a/generate.php b/generate.php deleted file mode 100644 index 92df256..0000000 100644 --- a/generate.php +++ /dev/null @@ -1,5 +1,0 @@ -<?php - -$contents = file_get_contents('weneedfeed.yml.tpl'); -$c = str_replace('__YEAR__', date('Y'), $contents); -file_put_contents('weneedfeed.yml', $c);diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml index 63e6f40..61aa76b 100644 --- a/.github/workflows/pages.yml +++ a/.github/workflows/pages.yml @@ -41,7 +41,7 @@ ruby-version: "3.0" bundler-cache: true - name: Generate RSS Feeds - run: bundle exec weneedfeed build --base-url="https://captnemo.in/npci-rss-feeds" + run: bundle exec ruby build.rb - name: Upload artifact # Automatically uploads an artifact from the './_site' directory by default uses: actions/upload-pages-artifact@64bcae551a7b18bcb9a09042ddf1960979799187 # v1.0.8 -- rgit 0.1.5