Switch to a Makefile

This commit is contained in:
Nemo 2022-05-02 09:51:41 +05:30
parent 8b89bdca62
commit 0923f19558
3 changed files with 35 additions and 25 deletions

View File

@ -3,7 +3,7 @@ on:
schedule:
# 18:07 UTC every day
# 23:37 IST every day
- cron: '7 18 * * *'
- cron: '25 4 * * *'
jobs:
update:
name: Update data
@ -12,14 +12,9 @@ jobs:
- uses: actions/checkout@v3
with:
ref: ${{ github.head_ref }}
- name: Get current date
uses: josStorer/get-current-time@v2
id: current-time
with:
# Versioning is current date
format: "YYYY.M.D"
- name: Update data
run: ./fetch.sh "${{ steps.current-time.outputs.formattedTime }}"
run: make all
id: update_data
# Only tag if we're running on the scheduled job
- uses: stefanzweifel/git-auto-commit-action@v4
with:
@ -27,6 +22,6 @@ jobs:
commit_author: 'github-actions[bot] <github-actions[bot]@users.noreply.github.com>'
file_pattern: "*.csv"
status_options: '--untracked-files=no'
tagging_message: "v${{ steps.current-time.outputs.formattedTime }}"
tagging_message: "v${{ steps.update_data.outputs.version }}"
- name: Create Release
run: gh release create "v${{ steps.current-time.outputs.formattedTime }}" --notes "v${{steps.current-time.outputs.formattedTime}}" *.csv && \
run: gh release create "v${{ steps.update_data.outputs.version }}" --notes "v${{steps.update_data.outputs.version}}" *.csv && \

15
Makefile Normal file
View File

@ -0,0 +1,15 @@
SHELL=/bin/bash
version=`date +%Y.%-m.%-d`
all: INE INF IN9 update
INE INF IN9:
./fetch.sh $@
update:
echo "::set-output name=version::$(version)"
sed -i "s/^version.*/version: $(version)/" CITATION.cff
sed -i "s/^date-released.*/date-released: `date --rfc-3339=date`/" CITATION.cff
jq ".version = \"$(version)\" | .created = \"`date --rfc-3339=seconds`\"" datapackage.json > d2.json
mv d2.json datapackage.json
git add CITATION.cff datapackage.json

View File

@ -22,7 +22,8 @@ function fetch_page() {
--connect-timeout 10 \
--retry-max-time 30 \
--data cnum=$1 \
--data "page_no=$2" | $PUP_BINARY '#nsdl-tables tr json{}' | \
--data "page_no=$2" | \
$PUP_BINARY '#nsdl-tables tr json{}' | \
# generate 6 lines (second column has a link, so parse that) with raw output
jq --raw-output '.[] | [.children[1].children[0].text, .children[2].text, .children[3].text,.children[4].text,.children[5].text]|.[]' | \
# and create a CSV from every 5 lines
@ -42,27 +43,26 @@ export -f fetch_page
function fetch_class() {
for i in $(seq 1 $2); do
echo fetch_page $1 $i "$1.csv"
sem -j 10 --timeout 500% fetch_page $1 $i "$1.csv"
done
}
for i in E F 9; do
total=$(fetch_total_pages "IN$i")
echo "::group::IN$i (Total=$total)"
rm "IN$i.csv"
fetch_class "IN$i" $total
echo "::endgroup::"
done
CLASS="$1"
total=$(fetch_total_pages "$CLASS")
echo "::group::$CLASS (Total=$total)"
rm "$CLASS.csv"
fetch_class "$CLASS" $total
echo "::endgroup::"
sem --wait
for i in E F 9; do
# Sort the file in place
sort -o "IN$i.csv" "IN$i.csv"
# Remove lines that don't start with the correct prefix
# This is to avoid ISINs like INF955L01IN9 showing up under IN9
sed -i '/^IN$i/!d' "IN$i.csv"
done
# Sort the file in place
sort -o "$CLASS.csv" "$CLASS.csv"
# Remove lines that don't start with the correct prefix
# This is to avoid ISINs like INF955L01IN9 showing up under IN9
sed -i "/^$CLASS/!d" "$CLASS.csv"
# Update CITATION
if [[ $(git diff --stat *.csv) != '' ]]; then