Switch to a Makefile
This commit is contained in:
parent
8b89bdca62
commit
0923f19558
|
@ -3,7 +3,7 @@ on:
|
|||
schedule:
|
||||
# 18:07 UTC every day
|
||||
# 23:37 IST every day
|
||||
- cron: '7 18 * * *'
|
||||
- cron: '25 4 * * *'
|
||||
jobs:
|
||||
update:
|
||||
name: Update data
|
||||
|
@ -12,14 +12,9 @@ jobs:
|
|||
- uses: actions/checkout@v3
|
||||
with:
|
||||
ref: ${{ github.head_ref }}
|
||||
- name: Get current date
|
||||
uses: josStorer/get-current-time@v2
|
||||
id: current-time
|
||||
with:
|
||||
# Versioning is current date
|
||||
format: "YYYY.M.D"
|
||||
- name: Update data
|
||||
run: ./fetch.sh "${{ steps.current-time.outputs.formattedTime }}"
|
||||
run: make all
|
||||
id: update_data
|
||||
# Only tag if we're running on the scheduled job
|
||||
- uses: stefanzweifel/git-auto-commit-action@v4
|
||||
with:
|
||||
|
@ -27,6 +22,6 @@ jobs:
|
|||
commit_author: 'github-actions[bot] <github-actions[bot]@users.noreply.github.com>'
|
||||
file_pattern: "*.csv"
|
||||
status_options: '--untracked-files=no'
|
||||
tagging_message: "v${{ steps.current-time.outputs.formattedTime }}"
|
||||
tagging_message: "v${{ steps.update_data.outputs.version }}"
|
||||
- name: Create Release
|
||||
run: gh release create "v${{ steps.current-time.outputs.formattedTime }}" --notes "v${{steps.current-time.outputs.formattedTime}}" *.csv && \
|
||||
run: gh release create "v${{ steps.update_data.outputs.version }}" --notes "v${{steps.update_data.outputs.version}}" *.csv && \
|
|
@ -0,0 +1,15 @@
|
|||
SHELL=/bin/bash
|
||||
version=`date +%Y.%-m.%-d`
|
||||
|
||||
all: INE INF IN9 update
|
||||
INE INF IN9:
|
||||
./fetch.sh $@
|
||||
|
||||
update:
|
||||
echo "::set-output name=version::$(version)"
|
||||
sed -i "s/^version.*/version: $(version)/" CITATION.cff
|
||||
sed -i "s/^date-released.*/date-released: `date --rfc-3339=date`/" CITATION.cff
|
||||
|
||||
jq ".version = \"$(version)\" | .created = \"`date --rfc-3339=seconds`\"" datapackage.json > d2.json
|
||||
mv d2.json datapackage.json
|
||||
git add CITATION.cff datapackage.json
|
30
fetch.sh
30
fetch.sh
|
@ -22,7 +22,8 @@ function fetch_page() {
|
|||
--connect-timeout 10 \
|
||||
--retry-max-time 30 \
|
||||
--data cnum=$1 \
|
||||
--data "page_no=$2" | $PUP_BINARY '#nsdl-tables tr json{}' | \
|
||||
--data "page_no=$2" | \
|
||||
$PUP_BINARY '#nsdl-tables tr json{}' | \
|
||||
# generate 6 lines (second column has a link, so parse that) with raw output
|
||||
jq --raw-output '.[] | [.children[1].children[0].text, .children[2].text, .children[3].text,.children[4].text,.children[5].text]|.[]' | \
|
||||
# and create a CSV from every 5 lines
|
||||
|
@ -42,27 +43,26 @@ export -f fetch_page
|
|||
|
||||
function fetch_class() {
|
||||
for i in $(seq 1 $2); do
|
||||
echo fetch_page $1 $i "$1.csv"
|
||||
sem -j 10 --timeout 500% fetch_page $1 $i "$1.csv"
|
||||
done
|
||||
}
|
||||
|
||||
for i in E F 9; do
|
||||
total=$(fetch_total_pages "IN$i")
|
||||
echo "::group::IN$i (Total=$total)"
|
||||
rm "IN$i.csv"
|
||||
fetch_class "IN$i" $total
|
||||
echo "::endgroup::"
|
||||
done
|
||||
CLASS="$1"
|
||||
|
||||
total=$(fetch_total_pages "$CLASS")
|
||||
echo "::group::$CLASS (Total=$total)"
|
||||
rm "$CLASS.csv"
|
||||
fetch_class "$CLASS" $total
|
||||
echo "::endgroup::"
|
||||
|
||||
sem --wait
|
||||
|
||||
for i in E F 9; do
|
||||
# Sort the file in place
|
||||
sort -o "IN$i.csv" "IN$i.csv"
|
||||
# Remove lines that don't start with the correct prefix
|
||||
# This is to avoid ISINs like INF955L01IN9 showing up under IN9
|
||||
sed -i '/^IN$i/!d' "IN$i.csv"
|
||||
done
|
||||
# Sort the file in place
|
||||
sort -o "$CLASS.csv" "$CLASS.csv"
|
||||
# Remove lines that don't start with the correct prefix
|
||||
# This is to avoid ISINs like INF955L01IN9 showing up under IN9
|
||||
sed -i "/^$CLASS/!d" "$CLASS.csv"
|
||||
|
||||
# Update CITATION
|
||||
if [[ $(git diff --stat *.csv) != '' ]]; then
|
||||
|
|
Loading…
Reference in New Issue