diff --git a/.github/workflows/update.yml b/.github/workflows/update.yml index b4e3192..e591abd 100644 --- a/.github/workflows/update.yml +++ b/.github/workflows/update.yml @@ -3,7 +3,7 @@ on: schedule: # 18:07 UTC every day # 23:37 IST every day - - cron: '7 18 * * *' + - cron: '25 4 * * *' jobs: update: name: Update data @@ -12,14 +12,9 @@ jobs: - uses: actions/checkout@v3 with: ref: ${{ github.head_ref }} - - name: Get current date - uses: josStorer/get-current-time@v2 - id: current-time - with: - # Versioning is current date - format: "YYYY.M.D" - name: Update data - run: ./fetch.sh "${{ steps.current-time.outputs.formattedTime }}" + run: make all + id: update_data # Only tag if we're running on the scheduled job - uses: stefanzweifel/git-auto-commit-action@v4 with: @@ -27,6 +22,6 @@ jobs: commit_author: 'github-actions[bot] ' file_pattern: "*.csv" status_options: '--untracked-files=no' - tagging_message: "v${{ steps.current-time.outputs.formattedTime }}" + tagging_message: "v${{ steps.update_data.outputs.version }}" - name: Create Release - run: gh release create "v${{ steps.current-time.outputs.formattedTime }}" --notes "v${{steps.current-time.outputs.formattedTime}}" *.csv && \ \ No newline at end of file + run: gh release create "v${{ steps.update_data.outputs.version }}" --notes "v${{steps.update_data.outputs.version}}" *.csv && \ \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..9a67afa --- /dev/null +++ b/Makefile @@ -0,0 +1,15 @@ +SHELL=/bin/bash +version=`date +%Y.%-m.%-d` + +all: INE INF IN9 update +INE INF IN9: + ./fetch.sh $@ + +update: + echo "::set-output name=version::$(version)" + sed -i "s/^version.*/version: $(version)/" CITATION.cff + sed -i "s/^date-released.*/date-released: `date --rfc-3339=date`/" CITATION.cff + + jq ".version = \"$(version)\" | .created = \"`date --rfc-3339=seconds`\"" datapackage.json > d2.json + mv d2.json datapackage.json + git add CITATION.cff datapackage.json \ No newline at end of file diff --git a/fetch.sh b/fetch.sh index 0d5a3c0..4aefcd3 100755 --- a/fetch.sh +++ b/fetch.sh @@ -22,7 +22,8 @@ function fetch_page() { --connect-timeout 10 \ --retry-max-time 30 \ --data cnum=$1 \ - --data "page_no=$2" | $PUP_BINARY '#nsdl-tables tr json{}' | \ + --data "page_no=$2" | \ + $PUP_BINARY '#nsdl-tables tr json{}' | \ # generate 6 lines (second column has a link, so parse that) with raw output jq --raw-output '.[] | [.children[1].children[0].text, .children[2].text, .children[3].text,.children[4].text,.children[5].text]|.[]' | \ # and create a CSV from every 5 lines @@ -42,27 +43,26 @@ export -f fetch_page function fetch_class() { for i in $(seq 1 $2); do + echo fetch_page $1 $i "$1.csv" sem -j 10 --timeout 500% fetch_page $1 $i "$1.csv" done } -for i in E F 9; do - total=$(fetch_total_pages "IN$i") - echo "::group::IN$i (Total=$total)" - rm "IN$i.csv" - fetch_class "IN$i" $total - echo "::endgroup::" -done +CLASS="$1" + +total=$(fetch_total_pages "$CLASS") +echo "::group::$CLASS (Total=$total)" +rm "$CLASS.csv" +fetch_class "$CLASS" $total +echo "::endgroup::" sem --wait -for i in E F 9; do - # Sort the file in place - sort -o "IN$i.csv" "IN$i.csv" - # Remove lines that don't start with the correct prefix - # This is to avoid ISINs like INF955L01IN9 showing up under IN9 - sed -i '/^IN$i/!d' "IN$i.csv" -done +# Sort the file in place +sort -o "$CLASS.csv" "$CLASS.csv" +# Remove lines that don't start with the correct prefix +# This is to avoid ISINs like INF955L01IN9 showing up under IN9 +sed -i "/^$CLASS/!d" "$CLASS.csv" # Update CITATION if [[ $(git diff --stat *.csv) != '' ]]; then