Switch to a Makefile

2022-05-02 09:51:41 +05:30 · 2022-05-02 09:51:41 +05:30 · 0923f19558
parent 8b89bdca62
commit 0923f19558
3 changed files with 35 additions and 25 deletions
--- a/.github/workflows/update.yml
+++ b/.github/workflows/update.yml
@ -3,7 +3,7 @@ on:
  schedule:
    # 18:07 UTC every day
    # 23:37 IST every day
-    - cron: '7 18 * * *'
+    - cron: '25 4 * * *'
 jobs:
  update:
    name: Update data
@ -12,14 +12,9 @@ jobs:
    - uses: actions/checkout@v3
      with:
        ref: ${{ github.head_ref }}
    - name: Get current date
      uses: josStorer/get-current-time@v2
      id: current-time
      with:
        # Versioning is current date
        format: "YYYY.M.D"
    - name: Update data
-      run: ./fetch.sh "${{ steps.current-time.outputs.formattedTime }}"
+      run: make all
      id: update_data
    # Only tag if we're running on the scheduled job
    - uses: stefanzweifel/git-auto-commit-action@v4
      with:
@ -27,6 +22,6 @@ jobs:
        commit_author: 'github-actions[bot] <github-actions[bot]@users.noreply.github.com>'
        file_pattern: "*.csv"
        status_options: '--untracked-files=no'
-        tagging_message: "v${{ steps.current-time.outputs.formattedTime }}"
+        tagging_message: "v${{ steps.update_data.outputs.version }}"
    - name: Create Release
-      run: gh release create "v${{ steps.current-time.outputs.formattedTime }}" --notes "v${{steps.current-time.outputs.formattedTime}}" *.csv && \
+      run: gh release create "v${{ steps.update_data.outputs.version }}" --notes "v${{steps.update_data.outputs.version}}" *.csv && \
--- a/15
+++ b/15
@ -0,0 +1,15 @@
 SHELL=/bin/bash
 version=`date +%Y.%-m.%-d`
 all: INE INF IN9 update
 INE INF IN9:
 	./fetch.sh $@
 update:
 	echo "::set-output name=version::$(version)"
 	sed -i "s/^version.*/version: $(version)/" CITATION.cff
 	sed -i "s/^date-released.*/date-released: `date --rfc-3339=date`/" CITATION.cff
 	jq ".version = \"$(version)\" | .created = \"`date --rfc-3339=seconds`\"" datapackage.json > d2.json
 	mv d2.json datapackage.json
 	git add CITATION.cff datapackage.json
--- a/fetch.sh
+++ b/fetch.sh
@ -22,7 +22,8 @@ function fetch_page() {
    --connect-timeout 10 \
    --retry-max-time 30 \
    --data cnum=$1 \
-    --data "page_no=$2" | $PUP_BINARY '#nsdl-tables tr json{}' | \
+    --data "page_no=$2" | \
  $PUP_BINARY '#nsdl-tables tr json{}' | \
  # generate 6 lines (second column has a link, so parse that) with raw output
  jq --raw-output '.[] | [.children[1].children[0].text, .children[2].text, .children[3].text,.children[4].text,.children[5].text]|.[]' | \
  # and create a CSV from every 5 lines
@ -42,27 +43,26 @@ export -f fetch_page
 function fetch_class() {
  for i in $(seq 1 $2); do
    echo fetch_page $1 $i "$1.csv"
    sem -j 10 --timeout 500% fetch_page $1 $i "$1.csv"
  done
 }
-for i in E F 9; do
+CLASS="$1"
-  total=$(fetch_total_pages "IN$i")
+
-  echo "::group::IN$i (Total=$total)"
+total=$(fetch_total_pages "$CLASS")
-  rm "IN$i.csv"
+echo "::group::$CLASS (Total=$total)"
-  fetch_class "IN$i" $total
+rm "$CLASS.csv"
-  echo "::endgroup::"
+fetch_class "$CLASS" $total
-done
+echo "::endgroup::"
 sem --wait
-for i in E F 9; do
+# Sort the file in place
-  # Sort the file in place
+sort -o "$CLASS.csv" "$CLASS.csv"
-  sort -o "IN$i.csv" "IN$i.csv"
+# Remove lines that don't start with the correct prefix
-  # Remove lines that don't start with the correct prefix
+# This is to avoid ISINs like INF955L01IN9 showing up under IN9
-  # This is to avoid ISINs like INF955L01IN9 showing up under IN9
+sed -i "/^$CLASS/!d" "$CLASS.csv"
  sed -i '/^IN$i/!d' "IN$i.csv"
 done
 # Update CITATION
 if [[ $(git diff --stat *.csv) != '' ]]; then