diff --git a/.github/workflows/update.yml b/.github/workflows/update.yml new file mode 100644 index 0000000..4636415 --- /dev/null +++ b/.github/workflows/update.yml @@ -0,0 +1,22 @@ +name: Update Data +on: + push: + # Run this on Sundays because NSDL won't add new things then + schedule: + # 8:07 UTC every Sunday + # 13:37 IST every Sunday + - cron: '7 8 * * 0' +jobs: + update: + name: Update data + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ github.head_ref }} + - name: Update data + run: ./fetch.sh + - uses: stefanzweifel/git-auto-commit-action@v4 + with: + commit_message: Update ISIN Data + commit_author: 'github-actions[bot] ' diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..17876e8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +pup +pup.zip \ No newline at end of file diff --git a/README.md b/README.md index bf33547..212befe 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,8 @@ ISIN Data from various public securities. Source: NSDL provides a ISIN Search at . +Automatically updated every Sunday using GitHub Actions. + Currently tracked: |File|Issuer| diff --git a/fetch.sh b/fetch.sh old mode 100644 new mode 100755 index 71d890d..976ee0a --- a/fetch.sh +++ b/fetch.sh @@ -1,13 +1,29 @@ #!/bin/bash + +export PUP_BINARY="$(which pup)" + +if ! command -v pup &> /dev/null +then + wget https://github.com/ericchiang/pup/releases/download/v0.4.0/pup_v0.4.0_linux_amd64.zip -O pup.zip + unzip pup.zip + rm pup.zip + chmod +x ./pup + export PUP_BINARY="$(pwd)/pup" +fi + # Call with INX Page_num file_name function fetch_page() { echo "[+] $1/$2" curl "https://nsdl.co.in/master_search_res.php" \ + --user-agent "Mozilla/Gecko/Firefox/58.0" \ --silent \ + --retry 3 \ + --connect-timeout 10 \ + --retry-max-time 30 \ --data cnum=$1 \ --data "page_no=$2" | # for each row - pup '#nsdl-tables tr json{}' | \ + $PUP_BINARY '#nsdl-tables tr json{}' | \ # generate 6 lines (second column has a link, so parse that) with raw output jq --raw-output '.[] | [.children[1].children[0].text, .children[2].text, .children[3].text,.children[4].text,.children[5].text]|.[]' | \ # and create a CSV from every 5 lines @@ -17,10 +33,11 @@ function fetch_page() { } function fetch_total_pages() { curl "https://nsdl.co.in/master_search_res.php" \ + --user-agent "Mozilla/Gecko/Firefox/58.0" \ --silent \ --data cnum=$1 \ --data "page_no=1" | - pup 'input[name=total_page] attr{value}' + $PUP_BINARY 'input[name=total_page] attr{value}' } export -f fetch_page