Auto Updates FTW

This commit is contained in:
Nemo 2022-04-24 14:04:50 +05:30
parent e726415cb3
commit decdbada40
4 changed files with 45 additions and 2 deletions

22
.github/workflows/update.yml vendored Normal file
View File

@ -0,0 +1,22 @@
name: Update Data
on:
push:
# Run this on Sundays because NSDL won't add new things then
schedule:
# 8:07 UTC every Sunday
# 13:37 IST every Sunday
- cron: '7 8 * * 0'
jobs:
update:
name: Update data
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
ref: ${{ github.head_ref }}
- name: Update data
run: ./fetch.sh
- uses: stefanzweifel/git-auto-commit-action@v4
with:
commit_message: Update ISIN Data
commit_author: 'github-actions[bot] <github-actions[bot]@users.noreply.github.com>'

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
pup
pup.zip

View File

@ -4,6 +4,8 @@ ISIN Data from various public securities.
Source: NSDL provides a ISIN Search at <https://nsdl.co.in/master_search.php>.
Automatically updated every Sunday using GitHub Actions.
Currently tracked:
|File|Issuer|

21
fetch.sh Normal file → Executable file
View File

@ -1,13 +1,29 @@
#!/bin/bash
export PUP_BINARY="$(which pup)"
if ! command -v pup &> /dev/null
then
wget https://github.com/ericchiang/pup/releases/download/v0.4.0/pup_v0.4.0_linux_amd64.zip -O pup.zip
unzip pup.zip
rm pup.zip
chmod +x ./pup
export PUP_BINARY="$(pwd)/pup"
fi
# Call with INX Page_num file_name
function fetch_page() {
echo "[+] $1/$2"
curl "https://nsdl.co.in/master_search_res.php" \
--user-agent "Mozilla/Gecko/Firefox/58.0" \
--silent \
--retry 3 \
--connect-timeout 10 \
--retry-max-time 30 \
--data cnum=$1 \
--data "page_no=$2" |
# for each row
pup '#nsdl-tables tr json{}' | \
$PUP_BINARY '#nsdl-tables tr json{}' | \
# generate 6 lines (second column has a link, so parse that) with raw output
jq --raw-output '.[] | [.children[1].children[0].text, .children[2].text, .children[3].text,.children[4].text,.children[5].text]|.[]' | \
# and create a CSV from every 5 lines
@ -17,10 +33,11 @@ function fetch_page() {
}
function fetch_total_pages() {
curl "https://nsdl.co.in/master_search_res.php" \
--user-agent "Mozilla/Gecko/Firefox/58.0" \
--silent \
--data cnum=$1 \
--data "page_no=1" |
pup 'input[name=total_page] attr{value}'
$PUP_BINARY 'input[name=total_page] attr{value}'
}
export -f fetch_page