Switch to a Makefile
This commit is contained in:
parent
8b89bdca62
commit
0923f19558
|
@ -3,7 +3,7 @@ on:
|
||||||
schedule:
|
schedule:
|
||||||
# 18:07 UTC every day
|
# 18:07 UTC every day
|
||||||
# 23:37 IST every day
|
# 23:37 IST every day
|
||||||
- cron: '7 18 * * *'
|
- cron: '25 4 * * *'
|
||||||
jobs:
|
jobs:
|
||||||
update:
|
update:
|
||||||
name: Update data
|
name: Update data
|
||||||
|
@ -12,14 +12,9 @@ jobs:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
with:
|
with:
|
||||||
ref: ${{ github.head_ref }}
|
ref: ${{ github.head_ref }}
|
||||||
- name: Get current date
|
|
||||||
uses: josStorer/get-current-time@v2
|
|
||||||
id: current-time
|
|
||||||
with:
|
|
||||||
# Versioning is current date
|
|
||||||
format: "YYYY.M.D"
|
|
||||||
- name: Update data
|
- name: Update data
|
||||||
run: ./fetch.sh "${{ steps.current-time.outputs.formattedTime }}"
|
run: make all
|
||||||
|
id: update_data
|
||||||
# Only tag if we're running on the scheduled job
|
# Only tag if we're running on the scheduled job
|
||||||
- uses: stefanzweifel/git-auto-commit-action@v4
|
- uses: stefanzweifel/git-auto-commit-action@v4
|
||||||
with:
|
with:
|
||||||
|
@ -27,6 +22,6 @@ jobs:
|
||||||
commit_author: 'github-actions[bot] <github-actions[bot]@users.noreply.github.com>'
|
commit_author: 'github-actions[bot] <github-actions[bot]@users.noreply.github.com>'
|
||||||
file_pattern: "*.csv"
|
file_pattern: "*.csv"
|
||||||
status_options: '--untracked-files=no'
|
status_options: '--untracked-files=no'
|
||||||
tagging_message: "v${{ steps.current-time.outputs.formattedTime }}"
|
tagging_message: "v${{ steps.update_data.outputs.version }}"
|
||||||
- name: Create Release
|
- name: Create Release
|
||||||
run: gh release create "v${{ steps.current-time.outputs.formattedTime }}" --notes "v${{steps.current-time.outputs.formattedTime}}" *.csv && \
|
run: gh release create "v${{ steps.update_data.outputs.version }}" --notes "v${{steps.update_data.outputs.version}}" *.csv && \
|
|
@ -0,0 +1,15 @@
|
||||||
|
SHELL=/bin/bash
|
||||||
|
version=`date +%Y.%-m.%-d`
|
||||||
|
|
||||||
|
all: INE INF IN9 update
|
||||||
|
INE INF IN9:
|
||||||
|
./fetch.sh $@
|
||||||
|
|
||||||
|
update:
|
||||||
|
echo "::set-output name=version::$(version)"
|
||||||
|
sed -i "s/^version.*/version: $(version)/" CITATION.cff
|
||||||
|
sed -i "s/^date-released.*/date-released: `date --rfc-3339=date`/" CITATION.cff
|
||||||
|
|
||||||
|
jq ".version = \"$(version)\" | .created = \"`date --rfc-3339=seconds`\"" datapackage.json > d2.json
|
||||||
|
mv d2.json datapackage.json
|
||||||
|
git add CITATION.cff datapackage.json
|
30
fetch.sh
30
fetch.sh
|
@ -22,7 +22,8 @@ function fetch_page() {
|
||||||
--connect-timeout 10 \
|
--connect-timeout 10 \
|
||||||
--retry-max-time 30 \
|
--retry-max-time 30 \
|
||||||
--data cnum=$1 \
|
--data cnum=$1 \
|
||||||
--data "page_no=$2" | $PUP_BINARY '#nsdl-tables tr json{}' | \
|
--data "page_no=$2" | \
|
||||||
|
$PUP_BINARY '#nsdl-tables tr json{}' | \
|
||||||
# generate 6 lines (second column has a link, so parse that) with raw output
|
# generate 6 lines (second column has a link, so parse that) with raw output
|
||||||
jq --raw-output '.[] | [.children[1].children[0].text, .children[2].text, .children[3].text,.children[4].text,.children[5].text]|.[]' | \
|
jq --raw-output '.[] | [.children[1].children[0].text, .children[2].text, .children[3].text,.children[4].text,.children[5].text]|.[]' | \
|
||||||
# and create a CSV from every 5 lines
|
# and create a CSV from every 5 lines
|
||||||
|
@ -42,27 +43,26 @@ export -f fetch_page
|
||||||
|
|
||||||
function fetch_class() {
|
function fetch_class() {
|
||||||
for i in $(seq 1 $2); do
|
for i in $(seq 1 $2); do
|
||||||
|
echo fetch_page $1 $i "$1.csv"
|
||||||
sem -j 10 --timeout 500% fetch_page $1 $i "$1.csv"
|
sem -j 10 --timeout 500% fetch_page $1 $i "$1.csv"
|
||||||
done
|
done
|
||||||
}
|
}
|
||||||
|
|
||||||
for i in E F 9; do
|
CLASS="$1"
|
||||||
total=$(fetch_total_pages "IN$i")
|
|
||||||
echo "::group::IN$i (Total=$total)"
|
total=$(fetch_total_pages "$CLASS")
|
||||||
rm "IN$i.csv"
|
echo "::group::$CLASS (Total=$total)"
|
||||||
fetch_class "IN$i" $total
|
rm "$CLASS.csv"
|
||||||
echo "::endgroup::"
|
fetch_class "$CLASS" $total
|
||||||
done
|
echo "::endgroup::"
|
||||||
|
|
||||||
sem --wait
|
sem --wait
|
||||||
|
|
||||||
for i in E F 9; do
|
# Sort the file in place
|
||||||
# Sort the file in place
|
sort -o "$CLASS.csv" "$CLASS.csv"
|
||||||
sort -o "IN$i.csv" "IN$i.csv"
|
# Remove lines that don't start with the correct prefix
|
||||||
# Remove lines that don't start with the correct prefix
|
# This is to avoid ISINs like INF955L01IN9 showing up under IN9
|
||||||
# This is to avoid ISINs like INF955L01IN9 showing up under IN9
|
sed -i "/^$CLASS/!d" "$CLASS.csv"
|
||||||
sed -i '/^IN$i/!d' "IN$i.csv"
|
|
||||||
done
|
|
||||||
|
|
||||||
# Update CITATION
|
# Update CITATION
|
||||||
if [[ $(git diff --stat *.csv) != '' ]]; then
|
if [[ $(git diff --stat *.csv) != '' ]]; then
|
||||||
|
|
Loading…
Reference in New Issue