#!/bin/bash doit() { ID=$1 curl -s "http://goidirectory.nic.in/sitecounter.php?id=$ID" |grep '> urls.raw.txt } export -f doit # Download from the goidirectory website echo "Downloading from the GOI Directory" for i in $(seq 1 14648); do sem -j30 doit $i done sem --wait # Get crts issued in gov.in # TODO: This isn't as good as the censys.io export # that pulse uses, switch to that echo "Downloading from the crt.sh database" echo "select name_value FROM certificate_identity WHERE name_value LIKE '%.gov.in' LIMIT 1000;" | psql -h crt.sh -p 5432 -U guest certwatch > crt.sh.domains.txt echo "select name_value FROM certificate_identity WHERE name_value LIKE '%.gov.in' LIMIT 1000 OFFSET 1000;" | psql -h crt.sh -p 5432 -U guest certwatch >> crt.sh.domains.txt echo "select name_value FROM certificate_identity WHERE name_value LIKE '%.gov.in' LIMIT 1000 OFFSET 2000;" | psql -h crt.sh -p 5432 -U guest certwatch >> crt.sh.domains.txt echo "select name_value FROM certificate_identity WHERE name_value LIKE '%.gov.in' LIMIT 1000 OFFSET 3000;" | psql -h crt.sh -p 5432 -U guest certwatch >> crt.sh.domains.txt echo "select name_value FROM certificate_identity WHERE name_value LIKE '%.gov.in' LIMIT 1000 OFFSET 4000;" | psql -h crt.sh -p 5432 -U guest certwatch >> crt.sh.domains.txt echo "select name_value FROM certificate_identity WHERE name_value LIKE '%.gov.in' LIMIT 1000 OFFSET 5000;" | psql -h crt.sh -p 5432 -U guest certwatch >> crt.sh.domains.txt echo "Combining them together" cat urls.raw.txt crt.sh.domains.txt |sort -u > combined.txt php parse.php | sort -u > domains.csv