Browse Source

Initial commit

master
Nemo 3 years ago
commit
791a59e305
5 changed files with 12836 additions and 0 deletions
  1. +2
    -0
      .gitignore
  2. +3
    -0
      README.md
  3. +16
    -0
      dl.sh
  4. +12807
    -0
      domains.csv
  5. +8
    -0
      parse.php

+ 2
- 0
.gitignore View File

@ -0,0 +1,2 @@
cache/
*.txt

+ 3
- 0
README.md View File

@ -0,0 +1,3 @@
# pulse-in
How many Indian government websites are using HTTPS? An attempt at tracking them using [18F/pulse](https://github.com/18F/pulse#setup).

+ 16
- 0
dl.sh View File

@ -0,0 +1,16 @@
#!/bin/bash
# Download from the goidirectory website
for i in $(seq 1 14388); do
echo $i
curl -s "http://goidirectory.nic.in/sitecounter.php?id=$i" |grep '<meta http-equiv="refresh"' >> urls.raw.txt
done
# Get crts issued in gov.in
# TODO: This isn't as good as the censys.io export
# that pulse uses, switch to that
echo "select name_value FROM certificate_identity WHERE name_value LIKE '%.gov.in';" | psql -h crt.sh -p 5432 -U guest certwatch > crt.sh.domains.txt
cat urls.raw.txt crt.sh.domains.txt |sort -u > combined.txt
php parse.php | sort -u > domains.csv

+ 12807
- 0
domains.csv
File diff suppressed because it is too large
View File


+ 8
- 0
parse.php View File

@ -0,0 +1,8 @@
<?php
foreach(file('combined.txt') as $row)
{
$row = trim($row);
$url = "http://$row/";
echo parse_url($url)['host'] . PHP_EOL;
}

Loading…
Cancel
Save