github.com/captn3m0/tld-squatting.git

from bs4 import BeautifulSoup
import re
import yaml
import csv
from math import floor,log10
import http.client

def parse_row(soup):
    table = soup.find("tbody")

    for row in table.find_all("tr"):
        if len(row.find_all("td")) < 5:
            continue
        status = row.select_one(".label").text
        tld_element = row.select_one("td:nth-child(3) a")
        tld_match = re.search(r"xn--[\w-]+", tld_element["href"])
        tld = tld_match.group() if tld_match else tld_element.text[1:]
        icon_type_mapping = {
            "fa-users": "unrestricted",
            "fa-lock": "restricted",
            "fa-registered": "brand",
            "fa-exclamation": "semi-restricted",
        }  # Add more mappings as needed
        icon_class = row.select_one(".fa")["class"][1]
        tld_type = icon_type_mapping.get(icon_class, "unknown")
        domain_count = int(
            row.select_one("td.right:nth-child(8)").text.replace(",", "")
        )
        domain_count_approx = 10**(floor(log10(domain_count)))
        owner = row.select_one("td:nth-child(5)").text.strip()
        if "Identity Digital Inc." in owner:
            owner = "Donuts"
        if "Charleston Road Registry" in owner:
            owner = "Google"
        if "Amazon Registry" in owner:
            owner = "Amazon"

        yield {
            "status": status,
            "tld": tld,
            "type": tld_type,
            "domain_count": domain_count_approx,
            "owner": owner
        }

if __name__ == "__main__":
    with open('ntld.html', 'r') as f:
        html = f.read()
    soup = BeautifulSoup(html, "html.parser")

    # Parse the response
    tlds = list(parse_row(soup))
    tlds = sorted(tlds, key=lambda x: x["tld"])
    # Write domain counts (1-10, 10-100, and so on. We note the lower count)
    with open("_data/domain_count.yml", "w") as f:
        yaml.dump({row["tld"]: row["domain_count"] for row in tlds}, f)
    # Write other tld information
    with open("_data/ntld.csv", "w") as f:
        writer = csv.DictWriter(
            f, fieldnames=["tld", "status", "type", "owner"], extrasaction="ignore"
        )
        writer.writeheader()
        for row in tlds:
            writer.writerow(row)