🏡 index : github.com/compromyse/enfold.git

author Raghuram Subramani <raghus2247@gmail.com> 2025-03-26 22:19:19.0 +05:30:00
committer Raghuram Subramani <raghus2247@gmail.com> 2025-03-26 22:19:19.0 +05:30:00
commit
f1f43d3448bc879eed55f1e6865c06e646b7eb4a [patch]
tree
3b67eefca59ffc4bd46b418ec401a3c36b753542
parent
ef63d21480f1f83a660902da3f9ad2d5606b37c2
download
f1f43d3448bc879eed55f1e6865c06e646b7eb4a.tar.gz

implement scraping of establishments



Diff

 scrape_ecourtindia_v6/main.py    | 10 ++++++++++
 scrape_ecourtindia_v6/scraper.py | 15 +++++++++++++++
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/scrape_ecourtindia_v6/main.py b/scrape_ecourtindia_v6/main.py
index 1cadad2..9d4c193 100644
--- a/scrape_ecourtindia_v6/main.py
+++ a/scrape_ecourtindia_v6/main.py
@@ -6,6 +6,8 @@

db = TinyDB('db.json')

SCRAPE_ESTABLISHMENTS = True

class ThreadSafeCSVWriter:
    def __init__(self, filename):
        self.file = open(filename, 'w', newline='')
@@ -25,7 +27,11 @@
    try:
        for district in scraper.scrape_districts(state):
            for cmplx in scraper.scrape_complexes(state, district):
                csv_writer.writerow([state, district, cmplx])
                if SCRAPE_ESTABLISHMENTS:
                    for establishment in scraper.scrape_establishments(state, district, cmplx):
                        csv_writer.writerow([ state, district, cmplx, establishment ])
                else:
                    csv_writer.writerow([ state, district, cmplx ])
    except Exception as e:
        print(f"Error scraping {state}: {e}")
    finally:
@@ -43,7 +49,7 @@
    states = m.scrape_states()
    m.driver.close()

    with ThreadPoolExecutor(max_workers=5) as executor:
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = [
            executor.submit(scrape_state_thread, state, config, csv_writer) 
            for state in states
diff --git a/scrape_ecourtindia_v6/scraper.py b/scrape_ecourtindia_v6/scraper.py
index cdab2fd..18b519a 100644
--- a/scrape_ecourtindia_v6/scraper.py
+++ a/scrape_ecourtindia_v6/scraper.py
@@ -82,6 +82,21 @@

        return complexes

    def scrape_establishments(self, state, district, cmplx):
        self.select('sess_state_code', state)
        sleep(0.2)
        self.select('sess_dist_code', district)
        sleep(0.2)
        self.select('court_complex_code', cmplx)
        sleep(1)

        element = self.driver.find_element(By.ID, 'court_est_code')
        options = Select(element).options
        establishments = [ option.text for option in options[1:] ]
        print(f'ESTABLISHMENTS: {establishments}')

        return establishments

    def select_court(self):
        sleep(2)
        while True: