From f1f43d3448bc879eed55f1e6865c06e646b7eb4a Mon Sep 17 00:00:00 2001 From: Raghuram Subramani <raghus2247@gmail.com> Date: Wed, 26 Mar 2025 22:19:19 +0530 Subject: [PATCH] implement scraping of establishments --- scrape_ecourtindia_v6/main.py | 10 ++++++++++ scrape_ecourtindia_v6/scraper.py | 15 +++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/scrape_ecourtindia_v6/main.py b/scrape_ecourtindia_v6/main.py index 1cadad2..9d4c193 100644 --- a/scrape_ecourtindia_v6/main.py +++ a/scrape_ecourtindia_v6/main.py @@ -6,6 +6,8 @@ db = TinyDB('db.json') +SCRAPE_ESTABLISHMENTS = True + class ThreadSafeCSVWriter: def __init__(self, filename): self.file = open(filename, 'w', newline='') @@ -25,7 +27,11 @@ try: for district in scraper.scrape_districts(state): for cmplx in scraper.scrape_complexes(state, district): - csv_writer.writerow([state, district, cmplx]) + if SCRAPE_ESTABLISHMENTS: + for establishment in scraper.scrape_establishments(state, district, cmplx): + csv_writer.writerow([ state, district, cmplx, establishment ]) + else: + csv_writer.writerow([ state, district, cmplx ]) except Exception as e: print(f"Error scraping {state}: {e}") finally: @@ -43,7 +49,7 @@ states = m.scrape_states() m.driver.close() - with ThreadPoolExecutor(max_workers=5) as executor: + with ThreadPoolExecutor(max_workers=10) as executor: futures = [ executor.submit(scrape_state_thread, state, config, csv_writer) for state in states diff --git a/scrape_ecourtindia_v6/scraper.py b/scrape_ecourtindia_v6/scraper.py index cdab2fd..18b519a 100644 --- a/scrape_ecourtindia_v6/scraper.py +++ a/scrape_ecourtindia_v6/scraper.py @@ -82,6 +82,21 @@ return complexes + def scrape_establishments(self, state, district, cmplx): + self.select('sess_state_code', state) + sleep(0.2) + self.select('sess_dist_code', district) + sleep(0.2) + self.select('court_complex_code', cmplx) + sleep(1) + + element = self.driver.find_element(By.ID, 'court_est_code') + options = Select(element).options + establishments = [ option.text for option in options[1:] ] + print(f'ESTABLISHMENTS: {establishments}') + + return establishments + def select_court(self): sleep(2) while True: -- rgit 0.1.5