From f1f43d3448bc879eed55f1e6865c06e646b7eb4a Mon Sep 17 00:00:00 2001
From: Raghuram Subramani <raghus2247@gmail.com>
Date: Wed, 26 Mar 2025 22:19:19 +0530
Subject: [PATCH] implement scraping of establishments

---
 scrape_ecourtindia_v6/main.py    | 10 ++++++++++
 scrape_ecourtindia_v6/scraper.py | 15 +++++++++++++++
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/scrape_ecourtindia_v6/main.py b/scrape_ecourtindia_v6/main.py
index 1cadad2..9d4c193 100644
--- a/scrape_ecourtindia_v6/main.py
+++ a/scrape_ecourtindia_v6/main.py
@@ -6,6 +6,8 @@
 
 db = TinyDB('db.json')
 
+SCRAPE_ESTABLISHMENTS = True
+
 class ThreadSafeCSVWriter:
     def __init__(self, filename):
         self.file = open(filename, 'w', newline='')
@@ -25,7 +27,11 @@
     try:
         for district in scraper.scrape_districts(state):
             for cmplx in scraper.scrape_complexes(state, district):
-                csv_writer.writerow([state, district, cmplx])
+                if SCRAPE_ESTABLISHMENTS:
+                    for establishment in scraper.scrape_establishments(state, district, cmplx):
+                        csv_writer.writerow([ state, district, cmplx, establishment ])
+                else:
+                    csv_writer.writerow([ state, district, cmplx ])
     except Exception as e:
         print(f"Error scraping {state}: {e}")
     finally:
@@ -43,7 +49,7 @@
     states = m.scrape_states()
     m.driver.close()
 
-    with ThreadPoolExecutor(max_workers=5) as executor:
+    with ThreadPoolExecutor(max_workers=10) as executor:
         futures = [
             executor.submit(scrape_state_thread, state, config, csv_writer) 
             for state in states
diff --git a/scrape_ecourtindia_v6/scraper.py b/scrape_ecourtindia_v6/scraper.py
index cdab2fd..18b519a 100644
--- a/scrape_ecourtindia_v6/scraper.py
+++ a/scrape_ecourtindia_v6/scraper.py
@@ -82,6 +82,21 @@
 
         return complexes
 
+    def scrape_establishments(self, state, district, cmplx):
+        self.select('sess_state_code', state)
+        sleep(0.2)
+        self.select('sess_dist_code', district)
+        sleep(0.2)
+        self.select('court_complex_code', cmplx)
+        sleep(1)
+
+        element = self.driver.find_element(By.ID, 'court_est_code')
+        options = Select(element).options
+        establishments = [ option.text for option in options[1:] ]
+        print(f'ESTABLISHMENTS: {establishments}')
+
+        return establishments
+
     def select_court(self):
         sleep(2)
         while True:
--
rgit 0.1.5