implement scraping of establishments
Diff
scrape_ecourtindia_v6/main.py | 10 ++++++++++
scrape_ecourtindia_v6/scraper.py | 15 +++++++++++++++
2 files changed, 23 insertions(+), 2 deletions(-)
@@ -6,6 +6,8 @@
db = TinyDB('db.json')
SCRAPE_ESTABLISHMENTS = True
class ThreadSafeCSVWriter:
def __init__(self, filename):
self.file = open(filename, 'w', newline='')
@@ -25,7 +27,11 @@
try:
for district in scraper.scrape_districts(state):
for cmplx in scraper.scrape_complexes(state, district):
csv_writer.writerow([state, district, cmplx])
if SCRAPE_ESTABLISHMENTS:
for establishment in scraper.scrape_establishments(state, district, cmplx):
csv_writer.writerow([ state, district, cmplx, establishment ])
else:
csv_writer.writerow([ state, district, cmplx ])
except Exception as e:
print(f"Error scraping {state}: {e}")
finally:
@@ -43,7 +49,7 @@
states = m.scrape_states()
m.driver.close()
with ThreadPoolExecutor(max_workers=5) as executor:
with ThreadPoolExecutor(max_workers=10) as executor:
futures = [
executor.submit(scrape_state_thread, state, config, csv_writer)
for state in states
@@ -82,6 +82,21 @@
return complexes
def scrape_establishments(self, state, district, cmplx):
self.select('sess_state_code', state)
sleep(0.2)
self.select('sess_dist_code', district)
sleep(0.2)
self.select('court_complex_code', cmplx)
sleep(1)
element = self.driver.find_element(By.ID, 'court_est_code')
options = Select(element).options
establishments = [ option.text for option in options[1:] ]
print(f'ESTABLISHMENTS: {establishments}')
return establishments
def select_court(self):
sleep(2)
while True: