🏡 index : github.com/compromyse/enfold.git

author Raghuram Subramani <raghus2247@gmail.com> 2025-03-27 23:05:13.0 +05:30:00
committer Raghuram Subramani <raghus2247@gmail.com> 2025-03-27 23:05:13.0 +05:30:00
commit
a02c8f4c8643b4b9a531e185813c5d82b6866ec0 [patch]
tree
a3cdd49df8412e63ac711c148df6814efa0a05e7
parent
7195110a466b0ed14de1b8ee4fa8d7bb79626018
download
a02c8f4c8643b4b9a531e185813c5d82b6866ec0.tar.gz

update



Diff

 scrape_ecourtindia_v6/orders_scrape_courts.py        | 130 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 scrape_ecourtindia_v6/scrape_orders.py               | 115 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------
 scrape_ecourtindia_v6/modules/scraper_case_status.py |   2 +-
 scrape_ecourtindia_v6/modules/scraper_orders.py      |  84 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 257 insertions(+), 74 deletions(-)

diff --git a/scrape_ecourtindia_v6/orders_scrape_courts.py b/scrape_ecourtindia_v6/orders_scrape_courts.py
new file mode 100644
index 0000000..597ce9f 100644
--- /dev/null
+++ a/scrape_ecourtindia_v6/orders_scrape_courts.py
@@ -1,0 +1,130 @@
import csv
from time import sleep
from modules.scraper_orders import ScraperOrders
from selenium.webdriver.common.by import By
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading

class ThreadSafeCSVWriter:
    def __init__(self, filename):
        self.file = open(filename, 'w', newline='')
        self.writer = csv.writer(self.file)
        self.lock = threading.Lock()

    def writerow(self, row):
        with self.lock:
            self.writer.writerow(row)
            print(f'Wrote: {row}')

    def close(self):
        self.file.close()

def scrape_district(state, district, csv_writer):
    try:
        config = {}
        scraper = ScraperOrders(config)
        scraper.close_modal()
        
        scraper.select('sess_state_code', state)
        scraper.select('sess_dist_code', district)

        complexes = scraper.scrape_complexes()
        scraper.select('court_complex_code', complexes[0])

        sleep(2)
        scraper.goto_courtnumber()

        for cmplx in complexes:
            while True:
                sleep(0.5)
                try:
                    modal_is_open = scraper.driver.find_element(By.CLASS_NAME, 'modal').is_displayed()
                    if modal_is_open:
                        scraper.close_modal()
                        continue
                    break
                except:
                    break
            
            scraper.select('court_complex_code', cmplx)
            sleep(0.5)

            court_numbers = scraper.get_court_numbers()
            for court_number in court_numbers:
                row = [state, district, cmplx, court_number]
                csv_writer.writerow(row)
        
        scraper.driver.quit()
    
    except Exception as e:
        print(f"Error scraping district {district}: {e}")

def scrape_courts():
    state = 'Uttar Pradesh'
    
    config = {}
    scraper = ScraperOrders(config)
    scraper.close_modal()
    scraper.select('sess_state_code', state)
    
    districts = scraper.scrape_districts()
    scraper.driver.quit()
    
    csv_writer = ThreadSafeCSVWriter('csv/court_numbers.csv')
    csv_writer.writerow(['State', 'District', 'Cmplx', 'Court number'])
    
    with ThreadPoolExecutor(max_workers=5) as executor:
        futures = [
            executor.submit(scrape_district, state, district, csv_writer) 
            for district in districts
        ]
        
        for future in as_completed(futures):
            try:
                future.result()
            except Exception as e:
                print(f"A thread encountered an error: {e}")
    
    csv_writer.close()

def scrape_orders(courts):
    csvfile = open(courts, newline='')
    reader = csv.reader(csvfile)

    for row in reader:
        print(row)
        config = {}
        scraper = ScraperOrders(config)
        scraper.close_modal()

        scraper.select('sess_state_code', row[0])
        scraper.select('sess_dist_code', row[1])

        while True:
            sleep(0.5)
            try:
                modal_is_open = scraper.driver.find_element(By.CLASS_NAME, 'modal').is_displayed()
                if modal_is_open:
                    scraper.close_modal()
                    continue
                break
            except:
                break

        scraper.select('court_complex_code', row[2])
        sleep(1)
        scraper.goto_courtnumber()

        scraper.select('nnjudgecode1', row[3])
        scraper.driver.find_element(By.ID, 'radBoth2').click()
        scraper.submit_search()

        scraper.parse_orders_table()
        scraper.handle_orders()

        break

    csvfile.close()

if __name__ == '__main__':
    scrape_orders('csv/2023-24_pocso.csv')
diff --git a/scrape_ecourtindia_v6/scrape_orders.py b/scrape_ecourtindia_v6/scrape_orders.py
index 74cdd56..54a2d80 100644
--- a/scrape_ecourtindia_v6/scrape_orders.py
+++ a/scrape_ecourtindia_v6/scrape_orders.py
@@ -1,82 +1,71 @@
import csv
from time import sleep

from tinydb import TinyDB
from modules.scraper_orders import ScraperOrders
from selenium.webdriver.common.by import By
from concurrent.futures import ThreadPoolExecutor, as_completed

import threading

class ThreadSafeCSVWriter:
    def __init__(self, filename):
        self.file = open(filename, 'w', newline='')
        self.writer = csv.writer(self.file)
class ThreadSafeDB:
    def __init__(self):
        self.db = TinyDB('orders.json')
        self.lock = threading.Lock()

    def writerow(self, row):
    
    def insert(self, record):
        with self.lock:
            self.writer.writerow(row)
            print(f'Wrote: {row}')
            self.db.insert(record)
            print(f'INSERTED: {record}')

    def close(self):
        self.file.close()
db = ThreadSafeDB()

def scrape_district(state, district, csv_writer):
def scrape_single_court(row):
    try:
        config = {}
        scraper = ScraperOrders(config)
        scraper = ScraperOrders(db, config)
        scraper.close_modal()
        
        scraper.select('sess_state_code', state)
        scraper.select('sess_dist_code', district)

        complexes = scraper.scrape_complexes()
        scraper.select('court_complex_code', complexes[0])

        sleep(2)
        scraper.goto_courtnumber()

        for cmplx in complexes:
            while True:
                sleep(0.5)
                try:
                    modal_is_open = scraper.driver.find_element(By.CLASS_NAME, 'modal').is_displayed()
                    if modal_is_open:
                        scraper.close_modal()
                        continue
                    break
                except:
                    break
            
            scraper.select('court_complex_code', cmplx)
        scraper.select('sess_state_code', row[0])
        scraper.select('sess_dist_code', row[1])
        
        while True:
            sleep(0.5)

            court_numbers = scraper.get_court_numbers()
            for court_number in court_numbers:
                row = [state, district, cmplx, court_number]
                csv_writer.writerow(row)
            try:
                modal_is_open = scraper.driver.find_element(By.CLASS_NAME, 'modal').is_displayed()
                if modal_is_open:
                    scraper.close_modal()
                    continue
                break
            except:
                break
        
        scraper.select('court_complex_code', row[2])
        sleep(1)
        scraper.goto_courtnumber()
        scraper.select('nnjudgecode1', row[3])
        
        scraper.driver.find_element(By.ID, 'radBoth2').click()
        
        scraper.submit_search()
        scraper.parse_orders_table()
        scraper.handle_orders()
        
        scraper.driver.quit()
    
    except Exception as e:
        print(f"Error scraping district {district}: {e}")

def scrape_courts():
    state = 'Uttar Pradesh'
    
    config = {}
    scraper = ScraperOrders(config)
    scraper.close_modal()
    scraper.select('sess_state_code', state)
        print(f"Error processing court {row}: {e}")

def scrape_orders(courts_csv):
    with open(courts_csv, newline='') as csvfile:
        reader = csv.reader(csvfile)
        next(reader, None)
        courts = list(reader)
    
    districts = scraper.scrape_districts()
    scraper.driver.quit()
    
    csv_writer = ThreadSafeCSVWriter('csv/court_numbers.csv')
    csv_writer.writerow(['State', 'District', 'Cmplx', 'Court number'])
    
    with ThreadPoolExecutor(max_workers=5) as executor:
        futures = [
            executor.submit(scrape_district, state, district, csv_writer) 
            for district in districts
            executor.submit(scrape_single_court, court) 
            for court in courts
        ]
        
        for future in as_completed(futures):
@@ -85,16 +74,6 @@
            except Exception as e:
                print(f"A thread encountered an error: {e}")
    
    csv_writer.close()

def scrape_orders(courts):
    csvfile = open(courts, newline='')
    reader = csv.reader(csvfile)

    for row in reader:
        print(row)

    csvfile.close()

if __name__ == '__main__':
    scrape_orders('csv/2023-24_pocso.csv')
    input_file = 'csv/2023-24_pocso.csv'
    scrape_orders(input_file)
diff --git a/scrape_ecourtindia_v6/modules/scraper_case_status.py b/scrape_ecourtindia_v6/modules/scraper_case_status.py
index d9b925d..684d9d7 100644
--- a/scrape_ecourtindia_v6/modules/scraper_case_status.py
+++ a/scrape_ecourtindia_v6/modules/scraper_case_status.py
@@ -134,7 +134,7 @@
            script = order.find_all('a')[0].get_attribute_list('onclick')[0]
            self.driver.execute_script(script)

            sleep(2)
            sleep(0.7)
            obj = self.driver.find_element(By.TAG_NAME, 'object')
            pdf_url = str(obj.get_attribute('data'))

diff --git a/scrape_ecourtindia_v6/modules/scraper_orders.py b/scrape_ecourtindia_v6/modules/scraper_orders.py
index 78594e8..a9fe7be 100644
--- a/scrape_ecourtindia_v6/modules/scraper_orders.py
+++ a/scrape_ecourtindia_v6/modules/scraper_orders.py
@@ -1,17 +1,25 @@
from time import sleep
import tempfile
import uuid
import os

from urllib import request

from bs4 import BeautifulSoup

import cv2
import pytesseract

from selenium.webdriver.common.by import By

from selenium.webdriver.support.select import Select
from tinydb import TinyDB

from .scraper import Scraper

class ScraperOrders(Scraper):
    def __init__(self, config):
        Scraper.__init__(self, 'https://services.ecourts.gov.in/ecourtindia_v6/?p=courtorder/index')
    def __init__(self, db, config):
        Scraper.__init__(self, 'https://services.ecourts.gov.in/ecourtindia_v6/?p=courtorder/index', headless=True)

        self.db = TinyDB('db.json')
        self.db = db
        self.config = config

    def goto_courtnumber(self):
@@ -27,3 +35,69 @@
        print(f'COURT NUMBERS: {court_numbers}')

        return court_numbers

    def submit_search(self):
        captcha_incomplete = True
        while captcha_incomplete:
            img = self.driver.find_element(By.ID, 'captcha_image')
            temp = tempfile.NamedTemporaryFile(suffix='.png')
            img.screenshot(temp.name)

            img = cv2.imread(temp.name)
            text = pytesseract.image_to_string(img).strip()

            element = self.driver.find_element(By.ID, 'order_no_captcha_code')
            element.send_keys(text)

            self.driver.execute_script('submitCourtNumber()')
            sleep(3)

            if self.driver.find_element(By.CLASS_NAME, 'alert-danger-cust').is_displayed():
                self.close_modal()
                element.clear()
            else:
                captcha_incomplete = False

    def parse_orders_table(self):
        try:
            table_innerhtml = self.driver.find_element(By.ID, 'dispTable').get_attribute('innerHTML')
        except:
            return

        rows = BeautifulSoup(str(table_innerhtml), 'html.parser').find_all('td')
        self.rows = []
        i = 6
        while i < len(rows):
            self.rows.append([ rows[i], rows[i-1].text, rows[i-2].text, rows[i-3].text ])
            i += 5

    def handle_orders(self):
        for row in self.rows:
            order = row[0]

            script = order.find_all('a')[0].get_attribute_list('onclick')[0]
            self.driver.execute_script(script)

            sleep(0.7)
            obj = self.driver.find_elements(By.TAG_NAME, 'object')[-1]
            pdf_url = str(obj.get_attribute('data'))

            while True:
                filename = f"pdf/{uuid.uuid4().hex}.pdf"
                if not os.path.exists(filename):
                    break

            cookies = "; ".join([f"{c['name']}={c['value']}" for c in self.driver.get_cookies()])
            r = request.Request(pdf_url)
            r.add_header("Cookie", cookies)

            try:
                with request.urlopen(r) as response, open(filename, "wb") as file:
                    file.write(response.read())
            except:
                print(f'UNABLE TO FETCH PDF: {pdf_url}')

            record = { 'case_info': row[3], 'petitioner_respondent': row[2], 'date': row[1], 'filename': filename }
            self.db.insert(record)

            self.driver.find_element(By.ID, 'modalOders').find_element(By.CLASS_NAME, 'btn-close').click()