From 250d8c18777c7e0ba87ec4334d3b316d4e6bd0dd Mon Sep 17 00:00:00 2001
From: Raghuram Subramani <raghus2247@gmail.com>
Date: Mon, 12 May 2025 11:00:36 +0530
Subject: [PATCH] update

---
 web/app/.gitignore           |   1 +
 web/app/job_manager.py       |   5 +----
 web/app/main.py              |  14 +++++++-------
 web/app/jobs/scrape_cases.py | 237 ++++++++++++++++++++++++++++++++++++++++++--------------------------------------
 web/app/templates/home.html  |   7 +++++++
 5 files changed, 125 insertions(+), 139 deletions(-)

diff --git a/web/app/.gitignore b/web/app/.gitignore
new file mode 100644
index 0000000..17aa483 100644
--- /dev/null
+++ a/web/app/.gitignore
@@ -1,0 +1,1 @@
+outputs/
diff --git a/web/app/job_manager.py b/web/app/job_manager.py
index bdbe994..3d588a6 100644
--- a/web/app/job_manager.py
+++ a/web/app/job_manager.py
@@ -25,7 +25,4 @@
         started_job_ids = self.q.started_job_registry.get_job_ids()
         started_jobs = [Job.fetch(job_id, connection=self.redis) for job_id in started_job_ids]
 
-        finished_job_ids = self.q.finished_job_registry.get_job_ids()
-        finished_jobs = [Job.fetch(job_id, connection=self.redis) for job_id in finished_job_ids]
-
-        return queued_jobs + started_jobs + finished_jobs
+        return queued_jobs + started_jobs
diff --git a/web/app/main.py b/web/app/main.py
index cc141b8..e834c0c 100644
--- a/web/app/main.py
+++ a/web/app/main.py
@@ -1,6 +1,7 @@
 from flask import request, flash, send_from_directory
 from flask import Blueprint, render_template, redirect, url_for
 from flask_login import login_required, logout_user, current_user
+from tinydb import TinyDB
 from .models import User
 
 import json
@@ -9,8 +10,6 @@
 
 from .modules.interface import Interface
 from .job_manager import JobManager
-
-from tinydb import TinyDB
 
 states = Interface().get_states()
 act_list = json.loads(open('app/acts.json').read())
@@ -22,7 +21,8 @@
 @login_required
 def home():
     jobs = job_manager.get_jobs()
-    return render_template('home.html', user=current_user, states=states, acts=act_list, jobs=jobs)
+    completed_jobs = TinyDB('jobs.json').all()
+    return render_template('home.html', user=current_user, states=states, acts=act_list, completed_jobs=completed_jobs, jobs=jobs)
 
 @main.route('/logout')
 @login_required
@@ -58,14 +58,14 @@
 @login_required
 def enqueue_job():
     acts = request.form.getlist('act')
-    sections = request.form.get('section').split(',')
+    sections = request.form.get('section', '').split(',')
     state_code = request.form.get('state_code')
     name = request.form.get('name')
 
-    if not section:
-        section = ''
+    if not sections:
+        sections = ''
 
-    job = job_manager.enqueue_scrape(f'{name} - {time.time_ns()}', acts, sections, state_code)
+    job_manager.enqueue_scrape(f'{name} - {time.time_ns()}', acts, sections, state_code)
 
     flash('Job created.', 'info')
     return redirect(url_for('main.home'))
diff --git a/web/app/jobs/scrape_cases.py b/web/app/jobs/scrape_cases.py
index 7a944f1..237acbc 100644
--- a/web/app/jobs/scrape_cases.py
+++ a/web/app/jobs/scrape_cases.py
@@ -1,160 +1,141 @@
 from app.modules.interface import Interface
-from tinydb import TinyDB
 from bs4 import BeautifulSoup
-import time
 import csv
 
-def scrape_cases(name, acts, sections, state_code):
-    acts = set(acts)
-    db = TinyDB(f'app/outputs/{name}.json')
-    interface = Interface()
+from tinydb import TinyDB
+
+db = TinyDB('app/jobs.json')
 
+def get_districts(interface, state_code):
     try:
-        districts = interface.get_districts(state_code)
+        return interface.get_districts(state_code)
     except Exception as e:
         print(f"[ERROR] Failed to scrape districts: {e}")
-        districts = []
+        return []
+
+def get_complexes(interface, state_code, dist_code, dist_name):
+    try:
+        return interface.get_complexes(state_code, dist_code)
+    except Exception as e:
+        print(f"[ERROR] Failed to scrape complexes for {dist_name}: {e}")
+        return []
+
+def fetch_cases(interface, state_code, dist_code, court_establishment, act, section, complex_name):
+    try:
+        return interface.search_by_act(state_code, dist_code, court_establishment, act, section)
+    except Exception as e:
+        print(f"[ERROR] Failed to scrape cases in complex {complex_name}: {e}")
+        return []
+
+def fetch_case_history(interface, state_code, dist_code, court_establishment, case_no):
+    try:
+        return interface.case_history(state_code, dist_code, court_establishment, case_no)
+    except Exception as e:
+        print(f"[ERROR] Failed to get history for case {case_no}: {e}")
+        return None
+
+def parse_orders(order_html):
+    soup = BeautifulSoup(order_html or '', features="html.parser")
+    orders = []
+    for row in soup.select('table.tbl-result tbody tr'):
+        cells = row.find_all('td')
+        if len(cells) >= 2:
+            order_date = cells[1].get_text(strip=True)
+            link_tag = cells[2].find('a', href=True) if len(cells) > 2 else None
+            if link_tag:
+                orders.append({'date': order_date, 'link': link_tag['href']})
+    return orders
+
+def parse_acts(entry, all_acts):
+    soup = BeautifulSoup(entry.get('act', ''), 'html.parser')
+    acts = []
+    for row in soup.select('tbody tr'):
+        cells = row.find_all('td')
+        if len(cells) == 2:
+            act = cells[0].get_text(strip=True)
+            section = cells[1].get_text(strip=True)
+            if act not in all_acts:
+                all_acts.append(act)
+            acts.append(f"{act}: {section}")
+    return '\n'.join(acts)
+
+def write_to_csv(entries, key_mapping, name):
+    max_final = max(len(entry.get('final_orders', [])) for entry in entries)
+    max_interim = max(len(entry.get('interim_orders', [])) for entry in entries)
+
+    with open(f'app/outputs/{name}.csv', 'w', newline='', encoding='utf-8') as csvfile:
+        writer = csv.writer(csvfile)
+        headers = list(key_mapping.values()) + \
+                  [f'Final Order {i+1}' for i in range(max_final)] + \
+                  [f'Interim Order {i+1}' for i in range(max_interim)]
+        writer.writerow(headers)
+
+        for entry in entries:
+            row = [entry.get(key, '') for key in key_mapping]
+
+            for order in entry.get('final_orders', []):
+                row.append(f'=HYPERLINK("{order["link"]}", "{order["date"]}")')
+            row += [''] * (max_final - len(entry.get('final_orders', [])))
+
+            for order in entry.get('interim_orders', []):
+                row.append(f'=HYPERLINK("{order["link"]}", "{order["date"]}")')
+            row += [''] * (max_interim - len(entry.get('interim_orders', [])))
 
+            writer.writerow(row)
+
+def scrape_cases(name, acts, sections, state_code):
+    acts = set(acts)
+    entries = []
+    interface = Interface()
+
+    districts = get_districts(interface, state_code)
     for dist_code, dist_name in districts:
         print(f'DISTRICT: {dist_name}')
+        complexes = get_complexes(interface, state_code, dist_code, dist_name)
 
-        try:
-            complexes = interface.get_complexes(state_code, dist_code)
-        except Exception as e:
-            print(f"[ERROR] Failed to scrape complexes for {dist_name}: {e}")
-            continue
-
         for complex_code, complex_name in complexes:
             print(f'COMPLEX: {complex_name}')
-
             court_establishments = str(complex_code).split(',')
+
             for i, court_establishment in enumerate(court_establishments, 1):
                 print(f'ESTABLISHMENT: {i}/{len(court_establishments)}')
 
                 for act in acts:
                     for section in sections:
-                        try:
-                            cases = interface.search_by_act(state_code, dist_code, court_establishment, act, section)
-                        except Exception as e:
-                            print(f"[ERROR] Failed to scrape cases in complex {complex_name}: {e}")
-                            continue
+                        cases = fetch_cases(interface, state_code, dist_code, court_establishment, act, section, complex_name)
 
                         for j, case in enumerate(cases, 1):
                             print(f'CASE: {j}/{len(cases)}')
-
-                            try:
-                                case_no = case['case_no']
-                                case_history = interface.case_history(state_code, dist_code, court_establishment, case_no)
-                            except Exception as e:
-                                print(f"[ERROR] Failed to get history for case {case.get('case_no', 'UNKNOWN')}: {e}")
+                            case_no = case.get('case_no')
+                            if not case_no:
                                 continue
 
-                            try:
-                                case_history['case_no'] = case_no
-                                case_history['complex_name'] = complex_name
-                                db.insert(case_history)
+                            case_history = fetch_case_history(interface, state_code, dist_code, court_establishment, case_no)
+                            if not case_history:
+                                continue
 
-                            except Exception as e:
-                                print(f"[ERROR] Failed to parse orders for case {case_no}: {e}")
-    
-    entries = db.all()
+                            case_history['case_no'] = case_no
+                            case_history['complex_name'] = complex_name
+                            entries.append(case_history)
 
     key_mapping = {
-        'case_no': 'Case Number',
-        'cino': 'CNR Number',
-        'type_name': 'Case Type',
-
-        'reg_no': 'Registration Number',
-        'reg_year': 'Registration Year',
-
-        'district_name': 'District',
-        'complex_name': 'Complex Name',
-        'court_name': 'Court Name',
-
-        'dt_regis': 'Registration Date',
-        'date_of_filing': 'Date of Filing',
-        'date_of_decision': 'Date of Decision',
-        'disp_name': 'Disposition',
-
-        'acts': 'Acts',
-
-        'pet_name': 'Petitioner',
-        'pet_adv': 'Petitioner Advocate',
-        'petparty_name': 'Petitioner Party Name',
-
-        'res_name': 'Respondent',
-        'res_adv': 'Respondent Advocate',
-        'resparty_name': 'Respondent Party Name'
+        'case_no': 'Case Number', 'cino': 'CNR Number', 'type_name': 'Case Type',
+        'reg_no': 'Registration Number', 'reg_year': 'Registration Year',
+        'district_name': 'District', 'complex_name': 'Complex Name', 'court_name': 'Court Name',
+        'dt_regis': 'Registration Date', 'date_of_filing': 'Date of Filing', 'date_of_decision': 'Date of Decision',
+        'disp_name': 'Disposition', 'acts': 'Acts',
+        'pet_name': 'Petitioner', 'pet_adv': 'Petitioner Advocate', 'petparty_name': 'Petitioner Party Name',
+        'res_name': 'Respondent', 'res_adv': 'Respondent Advocate', 'resparty_name': 'Respondent Party Name'
     }
 
     all_acts = []
-
     for entry in entries:
-        soup = BeautifulSoup(entry.get('finalOrder') or '', features="html.parser")
-        final_orders = []
-        for row in soup.select('table.tbl-result tbody tr'):
-            cells = row.find_all('td')
-            if len(cells) >= 2:
-                order_date = cells[1].get_text(strip=True)
-                link_tag = cells[2].find('a', href=True) if len(cells) > 2 else None
-                if link_tag:
-                    final_orders.append({'date': order_date, 'link': link_tag['href']})
-
-        soup = BeautifulSoup(entry.get('interimOrder') or '', features="html.parser")
-        interim_orders = []
-        for row in soup.select('table.tbl-result tbody tr'):
-            cells = row.find_all('td')
-            if len(cells) >= 2:
-                order_date = cells[1].get_text(strip=True)
-                link_tag = cells[2].find('a', href=True) if len(cells) > 2 else None
-                if link_tag:
-                    interim_orders.append({'date': order_date, 'link': link_tag['href']})
-
-        act_html = entry.get('act', '')
-        soup = BeautifulSoup(act_html, 'html.parser')
-
-        acts = []
-        for row in soup.select('tbody tr'):
-            cells = row.find_all('td')
-            if len(cells) == 2:
-                act = cells[0].get_text(strip=True)
-                section = cells[1].get_text(strip=True)
-                if act not in all_acts:
-                    all_acts.append(act)
-
-                acts.append(f"{act}: {section}")
-
-        entry['acts'] = '\n'.join(acts)
-        entry['final_orders'] = final_orders
-        entry['interim_orders'] = interim_orders
-
-    max_final = max(len(entry.get('final_orders', [])) for entry in entries)
-    max_interim = max(len(entry.get('interim_orders', [])) for entry in entries)
-
-    with open(f'app/outputs/{name}.csv', 'w', newline='', encoding='utf-8') as csvfile:
-        writer = csv.writer(csvfile)
-
-        headers = list(key_mapping.values())
-
-        headers += [f'Final Order {i+1}' for i in range(max_final)]
-        headers += [f'Interim Order {i+1}' for i in range(max_interim)]
-        writer.writerow(headers)
-
-        for entry in entries:
-            row = []
-            for key in key_mapping:
-                row.append(entry.get(key, ''))
-
-            final_orders = entry.get('final_orders', [])
-            for order in final_orders:
-                hyperlink = f'=HYPERLINK("{order["link"]}", "{order["date"]}")'
-                row.append(hyperlink)
-            row += [''] * (max_final - len(final_orders))
-
-            interim_orders = entry.get('interim_orders', [])
-            for order in interim_orders:
-                hyperlink = f'=HYPERLINK("{order["link"]}", "{order["date"]}")'
-                row.append(hyperlink)
-            row += [''] * (max_interim - len(interim_orders))
-
-            writer.writerow(row)
+        entry['final_orders'] = parse_orders(entry.get('finalOrder'))
+        entry['interim_orders'] = parse_orders(entry.get('interimOrder'))
+        entry['acts'] = parse_acts(entry, all_acts)
+
+    write_to_csv(entries, key_mapping, name)
+
+    db.insert({
+        "name": name
+    })
diff --git a/web/app/templates/home.html b/web/app/templates/home.html
index 797c66d..7caff64 100644
--- a/web/app/templates/home.html
+++ a/web/app/templates/home.html
@@ -62,6 +62,13 @@
     </tr>
   </thead>
   <tbody>
+    {% for job in completed_jobs %}
+    <tr>
+      <td>{{ job['name'] }}</td>
+      <td>COMPLETED</td>
+      <td><a href="{{ url_for('main.download_output', filename=job['name']) }}">Download</a></td>
+    </tr>
+    {% endfor %}
     {% for job in jobs %}
     <tr>
       <td>{{ job.args[0] }}</td>
--
rgit 0.1.5