🏡 index : github.com/compromyse/enfold.git

author Raghuram Subramani <raghus2247@gmail.com> 2025-05-11 19:20:58.0 +05:30:00
committer Raghuram Subramani <raghus2247@gmail.com> 2025-05-11 19:20:58.0 +05:30:00
commit
a44d6029f2aed0389750ce0cd694a369450fef57 [patch]
tree
b98a254b308d7fea6ddcb0ffc347d1ebeadce935
parent
af8665ad224f923bdffd26fa75e7d4a8714b976c
download
a44d6029f2aed0389750ce0cd694a369450fef57.tar.gz

update



Diff

 web/app/job_manager.py       | 14 ++++++++++----
 web/app/main.py              |  8 ++++----
 web/app/jobs/scrape_cases.py | 42 ++++++++++++++++++++++++++++++------------
 web/app/templates/home.html  |  5 ++---
 4 files changed, 37 insertions(+), 32 deletions(-)

diff --git a/web/app/job_manager.py b/web/app/job_manager.py
index abec11d..fa486f3 100644
--- a/web/app/job_manager.py
+++ a/web/app/job_manager.py
@@ -8,18 +8,22 @@
        self.redis = Redis()
        self.q = Queue(connection=self.redis)

    def enqueue_scrape(self, name, acts, section, state_code):
    def enqueue_scrape(self, name, acts, sections, state_code):
        # 4 hour timeout
        return self.q.enqueue(
            scrape_cases,
            name,
            acts,
            section,
            sections,
            state_code,
            job_timeout=14400
        )

    def get_started_jobs(self):
    def get_jobs(self):
        started_job_ids = self.q.started_job_registry.get_job_ids()
        jobs = [Job.fetch(job_id, connection=self.redis) for job_id in started_job_ids]
        return jobs
        started_jobs = [Job.fetch(job_id, connection=self.redis) for job_id in started_job_ids]

        finished_job_ids = self.q.finished_job_registry.get_job_ids()
        finished_jobs = [Job.fetch(job_id, connection=self.redis) for job_id in finished_job_ids]

        return started_jobs + finished_jobs
diff --git a/web/app/main.py b/web/app/main.py
index cc3f995..1266cb8 100644
--- a/web/app/main.py
+++ a/web/app/main.py
@@ -21,7 +21,7 @@
@main.route('/')
@login_required
def home():
    jobs = job_manager.get_started_jobs()
    jobs = job_manager.get_jobs()
    return render_template('home.html', user=current_user, states=states, acts=act_list, jobs=jobs)

@main.route('/logout')
@@ -57,14 +57,14 @@
@login_required
def enqueue_job():
    acts = request.form.getlist('act')
    section = request.form.get('section')
    sections = request.form.get('section').split(',')
    state_code = request.form.get('state_code')
    name = request.form.get('name')

    if not section:
        section = ''

    job = job_manager.enqueue_scrape(f'{name} - {time.time_ns()}', acts, section, state_code)
    job = job_manager.enqueue_scrape(f'{name} - {time.time_ns()}', acts, sections, state_code)

    flash('Job created.', 'info')
    return redirect(url_for('main.home'))
@@ -72,5 +72,5 @@
@main.route('/download/<filename>')
@login_required
def download_output(filename):
    output_dir = os.path.join(os.getcwd(), 'outputs')
    output_dir = os.path.join(os.getcwd(), 'app/outputs')
    return send_from_directory(output_dir, f'{filename}.csv', as_attachment=True)
diff --git a/web/app/jobs/scrape_cases.py b/web/app/jobs/scrape_cases.py
index 9cd4930..7a944f1 100644
--- a/web/app/jobs/scrape_cases.py
+++ a/web/app/jobs/scrape_cases.py
@@ -1,10 +1,11 @@
from app.modules.interface import Interface
from tinydb import TinyDB
from bs4 import BeautifulSoup
import time
import csv

def scrape_cases(name, acts, section, state_code):
def scrape_cases(name, acts, sections, state_code):
    acts = set(acts)
    db = TinyDB(f'app/outputs/{name}.json')
    interface = Interface()

@@ -31,29 +32,30 @@
                print(f'ESTABLISHMENT: {i}/{len(court_establishments)}')

                for act in acts:
                    try:
                        cases = interface.search_by_act(state_code, dist_code, court_establishment, act, section)
                    except Exception as e:
                        print(f"[ERROR] Failed to scrape cases in complex {complex_name}: {e}")
                        continue

                    for j, case in enumerate(cases, 1):
                        print(f'CASE: {j}/{len(cases)}')

                    for section in sections:
                        try:
                            case_no = case['case_no']
                            case_history = interface.case_history(state_code, dist_code, court_establishment, case_no)
                            cases = interface.search_by_act(state_code, dist_code, court_establishment, act, section)
                        except Exception as e:
                            print(f"[ERROR] Failed to get history for case {case.get('case_no', 'UNKNOWN')}: {e}")
                            print(f"[ERROR] Failed to scrape cases in complex {complex_name}: {e}")
                            continue

                        try:
                            case_history['case_no'] = case_no
                            case_history['complex_name'] = complex_name
                            db.insert(case_history)

                        except Exception as e:
                            print(f"[ERROR] Failed to parse orders for case {case_no}: {e}")
                        for j, case in enumerate(cases, 1):
                            print(f'CASE: {j}/{len(cases)}')

                            try:
                                case_no = case['case_no']
                                case_history = interface.case_history(state_code, dist_code, court_establishment, case_no)
                            except Exception as e:
                                print(f"[ERROR] Failed to get history for case {case.get('case_no', 'UNKNOWN')}: {e}")
                                continue

                            try:
                                case_history['case_no'] = case_no
                                case_history['complex_name'] = complex_name
                                db.insert(case_history)

                            except Exception as e:
                                print(f"[ERROR] Failed to parse orders for case {case_no}: {e}")
    
    entries = db.all()

diff --git a/web/app/templates/home.html b/web/app/templates/home.html
index 809269d..d35767a 100644
--- a/web/app/templates/home.html
+++ a/web/app/templates/home.html
@@ -30,7 +30,8 @@
      {% endfor %}
    </select>

    <input type="text" name="section" placeholder="Section">
    <label for="act">Comma Separated Section List</label>
    <input type="text" name="section" placeholder="Sections">

    <select name="state_code">
      {% for code, name in states %}
@@ -56,7 +57,6 @@
      <th scope="col">Job Name</th>
      <th scope="col">Job Status</th>
      <th scope="col">Output</th>
      <th scope="col">Log</th>
    </tr>
  </thead>
  <tbody>
@@ -65,7 +65,6 @@
      <td>{{ job.args[0] }}</td>
      <td>{{ job._status }}</td>
      <td><a href="{{ url_for('main.download_output', filename=job.args[0]) }}">Download</a></td>
      <td>Running</td>
    </tr>
    {% endfor %}
  </tbody>