From 959c5e6eaf0b5879c8277fe58685d28ec94f98d0 Mon Sep 17 00:00:00 2001
From: Raghuram Subramani <raghus2247@gmail.com>
Date: Thu, 27 Mar 2025 23:20:24 +0530
Subject: [PATCH] clean

---
 scrape_ecourtindia_v6/.gitignore           |  1 +
 scrape_ecourtindia_v6/clean.sh             |  2 +-
 scrape_ecourtindia_v6/requirements.txt     |  3 ---
 scrape_ecourtindia_v6/run.sh               |  3 ---
 scrape_ecourtindia_v6/scrape_orders.py     |  4 ++--
 scrape_ecourtindia_v6/web.py               | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
 scrape_ecourtindia_v6/templates/index.html | 40 ----------------------------------------
 7 files changed, 71 insertions(+), 64 deletions(-)

diff --git a/scrape_ecourtindia_v6/.gitignore b/scrape_ecourtindia_v6/.gitignore
index 62236f3..f390c7e 100644
--- a/scrape_ecourtindia_v6/.gitignore
+++ a/scrape_ecourtindia_v6/.gitignore
@@ -1,2 +1,3 @@
 courts.csv
 csv/*
+orders.json
diff --git a/scrape_ecourtindia_v6/clean.sh b/scrape_ecourtindia_v6/clean.sh
index 8c8a0ab..a38f202 100755
--- a/scrape_ecourtindia_v6/clean.sh
+++ a/scrape_ecourtindia_v6/clean.sh
@@ -1,4 +1,4 @@
 #!/usr/bin/env bash
-rm -r html/* pdf/* db.json
+rm -rf html/* pdf/* *.json
 
 mkdir -p html pdf
diff --git a/scrape_ecourtindia_v6/requirements.txt b/scrape_ecourtindia_v6/requirements.txt
deleted file mode 100644
index 78bea83..0000000 100644
--- a/scrape_ecourtindia_v6/requirements.txt
+++ /dev/null
@@ -1,3 +1,0 @@
-selenium
-opencv-python
-pytesseract
diff --git a/scrape_ecourtindia_v6/run.sh b/scrape_ecourtindia_v6/run.sh
deleted file mode 100644
index de47eaf..0000000 100644
--- a/scrape_ecourtindia_v6/run.sh
+++ /dev/null
@@ -1,3 +1,0 @@
-#!/usr/bin/env bash
-
-uvicorn web:app --reload
diff --git a/scrape_ecourtindia_v6/scrape_orders.py b/scrape_ecourtindia_v6/scrape_orders.py
index b0dc311..146119e 100644
--- a/scrape_ecourtindia_v6/scrape_orders.py
+++ a/scrape_ecourtindia_v6/scrape_orders.py
@@ -43,8 +43,9 @@
         scraper.select('court_complex_code', row[2])
         sleep(1)
         scraper.goto_courtnumber()
-        sleep(0.6)
+        sleep(1)
         scraper.select('nnjudgecode1', row[3])
+        sleep(1)
         
         scraper.driver.find_element(By.ID, 'radBoth2').click()
         
@@ -60,7 +61,6 @@
 def scrape_orders(courts_csv):
     with open(courts_csv, newline='') as csvfile:
         reader = csv.reader(csvfile)
-        next(reader, None)
         courts = list(reader)
     
     with ThreadPoolExecutor(max_workers=5) as executor:
diff --git a/scrape_ecourtindia_v6/web.py b/scrape_ecourtindia_v6/web.py
index 195b81f..a0bf0b0 100644
--- a/scrape_ecourtindia_v6/web.py
+++ a/scrape_ecourtindia_v6/web.py
@@ -1,20 +1,72 @@
-from tinydb import TinyDB
+import os
+from flask import Flask, send_from_directory, abort
 
-from fastapi import FastAPI, Request
-from fastapi.responses import HTMLResponse
-from fastapi.staticfiles import StaticFiles
-from fastapi.templating import Jinja2Templates
+app = Flask(__name__)
 
-db = TinyDB('db.json')
-app = FastAPI()
+# Directory where PDFs are stored
+PDF_DIRECTORY = './pdf'
 
-app.mount("/html", StaticFiles(directory="html"), name="html")
-app.mount("/pdf", StaticFiles(directory="pdf"), name="pdf")
+@app.route('/pdf/<filename>')
+def view_pdf(filename):
+    """
+    Route to view a PDF file from the specified directory.
+    
+    Args:
+        filename (str): Name of the PDF file to display
+    
+    Returns:
+        PDF file or 404 error if file doesn't exist
+    """
+    try:
+        # Ensure the filename is safe and exists
+        if not filename.endswith('.pdf'):
+            abort(400, description="Invalid file type. Only PDF files are allowed.")
+        
+        # Check if the file exists in the PDF directory
+        filepath = os.path.join(PDF_DIRECTORY, filename)
+        if not os.path.exists(filepath):
+            abort(404, description="PDF file not found")
+        
+        # Send the PDF file
+        return send_from_directory(PDF_DIRECTORY, filename, as_attachment=False)
+    
+    except Exception as e:
+        abort(500, description=f"Internal server error: {str(e)}")
 
-templates = Jinja2Templates(directory="templates")
+@app.route('/pdf')
+def list_pdfs():
+    """
+    Route to list all available PDF files in the directory.
+    
+    Returns:
+        HTML page with list of PDFs or error message
+    """
+    try:
+        # Get list of PDF files in the directory
+        pdf_files = [f for f in os.listdir(PDF_DIRECTORY) if f.endswith('.pdf')]
+        
+        # Create a simple HTML response with links to PDFs
+        pdf_links = "\n".join([
+            f'<li><a href="/pdf/{file}">{file}</a></li>' 
+            for file in pdf_files
+        ])
+        
+        return f"""
+        <html>
+            <head><title>PDF Viewer</title></head>
+            <body>
+                <h1>Available PDFs</h1>
+                <ul>{pdf_links}</ul>
+            </body>
+        </html>
+        """
+    
+    except Exception as e:
+        abort(500, description=f"Error listing PDFs: {str(e)}")
 
-@app.get("/", response_class=HTMLResponse)
-async def index(request: Request):
-    return templates.TemplateResponse(
-            request=request, name="index.html", context={ 'views': db.all() }
-    )
+if __name__ == '__main__':
+    # Ensure PDF directory exists
+    os.makedirs(PDF_DIRECTORY, exist_ok=True)
+    
+    # Run the Flask app
+    app.run(host='0.0.0.0', port=8000, debug=True)
diff --git a/scrape_ecourtindia_v6/templates/index.html b/scrape_ecourtindia_v6/templates/index.html
deleted file mode 100644
index 0b01b77..0000000 100644
--- a/scrape_ecourtindia_v6/templates/index.html
+++ /dev/null
@@ -1,40 +1,0 @@
-<html>
-<head>
-  <title>Index</title>
-  <link
-    rel="stylesheet"
-    href="https://cdn.jsdelivr.net/npm/@picocss/pico@2/css/pico.min.css"
-  >
-  <meta charset="utf-8">
-  <meta name="viewport" content="width=device-width, initial-scale=1">
-  <meta name="color-scheme" content="light dark">
-</head>
-<body>
-  <main class="container">
-    <table>
-      <thead>
-        <tr>
-          <th scope="col">Case Info</th>
-          <th scope="col">Petitioner/Respondent</th>
-          <th scope="col">HTML File</th>
-          <th scope="col">Orders</th>
-        </tr>
-      </thead>
-      <tbody>
-        {% for view in views %}
-          <tr>
-            <th scope="row">{{ view.case_info }}</th>
-            <td>{{ view.petitioner_respondent }}</td>
-            <td><a href='{{ view.htmlfile }}'>Open</a></td>
-            <td>
-              {% for pdf in view.pdfs %}
-                <a href='{{ pdf }}'>Open</a>
-              {% endfor %}
-            </td>
-          </tr>
-        {% endfor %}
-      </tbody>
-    </table>
-  </main>
-</body>
-</html>
--
rgit 0.1.5