clean
Diff
scrape_ecourtindia_v6/.gitignore | 1 +
scrape_ecourtindia_v6/clean.sh | 2 +-
scrape_ecourtindia_v6/requirements.txt | 3 ---
scrape_ecourtindia_v6/run.sh | 3 ---
scrape_ecourtindia_v6/scrape_orders.py | 4 ++--
scrape_ecourtindia_v6/web.py | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
scrape_ecourtindia_v6/templates/index.html | 40 ----------------------------------------
7 files changed, 71 insertions(+), 64 deletions(-)
@@ -1,2 +1,3 @@
courts.csv
csv/*
orders.json
@@ -1,4 +1,4 @@
rm -r html/* pdf/* db.json
rm -rf html/* pdf/* *.json
mkdir -p html pdf
@@ -1,3 +1,0 @@
selenium
opencv-python
pytesseract
@@ -1,3 +1,0 @@
uvicorn web:app --reload
@@ -43,8 +43,9 @@
scraper.select('court_complex_code', row[2])
sleep(1)
scraper.goto_courtnumber()
sleep(0.6)
sleep(1)
scraper.select('nnjudgecode1', row[3])
sleep(1)
scraper.driver.find_element(By.ID, 'radBoth2').click()
@@ -60,7 +61,6 @@
def scrape_orders(courts_csv):
with open(courts_csv, newline='') as csvfile:
reader = csv.reader(csvfile)
next(reader, None)
courts = list(reader)
with ThreadPoolExecutor(max_workers=5) as executor:
@@ -1,20 +1,72 @@
from tinydb import TinyDB
import os
from flask import Flask, send_from_directory, abort
from fastapi import FastAPI, Request
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
app = Flask(__name__)
db = TinyDB('db.json')
app = FastAPI()
PDF_DIRECTORY = './pdf'
app.mount("/html", StaticFiles(directory="html"), name="html")
app.mount("/pdf", StaticFiles(directory="pdf"), name="pdf")
@app.route('/pdf/<filename>')
def view_pdf(filename):
"""
Route to view a PDF file from the specified directory.
Args:
filename (str): Name of the PDF file to display
Returns:
PDF file or 404 error if file doesn't exist
"""
try:
if not filename.endswith('.pdf'):
abort(400, description="Invalid file type. Only PDF files are allowed.")
filepath = os.path.join(PDF_DIRECTORY, filename)
if not os.path.exists(filepath):
abort(404, description="PDF file not found")
return send_from_directory(PDF_DIRECTORY, filename, as_attachment=False)
except Exception as e:
abort(500, description=f"Internal server error: {str(e)}")
templates = Jinja2Templates(directory="templates")
@app.route('/pdf')
def list_pdfs():
"""
Route to list all available PDF files in the directory.
Returns:
HTML page with list of PDFs or error message
"""
try:
pdf_files = [f for f in os.listdir(PDF_DIRECTORY) if f.endswith('.pdf')]
pdf_links = "\n".join([
f'<li><a href="/pdf/{file}">{file}</a></li>'
for file in pdf_files
])
return f"""
<html>
<head><title>PDF Viewer</title></head>
<body>
<h1>Available PDFs</h1>
<ul>{pdf_links}</ul>
</body>
</html>
"""
except Exception as e:
abort(500, description=f"Error listing PDFs: {str(e)}")
@app.get("/", response_class=HTMLResponse)
async def index(request: Request):
return templates.TemplateResponse(
request=request, name="index.html", context={ 'views': db.all() }
)
if __name__ == '__main__':
os.makedirs(PDF_DIRECTORY, exist_ok=True)
app.run(host='0.0.0.0', port=8000, debug=True)
@@ -1,40 +1,0 @@
<html>
<head>
<title>Index</title>
<link
rel="stylesheet"
href="https://cdn.jsdelivr.net/npm/@picocss/pico@2/css/pico.min.css"
>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="color-scheme" content="light dark">
</head>
<body>
<main class="container">
<table>
<thead>
<tr>
<th scope="col">Case Info</th>
<th scope="col">Petitioner/Respondent</th>
<th scope="col">HTML File</th>
<th scope="col">Orders</th>
</tr>
</thead>
<tbody>
{% for view in views %}
<tr>
<th scope="row">{{ view.case_info }}</th>
<td>{{ view.petitioner_respondent }}</td>
<td><a href='{{ view.htmlfile }}'>Open</a></td>
<td>
{% for pdf in view.pdfs %}
<a href='{{ pdf }}'>Open</a>
{% endfor %}
</td>
</tr>
{% endfor %}
</tbody>
</table>
</main>
</body>
</html>