From 9800d197b70219b18528e3bba127164b981bfa32 Mon Sep 17 00:00:00 2001 From: Nemo Date: Wed, 6 Dec 2023 11:41:48 +0530 Subject: [PATCH] Slight improvement in regex --- convert.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/convert.py b/convert.py index 9d26b28..b33cd27 100644 --- a/convert.py +++ b/convert.py @@ -20,11 +20,12 @@ def convert_to_markdown(file): date=parsedate_to_datetime(em.get('Date')) last_month = date - dateutil.relativedelta.relativedelta(months=1) + # See https://regex101.com/r/QRQ2Va/1 for regex sample text re_clicks = r"(.*)\s+Clicks \(web\)" re_impressions = r"(.*)\s+Impressions \(web\)" re_url_stats = r"(?P^http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+$)\n+(?P(?:\+?[\w\.]+))" - re_growing_queries = r"(?P.*)\n+(?P\+?[\d\.]+) clicks \((?P\w+)\)" - re_top_queries = r"(?P.{6,})\n{2}(?P\d+$)" + re_growing_queries = r"(?P.*)\n+(?P\+?[\d\.]+\w?) clicks \((?P\w+)\)" + re_top_queries = r"(?P.{6,})\n{2}(?P\d\.\w+$)" re_devices = r"Desktop Mobile Tablet\s+^(?P(?:\d|\.)+[A-Z]?) (?P(?:\d|\.)+[A-Z]?) (?P(?:\d|\.)+[A-Z]?)$" # Skip the first two matches