🏡 index : github.com/captn3m0/gsur-eml-to-markdown.git

author Nemo <commits@captnemo.in> 2023-12-06 11:41:48.0 +05:30:00
committer Nemo <commits@captnemo.in> 2023-12-06 11:41:48.0 +05:30:00
commit
9800d197b70219b18528e3bba127164b981bfa32 [patch]
tree
ba367793e4801a0a03ca242efaafcab249581a8a
parent
5a4dad578f8bc75b166cd06a9eaadf2a2a73e9d8
download
9800d197b70219b18528e3bba127164b981bfa32.tar.gz

Slight improvement in regex



Diff

 convert.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/convert.py b/convert.py
index 9d26b28..b33cd27 100644
--- a/convert.py
+++ a/convert.py
@@ -20,11 +20,12 @@
		date=parsedate_to_datetime(em.get('Date'))
		last_month = date - dateutil.relativedelta.relativedelta(months=1)

		# See https://regex101.com/r/QRQ2Va/1 for regex sample text
		re_clicks = r"(.*)\s+Clicks \(web\)"
		re_impressions = r"(.*)\s+Impressions \(web\)"
		re_url_stats = r"(?P<url>^http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+$)\n+(?P<num>(?:\+?[\w\.]+))"
		re_growing_queries = r"(?P<query>.*)\n+(?P<num>\+?[\d\.]+) clicks \((?P<device>\w+)\)"
		re_top_queries = r"(?P<query>.{6,})\n{2}(?P<num>\d+$)"
		re_growing_queries = r"(?P<query>.*)\n+(?P<num>\+?[\d\.]+\w?) clicks \((?P<device>\w+)\)"
		re_top_queries = r"(?P<query>.{6,})\n{2}(?P<num>\d\.\w+$)"
		re_devices = r"Desktop Mobile Tablet\s+^(?P<desktop>(?:\d|\.)+[A-Z]?) (?P<mobile>(?:\d|\.)+[A-Z]?) (?P<tablet>(?:\d|\.)+[A-Z]?)$"

		# Skip the first two matches