Slight improvement in regex

This commit is contained in:
Nemo 2023-12-06 11:41:48 +05:30
parent 5a4dad578f
commit 9800d197b7
1 changed files with 3 additions and 2 deletions

View File

@ -20,11 +20,12 @@ def convert_to_markdown(file):
date=parsedate_to_datetime(em.get('Date'))
last_month = date - dateutil.relativedelta.relativedelta(months=1)
# See https://regex101.com/r/QRQ2Va/1 for regex sample text
re_clicks = r"(.*)\s+Clicks \(web\)"
re_impressions = r"(.*)\s+Impressions \(web\)"
re_url_stats = r"(?P<url>^http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+$)\n+(?P<num>(?:\+?[\w\.]+))"
re_growing_queries = r"(?P<query>.*)\n+(?P<num>\+?[\d\.]+) clicks \((?P<device>\w+)\)"
re_top_queries = r"(?P<query>.{6,})\n{2}(?P<num>\d+$)"
re_growing_queries = r"(?P<query>.*)\n+(?P<num>\+?[\d\.]+\w?) clicks \((?P<device>\w+)\)"
re_top_queries = r"(?P<query>.{6,})\n{2}(?P<num>\d\.\w+$)"
re_devices = r"Desktop Mobile Tablet\s+^(?P<desktop>(?:\d|\.)+[A-Z]?) (?P<mobile>(?:\d|\.)+[A-Z]?) (?P<tablet>(?:\d|\.)+[A-Z]?)$"
# Skip the first two matches