Slight improvement in regex
This commit is contained in:
parent
5a4dad578f
commit
9800d197b7
|
@ -20,11 +20,12 @@ def convert_to_markdown(file):
|
|||
date=parsedate_to_datetime(em.get('Date'))
|
||||
last_month = date - dateutil.relativedelta.relativedelta(months=1)
|
||||
|
||||
# See https://regex101.com/r/QRQ2Va/1 for regex sample text
|
||||
re_clicks = r"(.*)\s+Clicks \(web\)"
|
||||
re_impressions = r"(.*)\s+Impressions \(web\)"
|
||||
re_url_stats = r"(?P<url>^http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+$)\n+(?P<num>(?:\+?[\w\.]+))"
|
||||
re_growing_queries = r"(?P<query>.*)\n+(?P<num>\+?[\d\.]+) clicks \((?P<device>\w+)\)"
|
||||
re_top_queries = r"(?P<query>.{6,})\n{2}(?P<num>\d+$)"
|
||||
re_growing_queries = r"(?P<query>.*)\n+(?P<num>\+?[\d\.]+\w?) clicks \((?P<device>\w+)\)"
|
||||
re_top_queries = r"(?P<query>.{6,})\n{2}(?P<num>\d\.\w+$)"
|
||||
re_devices = r"Desktop Mobile Tablet\s+^(?P<desktop>(?:\d|\.)+[A-Z]?) (?P<mobile>(?:\d|\.)+[A-Z]?) (?P<tablet>(?:\d|\.)+[A-Z]?)$"
|
||||
|
||||
# Skip the first two matches
|
||||
|
|
Loading…
Reference in New Issue