diff --git a/convert.py b/convert.py index 9d26b28..b33cd27 100644 --- a/convert.py +++ b/convert.py @@ -20,11 +20,12 @@ def convert_to_markdown(file): date=parsedate_to_datetime(em.get('Date')) last_month = date - dateutil.relativedelta.relativedelta(months=1) + # See https://regex101.com/r/QRQ2Va/1 for regex sample text re_clicks = r"(.*)\s+Clicks \(web\)" re_impressions = r"(.*)\s+Impressions \(web\)" re_url_stats = r"(?P^http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+$)\n+(?P(?:\+?[\w\.]+))" - re_growing_queries = r"(?P.*)\n+(?P\+?[\d\.]+) clicks \((?P\w+)\)" - re_top_queries = r"(?P.{6,})\n{2}(?P\d+$)" + re_growing_queries = r"(?P.*)\n+(?P\+?[\d\.]+\w?) clicks \((?P\w+)\)" + re_top_queries = r"(?P.{6,})\n{2}(?P\d\.\w+$)" re_devices = r"Desktop Mobile Tablet\s+^(?P(?:\d|\.)+[A-Z]?) (?P(?:\d|\.)+[A-Z]?) (?P(?:\d|\.)+[A-Z]?)$" # Skip the first two matches