From 5a4dad578f8bc75b166cd06a9eaadf2a2a73e9d8 Mon Sep 17 00:00:00 2001 From: Nemo Date: Tue, 7 Jun 2022 16:03:10 +0530 Subject: [PATCH] init --- LICENSE | 7 ++ README.md | 19 ++++ convert.py | 56 ++++++++++++ requirements.txt | 1 + template.mustache | 56 ++++++++++++ test.eml | 226 ++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 365 insertions(+) create mode 100644 LICENSE create mode 100644 README.md create mode 100644 convert.py create mode 100644 requirements.txt create mode 100644 template.mustache create mode 100644 test.eml diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..e964cd7 --- /dev/null +++ b/LICENSE @@ -0,0 +1,7 @@ +Copyright 2022 Abhay Rana + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..6587541 --- /dev/null +++ b/README.md @@ -0,0 +1,19 @@ +# GSUR Email to Markdown + +Converts Google Search Usability Reports to Markdown + +## dependencies + +Install with `pip install -r requirements.txt` + +`pystatche` + +## running + +First, save your GSU report as a `.eml` file, then run: + +`python convert.py filename.eml` + +## license + +Licensed under the [MIT License](https://nemo.mit-license.org/). See LICENSE file for details. \ No newline at end of file diff --git a/convert.py b/convert.py new file mode 100644 index 0000000..9d26b28 --- /dev/null +++ b/convert.py @@ -0,0 +1,56 @@ +from email.utils import parsedate_to_datetime +from email.parser import Parser +import dateutil.relativedelta +from email import policy +import pystache +import sys +import re + +def parse_res(r, keys): + if isinstance(r, list): + return [{k:row[i] for i,k in enumerate(keys)} for row in r] + else: + return {k: r[i] for i,k in enumerate(keys)} + +def convert_to_markdown(file): + with open(file) as f: + em = Parser(policy=policy.default).parse(f) + text = em.get_body(preferencelist=('plain')).as_string() + + date=parsedate_to_datetime(em.get('Date')) + last_month = date - dateutil.relativedelta.relativedelta(months=1) + + re_clicks = r"(.*)\s+Clicks \(web\)" + re_impressions = r"(.*)\s+Impressions \(web\)" + re_url_stats = r"(?P^http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+$)\n+(?P(?:\+?[\w\.]+))" + re_growing_queries = r"(?P.*)\n+(?P\+?[\d\.]+) clicks \((?P\w+)\)" + re_top_queries = r"(?P.{6,})\n{2}(?P\d+$)" + re_devices = r"Desktop Mobile Tablet\s+^(?P(?:\d|\.)+[A-Z]?) (?P(?:\d|\.)+[A-Z]?) (?P(?:\d|\.)+[A-Z]?)$" + + # Skip the first two matches + re_visit_breakdown = r"(?P[A-Z].*)\n+(?P(?:\d|\.|K|M)+$)\s+" + + data = { + "year": last_month.year, + "month": last_month.month, + 'date_formatted': last_month.strftime('%B %Y'), + 'datetime': last_month, + "clicks": re.findall(re_clicks, text)[0], + "impressions": re.findall(re_impressions, text)[0], + "growing_pages": parse_res(re.findall(re_url_stats, text, re.M)[1:4], ['url', 'growth']), + "performing_pages": parse_res(re.findall(re_url_stats, text, re.M)[4:7], ['url', 'hits']), + "growing_queries": parse_res(re.findall(re_growing_queries, text), ['query', 'growth', 'device']), + "top_queries": parse_res(re.findall(re_top_queries, text, re.M), ['query', 'hits']), + "device_breakdown": parse_res(re.findall(re_devices, text, re.M)[0], ['web', 'mobile', 'tablet']), + "country_breakdown": parse_res(re.findall(re_visit_breakdown, text, re.M)[2:5], ['country', 'hits']), + "type_breakdown": parse_res(re.findall(re_visit_breakdown, text, re.M)[-3:], ['type', 'hits']) + } + + with open('template.mustache', 'r') as tpl: + print(pystache.render(tpl.read(), data)) + +if __name__ == '__main__': + if(len(sys.argv) >= 2): + convert_to_markdown(sys.argv[1]) + else: + print("Please run as python convert.py file.eml") \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..dcb2187 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +pystache==0.6.0 diff --git a/template.mustache b/template.mustache new file mode 100644 index 0000000..d985851 --- /dev/null +++ b/template.mustache @@ -0,0 +1,56 @@ +# {{date_formatted}} + +Clicks (web)|Impressions (web) +---|--- +{{clicks}}|{{impressions}} + +## Top growing pages + +Page | Clicks (web) +---|--- +{{#growing_pages}} +{{url}}|{{growth}} +{{/growing_pages}} + +## Top performing pages + +Page | Clicks (web) +---|--- +{{#performing_pages}} +{{url}}|{{hits}} +{{/performing_pages}} + +## Top growing queries +Query|Growth +---|--- +{{#growing_queries}} +{{query}}|{{growth}} ({{device}}) +{{/growing_queries}} + +## Top performing queries +Query | Clicks (web) +---|--- +{{#top_queries}} +{{query}}|{{hits}} +{{/top_queries}} + +## Devices By clicks + +Desktop|Mobile|Tablet +---|---|--- +{{device_breakdown.web}}|{{device_breakdown.mobile}}|{{device_breakdown.tablet}} + +## Top countries +Country|Clicks +---|--- +{{#country_breakdown}} +{{country}}|{{hits}} +{{/country_breakdown}} + +## Google Search Type + +Type|Hits +---|--- +{{#type_breakdown}} +{{type}}|{{hits}} +{{/type_breakdown}} diff --git a/test.eml b/test.eml new file mode 100644 index 0000000..c794de6 --- /dev/null +++ b/test.eml @@ -0,0 +1,226 @@ +Date: Tue, 03 May 2022 20:42:12 -0700 +Reply-To: Google Search Console Team +Message-ID: +Subject: Your April + Search performance for https://endoflife.date/ +From: Google Search Console Team +To: gsur@captnemo.in +Content-Type: multipart/alternative; boundary="0000000000001b407105de2769ff" + +--0000000000001b407105de2769ff +Content-Type: text/plain; charset="UTF-8"; format=flowed; delsp=yes + +New: Click to try + +Search Console Insights + + +Your April performance +on Google Search + + +https://endoflife.date/ + + +70.3K + +Clicks (web) + + +2.22M + +Impressions (web) + + +4 + +Pages with +first impressions +(estimated) + + +Your content achievements + +Top growing pages + +Compared to previous month + + +Page + +Clicks (web) + + +https://endoflife.date/java + + ++1.64K + + +https://endoflife.date/python + + ++598 + + +https://endoflife.date/spring-framework + + ++468 + + +Top performing pages + + +Page + +Clicks (web) + + +https://endoflife.date/iphone + +10.5K + + +https://endoflife.date/java + +6.97K + + +https://endoflife.date/python + +6.32K + +Full report + + +How do people find you? + +Top growing queries + +Compared to previous month + + +java 8 eol + ++128 clicks (web) + + +python eol + ++77 clicks (web) + + +python 3.6 eol + ++60 clicks (web) + + +Top performing queries + + +Query + +Clicks (web) + + +python eol + +513 + + +python 3.6 end of life + +510 + + +java 8 eol + +472 + +Full report + + +Learn about your audience + +Devices + +By clicks (web) + + +Desktop Mobile Tablet + +57.9K 11.8K 606 + +Full report + + +Top countries + +By clicks (web) + + +United States + +19.2K + + +United Kingdom + +6.45K + + +Germany + +4.41K + + +Full report + + +Google search type + +By clicks + + +Web + +70.3K + + +Image + +50 + + +Video + +0 + + +https://endoflife.date/ + +Keep exploring your data to identify new opportunities + +Performance report + +Learn more about this email + + +Was this email helpful? + + +You've received this transactional email because your site is listed in +Google Search Console. You may unsubscribe from this type of message or add +partners who should receive messages for this Search Console account + +Google LLC, 1600 Amphitheatre Parkway Mountain View, CA 94043 + + + +--0000000000001b407105de2769ff +Content-Type: text/html; charset="UTF-8" +Content-Transfer-Encoding: quoted-printable + +removed +--0000000000001b407105de2769ff-- \ No newline at end of file