mirror of
https://github.com/captn3m0/Scripts.git
synced 2024-09-27 22:22:53 +00:00
114 lines
3.0 KiB
Plaintext
114 lines
3.0 KiB
Plaintext
|
#!/usr/bin/env python
|
||
|
# coding:utf-8
|
||
|
# Created by tom_th_lin on 2015/9/18.
|
||
|
import sys
|
||
|
import io
|
||
|
import requests
|
||
|
from flask import abort, json
|
||
|
# from md2html import _read_file_or_404
|
||
|
# from md2html import render_content
|
||
|
from bs4 import BeautifulSoup
|
||
|
|
||
|
###
|
||
|
# define function
|
||
|
###
|
||
|
|
||
|
|
||
|
def _read_file_or_404(filename, read_as_text=True):
|
||
|
"""
|
||
|
Reads the contents of the specified file, or raise 404.
|
||
|
"""
|
||
|
mode = 'rt' if read_as_text else 'rb'
|
||
|
encoding = 'utf-8' if read_as_text else None
|
||
|
try:
|
||
|
with io.open(filename, mode, encoding=encoding) as f:
|
||
|
return f.read()
|
||
|
except IOError as ex:
|
||
|
if ex.errno != errno.ENOENT:
|
||
|
raise
|
||
|
abort(404)
|
||
|
|
||
|
|
||
|
def render_content(text, api_url, gfm=False, context=None,
|
||
|
username=None, password=None):
|
||
|
"""
|
||
|
Renders the specified markup using the GitHub API.
|
||
|
"""
|
||
|
if gfm:
|
||
|
url = '{}/markdown'.format(api_url)
|
||
|
data = {'text': text, 'mode': 'gfm'}
|
||
|
if context:
|
||
|
data['context'] = context
|
||
|
data = json.dumps(data, ensure_ascii=False).encode('utf-8')
|
||
|
headers = {'content-type': 'application/json; charset=UTF-8'}
|
||
|
else:
|
||
|
url = '{}/markdown/raw'.format(api_url)
|
||
|
data = text.encode('utf-8')
|
||
|
headers = {'content-type': 'text/x-markdown; charset=UTF-8'}
|
||
|
auth = (username, password) if username or password else None
|
||
|
r = requests.post(url, headers=headers, data=data, auth=auth)
|
||
|
# Relay HTTP errors
|
||
|
if r.status_code != 200:
|
||
|
try:
|
||
|
message = r.json()['message']
|
||
|
except Exception:
|
||
|
message = r.text
|
||
|
abort(r.status_code, message)
|
||
|
return r.text
|
||
|
|
||
|
|
||
|
###
|
||
|
#
|
||
|
# Start program.
|
||
|
#
|
||
|
###
|
||
|
if len(sys.argv) >= 2:
|
||
|
filename = sys.argv[1]
|
||
|
else:
|
||
|
print("please put sys.argv[1] as filename !!")
|
||
|
sys.exit()
|
||
|
if not (filename[-2:] == 'md'):
|
||
|
print("the file format should be *.md")
|
||
|
sys.exit()
|
||
|
render_text = _read_file_or_404(filename)
|
||
|
# print(render_text)
|
||
|
api_url = 'https://api.github.com'
|
||
|
html_table = render_content(render_text, api_url, True, None, None, None)
|
||
|
|
||
|
soup = BeautifulSoup(html_table, "html.parser") # parse html
|
||
|
table = soup.table # get <table>...</table>
|
||
|
# print(table)
|
||
|
|
||
|
f = open(filename[0:filename.find('.')] + '.csv', 'w') # open file.csv
|
||
|
|
||
|
rows = table.find_all('tr') # get all content <tr></tr>
|
||
|
|
||
|
ths = rows[0].find_all('th')
|
||
|
|
||
|
write_th_to_file = ''
|
||
|
for th in ths:
|
||
|
if th.string is not None:
|
||
|
write_th_to_file += (th.string + ',')
|
||
|
else:
|
||
|
write_th_to_file += (' ' + ',')
|
||
|
write_th_to_file = write_th_to_file[:-1]
|
||
|
|
||
|
f.write(write_th_to_file)
|
||
|
f.write('\n')
|
||
|
|
||
|
for row in rows[1:]:
|
||
|
write_td_to_file = ''
|
||
|
tds = row.find_all('td')
|
||
|
for td in tds:
|
||
|
if td.string is not None:
|
||
|
write_td_to_file += (td.string+',')
|
||
|
else:
|
||
|
write_td_to_file += (' '+',')
|
||
|
write_td_to_file = write_td_to_file[:-1]
|
||
|
# print(write_td_to_file)
|
||
|
f.write(write_td_to_file)
|
||
|
f.write('\n')
|
||
|
|
||
|
f.close()
|
||
|
print("convertion successfully done")
|