diff --git a/bootstrap.sh b/bootstrap.sh index fe7bc25..e57184c 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -1,14 +1,19 @@ #!/bin/bash - -mkdir -p html -cd html -wget --mirror https://landing.google.com/sre/book/ - -mv landing.google.com/sre/book/* . -rm -rf landing.google.com -cd .. - +TOC_URL="https://landing.google.com/sre/sre-book/toc/index.html" +# Make sure that links are relative \ +# # Remove the /sre/sre-book/ directories +# Save stuff in html/ directory +# Do not create a landing.google.com directory +# Enable recursion, timestamping +# We need to go up a level from /toc/ where we start +wget \ + --convert-links \ + --cut-dirs=2 \ + --directory-prefix=html \ + --no-host-directories \ + --mirror \ + --include-directories=/sre/sre-book/ "$TOC_URL" # Note: This does not yet create a virtual environment # and only runs on Python 2 pip install -r requirements.txt -python2 generate.py +# python2 generate.py diff --git a/build/.gitignore b/build/.gitignore deleted file mode 100644 index d6b7ef3..0000000 --- a/build/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -* -!.gitignore diff --git a/generate.py b/generate.py index 51cc437..353b9c3 100644 --- a/generate.py +++ b/generate.py @@ -3,31 +3,31 @@ import os import pypub epub = pypub.Epub('Site Reliability Engineering') +root = os.getcwd() def setup_toc(): - soup = BeautifulSoup(open('./html/index.html'), 'html.parser') - links = soup.select('.content a ') + os.chdir('html/toc') + soup = BeautifulSoup(open('index.html'), 'html.parser') + links = soup.select('.content a') for link in links: - print(link['href']) - add_chapter_file(link['href'], link.get_text()) + if link.has_attr('class') and 'menu-buttons' not in list(link['class']): + add_chapter_file(link['href'], link.get_text()) - epub.create_epub(os.path.abspath('./build')) + epub.create_epub('build') def add_chapter_file(href, title): - file_path = href.replace('/sre/book/', 'html/') - - with open(file_path, 'r') as f: + with open(href, 'r') as f: contents = f.read() + # print(len(contents)) chapter_soup = BeautifulSoup(contents, 'html.parser') chapter_soup = chapter_soup.select_one('.content') - links = chapter_soup.select_all('a') - for link in links: - link.href = link.href.replace('/sre/book/chapters/', '') chapter = pypub.create_chapter_from_string( - chapter_html, url=None, title=title) + str(chapter_soup), url=None, title=title) epub.add_chapter(chapter) + setup_toc() -epub.create_epub('./build') +os.chdir(root) +epub.create_epub('build')