Fix for new directory structure

- Fixes #14
This commit is contained in:
Nemo 2018-12-02 12:36:05 +05:30
parent 8461a02365
commit e158e9e827
3 changed files with 28 additions and 25 deletions

View File

@ -1,14 +1,19 @@
#!/bin/bash #!/bin/bash
TOC_URL="https://landing.google.com/sre/sre-book/toc/index.html"
mkdir -p html # Make sure that links are relative \
cd html # # Remove the /sre/sre-book/ directories
wget --mirror https://landing.google.com/sre/book/ # Save stuff in html/ directory
# Do not create a landing.google.com directory
mv landing.google.com/sre/book/* . # Enable recursion, timestamping
rm -rf landing.google.com # We need to go up a level from /toc/ where we start
cd .. wget \
--convert-links \
--cut-dirs=2 \
--directory-prefix=html \
--no-host-directories \
--mirror \
--include-directories=/sre/sre-book/ "$TOC_URL"
# Note: This does not yet create a virtual environment # Note: This does not yet create a virtual environment
# and only runs on Python 2 # and only runs on Python 2
pip install -r requirements.txt pip install -r requirements.txt
python2 generate.py # python2 generate.py

2
build/.gitignore vendored
View File

@ -1,2 +0,0 @@
*
!.gitignore

View File

@ -3,31 +3,31 @@ import os
import pypub import pypub
epub = pypub.Epub('Site Reliability Engineering') epub = pypub.Epub('Site Reliability Engineering')
root = os.getcwd()
def setup_toc(): def setup_toc():
soup = BeautifulSoup(open('./html/index.html'), 'html.parser') os.chdir('html/toc')
links = soup.select('.content a ') soup = BeautifulSoup(open('index.html'), 'html.parser')
links = soup.select('.content a')
for link in links: for link in links:
print(link['href']) if link.has_attr('class') and 'menu-buttons' not in list(link['class']):
add_chapter_file(link['href'], link.get_text()) add_chapter_file(link['href'], link.get_text())
epub.create_epub(os.path.abspath('./build')) epub.create_epub('build')
def add_chapter_file(href, title): def add_chapter_file(href, title):
file_path = href.replace('/sre/book/', 'html/') with open(href, 'r') as f:
with open(file_path, 'r') as f:
contents = f.read() contents = f.read()
# print(len(contents))
chapter_soup = BeautifulSoup(contents, 'html.parser') chapter_soup = BeautifulSoup(contents, 'html.parser')
chapter_soup = chapter_soup.select_one('.content') chapter_soup = chapter_soup.select_one('.content')
links = chapter_soup.select_all('a')
for link in links:
link.href = link.href.replace('/sre/book/chapters/', '')
chapter = pypub.create_chapter_from_string( chapter = pypub.create_chapter_from_string(
chapter_html, url=None, title=title) str(chapter_soup), url=None, title=title)
epub.add_chapter(chapter) epub.add_chapter(chapter)
setup_toc() setup_toc()
epub.create_epub('./build') os.chdir(root)
epub.create_epub('build')