From 82002257809fe917e08c5931dc5d098c528eaac9 Mon Sep 17 00:00:00 2001
From: Felix Martin <mail@felixm.de>
Date: Sat, 22 Oct 2022 21:37:01 -0400
Subject: [PATCH] Update readme and format scripts

---
 README.md      |  7 +++++--
 src/epub.py    | 49 ++++++++++++++++++++++++++++-----------------
 src/main.py    | 14 +++++++------
 src/pokemon.py | 54 +++++++++++++++++++++++++++++++-------------------
 4 files changed, 78 insertions(+), 46 deletions(-)
diff --git a/README.md b/README.md
index 77cac8c..2fdcee5 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,12 @@
 # poos-xray
 
-Script that annotates Pokemon: the Origin of the Species epub with links to
-descriptions of the Pokemon.
+Script that annotates the Pokemon: the Origin of the Species e-book with links
+to descriptions and pictures of the Pokemon within the e-book itself. 
+
+It works with the epub that you can download from [Daystar Eld's Patreon](https://www.patreon.com/daystareld/).
 
 ```shell
 pipenv install
 pipenv shell
+python poos-xray "DaystarEld - Pokemon The Origin of Species.epub"
 ```
diff --git a/src/epub.py b/src/epub.py
index afa9d35..f69139a 100644
--- a/src/epub.py
+++ b/src/epub.py
@@ -9,26 +9,29 @@ from typing import List, Dict
 
 POKEMON_ID_PREFIX = "pokemon-id-"
 
+
 def create_pokedex_chapter(pokemon: List[Pokemon]) -> epub.EpubHtml:
     POKEDEX_TITLE = "Pokedex"
     POKEDEX_FILE = "content/np_pokedex.xhtml"
     POKEDEX_UID = "np_pokedex"
-    chapter = epub.EpubHtml(title=POKEDEX_TITLE, file_name=POKEDEX_FILE, uid=POKEDEX_UID)
-    content = ['<h1>Pokedex</h1>']
+    chapter = epub.EpubHtml(
+        title=POKEDEX_TITLE, file_name=POKEDEX_FILE, uid=POKEDEX_UID
+    )
+    content = ["<h1>Pokedex</h1>"]
 
     for p in pokemon:
         content.append(f'<h2 id="{POKEMON_ID_PREFIX}{p.name.lower()}">{p.name}</h2>')
-        content.append(f'  <p><img alt="[Pokemon {p.name}]" src="../{p.img_filepath}"/><br/></p>')
+        content.append(
+            f'  <p><img alt="[Pokemon {p.name}]" src="../{p.img_filepath}"/><br/></p>'
+        )
         for paragraph in p.description.split("\n"):
-            content.append(f'  <p>{paragraph}</p>')
-        content.append('')
+            content.append(f"  <p>{paragraph}</p>")
+        content.append("")
 
     chapter.content = "\n".join(content)
     return chapter
 
 
-
-
 def patch_chapter(chapter: epub.EpubHtml, pokemon_lookup: Dict[str, Pokemon]):
     r = re.compile("([:,.!?“”‘’…])")
     soup: BeautifulSoup = BeautifulSoup(chapter.content, "html.parser")
@@ -41,8 +44,8 @@ def patch_chapter(chapter: epub.EpubHtml, pokemon_lookup: Dict[str, Pokemon]):
         return tag
 
     def patch_string(section: NavigableString) -> List:
-        """ Replace Pokemon with link to Pokemon; requires splitting up the
-            NavigableString into a list of NavigableStrings and Tags. """
+        """Replace Pokemon with link to Pokemon; requires splitting up the
+        NavigableString into a list of NavigableStrings and Tags."""
         result = [[]]
         for word in str(section).split(" "):
             word_stripped = r.sub("", word)
@@ -55,7 +58,9 @@ def patch_chapter(chapter: epub.EpubHtml, pokemon_lookup: Dict[str, Pokemon]):
                 else:
                     # add other chars before pokemon if there are any
                     result[-1].append("".join(word_split[:i]))
-                pokemon_link = pokemon_name_to_link(word_stripped.lower(), word_stripped)
+                pokemon_link = pokemon_name_to_link(
+                    word_stripped.lower(), word_stripped
+                )
                 result.append(pokemon_link)
                 result.append([])
                 if i + 1 == len(word_split):
@@ -63,7 +68,7 @@ def patch_chapter(chapter: epub.EpubHtml, pokemon_lookup: Dict[str, Pokemon]):
                     result[-1].append(" ")
                 else:
                     # add other chars after pokemon if there are any
-                    result[-1].append("".join(word_split[i + 1:]))
+                    result[-1].append("".join(word_split[i + 1 :]))
             else:
                 result[-1].append(word)
 
@@ -96,20 +101,28 @@ def patch(epub_filepath: str, pokemon: List[Pokemon]):
     book.add_item(chapter)
     link = epub.Link(chapter.file_name, chapter.title, chapter.id)
     book.toc.append(link)
-    book.spine.append((chapter.id, 'yes'))
+    book.spine.append((chapter.id, "yes"))
 
     for p in pokemon:
-        image_content = open(p.img_filepath, 'rb').read()
-        img = epub.EpubItem(uid=p.name, file_name=p.img_filepath, media_type='image/png', content=image_content)
+        image_content = open(p.img_filepath, "rb").read()
+        img = epub.EpubItem(
+            uid=p.name,
+            file_name=p.img_filepath,
+            media_type="image/png",
+            content=image_content,
+        )
         book.add_item(img)
 
     pokemon_lookup = {p.name.lower(): p for p in pokemon}
-    chapters = [b for b in book.get_items()
-                if isinstance(b, epub.EpubHtml)
-                if b.id.startswith("np_")]
+    chapters = [
+        b
+        for b in book.get_items()
+        if isinstance(b, epub.EpubHtml)
+        if b.id.startswith("np_")
+    ]
     for c in chapters:
         patch_chapter(c, pokemon_lookup)
 
     epub_out = epub_filepath.replace(".", "-with-links.")
     epub.write_epub(epub_out, book, {})
-    logging.info(f"{epub_out} written.")
+    logging.info(f"Write '{epub_out}'.")
diff --git a/src/main.py b/src/main.py
index 3dbeb84..b7db292 100644
--- a/src/main.py
+++ b/src/main.py
@@ -1,13 +1,15 @@
+import sys
 import logging
 import src.pokemon
 import src.epub
 
 
-def init_logging():
-    logging.basicConfig(level=logging.INFO)
-
-
 def main():
-    init_logging()
+    logging.basicConfig(format="%(message)s", level=logging.INFO)
+    try:
+        ptoos_epub = sys.argv[1]
+    except IndexError:
+        ptoos_epub = "poos.epub"
+    logging.info(f"Patching '{ptoos_epub}'.")
     pokemon = src.pokemon.get_pokemon()
-    src.epub.patch("poos.epub", pokemon)
+    src.epub.patch(ptoos_epub, pokemon)
diff --git a/src/pokemon.py b/src/pokemon.py
index b8b6fa7..926df22 100644
--- a/src/pokemon.py
+++ b/src/pokemon.py
@@ -9,7 +9,9 @@ from typing import List
 
 POKEMON_CACHE_DIRECTORY = "pokemon"
 BULBAPEDIA_BASE_URL = "https://bulbapedia.bulbagarden.net"
-NATIONAL_INDEX_URL = BULBAPEDIA_BASE_URL + "/wiki/List_of_Pok%C3%A9mon_by_National_Pok%C3%A9dex_number"
+NATIONAL_INDEX_URL = (
+    BULBAPEDIA_BASE_URL + "/wiki/List_of_Pok%C3%A9mon_by_National_Pok%C3%A9dex_number"
+)
 
 
 class Pokemon(BaseModel):
@@ -24,13 +26,13 @@ class Pokemon(BaseModel):
 
 
 def download_to_file(url: str, filepath: str, override=False):
-    """ Downloads url into filepath. """
+    """Downloads url into filepath."""
     if os.path.isfile(filepath) and override is False:
         logging.debug(f"'{filepath}' exists.")
         return
 
     headers = {
-        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0'
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0"
     }
     r = requests.get(url, headers=headers)
     if r.status_code != 200:
@@ -45,12 +47,14 @@ def download_to_file(url: str, filepath: str, override=False):
 
 
 def get_pokemon() -> List[Pokemon]:
-    """ Scrape Pokemon from the Bulbapedia national dex """
+    """Scrape Pokemon from the Bulbapedia national dex"""
     NATIONAL_INDEX_FILEPATH = os.path.join(POKEMON_CACHE_DIRECTORY, "pokedex.html")
     download_to_file(NATIONAL_INDEX_URL, NATIONAL_INDEX_FILEPATH)
     with open(NATIONAL_INDEX_FILEPATH, "r") as r:
         soup = BeautifulSoup(r, "html.parser")
-    pokemon_list_soup: BeautifulSoup = soup.find(id="List_of_Pokémon_by_National_Pokédex_number").parent
+    pokemon_list_soup: BeautifulSoup = soup.find(
+        id="List_of_Pokémon_by_National_Pokédex_number"
+    ).parent
     generation_soups: BeautifulSoup = pokemon_list_soup.find_next_siblings("h3")
 
     table_row_soups = []
@@ -77,48 +81,58 @@ def get_pokemon() -> List[Pokemon]:
             continue
 
         index = table_row_soup.find_next("td").next_sibling.next_sibling.text.strip()
-        html_url = BULBAPEDIA_BASE_URL + table_row_soup.find_next("th").next_element.attrs["href"]
+        html_url = (
+            BULBAPEDIA_BASE_URL
+            + table_row_soup.find_next("th").next_element.attrs["href"]
+        )
         img_url = table_row_soup.find("img").attrs["src"]
         html_filepath = os.path.join(POKEMON_CACHE_DIRECTORY, name.lower() + ".html")
         img_filepath = os.path.join(POKEMON_CACHE_DIRECTORY, name.lower() + ".png")
-        p = Pokemon(name=name,
-                    index=index,
-                    html_url=html_url,
-                    img_url=img_url,
-                    html_filepath=html_filepath,
-                    img_filepath=img_filepath,
-                    json_filepath=json_filepath)
+        p = Pokemon(
+            name=name,
+            index=index,
+            html_url=html_url,
+            img_url=img_url,
+            html_filepath=html_filepath,
+            img_filepath=img_filepath,
+            json_filepath=json_filepath,
+        )
         pokemon.append(p)
         extend_pokemon(p)
-        with open(p.json_filepath, 'w') as f:
+        with open(p.json_filepath, "w") as f:
             f.write(p.json())
             logging.info(f"Saved {p.json_filepath}.")
 
     # Filter out speculative Pokemon
-    pokemon = [p for p in pokemon if not p.description.startswith("This article's contents will change")]
+    pokemon = [
+        p
+        for p in pokemon
+        if not p.description.startswith("This article's contents will change")
+    ]
 
     logging.info("Pokemon loaded.")
     return pokemon
 
 
 def extend_pokemon(p: Pokemon):
-    """ Add description and download Pokemon image """
+    """Add description and download Pokemon image"""
     download_to_file(p.html_url, p.html_filepath)
     with open(p.html_filepath, "r") as r:
         soup = BeautifulSoup(r, "html.parser")
-    content_soup: BeautifulSoup = soup.find(id='mw-content-text').contents[0]
+    content_soup: BeautifulSoup = soup.find(id="mw-content-text").contents[0]
 
     # description
     p_soup = content_soup.find("p")
     description = []
-    while p_soup.name == 'p':
+    while p_soup.name == "p":
         description.append(p_soup.get_text())
         p_soup = p_soup.next_sibling
     p.description = "".join(description)
 
     # image
-    img_url = content_soup.find("table").find_next_sibling("table").find("img").attrs["src"]
+    img_url = (
+        content_soup.find("table").find_next_sibling("table").find("img").attrs["src"]
+    )
     img_url = img_url.replace("//", "https://")
     p.img_url = img_url
     download_to_file(img_url, p.img_filepath)
-