Implement replacing Pokemon with links

2022-10-22 21:23:30 -04:00
parent 951649bd9e
commit 1248d9d750
2 changed files with 79 additions and 7 deletions
--- a/src/epub.py
+++ b/src/epub.py
@@ -1,8 +1,11 @@
 import ebooklib
 import logging
+import re
+from bs4 import BeautifulSoup, Tag
+from bs4.element import NavigableString
 from ebooklib import epub
 from src.pokemon import Pokemon
-from typing import List
+from typing import List, Dict

 POKEMON_ID_PREFIX = "pokemon-id-"

@@ -16,12 +19,76 @@ def create_pokedex_chapter(pokemon: List[Pokemon]) -> epub.EpubHtml:
    for p in pokemon:
        content.append(f'<h2 id="{POKEMON_ID_PREFIX}{p.name.lower()}">{p.name}</h2>')
        content.append(f'  <p><img alt="[Pokemon {p.name}]" src="../{p.img_filepath}"/><br/></p>')
-        content.append(f'  <p>{p.description}</p>')
+        for paragraph in p.description.split("\n"):
+            content.append(f'  <p>{paragraph}</p>')
        content.append('')

    chapter.content = "\n".join(content)
    return chapter

+
+
+
+def patch_chapter(chapter: epub.EpubHtml, pokemon_lookup: Dict[str, Pokemon]):
+    r = re.compile("([:,.!?“”‘’…])")
+    soup: BeautifulSoup = BeautifulSoup(chapter.content, "html.parser")
+
+    def pokemon_name_to_link(key: str, word: str) -> Tag:
+        tag = soup.new_tag("a")
+        tag.string = word
+        tag.attrs["href"] = f"np_pokedex.xhtml#{POKEMON_ID_PREFIX}{key}"
+        tag.attrs["style"] = "color:black;text-decoration:none"
+        return tag
+
+    def patch_string(section: NavigableString) -> List:
+        """ Replace Pokemon with link to Pokemon; requires splitting up the
+            NavigableString into a list of NavigableStrings and Tags. """
+        result = [[]]
+        for word in str(section).split(" "):
+            word_stripped = r.sub("", word)
+            if word_stripped.lower() in pokemon_lookup:
+                word_split = r.split(word)
+                i = word_split.index(word_stripped)
+                if i == 0:
+                    # add space if there are no other chars before pokemon
+                    result[-1].append(" ")
+                else:
+                    # add other chars before pokemon if there are any
+                    result[-1].append("".join(word_split[:i]))
+                pokemon_link = pokemon_name_to_link(word_stripped.lower(), word_stripped)
+                result.append(pokemon_link)
+                result.append([])
+                if i + 1 == len(word_split):
+                    # add space after pokemon if there are no other chars
+                    result[-1].append(" ")
+                else:
+                    # add other chars after pokemon if there are any
+                    result[-1].append("".join(word_split[i + 1:]))
+            else:
+                result[-1].append(word)
+
+        # convert words back into strings.
+        for i in range(len(result)):
+            if isinstance(result[i], list):
+                result[i] = NavigableString(" ".join(result[i]))
+        return result
+
+    def patch_paragraph(paragraph: Tag):
+        contents = []
+        for section in paragraph.contents:
+            if isinstance(section, NavigableString):
+                contents += patch_string(section)
+            else:
+                patch_paragraph(section)
+                contents.append(section)
+        paragraph.contents = contents
+
+    for p_soup in soup.find_all("p"):
+        words_have_changed, words = False, []
+        patch_paragraph(p_soup)
+    chapter.content = str(soup)
+
+
 def patch(epub_filepath: str, pokemon: List[Pokemon]):
    book = epub.read_epub(epub_filepath)

@@ -36,8 +103,13 @@ def patch(epub_filepath: str, pokemon: List[Pokemon]):
        img = epub.EpubItem(uid=p.name, file_name=p.img_filepath, media_type='image/png', content=image_content)
        book.add_item(img)

-    # Link to Pokemon looks like this:
-    # <a href="np_pokedex.xhtml#pokemon-id-bulbasaur">Bulbasaur!</a>
+    pokemon_lookup = {p.name.lower(): p for p in pokemon}
+    chapters = [b for b in book.get_items()
+                if isinstance(b, epub.EpubHtml)
+                if b.id.startswith("np_")]
+    for c in chapters:
+        patch_chapter(c, pokemon_lookup)

-    epub.write_epub('tmp/test.epub', book, {})
-    logging.info("Written")
+    epub_out = epub_filepath.replace(".", "-with-links.")
+    epub.write_epub(epub_out, book, {})
+    logging.info(f"{epub_out} written.")
--- a/src/pokemon.py
+++ b/src/pokemon.py
@@ -97,7 +97,7 @@ def get_pokemon() -> List[Pokemon]:
    # Filter out speculative Pokemon
    pokemon = [p for p in pokemon if not p.description.startswith("This article's contents will change")]

-    logging.info("Loaded Pokemon.")
+    logging.info("Pokemon loaded.")
    return pokemon