Add epub unit tests and fix Mr. Mime again to resolve #3

2022-12-17 20:26:21 -05:00
parent ea57b80d50
commit 8b83c2d2ea
4 changed files with 181 additions and 12216 deletions
--- a/conftest.py
+++ b/conftest.py
--- a/src/epub.py
+++ b/src/epub.py
@@ -7,11 +7,12 @@ from bs4 import BeautifulSoup, Tag
 from bs4.element import NavigableString
 from ebooklib import epub
 from src.pokemon import Pokemon
-from typing import List, Dict, Optional
+from typing import List, Dict, Optional, Set
 from rich.progress import track

 POKEMON_ID_PREFIX = "pokemon-id-"
 POKEDEX_UID = "np_pokedex"
+SPECIAL_CHARS_REGEX = re.compile("([:,.!?“”‘’… ]+)")


@dataclass
@@ -21,8 +22,15 @@ class AnnoyingPokemon:
    name_in_pokedex: str


+@dataclass
+class ChapterContext:
+    pokemon_lookup: Dict[str, Pokemon]
+    pokemon_added: Set[str]  # Set to only link Pokemon for first occurrence in chapter
+    chapter_soup: BeautifulSoup
+
+
 ANNOYING_POKEMON = [
-    AnnoyingPokemon(["Mr", ".", "Mime"], 3, "mr. mime"),
+    AnnoyingPokemon(["mr", ". ", "mime"], 3, "mr. mime"),
    AnnoyingPokemon(["farfetch", "’", "d"], 3, "farfetch'd"),
    AnnoyingPokemon(["sirfetch", "’", "d"], 3, "sirfetch'd"),
 ]
@@ -49,79 +57,80 @@ def create_pokedex_chapter(pokemon: List[Pokemon]) -> epub.EpubHtml:
    return chapter


-def patch_chapter(chapter: epub.EpubHtml, pokemon_lookup: Dict[str, Pokemon]):
-    special_chars_regex = re.compile("([:,.!?“”‘’… ]+)")
-    soup: BeautifulSoup = BeautifulSoup(chapter.content, "html.parser")
+def pokemon_to_link(p: Pokemon, name_as_in_book: str, ctx: ChapterContext) -> Tag:
+    tag = ctx.chapter_soup.new_tag("a")
+    tag.string = name_as_in_book
+    tag.attrs["href"] = f"np_pokedex.xhtml#{POKEMON_ID_PREFIX}{p.link_id}"
+    return tag

-    # Set to remember which Pokemon have already gotten a link for that
-    # chapter.
-    pokemon_added_for_chapter = set()

-    def pokemon_to_link(p: Pokemon, name_as_in_book: str) -> Tag:
-        tag = soup.new_tag("a")
-        tag.string = name_as_in_book
-        tag.attrs["href"] = f"np_pokedex.xhtml#{POKEMON_ID_PREFIX}{p.link_id}"
-        # tag.attrs["style"] = "color:black;text-decoration:none"
-        return tag
+def is_annoying_pokemon(index: int, chunks: List[str]) -> Optional[AnnoyingPokemon]:
+    for p in ANNOYING_POKEMON:
+        if p.name_chunks == list(
+            map(lambda s: s.lower(), chunks[index:index + p.length_chunks])
+        ):
+            return p
+    return None

-    def is_annoying_pokemon(index: int, chunks: List[str]) -> Optional[AnnoyingPokemon]:
-        for p in ANNOYING_POKEMON:
-            if p.name_chunks == list(
-                map(lambda s: s.lower(), chunks[index : index + p.length_chunks])
-            ):
-                return p
-        return None

-    def patch_string(section: NavigableString) -> List:
-        """Replace Pokemon with link to Pokemon; requires splitting up the
-        NavigableString into a list of NavigableStrings and Tags."""
-        result: List[List] = [[]]
-        index, chunks = 0, special_chars_regex.split(str(section))
-        while index < len(chunks):
-            word = chunks[index]
-            pokemon: Optional[Pokemon] = None
-            increment: int = 1
+def patch_string(section: NavigableString, ctx: ChapterContext) -> List:
+    """Replace Pokemon with link to Pokemon; requires splitting up the
+    NavigableString into a list of NavigableStrings and Tags."""
+    result: List[List] = [[]]
+    index, chunks = 0, SPECIAL_CHARS_REGEX.split(str(section))
+    while index < len(chunks):
+        word = chunks[index]
+        pokemon: Optional[Pokemon] = None
+        increment: int = 1

-            if word.lower() in pokemon_lookup:
-                pokemon = pokemon_lookup[word.lower()]
-            elif annoying_pokemon := is_annoying_pokemon(index, chunks):
-                pokemon = pokemon_lookup[annoying_pokemon.name_in_pokedex]
-                increment = annoying_pokemon.length_chunks
+        if word.lower() in ctx.pokemon_lookup:
+            pokemon = ctx.pokemon_lookup[word.lower()]
+        elif annoying_pokemon := is_annoying_pokemon(index, chunks):
+            pokemon = ctx.pokemon_lookup[annoying_pokemon.name_in_pokedex]
+            increment = annoying_pokemon.length_chunks

-            if pokemon is not None and pokemon.name in pokemon_added_for_chapter:
-                pokemon = None
+        if pokemon is not None and pokemon.name in ctx.pokemon_added:
+            pokemon = None

-            if pokemon is not None:
-                pokemon_added_for_chapter.add(pokemon.name)
-                pokemon.appears_in_book = True
-                name = "".join(chunks[index : index + increment])
-                link = pokemon_to_link(pokemon, name)
-                result.append(link)
-                result.append([])
-                index += increment
-            else:
-                result[-1].append(word)
-                index += 1
+        if pokemon is not None:
+            ctx.pokemon_added.add(pokemon.name)
+            pokemon.appears_in_book = True
+            name = "".join(chunks[index:index + increment])
+            link = pokemon_to_link(pokemon, name, ctx)
+            result.append(link)
+            result.append([])
+            index += increment
+        else:
+            result[-1].append(word)
+            index += 1

-        # convert words back into strings
-        for i in range(len(result)):
-            if isinstance(result[i], list):
-                result[i] = NavigableString("".join(result[i]))
-        return result
+    # convert words back into strings
+    for i in range(len(result)):
+        if isinstance(result[i], list):
+            result[i] = NavigableString("".join(result[i]))
+    return result

-    def patch_paragraph(paragraph: Tag):
-        contents = []
-        for section in paragraph.contents:
-            if isinstance(section, NavigableString):
-                contents += patch_string(section)
-            else:
-                patch_paragraph(section)
-                contents.append(section)
-        paragraph.contents = contents

-    for p_soup in soup.find_all("p"):
-        patch_paragraph(p_soup)
-    chapter.content = str(soup)
+def patch_paragraph(paragraph: Tag, ctx: ChapterContext):
+    contents = []
+    for section in paragraph.contents:
+        if isinstance(section, NavigableString):
+            contents += patch_string(section, ctx)
+        else:
+            patch_paragraph(section, ctx)
+            contents.append(section)
+    paragraph.contents = contents
+
+
+def patch_chapter(chapter_soup: BeautifulSoup, pokemon_lookup: Dict[str, Pokemon]) -> str:
+    ctx = ChapterContext(
+        pokemon_lookup=pokemon_lookup,
+        pokemon_added=set(),
+        chapter_soup=chapter_soup,
+    )
+    for p_soup in chapter_soup.find_all("p"):
+        patch_paragraph(p_soup, ctx)
+    return str(chapter_soup)


 def get_pokemon_lookup(pokemon: List[Pokemon]) -> Dict[str, Pokemon]:
@@ -150,8 +159,9 @@ def get_epub_with_pokedex(epub_filename: Path, pokemon: List[Pokemon]) -> epub.E
        logging.warning(f"It looks like '{epub_filename}' already has a Pokedex.")
        sys.exit(1)

-    for c in track(chapters, description="Add Pokemon links to chapters"):
-        patch_chapter(c, pokemon_lookup)
+    for chapter in track(chapters, description="Add Pokemon links to chapters"):
+        chapter_soup = BeautifulSoup(chapter.content, "html.parser")
+        chapter.content = patch_chapter(chapter_soup, pokemon_lookup)

    # only add Pokemon to Pokedex chapter that appear (in the book)
    pokemon = [p for p in pokemon if p.appears_in_book]
--- a/test/test_epub.py
+++ b/test/test_epub.py
@@ -0,0 +1,104 @@
+import epub
+import src.pokemon as pokemon
+from typing import List
+from bs4 import BeautifulSoup
+
+
+def test_patch_chapter_tauros():
+    pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
+    chapter_soup = BeautifulSoup("<p>it's a tauros yeah</p>", "lxml")
+    o = epub.patch_chapter(chapter_soup, pokemon_lookup)
+    e = s('<p>it\'s a <a href="np_pokedex.xhtml#pokemon-id-tauros">tauros</a> yeah</p>')
+    assert o == e
+
+
+def test_patch_chapter_double_tauros():
+    pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
+    chapter_soup = BeautifulSoup("<p>it's two tauros tauros</p>", "lxml")
+    o = epub.patch_chapter(chapter_soup, pokemon_lookup)
+    e = s('<p>it\'s two <a href="np_pokedex.xhtml#pokemon-id-tauros">tauros</a> tauros</p>')
+    assert o == e
+
+
+def test_patch_chapter_tauros_italic():
+    pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
+    chapter_soup = BeautifulSoup("<p>it's two <i>tauros</i> tauros</p>", "lxml")
+    o = epub.patch_chapter(chapter_soup, pokemon_lookup)
+    e = s('<p>it\'s two <i><a href="np_pokedex.xhtml#pokemon-id-tauros">tauros</a></i> tauros</p>')
+    assert o == e
+
+
+def test_patch_chapter_nidoran_with_s():
+    pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
+    chapter_soup = BeautifulSoup("<p>it's a Nidoran‘s goldfish</p>", "lxml")
+    o = epub.patch_chapter(chapter_soup, pokemon_lookup)
+    e = s('<p>it\'s a <a href="np_pokedex.xhtml#pokemon-id-nidoran">Nidoran</a>‘s goldfish</p>')
+    assert o == e
+
+
+def test_patch_chapter_nidoran_with_double_quotes():
+    pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
+    chapter_soup = BeautifulSoup("<p>it's a “Nidoran” duh</p>", "lxml")
+    o = epub.patch_chapter(chapter_soup, pokemon_lookup)
+    e = s('<p>it\'s a “<a href="np_pokedex.xhtml#pokemon-id-nidoran">Nidoran</a>” duh</p>')
+    assert o == e
+
+
+def test_patch_chapter_mr_mime():
+    pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
+    chapter_soup = BeautifulSoup("<p>it's a “Mr. Mime” duh</p>", "lxml")
+    o = epub.patch_chapter(chapter_soup, pokemon_lookup)
+    e = s('<p>it\'s a “<a href="np_pokedex.xhtml#pokemon-id-mrmime">Mr. Mime</a>” duh</p>')
+    assert o == e
+
+
+def test_patch_chapter_barrierd():
+    pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
+    chapter_soup = BeautifulSoup("<p>it's a “barrierd” duh</p>", "lxml")
+    o = epub.patch_chapter(chapter_soup, pokemon_lookup)
+    e = s('<p>it\'s a “<a href="np_pokedex.xhtml#pokemon-id-mrmime">barrierd</a>” duh</p>')
+    assert o == e
+
+
+def test_patch_chapter_farfetched():
+    pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
+    chapter_soup = BeautifulSoup("<p>it's a farfetch’d yo</p>", "lxml")
+    o = epub.patch_chapter(chapter_soup, pokemon_lookup)
+    e = s('<p>it\'s a <a href="np_pokedex.xhtml#pokemon-id-farfetchd">farfetch’d</a> yo</p>')
+    assert o == e
+
+
+def s(s: str) -> str:
+    return "<html><body>" + s + "</body></html>"
+
+
+def get_pokemon() -> List[pokemon.Pokemon]:
+    return [
+        pokemon.Pokemon(
+            name="Tauros",
+            link_id="tauros",
+            img_filename="pokemon/tauros.png",
+            description="Tauros (",
+            appears_in_book=False),
+        pokemon.Pokemon(
+            name="Nidoran♂",
+            link_id="nidoran",
+            img_filename="pokemon.png",
+            description="Nidoran",
+            appears_in_book=False
+        ),
+        pokemon.Pokemon(
+            name="Mr. Mime",
+            link_id="mrmime",
+            img_filename="pokemon/mr. mime.png",
+            description="Mr. Mime",
+            appears_in_book=False
+        ),
+        pokemon.Pokemon(
+            name="Farfetch'd",
+            link_id="farfetchd",
+            img_filename="pokemon/farfetch'd.png",
+            description="Farfetch",
+            appears_in_book=False
+        )
+    ]
--- a/test/test_pokedex.html
+++ b/test/test_pokedex.html