Add epub unit tests and fix Mr. Mime again to resolve #3

2022-12-17 20:26:21 -05:00
parent ea57b80d50
commit 8b83c2d2ea
4 changed files with 181 additions and 12216 deletions
--- a/conftest.py
+++ b/conftest.py
--- a/src/epub.py
+++ b/src/epub.py
@@ -7,11 +7,12 @@ from bs4 import BeautifulSoup, Tag
 from bs4.element import NavigableString
 from ebooklib import epub
 from src.pokemon import Pokemon
-from typing import List, Dict, Optional
+from typing import List, Dict, Optional, Set
 from rich.progress import track
 POKEMON_ID_PREFIX = "pokemon-id-"
 POKEDEX_UID = "np_pokedex"
 SPECIAL_CHARS_REGEX = re.compile("([:,.!?“”‘’… ]+)")
@dataclass
@@ -21,8 +22,15 @@ class AnnoyingPokemon:
    name_in_pokedex: str
@dataclass
 class ChapterContext:
    pokemon_lookup: Dict[str, Pokemon]
    pokemon_added: Set[str]  # Set to only link Pokemon for first occurrence in chapter
    chapter_soup: BeautifulSoup
 ANNOYING_POKEMON = [
-    AnnoyingPokemon(["Mr", ".", "Mime"], 3, "mr. mime"),
+    AnnoyingPokemon(["mr", ". ", "mime"], 3, "mr. mime"),
    AnnoyingPokemon(["farfetch", "’", "d"], 3, "farfetch'd"),
    AnnoyingPokemon(["sirfetch", "’", "d"], 3, "sirfetch'd"),
 ]
@@ -49,53 +57,46 @@ def create_pokedex_chapter(pokemon: List[Pokemon]) -> epub.EpubHtml:
    return chapter
-def patch_chapter(chapter: epub.EpubHtml, pokemon_lookup: Dict[str, Pokemon]):
+def pokemon_to_link(p: Pokemon, name_as_in_book: str, ctx: ChapterContext) -> Tag:
-    special_chars_regex = re.compile("([:,.!?“”‘’… ]+)")
+    tag = ctx.chapter_soup.new_tag("a")
    soup: BeautifulSoup = BeautifulSoup(chapter.content, "html.parser")
    # Set to remember which Pokemon have already gotten a link for that
    # chapter.
    pokemon_added_for_chapter = set()
    def pokemon_to_link(p: Pokemon, name_as_in_book: str) -> Tag:
        tag = soup.new_tag("a")
    tag.string = name_as_in_book
    tag.attrs["href"] = f"np_pokedex.xhtml#{POKEMON_ID_PREFIX}{p.link_id}"
        # tag.attrs["style"] = "color:black;text-decoration:none"
    return tag
-    def is_annoying_pokemon(index: int, chunks: List[str]) -> Optional[AnnoyingPokemon]:
+
 def is_annoying_pokemon(index: int, chunks: List[str]) -> Optional[AnnoyingPokemon]:
    for p in ANNOYING_POKEMON:
        if p.name_chunks == list(
-                map(lambda s: s.lower(), chunks[index : index + p.length_chunks])
+            map(lambda s: s.lower(), chunks[index:index + p.length_chunks])
        ):
            return p
    return None
-    def patch_string(section: NavigableString) -> List:
+
 def patch_string(section: NavigableString, ctx: ChapterContext) -> List:
    """Replace Pokemon with link to Pokemon; requires splitting up the
    NavigableString into a list of NavigableStrings and Tags."""
    result: List[List] = [[]]
-        index, chunks = 0, special_chars_regex.split(str(section))
+    index, chunks = 0, SPECIAL_CHARS_REGEX.split(str(section))
    while index < len(chunks):
        word = chunks[index]
        pokemon: Optional[Pokemon] = None
        increment: int = 1
-            if word.lower() in pokemon_lookup:
+        if word.lower() in ctx.pokemon_lookup:
-                pokemon = pokemon_lookup[word.lower()]
+            pokemon = ctx.pokemon_lookup[word.lower()]
        elif annoying_pokemon := is_annoying_pokemon(index, chunks):
-                pokemon = pokemon_lookup[annoying_pokemon.name_in_pokedex]
+            pokemon = ctx.pokemon_lookup[annoying_pokemon.name_in_pokedex]
            increment = annoying_pokemon.length_chunks
-            if pokemon is not None and pokemon.name in pokemon_added_for_chapter:
+        if pokemon is not None and pokemon.name in ctx.pokemon_added:
            pokemon = None
        if pokemon is not None:
-                pokemon_added_for_chapter.add(pokemon.name)
+            ctx.pokemon_added.add(pokemon.name)
            pokemon.appears_in_book = True
-                name = "".join(chunks[index : index + increment])
+            name = "".join(chunks[index:index + increment])
-                link = pokemon_to_link(pokemon, name)
+            link = pokemon_to_link(pokemon, name, ctx)
            result.append(link)
            result.append([])
            index += increment
@@ -109,19 +110,27 @@ def patch_chapter(chapter: epub.EpubHtml, pokemon_lookup: Dict[str, Pokemon]):
            result[i] = NavigableString("".join(result[i]))
    return result
-    def patch_paragraph(paragraph: Tag):
+
 def patch_paragraph(paragraph: Tag, ctx: ChapterContext):
    contents = []
    for section in paragraph.contents:
        if isinstance(section, NavigableString):
-                contents += patch_string(section)
+            contents += patch_string(section, ctx)
        else:
-                patch_paragraph(section)
+            patch_paragraph(section, ctx)
            contents.append(section)
    paragraph.contents = contents
-    for p_soup in soup.find_all("p"):
+
-        patch_paragraph(p_soup)
+def patch_chapter(chapter_soup: BeautifulSoup, pokemon_lookup: Dict[str, Pokemon]) -> str:
-    chapter.content = str(soup)
+    ctx = ChapterContext(
        pokemon_lookup=pokemon_lookup,
        pokemon_added=set(),
        chapter_soup=chapter_soup,
    )
    for p_soup in chapter_soup.find_all("p"):
        patch_paragraph(p_soup, ctx)
    return str(chapter_soup)
 def get_pokemon_lookup(pokemon: List[Pokemon]) -> Dict[str, Pokemon]:
@@ -150,8 +159,9 @@ def get_epub_with_pokedex(epub_filename: Path, pokemon: List[Pokemon]) -> epub.E
        logging.warning(f"It looks like '{epub_filename}' already has a Pokedex.")
        sys.exit(1)
-    for c in track(chapters, description="Add Pokemon links to chapters"):
+    for chapter in track(chapters, description="Add Pokemon links to chapters"):
-        patch_chapter(c, pokemon_lookup)
+        chapter_soup = BeautifulSoup(chapter.content, "html.parser")
        chapter.content = patch_chapter(chapter_soup, pokemon_lookup)
    # only add Pokemon to Pokedex chapter that appear (in the book)
    pokemon = [p for p in pokemon if p.appears_in_book]
--- a/test/test_epub.py
+++ b/test/test_epub.py
@@ -0,0 +1,104 @@
 import epub
 import src.pokemon as pokemon
 from typing import List
 from bs4 import BeautifulSoup
 def test_patch_chapter_tauros():
    pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
    chapter_soup = BeautifulSoup("<p>it's a tauros yeah</p>", "lxml")
    o = epub.patch_chapter(chapter_soup, pokemon_lookup)
    e = s('<p>it\'s a <a href="np_pokedex.xhtml#pokemon-id-tauros">tauros</a> yeah</p>')
    assert o == e
 def test_patch_chapter_double_tauros():
    pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
    chapter_soup = BeautifulSoup("<p>it's two tauros tauros</p>", "lxml")
    o = epub.patch_chapter(chapter_soup, pokemon_lookup)
    e = s('<p>it\'s two <a href="np_pokedex.xhtml#pokemon-id-tauros">tauros</a> tauros</p>')
    assert o == e
 def test_patch_chapter_tauros_italic():
    pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
    chapter_soup = BeautifulSoup("<p>it's two <i>tauros</i> tauros</p>", "lxml")
    o = epub.patch_chapter(chapter_soup, pokemon_lookup)
    e = s('<p>it\'s two <i><a href="np_pokedex.xhtml#pokemon-id-tauros">tauros</a></i> tauros</p>')
    assert o == e
 def test_patch_chapter_nidoran_with_s():
    pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
    chapter_soup = BeautifulSoup("<p>it's a Nidoran‘s goldfish</p>", "lxml")
    o = epub.patch_chapter(chapter_soup, pokemon_lookup)
    e = s('<p>it\'s a <a href="np_pokedex.xhtml#pokemon-id-nidoran">Nidoran</a>‘s goldfish</p>')
    assert o == e
 def test_patch_chapter_nidoran_with_double_quotes():
    pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
    chapter_soup = BeautifulSoup("<p>it's a “Nidoran” duh</p>", "lxml")
    o = epub.patch_chapter(chapter_soup, pokemon_lookup)
    e = s('<p>it\'s a “<a href="np_pokedex.xhtml#pokemon-id-nidoran">Nidoran</a>” duh</p>')
    assert o == e
 def test_patch_chapter_mr_mime():
    pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
    chapter_soup = BeautifulSoup("<p>it's a “Mr. Mime” duh</p>", "lxml")
    o = epub.patch_chapter(chapter_soup, pokemon_lookup)
    e = s('<p>it\'s a “<a href="np_pokedex.xhtml#pokemon-id-mrmime">Mr. Mime</a>” duh</p>')
    assert o == e
 def test_patch_chapter_barrierd():
    pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
    chapter_soup = BeautifulSoup("<p>it's a “barrierd” duh</p>", "lxml")
    o = epub.patch_chapter(chapter_soup, pokemon_lookup)
    e = s('<p>it\'s a “<a href="np_pokedex.xhtml#pokemon-id-mrmime">barrierd</a>” duh</p>')
    assert o == e
 def test_patch_chapter_farfetched():
    pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
    chapter_soup = BeautifulSoup("<p>it's a farfetch’d yo</p>", "lxml")
    o = epub.patch_chapter(chapter_soup, pokemon_lookup)
    e = s('<p>it\'s a <a href="np_pokedex.xhtml#pokemon-id-farfetchd">farfetch’d</a> yo</p>')
    assert o == e
 def s(s: str) -> str:
    return "<html><body>" + s + "</body></html>"
 def get_pokemon() -> List[pokemon.Pokemon]:
    return [
        pokemon.Pokemon(
            name="Tauros",
            link_id="tauros",
            img_filename="pokemon/tauros.png",
            description="Tauros (",
            appears_in_book=False),
        pokemon.Pokemon(
            name="Nidoran♂",
            link_id="nidoran",
            img_filename="pokemon.png",
            description="Nidoran",
            appears_in_book=False
        ),
        pokemon.Pokemon(
            name="Mr. Mime",
            link_id="mrmime",
            img_filename="pokemon/mr. mime.png",
            description="Mr. Mime",
            appears_in_book=False
        ),
        pokemon.Pokemon(
            name="Farfetch'd",
            link_id="farfetchd",
            img_filename="pokemon/farfetch'd.png",
            description="Farfetch",
            appears_in_book=False
        )
    ]
--- a/test/test_pokedex.html
+++ b/test/test_pokedex.html