Add epub unit tests and fix Mr. Mime again to resolve #3

This commit is contained in:
2022-12-17 20:26:21 -05:00
parent ea57b80d50
commit 8b83c2d2ea
4 changed files with 181 additions and 12216 deletions

0
conftest.py Normal file
View File

View File

@@ -7,11 +7,12 @@ from bs4 import BeautifulSoup, Tag
from bs4.element import NavigableString from bs4.element import NavigableString
from ebooklib import epub from ebooklib import epub
from src.pokemon import Pokemon from src.pokemon import Pokemon
from typing import List, Dict, Optional from typing import List, Dict, Optional, Set
from rich.progress import track from rich.progress import track
POKEMON_ID_PREFIX = "pokemon-id-" POKEMON_ID_PREFIX = "pokemon-id-"
POKEDEX_UID = "np_pokedex" POKEDEX_UID = "np_pokedex"
SPECIAL_CHARS_REGEX = re.compile("([:,.!?“”‘’… ]+)")
@dataclass @dataclass
@@ -21,8 +22,15 @@ class AnnoyingPokemon:
name_in_pokedex: str name_in_pokedex: str
@dataclass
class ChapterContext:
pokemon_lookup: Dict[str, Pokemon]
pokemon_added: Set[str] # Set to only link Pokemon for first occurrence in chapter
chapter_soup: BeautifulSoup
ANNOYING_POKEMON = [ ANNOYING_POKEMON = [
AnnoyingPokemon(["Mr", ".", "Mime"], 3, "mr. mime"), AnnoyingPokemon(["mr", ". ", "mime"], 3, "mr. mime"),
AnnoyingPokemon(["farfetch", "", "d"], 3, "farfetch'd"), AnnoyingPokemon(["farfetch", "", "d"], 3, "farfetch'd"),
AnnoyingPokemon(["sirfetch", "", "d"], 3, "sirfetch'd"), AnnoyingPokemon(["sirfetch", "", "d"], 3, "sirfetch'd"),
] ]
@@ -49,53 +57,46 @@ def create_pokedex_chapter(pokemon: List[Pokemon]) -> epub.EpubHtml:
return chapter return chapter
def patch_chapter(chapter: epub.EpubHtml, pokemon_lookup: Dict[str, Pokemon]): def pokemon_to_link(p: Pokemon, name_as_in_book: str, ctx: ChapterContext) -> Tag:
special_chars_regex = re.compile("([:,.!?“”‘’… ]+)") tag = ctx.chapter_soup.new_tag("a")
soup: BeautifulSoup = BeautifulSoup(chapter.content, "html.parser")
# Set to remember which Pokemon have already gotten a link for that
# chapter.
pokemon_added_for_chapter = set()
def pokemon_to_link(p: Pokemon, name_as_in_book: str) -> Tag:
tag = soup.new_tag("a")
tag.string = name_as_in_book tag.string = name_as_in_book
tag.attrs["href"] = f"np_pokedex.xhtml#{POKEMON_ID_PREFIX}{p.link_id}" tag.attrs["href"] = f"np_pokedex.xhtml#{POKEMON_ID_PREFIX}{p.link_id}"
# tag.attrs["style"] = "color:black;text-decoration:none"
return tag return tag
def is_annoying_pokemon(index: int, chunks: List[str]) -> Optional[AnnoyingPokemon]:
def is_annoying_pokemon(index: int, chunks: List[str]) -> Optional[AnnoyingPokemon]:
for p in ANNOYING_POKEMON: for p in ANNOYING_POKEMON:
if p.name_chunks == list( if p.name_chunks == list(
map(lambda s: s.lower(), chunks[index : index + p.length_chunks]) map(lambda s: s.lower(), chunks[index:index + p.length_chunks])
): ):
return p return p
return None return None
def patch_string(section: NavigableString) -> List:
def patch_string(section: NavigableString, ctx: ChapterContext) -> List:
"""Replace Pokemon with link to Pokemon; requires splitting up the """Replace Pokemon with link to Pokemon; requires splitting up the
NavigableString into a list of NavigableStrings and Tags.""" NavigableString into a list of NavigableStrings and Tags."""
result: List[List] = [[]] result: List[List] = [[]]
index, chunks = 0, special_chars_regex.split(str(section)) index, chunks = 0, SPECIAL_CHARS_REGEX.split(str(section))
while index < len(chunks): while index < len(chunks):
word = chunks[index] word = chunks[index]
pokemon: Optional[Pokemon] = None pokemon: Optional[Pokemon] = None
increment: int = 1 increment: int = 1
if word.lower() in pokemon_lookup: if word.lower() in ctx.pokemon_lookup:
pokemon = pokemon_lookup[word.lower()] pokemon = ctx.pokemon_lookup[word.lower()]
elif annoying_pokemon := is_annoying_pokemon(index, chunks): elif annoying_pokemon := is_annoying_pokemon(index, chunks):
pokemon = pokemon_lookup[annoying_pokemon.name_in_pokedex] pokemon = ctx.pokemon_lookup[annoying_pokemon.name_in_pokedex]
increment = annoying_pokemon.length_chunks increment = annoying_pokemon.length_chunks
if pokemon is not None and pokemon.name in pokemon_added_for_chapter: if pokemon is not None and pokemon.name in ctx.pokemon_added:
pokemon = None pokemon = None
if pokemon is not None: if pokemon is not None:
pokemon_added_for_chapter.add(pokemon.name) ctx.pokemon_added.add(pokemon.name)
pokemon.appears_in_book = True pokemon.appears_in_book = True
name = "".join(chunks[index : index + increment]) name = "".join(chunks[index:index + increment])
link = pokemon_to_link(pokemon, name) link = pokemon_to_link(pokemon, name, ctx)
result.append(link) result.append(link)
result.append([]) result.append([])
index += increment index += increment
@@ -109,19 +110,27 @@ def patch_chapter(chapter: epub.EpubHtml, pokemon_lookup: Dict[str, Pokemon]):
result[i] = NavigableString("".join(result[i])) result[i] = NavigableString("".join(result[i]))
return result return result
def patch_paragraph(paragraph: Tag):
def patch_paragraph(paragraph: Tag, ctx: ChapterContext):
contents = [] contents = []
for section in paragraph.contents: for section in paragraph.contents:
if isinstance(section, NavigableString): if isinstance(section, NavigableString):
contents += patch_string(section) contents += patch_string(section, ctx)
else: else:
patch_paragraph(section) patch_paragraph(section, ctx)
contents.append(section) contents.append(section)
paragraph.contents = contents paragraph.contents = contents
for p_soup in soup.find_all("p"):
patch_paragraph(p_soup) def patch_chapter(chapter_soup: BeautifulSoup, pokemon_lookup: Dict[str, Pokemon]) -> str:
chapter.content = str(soup) ctx = ChapterContext(
pokemon_lookup=pokemon_lookup,
pokemon_added=set(),
chapter_soup=chapter_soup,
)
for p_soup in chapter_soup.find_all("p"):
patch_paragraph(p_soup, ctx)
return str(chapter_soup)
def get_pokemon_lookup(pokemon: List[Pokemon]) -> Dict[str, Pokemon]: def get_pokemon_lookup(pokemon: List[Pokemon]) -> Dict[str, Pokemon]:
@@ -150,8 +159,9 @@ def get_epub_with_pokedex(epub_filename: Path, pokemon: List[Pokemon]) -> epub.E
logging.warning(f"It looks like '{epub_filename}' already has a Pokedex.") logging.warning(f"It looks like '{epub_filename}' already has a Pokedex.")
sys.exit(1) sys.exit(1)
for c in track(chapters, description="Add Pokemon links to chapters"): for chapter in track(chapters, description="Add Pokemon links to chapters"):
patch_chapter(c, pokemon_lookup) chapter_soup = BeautifulSoup(chapter.content, "html.parser")
chapter.content = patch_chapter(chapter_soup, pokemon_lookup)
# only add Pokemon to Pokedex chapter that appear (in the book) # only add Pokemon to Pokedex chapter that appear (in the book)
pokemon = [p for p in pokemon if p.appears_in_book] pokemon = [p for p in pokemon if p.appears_in_book]

View File

@@ -0,0 +1,104 @@
import epub
import src.pokemon as pokemon
from typing import List
from bs4 import BeautifulSoup
def test_patch_chapter_tauros():
pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
chapter_soup = BeautifulSoup("<p>it's a tauros yeah</p>", "lxml")
o = epub.patch_chapter(chapter_soup, pokemon_lookup)
e = s('<p>it\'s a <a href="np_pokedex.xhtml#pokemon-id-tauros">tauros</a> yeah</p>')
assert o == e
def test_patch_chapter_double_tauros():
pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
chapter_soup = BeautifulSoup("<p>it's two tauros tauros</p>", "lxml")
o = epub.patch_chapter(chapter_soup, pokemon_lookup)
e = s('<p>it\'s two <a href="np_pokedex.xhtml#pokemon-id-tauros">tauros</a> tauros</p>')
assert o == e
def test_patch_chapter_tauros_italic():
pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
chapter_soup = BeautifulSoup("<p>it's two <i>tauros</i> tauros</p>", "lxml")
o = epub.patch_chapter(chapter_soup, pokemon_lookup)
e = s('<p>it\'s two <i><a href="np_pokedex.xhtml#pokemon-id-tauros">tauros</a></i> tauros</p>')
assert o == e
def test_patch_chapter_nidoran_with_s():
pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
chapter_soup = BeautifulSoup("<p>it's a Nidorans goldfish</p>", "lxml")
o = epub.patch_chapter(chapter_soup, pokemon_lookup)
e = s('<p>it\'s a <a href="np_pokedex.xhtml#pokemon-id-nidoran">Nidoran</a>s goldfish</p>')
assert o == e
def test_patch_chapter_nidoran_with_double_quotes():
pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
chapter_soup = BeautifulSoup("<p>it's a “Nidoran” duh</p>", "lxml")
o = epub.patch_chapter(chapter_soup, pokemon_lookup)
e = s('<p>it\'s a “<a href="np_pokedex.xhtml#pokemon-id-nidoran">Nidoran</a>” duh</p>')
assert o == e
def test_patch_chapter_mr_mime():
pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
chapter_soup = BeautifulSoup("<p>it's a “Mr. Mime” duh</p>", "lxml")
o = epub.patch_chapter(chapter_soup, pokemon_lookup)
e = s('<p>it\'s a “<a href="np_pokedex.xhtml#pokemon-id-mrmime">Mr. Mime</a>” duh</p>')
assert o == e
def test_patch_chapter_barrierd():
pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
chapter_soup = BeautifulSoup("<p>it's a “barrierd” duh</p>", "lxml")
o = epub.patch_chapter(chapter_soup, pokemon_lookup)
e = s('<p>it\'s a “<a href="np_pokedex.xhtml#pokemon-id-mrmime">barrierd</a>” duh</p>')
assert o == e
def test_patch_chapter_farfetched():
pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
chapter_soup = BeautifulSoup("<p>it's a farfetchd yo</p>", "lxml")
o = epub.patch_chapter(chapter_soup, pokemon_lookup)
e = s('<p>it\'s a <a href="np_pokedex.xhtml#pokemon-id-farfetchd">farfetchd</a> yo</p>')
assert o == e
def s(s: str) -> str:
return "<html><body>" + s + "</body></html>"
def get_pokemon() -> List[pokemon.Pokemon]:
return [
pokemon.Pokemon(
name="Tauros",
link_id="tauros",
img_filename="pokemon/tauros.png",
description="Tauros (",
appears_in_book=False),
pokemon.Pokemon(
name="Nidoran♂",
link_id="nidoran",
img_filename="pokemon.png",
description="Nidoran",
appears_in_book=False
),
pokemon.Pokemon(
name="Mr. Mime",
link_id="mrmime",
img_filename="pokemon/mr. mime.png",
description="Mr. Mime",
appears_in_book=False
),
pokemon.Pokemon(
name="Farfetch'd",
link_id="farfetchd",
img_filename="pokemon/farfetch'd.png",
description="Farfetch",
appears_in_book=False
)
]

File diff suppressed because one or more lines are too long