Add epub unit tests and fix Mr. Mime again to resolve #3
This commit is contained in:
0
conftest.py
Normal file
0
conftest.py
Normal file
144
src/epub.py
144
src/epub.py
@@ -7,11 +7,12 @@ from bs4 import BeautifulSoup, Tag
|
|||||||
from bs4.element import NavigableString
|
from bs4.element import NavigableString
|
||||||
from ebooklib import epub
|
from ebooklib import epub
|
||||||
from src.pokemon import Pokemon
|
from src.pokemon import Pokemon
|
||||||
from typing import List, Dict, Optional
|
from typing import List, Dict, Optional, Set
|
||||||
from rich.progress import track
|
from rich.progress import track
|
||||||
|
|
||||||
POKEMON_ID_PREFIX = "pokemon-id-"
|
POKEMON_ID_PREFIX = "pokemon-id-"
|
||||||
POKEDEX_UID = "np_pokedex"
|
POKEDEX_UID = "np_pokedex"
|
||||||
|
SPECIAL_CHARS_REGEX = re.compile("([:,.!?“”‘’… ]+)")
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -21,8 +22,15 @@ class AnnoyingPokemon:
|
|||||||
name_in_pokedex: str
|
name_in_pokedex: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ChapterContext:
|
||||||
|
pokemon_lookup: Dict[str, Pokemon]
|
||||||
|
pokemon_added: Set[str] # Set to only link Pokemon for first occurrence in chapter
|
||||||
|
chapter_soup: BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
ANNOYING_POKEMON = [
|
ANNOYING_POKEMON = [
|
||||||
AnnoyingPokemon(["Mr", ".", "Mime"], 3, "mr. mime"),
|
AnnoyingPokemon(["mr", ". ", "mime"], 3, "mr. mime"),
|
||||||
AnnoyingPokemon(["farfetch", "’", "d"], 3, "farfetch'd"),
|
AnnoyingPokemon(["farfetch", "’", "d"], 3, "farfetch'd"),
|
||||||
AnnoyingPokemon(["sirfetch", "’", "d"], 3, "sirfetch'd"),
|
AnnoyingPokemon(["sirfetch", "’", "d"], 3, "sirfetch'd"),
|
||||||
]
|
]
|
||||||
@@ -49,79 +57,80 @@ def create_pokedex_chapter(pokemon: List[Pokemon]) -> epub.EpubHtml:
|
|||||||
return chapter
|
return chapter
|
||||||
|
|
||||||
|
|
||||||
def patch_chapter(chapter: epub.EpubHtml, pokemon_lookup: Dict[str, Pokemon]):
|
def pokemon_to_link(p: Pokemon, name_as_in_book: str, ctx: ChapterContext) -> Tag:
|
||||||
special_chars_regex = re.compile("([:,.!?“”‘’… ]+)")
|
tag = ctx.chapter_soup.new_tag("a")
|
||||||
soup: BeautifulSoup = BeautifulSoup(chapter.content, "html.parser")
|
tag.string = name_as_in_book
|
||||||
|
tag.attrs["href"] = f"np_pokedex.xhtml#{POKEMON_ID_PREFIX}{p.link_id}"
|
||||||
|
return tag
|
||||||
|
|
||||||
# Set to remember which Pokemon have already gotten a link for that
|
|
||||||
# chapter.
|
|
||||||
pokemon_added_for_chapter = set()
|
|
||||||
|
|
||||||
def pokemon_to_link(p: Pokemon, name_as_in_book: str) -> Tag:
|
def is_annoying_pokemon(index: int, chunks: List[str]) -> Optional[AnnoyingPokemon]:
|
||||||
tag = soup.new_tag("a")
|
for p in ANNOYING_POKEMON:
|
||||||
tag.string = name_as_in_book
|
if p.name_chunks == list(
|
||||||
tag.attrs["href"] = f"np_pokedex.xhtml#{POKEMON_ID_PREFIX}{p.link_id}"
|
map(lambda s: s.lower(), chunks[index:index + p.length_chunks])
|
||||||
# tag.attrs["style"] = "color:black;text-decoration:none"
|
):
|
||||||
return tag
|
return p
|
||||||
|
return None
|
||||||
|
|
||||||
def is_annoying_pokemon(index: int, chunks: List[str]) -> Optional[AnnoyingPokemon]:
|
|
||||||
for p in ANNOYING_POKEMON:
|
|
||||||
if p.name_chunks == list(
|
|
||||||
map(lambda s: s.lower(), chunks[index : index + p.length_chunks])
|
|
||||||
):
|
|
||||||
return p
|
|
||||||
return None
|
|
||||||
|
|
||||||
def patch_string(section: NavigableString) -> List:
|
def patch_string(section: NavigableString, ctx: ChapterContext) -> List:
|
||||||
"""Replace Pokemon with link to Pokemon; requires splitting up the
|
"""Replace Pokemon with link to Pokemon; requires splitting up the
|
||||||
NavigableString into a list of NavigableStrings and Tags."""
|
NavigableString into a list of NavigableStrings and Tags."""
|
||||||
result: List[List] = [[]]
|
result: List[List] = [[]]
|
||||||
index, chunks = 0, special_chars_regex.split(str(section))
|
index, chunks = 0, SPECIAL_CHARS_REGEX.split(str(section))
|
||||||
while index < len(chunks):
|
while index < len(chunks):
|
||||||
word = chunks[index]
|
word = chunks[index]
|
||||||
pokemon: Optional[Pokemon] = None
|
pokemon: Optional[Pokemon] = None
|
||||||
increment: int = 1
|
increment: int = 1
|
||||||
|
|
||||||
if word.lower() in pokemon_lookup:
|
if word.lower() in ctx.pokemon_lookup:
|
||||||
pokemon = pokemon_lookup[word.lower()]
|
pokemon = ctx.pokemon_lookup[word.lower()]
|
||||||
elif annoying_pokemon := is_annoying_pokemon(index, chunks):
|
elif annoying_pokemon := is_annoying_pokemon(index, chunks):
|
||||||
pokemon = pokemon_lookup[annoying_pokemon.name_in_pokedex]
|
pokemon = ctx.pokemon_lookup[annoying_pokemon.name_in_pokedex]
|
||||||
increment = annoying_pokemon.length_chunks
|
increment = annoying_pokemon.length_chunks
|
||||||
|
|
||||||
if pokemon is not None and pokemon.name in pokemon_added_for_chapter:
|
if pokemon is not None and pokemon.name in ctx.pokemon_added:
|
||||||
pokemon = None
|
pokemon = None
|
||||||
|
|
||||||
if pokemon is not None:
|
if pokemon is not None:
|
||||||
pokemon_added_for_chapter.add(pokemon.name)
|
ctx.pokemon_added.add(pokemon.name)
|
||||||
pokemon.appears_in_book = True
|
pokemon.appears_in_book = True
|
||||||
name = "".join(chunks[index : index + increment])
|
name = "".join(chunks[index:index + increment])
|
||||||
link = pokemon_to_link(pokemon, name)
|
link = pokemon_to_link(pokemon, name, ctx)
|
||||||
result.append(link)
|
result.append(link)
|
||||||
result.append([])
|
result.append([])
|
||||||
index += increment
|
index += increment
|
||||||
else:
|
else:
|
||||||
result[-1].append(word)
|
result[-1].append(word)
|
||||||
index += 1
|
index += 1
|
||||||
|
|
||||||
# convert words back into strings
|
# convert words back into strings
|
||||||
for i in range(len(result)):
|
for i in range(len(result)):
|
||||||
if isinstance(result[i], list):
|
if isinstance(result[i], list):
|
||||||
result[i] = NavigableString("".join(result[i]))
|
result[i] = NavigableString("".join(result[i]))
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def patch_paragraph(paragraph: Tag):
|
|
||||||
contents = []
|
|
||||||
for section in paragraph.contents:
|
|
||||||
if isinstance(section, NavigableString):
|
|
||||||
contents += patch_string(section)
|
|
||||||
else:
|
|
||||||
patch_paragraph(section)
|
|
||||||
contents.append(section)
|
|
||||||
paragraph.contents = contents
|
|
||||||
|
|
||||||
for p_soup in soup.find_all("p"):
|
def patch_paragraph(paragraph: Tag, ctx: ChapterContext):
|
||||||
patch_paragraph(p_soup)
|
contents = []
|
||||||
chapter.content = str(soup)
|
for section in paragraph.contents:
|
||||||
|
if isinstance(section, NavigableString):
|
||||||
|
contents += patch_string(section, ctx)
|
||||||
|
else:
|
||||||
|
patch_paragraph(section, ctx)
|
||||||
|
contents.append(section)
|
||||||
|
paragraph.contents = contents
|
||||||
|
|
||||||
|
|
||||||
|
def patch_chapter(chapter_soup: BeautifulSoup, pokemon_lookup: Dict[str, Pokemon]) -> str:
|
||||||
|
ctx = ChapterContext(
|
||||||
|
pokemon_lookup=pokemon_lookup,
|
||||||
|
pokemon_added=set(),
|
||||||
|
chapter_soup=chapter_soup,
|
||||||
|
)
|
||||||
|
for p_soup in chapter_soup.find_all("p"):
|
||||||
|
patch_paragraph(p_soup, ctx)
|
||||||
|
return str(chapter_soup)
|
||||||
|
|
||||||
|
|
||||||
def get_pokemon_lookup(pokemon: List[Pokemon]) -> Dict[str, Pokemon]:
|
def get_pokemon_lookup(pokemon: List[Pokemon]) -> Dict[str, Pokemon]:
|
||||||
@@ -150,8 +159,9 @@ def get_epub_with_pokedex(epub_filename: Path, pokemon: List[Pokemon]) -> epub.E
|
|||||||
logging.warning(f"It looks like '{epub_filename}' already has a Pokedex.")
|
logging.warning(f"It looks like '{epub_filename}' already has a Pokedex.")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
for c in track(chapters, description="Add Pokemon links to chapters"):
|
for chapter in track(chapters, description="Add Pokemon links to chapters"):
|
||||||
patch_chapter(c, pokemon_lookup)
|
chapter_soup = BeautifulSoup(chapter.content, "html.parser")
|
||||||
|
chapter.content = patch_chapter(chapter_soup, pokemon_lookup)
|
||||||
|
|
||||||
# only add Pokemon to Pokedex chapter that appear (in the book)
|
# only add Pokemon to Pokedex chapter that appear (in the book)
|
||||||
pokemon = [p for p in pokemon if p.appears_in_book]
|
pokemon = [p for p in pokemon if p.appears_in_book]
|
||||||
|
|||||||
@@ -0,0 +1,104 @@
|
|||||||
|
import epub
|
||||||
|
import src.pokemon as pokemon
|
||||||
|
from typing import List
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
|
def test_patch_chapter_tauros():
|
||||||
|
pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
|
||||||
|
chapter_soup = BeautifulSoup("<p>it's a tauros yeah</p>", "lxml")
|
||||||
|
o = epub.patch_chapter(chapter_soup, pokemon_lookup)
|
||||||
|
e = s('<p>it\'s a <a href="np_pokedex.xhtml#pokemon-id-tauros">tauros</a> yeah</p>')
|
||||||
|
assert o == e
|
||||||
|
|
||||||
|
|
||||||
|
def test_patch_chapter_double_tauros():
|
||||||
|
pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
|
||||||
|
chapter_soup = BeautifulSoup("<p>it's two tauros tauros</p>", "lxml")
|
||||||
|
o = epub.patch_chapter(chapter_soup, pokemon_lookup)
|
||||||
|
e = s('<p>it\'s two <a href="np_pokedex.xhtml#pokemon-id-tauros">tauros</a> tauros</p>')
|
||||||
|
assert o == e
|
||||||
|
|
||||||
|
|
||||||
|
def test_patch_chapter_tauros_italic():
|
||||||
|
pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
|
||||||
|
chapter_soup = BeautifulSoup("<p>it's two <i>tauros</i> tauros</p>", "lxml")
|
||||||
|
o = epub.patch_chapter(chapter_soup, pokemon_lookup)
|
||||||
|
e = s('<p>it\'s two <i><a href="np_pokedex.xhtml#pokemon-id-tauros">tauros</a></i> tauros</p>')
|
||||||
|
assert o == e
|
||||||
|
|
||||||
|
|
||||||
|
def test_patch_chapter_nidoran_with_s():
|
||||||
|
pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
|
||||||
|
chapter_soup = BeautifulSoup("<p>it's a Nidoran‘s goldfish</p>", "lxml")
|
||||||
|
o = epub.patch_chapter(chapter_soup, pokemon_lookup)
|
||||||
|
e = s('<p>it\'s a <a href="np_pokedex.xhtml#pokemon-id-nidoran">Nidoran</a>‘s goldfish</p>')
|
||||||
|
assert o == e
|
||||||
|
|
||||||
|
|
||||||
|
def test_patch_chapter_nidoran_with_double_quotes():
|
||||||
|
pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
|
||||||
|
chapter_soup = BeautifulSoup("<p>it's a “Nidoran” duh</p>", "lxml")
|
||||||
|
o = epub.patch_chapter(chapter_soup, pokemon_lookup)
|
||||||
|
e = s('<p>it\'s a “<a href="np_pokedex.xhtml#pokemon-id-nidoran">Nidoran</a>” duh</p>')
|
||||||
|
assert o == e
|
||||||
|
|
||||||
|
|
||||||
|
def test_patch_chapter_mr_mime():
|
||||||
|
pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
|
||||||
|
chapter_soup = BeautifulSoup("<p>it's a “Mr. Mime” duh</p>", "lxml")
|
||||||
|
o = epub.patch_chapter(chapter_soup, pokemon_lookup)
|
||||||
|
e = s('<p>it\'s a “<a href="np_pokedex.xhtml#pokemon-id-mrmime">Mr. Mime</a>” duh</p>')
|
||||||
|
assert o == e
|
||||||
|
|
||||||
|
|
||||||
|
def test_patch_chapter_barrierd():
|
||||||
|
pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
|
||||||
|
chapter_soup = BeautifulSoup("<p>it's a “barrierd” duh</p>", "lxml")
|
||||||
|
o = epub.patch_chapter(chapter_soup, pokemon_lookup)
|
||||||
|
e = s('<p>it\'s a “<a href="np_pokedex.xhtml#pokemon-id-mrmime">barrierd</a>” duh</p>')
|
||||||
|
assert o == e
|
||||||
|
|
||||||
|
|
||||||
|
def test_patch_chapter_farfetched():
|
||||||
|
pokemon_lookup = epub.get_pokemon_lookup(get_pokemon())
|
||||||
|
chapter_soup = BeautifulSoup("<p>it's a farfetch’d yo</p>", "lxml")
|
||||||
|
o = epub.patch_chapter(chapter_soup, pokemon_lookup)
|
||||||
|
e = s('<p>it\'s a <a href="np_pokedex.xhtml#pokemon-id-farfetchd">farfetch’d</a> yo</p>')
|
||||||
|
assert o == e
|
||||||
|
|
||||||
|
|
||||||
|
def s(s: str) -> str:
|
||||||
|
return "<html><body>" + s + "</body></html>"
|
||||||
|
|
||||||
|
|
||||||
|
def get_pokemon() -> List[pokemon.Pokemon]:
|
||||||
|
return [
|
||||||
|
pokemon.Pokemon(
|
||||||
|
name="Tauros",
|
||||||
|
link_id="tauros",
|
||||||
|
img_filename="pokemon/tauros.png",
|
||||||
|
description="Tauros (",
|
||||||
|
appears_in_book=False),
|
||||||
|
pokemon.Pokemon(
|
||||||
|
name="Nidoran♂",
|
||||||
|
link_id="nidoran",
|
||||||
|
img_filename="pokemon.png",
|
||||||
|
description="Nidoran",
|
||||||
|
appears_in_book=False
|
||||||
|
),
|
||||||
|
pokemon.Pokemon(
|
||||||
|
name="Mr. Mime",
|
||||||
|
link_id="mrmime",
|
||||||
|
img_filename="pokemon/mr. mime.png",
|
||||||
|
description="Mr. Mime",
|
||||||
|
appears_in_book=False
|
||||||
|
),
|
||||||
|
pokemon.Pokemon(
|
||||||
|
name="Farfetch'd",
|
||||||
|
link_id="farfetchd",
|
||||||
|
img_filename="pokemon/farfetch'd.png",
|
||||||
|
description="Farfetch",
|
||||||
|
appears_in_book=False
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|||||||
12149
test/test_pokedex.html
12149
test/test_pokedex.html
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user