ptoos-xray/src/epub.py

182 lines
6.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import ebooklib
import logging
import re
import sys
from dataclasses import dataclass
from bs4 import BeautifulSoup, Tag
from bs4.element import NavigableString
from ebooklib import epub
from src.pokemon import Pokemon
from typing import List, Dict, Optional
from rich.progress import track
from rich.console import Console
POKEMON_ID_PREFIX = "pokemon-id-"
POKEDEX_UID = "np_pokedex"
@dataclass
class AnnoyingPokemon:
name_chunks: List[str]
length_chunks: int
name_in_pokedex: str
ANNOYING_POKEMON = [
AnnoyingPokemon(["Mr", ".", "Mime"], 3, "mr. mime"),
AnnoyingPokemon(["farfetch", "", "d"], 3, "farfetch'd"),
AnnoyingPokemon(["sirfetch", "", "d"], 3, "sirfetch'd"),
]
def create_pokedex_chapter(pokemon: List[Pokemon]) -> epub.EpubHtml:
POKEDEX_TITLE = "Pokedex"
POKEDEX_FILE = "content/np_pokedex.xhtml"
chapter = epub.EpubHtml(
title=POKEDEX_TITLE, file_name=POKEDEX_FILE, uid=POKEDEX_UID
)
content = ["<h1>Pokedex</h1>"]
for p in pokemon:
content.append(f'<h2 id="{POKEMON_ID_PREFIX}{p.link_id}">{p.name}</h2>')
content.append(
f' <p><img alt="[Pokemon {p.name}]" src="../{p.img_filename}"/><br/></p>'
)
for paragraph in p.description.split("\n"):
content.append(f" <p>{paragraph}</p>")
content.append("")
chapter.content = "\n".join(content)
return chapter
def patch_chapter(chapter: epub.EpubHtml, pokemon_lookup: Dict[str, Pokemon]):
special_chars_regex = re.compile("([:,.!?“”‘’… ]+)")
soup: BeautifulSoup = BeautifulSoup(chapter.content, "html.parser")
# Set to remember which Pokemon have already gotten a link for that
# chapter.
pokemon_added_for_chapter = set()
def pokemon_to_link(p: Pokemon, name_as_in_book: str) -> Tag:
tag = soup.new_tag("a")
tag.string = name_as_in_book
tag.attrs["href"] = f"np_pokedex.xhtml#{POKEMON_ID_PREFIX}{p.link_id}"
# tag.attrs["style"] = "color:black;text-decoration:none"
return tag
def is_annoying_pokemon(index: int, chunks: List[str]) -> Optional[AnnoyingPokemon]:
for p in ANNOYING_POKEMON:
if p.name_chunks == list(
map(lambda s: s.lower(), chunks[index : index + p.length_chunks])
):
return p
return None
def patch_string(section: NavigableString) -> List:
"""Replace Pokemon with link to Pokemon; requires splitting up the
NavigableString into a list of NavigableStrings and Tags."""
result = [[]]
index, chunks = 0, special_chars_regex.split(str(section))
while index < len(chunks):
word = chunks[index]
pokemon: Optional[Pokemon] = None
increment: int = 1
if word.lower() in pokemon_lookup:
pokemon = pokemon_lookup[word.lower()]
elif annoying_pokemon := is_annoying_pokemon(index, chunks):
pokemon = pokemon_lookup[annoying_pokemon.name_in_pokedex]
increment = annoying_pokemon.length_chunks
if pokemon is not None and pokemon.name in pokemon_added_for_chapter:
pokemon = None
if pokemon is not None:
pokemon_added_for_chapter.add(pokemon.name)
pokemon.appears_in_book = True
name = "".join(chunks[index : index + increment])
link = pokemon_to_link(pokemon, name)
result.append(link)
result.append([])
index += increment
else:
result[-1].append(word)
index += 1
# convert words back into strings
for i in range(len(result)):
if isinstance(result[i], list):
result[i] = NavigableString("".join(result[i]))
return result
def patch_paragraph(paragraph: Tag):
contents = []
for section in paragraph.contents:
if isinstance(section, NavigableString):
contents += patch_string(section)
else:
patch_paragraph(section)
contents.append(section)
paragraph.contents = contents
for p_soup in soup.find_all("p"):
words_have_changed, words = False, []
patch_paragraph(p_soup)
chapter.content = str(soup)
def get_pokemon_lookup(pokemon: List[Pokemon]) -> Dict[str, Pokemon]:
pokemon_lookup = {p.name.lower(): p for p in pokemon}
pokemon_lookup["nidoran"] = pokemon_lookup["nidoran♂"]
pokemon_lookup["barrierd"] = pokemon_lookup["mr. mime"]
return pokemon_lookup
def patch(epub_filename: str, pokemon: List[Pokemon]):
try:
book = epub.read_epub(epub_filename)
except Exception:
logging.exception("Failed to open epub.")
sys.exit(1)
pokemon_lookup = get_pokemon_lookup(pokemon)
chapters = [
b
for b in book.get_items()
if isinstance(b, epub.EpubHtml)
if b.id.startswith("np_")
]
if [c for c in chapters if c.id == POKEDEX_UID]:
logging.warning(f"It looks like '{epub_filename}' already has a Pokedex.")
sys.exit(1)
for c in track(chapters, description="Add Pokemon links to chapters"):
patch_chapter(c, pokemon_lookup)
# only add Pokemon to Pokedex chapter that appear (in the book)
pokemon = [p for p in pokemon if p.appears_in_book]
chapter = create_pokedex_chapter(pokemon)
book.add_item(chapter)
link = epub.Link(chapter.file_name, chapter.title, chapter.id)
book.toc.append(link)
book.spine.append((chapter.id, "yes"))
for p in pokemon:
image_content = open(p.img_filename, "rb").read()
img = epub.EpubItem(
uid=p.name,
file_name=p.img_filename,
media_type="image/png",
content=image_content,
)
book.add_item(img)
console = Console()
epub_out = epub_filename.replace(".", "-with-links.")
with console.status(f"Writing {epub_out}"):
epub.write_epub(epub_out, book, {})
console.print(f"[green]✓[/green] [orange1]{epub_out}[/orange1] written")