Make patch string algorithm suck less and add progress bars
This commit is contained in:
65
src/epub.py
65
src/epub.py
@@ -6,6 +6,8 @@ from bs4.element import NavigableString
|
||||
from ebooklib import epub
|
||||
from src.pokemon import Pokemon
|
||||
from typing import List, Dict
|
||||
from rich.progress import track
|
||||
from rich.console import Console
|
||||
|
||||
POKEMON_ID_PREFIX = "pokemon-id-"
|
||||
|
||||
@@ -33,49 +35,33 @@ def create_pokedex_chapter(pokemon: List[Pokemon]) -> epub.EpubHtml:
|
||||
|
||||
|
||||
def patch_chapter(chapter: epub.EpubHtml, pokemon_lookup: Dict[str, Pokemon]):
|
||||
r = re.compile("([:,.!?“”‘’…])")
|
||||
r = re.compile("([:,.!?“”‘’… ]+)")
|
||||
soup: BeautifulSoup = BeautifulSoup(chapter.content, "html.parser")
|
||||
|
||||
def pokemon_name_to_link(key: str, word: str) -> Tag:
|
||||
tag = soup.new_tag("a")
|
||||
tag.string = word
|
||||
tag.attrs["href"] = f"np_pokedex.xhtml#{POKEMON_ID_PREFIX}{key}"
|
||||
tag.attrs["style"] = "color:black;text-decoration:none"
|
||||
# tag.attrs["style"] = "color:black;text-decoration:none"
|
||||
return tag
|
||||
|
||||
def patch_string(section: NavigableString) -> List:
|
||||
"""Replace Pokemon with link to Pokemon; requires splitting up the
|
||||
NavigableString into a list of NavigableStrings and Tags."""
|
||||
result = [[]]
|
||||
for word in str(section).split(" "):
|
||||
word_stripped = r.sub("", word)
|
||||
if word_stripped.lower() in pokemon_lookup:
|
||||
word_split = r.split(word)
|
||||
i = word_split.index(word_stripped)
|
||||
if i == 0:
|
||||
# add space if there are no other chars before pokemon
|
||||
result[-1].append(" ")
|
||||
else:
|
||||
# add other chars before pokemon if there are any
|
||||
result[-1].append("".join(word_split[:i]))
|
||||
pokemon_link = pokemon_name_to_link(
|
||||
word_stripped.lower(), word_stripped
|
||||
)
|
||||
result.append(pokemon_link)
|
||||
for word in r.split(str(section)):
|
||||
if word.lower() in pokemon_lookup:
|
||||
pokemon_lookup[word.lower()].appears_in_book = True
|
||||
link = pokemon_name_to_link(word.lower(), word)
|
||||
result.append(link)
|
||||
result.append([])
|
||||
if i + 1 == len(word_split):
|
||||
# add space after pokemon if there are no other chars
|
||||
result[-1].append(" ")
|
||||
else:
|
||||
# add other chars after pokemon if there are any
|
||||
result[-1].append("".join(word_split[i + 1 :]))
|
||||
else:
|
||||
result[-1].append(word)
|
||||
|
||||
# convert words back into strings.
|
||||
# convert words back into strings
|
||||
for i in range(len(result)):
|
||||
if isinstance(result[i], list):
|
||||
result[i] = NavigableString(" ".join(result[i]))
|
||||
result[i] = NavigableString("".join(result[i]))
|
||||
return result
|
||||
|
||||
def patch_paragraph(paragraph: Tag):
|
||||
@@ -97,6 +83,19 @@ def patch_chapter(chapter: epub.EpubHtml, pokemon_lookup: Dict[str, Pokemon]):
|
||||
def patch(epub_filepath: str, pokemon: List[Pokemon]):
|
||||
book = epub.read_epub(epub_filepath)
|
||||
|
||||
pokemon_lookup = {p.name.lower(): p for p in pokemon}
|
||||
chapters = [
|
||||
b
|
||||
for b in book.get_items()
|
||||
if isinstance(b, epub.EpubHtml)
|
||||
if b.id.startswith("np_")
|
||||
]
|
||||
for c in track(chapters, description="Add Pokemon links to chapters"):
|
||||
patch_chapter(c, pokemon_lookup)
|
||||
|
||||
# only add Pokemon to Pokedex chapter that appear (in the book)
|
||||
pokemon = [p for p in pokemon if p.appears_in_book]
|
||||
|
||||
chapter = create_pokedex_chapter(pokemon)
|
||||
book.add_item(chapter)
|
||||
link = epub.Link(chapter.file_name, chapter.title, chapter.id)
|
||||
@@ -113,16 +112,8 @@ def patch(epub_filepath: str, pokemon: List[Pokemon]):
|
||||
)
|
||||
book.add_item(img)
|
||||
|
||||
pokemon_lookup = {p.name.lower(): p for p in pokemon}
|
||||
chapters = [
|
||||
b
|
||||
for b in book.get_items()
|
||||
if isinstance(b, epub.EpubHtml)
|
||||
if b.id.startswith("np_")
|
||||
]
|
||||
for c in chapters:
|
||||
patch_chapter(c, pokemon_lookup)
|
||||
|
||||
console = Console()
|
||||
epub_out = epub_filepath.replace(".", "-with-links.")
|
||||
epub.write_epub(epub_out, book, {})
|
||||
logging.info(f"Write '{epub_out}'.")
|
||||
with console.status(f"Writing {epub_out}"):
|
||||
epub.write_epub(epub_out, book, {})
|
||||
console.print(f"[green]✓[/green] [orange1]{epub_out}[/orange1] written")
|
||||
|
||||
Reference in New Issue
Block a user