diff --git a/Pipfile b/Pipfile index fd59265..ef567aa 100644 --- a/Pipfile +++ b/Pipfile @@ -9,9 +9,9 @@ python_version = "3.10" [packages] bs4 = "*" ebooklib = "*" -lxml = "*" pydantic = "*" requests = "*" +rich = "*" [dev-packages] black = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 1dcd4c1..22d793a 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "5e5d63b2697bac028104473e63e0cfee2967b7aa93c011800ea85523c22c3f99" + "sha256": "8de9c46e0028fc5384e51e2622ff20004653dca7138c702a57f12769c35240bf" }, "pipfile-spec": 6, "requires": { @@ -47,6 +47,13 @@ "markers": "python_full_version >= '3.6.0'", "version": "==2.1.1" }, + "commonmark": { + "hashes": [ + "sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60", + "sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9" + ], + "version": "==0.9.1" + }, "ebooklib": { "hashes": [ "sha256:fe23e22c28050196c68db3e7b13b257bf39426d927cb395c6f2cc13ac11327f1" @@ -135,7 +142,7 @@ "sha256:fe17d10b97fdf58155f858606bddb4e037b805a60ae023c009f760d8361a4eb8", "sha256:fe749b052bb7233fe5d072fcb549221a8cb1a16725c47c37e42b0b9cb3ff2c3f" ], - "index": "pypi", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", "version": "==4.9.1" }, "pydantic": { @@ -180,6 +187,14 @@ "index": "pypi", "version": "==1.10.2" }, + "pygments": { + "hashes": [ + "sha256:56a8508ae95f98e2b9bdf93a6be5ae3f7d8af858b43e02c5a2ff083726be40c1", + "sha256:f643f331ab57ba3c9d89212ee4a2dabc6e94f117cf4eefde99a0574720d14c42" + ], + "markers": "python_version >= '3.6'", + "version": "==2.13.0" + }, "requests": { "hashes": [ "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983", @@ -188,6 +203,14 @@ "index": "pypi", "version": "==2.28.1" }, + "rich": { + "hashes": [ + "sha256:a4eb26484f2c82589bd9a17c73d32a010b1e29d89f1604cd9bf3a2097b81bb5e", + "sha256:ba3a3775974105c221d31141f2c116f4fd65c5ceb0698657a11e9f295ec93fd0" + ], + "index": "pypi", + "version": "==12.6.0" + }, "six": { "hashes": [ "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", diff --git a/src/epub.py b/src/epub.py index f69139a..709a711 100644 --- a/src/epub.py +++ b/src/epub.py @@ -6,6 +6,8 @@ from bs4.element import NavigableString from ebooklib import epub from src.pokemon import Pokemon from typing import List, Dict +from rich.progress import track +from rich.console import Console POKEMON_ID_PREFIX = "pokemon-id-" @@ -33,49 +35,33 @@ def create_pokedex_chapter(pokemon: List[Pokemon]) -> epub.EpubHtml: def patch_chapter(chapter: epub.EpubHtml, pokemon_lookup: Dict[str, Pokemon]): - r = re.compile("([:,.!?“”‘’…])") + r = re.compile("([:,.!?“”‘’… ]+)") soup: BeautifulSoup = BeautifulSoup(chapter.content, "html.parser") def pokemon_name_to_link(key: str, word: str) -> Tag: tag = soup.new_tag("a") tag.string = word tag.attrs["href"] = f"np_pokedex.xhtml#{POKEMON_ID_PREFIX}{key}" - tag.attrs["style"] = "color:black;text-decoration:none" + # tag.attrs["style"] = "color:black;text-decoration:none" return tag def patch_string(section: NavigableString) -> List: """Replace Pokemon with link to Pokemon; requires splitting up the NavigableString into a list of NavigableStrings and Tags.""" result = [[]] - for word in str(section).split(" "): - word_stripped = r.sub("", word) - if word_stripped.lower() in pokemon_lookup: - word_split = r.split(word) - i = word_split.index(word_stripped) - if i == 0: - # add space if there are no other chars before pokemon - result[-1].append(" ") - else: - # add other chars before pokemon if there are any - result[-1].append("".join(word_split[:i])) - pokemon_link = pokemon_name_to_link( - word_stripped.lower(), word_stripped - ) - result.append(pokemon_link) + for word in r.split(str(section)): + if word.lower() in pokemon_lookup: + pokemon_lookup[word.lower()].appears_in_book = True + link = pokemon_name_to_link(word.lower(), word) + result.append(link) result.append([]) - if i + 1 == len(word_split): - # add space after pokemon if there are no other chars - result[-1].append(" ") - else: - # add other chars after pokemon if there are any - result[-1].append("".join(word_split[i + 1 :])) else: result[-1].append(word) - # convert words back into strings. + # convert words back into strings for i in range(len(result)): if isinstance(result[i], list): - result[i] = NavigableString(" ".join(result[i])) + result[i] = NavigableString("".join(result[i])) return result def patch_paragraph(paragraph: Tag): @@ -97,6 +83,19 @@ def patch_chapter(chapter: epub.EpubHtml, pokemon_lookup: Dict[str, Pokemon]): def patch(epub_filepath: str, pokemon: List[Pokemon]): book = epub.read_epub(epub_filepath) + pokemon_lookup = {p.name.lower(): p for p in pokemon} + chapters = [ + b + for b in book.get_items() + if isinstance(b, epub.EpubHtml) + if b.id.startswith("np_") + ] + for c in track(chapters, description="Add Pokemon links to chapters"): + patch_chapter(c, pokemon_lookup) + + # only add Pokemon to Pokedex chapter that appear (in the book) + pokemon = [p for p in pokemon if p.appears_in_book] + chapter = create_pokedex_chapter(pokemon) book.add_item(chapter) link = epub.Link(chapter.file_name, chapter.title, chapter.id) @@ -113,16 +112,8 @@ def patch(epub_filepath: str, pokemon: List[Pokemon]): ) book.add_item(img) - pokemon_lookup = {p.name.lower(): p for p in pokemon} - chapters = [ - b - for b in book.get_items() - if isinstance(b, epub.EpubHtml) - if b.id.startswith("np_") - ] - for c in chapters: - patch_chapter(c, pokemon_lookup) - + console = Console() epub_out = epub_filepath.replace(".", "-with-links.") - epub.write_epub(epub_out, book, {}) - logging.info(f"Write '{epub_out}'.") + with console.status(f"Writing {epub_out}"): + epub.write_epub(epub_out, book, {}) + console.print(f"[green]✓[/green] [orange1]{epub_out}[/orange1] written") diff --git a/src/main.py b/src/main.py index b7db292..f563542 100644 --- a/src/main.py +++ b/src/main.py @@ -3,13 +3,19 @@ import logging import src.pokemon import src.epub +from rich.logging import RichHandler + def main(): - logging.basicConfig(format="%(message)s", level=logging.INFO) + logging.basicConfig( + level=logging.INFO, + format="%(message)s", + datefmt="[%X]", + handlers=[RichHandler()], + ) try: ptoos_epub = sys.argv[1] except IndexError: ptoos_epub = "poos.epub" - logging.info(f"Patching '{ptoos_epub}'.") pokemon = src.pokemon.get_pokemon() src.epub.patch(ptoos_epub, pokemon) diff --git a/src/pokemon.py b/src/pokemon.py index 926df22..e877179 100644 --- a/src/pokemon.py +++ b/src/pokemon.py @@ -2,6 +2,7 @@ import requests import sys import os import logging +from rich.progress import track from pydantic import BaseModel from bs4 import BeautifulSoup from typing import List @@ -23,6 +24,7 @@ class Pokemon(BaseModel): img_filepath: str json_filepath: str description: str = "" + appears_in_book: bool = False def download_to_file(url: str, filepath: str, override=False): @@ -65,7 +67,7 @@ def get_pokemon() -> List[Pokemon]: table_row_soups += tbody_soup.find_all("tr", recursive=False)[1:] pokemon = [] - for table_row_soup in table_row_soups: + for table_row_soup in track(table_row_soups, description="Download Pokemon"): name = table_row_soup.find_next("th").next_element.attrs["title"] # ignore Galarian and Alolan Pokemon so @@ -101,7 +103,7 @@ def get_pokemon() -> List[Pokemon]: extend_pokemon(p) with open(p.json_filepath, "w") as f: f.write(p.json()) - logging.info(f"Saved {p.json_filepath}.") + logging.debug(f"Saved {p.json_filepath}.") # Filter out speculative Pokemon pokemon = [ @@ -110,7 +112,6 @@ def get_pokemon() -> List[Pokemon]: if not p.description.startswith("This article's contents will change") ] - logging.info("Pokemon loaded.") return pokemon