From 82002257809fe917e08c5931dc5d098c528eaac9 Mon Sep 17 00:00:00 2001 From: Felix Martin Date: Sat, 22 Oct 2022 21:37:01 -0400 Subject: [PATCH] Update readme and format scripts --- README.md | 7 +++++-- src/epub.py | 49 ++++++++++++++++++++++++++++----------------- src/main.py | 14 +++++++------ src/pokemon.py | 54 +++++++++++++++++++++++++++++++------------------- 4 files changed, 78 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index 77cac8c..2fdcee5 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,12 @@ # poos-xray -Script that annotates Pokemon: the Origin of the Species epub with links to -descriptions of the Pokemon. +Script that annotates the Pokemon: the Origin of the Species e-book with links +to descriptions and pictures of the Pokemon within the e-book itself. + +It works with the epub that you can download from [Daystar Eld's Patreon](https://www.patreon.com/daystareld/). ```shell pipenv install pipenv shell +python poos-xray "DaystarEld - Pokemon The Origin of Species.epub" ``` diff --git a/src/epub.py b/src/epub.py index afa9d35..f69139a 100644 --- a/src/epub.py +++ b/src/epub.py @@ -9,26 +9,29 @@ from typing import List, Dict POKEMON_ID_PREFIX = "pokemon-id-" + def create_pokedex_chapter(pokemon: List[Pokemon]) -> epub.EpubHtml: POKEDEX_TITLE = "Pokedex" POKEDEX_FILE = "content/np_pokedex.xhtml" POKEDEX_UID = "np_pokedex" - chapter = epub.EpubHtml(title=POKEDEX_TITLE, file_name=POKEDEX_FILE, uid=POKEDEX_UID) - content = ['

Pokedex

'] + chapter = epub.EpubHtml( + title=POKEDEX_TITLE, file_name=POKEDEX_FILE, uid=POKEDEX_UID + ) + content = ["

Pokedex

"] for p in pokemon: content.append(f'

{p.name}

') - content.append(f'

[Pokemon {p.name}]

') + content.append( + f'

[Pokemon {p.name}]

' + ) for paragraph in p.description.split("\n"): - content.append(f'

{paragraph}

') - content.append('') + content.append(f"

{paragraph}

") + content.append("") chapter.content = "\n".join(content) return chapter - - def patch_chapter(chapter: epub.EpubHtml, pokemon_lookup: Dict[str, Pokemon]): r = re.compile("([:,.!?“”‘’…])") soup: BeautifulSoup = BeautifulSoup(chapter.content, "html.parser") @@ -41,8 +44,8 @@ def patch_chapter(chapter: epub.EpubHtml, pokemon_lookup: Dict[str, Pokemon]): return tag def patch_string(section: NavigableString) -> List: - """ Replace Pokemon with link to Pokemon; requires splitting up the - NavigableString into a list of NavigableStrings and Tags. """ + """Replace Pokemon with link to Pokemon; requires splitting up the + NavigableString into a list of NavigableStrings and Tags.""" result = [[]] for word in str(section).split(" "): word_stripped = r.sub("", word) @@ -55,7 +58,9 @@ def patch_chapter(chapter: epub.EpubHtml, pokemon_lookup: Dict[str, Pokemon]): else: # add other chars before pokemon if there are any result[-1].append("".join(word_split[:i])) - pokemon_link = pokemon_name_to_link(word_stripped.lower(), word_stripped) + pokemon_link = pokemon_name_to_link( + word_stripped.lower(), word_stripped + ) result.append(pokemon_link) result.append([]) if i + 1 == len(word_split): @@ -63,7 +68,7 @@ def patch_chapter(chapter: epub.EpubHtml, pokemon_lookup: Dict[str, Pokemon]): result[-1].append(" ") else: # add other chars after pokemon if there are any - result[-1].append("".join(word_split[i + 1:])) + result[-1].append("".join(word_split[i + 1 :])) else: result[-1].append(word) @@ -96,20 +101,28 @@ def patch(epub_filepath: str, pokemon: List[Pokemon]): book.add_item(chapter) link = epub.Link(chapter.file_name, chapter.title, chapter.id) book.toc.append(link) - book.spine.append((chapter.id, 'yes')) + book.spine.append((chapter.id, "yes")) for p in pokemon: - image_content = open(p.img_filepath, 'rb').read() - img = epub.EpubItem(uid=p.name, file_name=p.img_filepath, media_type='image/png', content=image_content) + image_content = open(p.img_filepath, "rb").read() + img = epub.EpubItem( + uid=p.name, + file_name=p.img_filepath, + media_type="image/png", + content=image_content, + ) book.add_item(img) pokemon_lookup = {p.name.lower(): p for p in pokemon} - chapters = [b for b in book.get_items() - if isinstance(b, epub.EpubHtml) - if b.id.startswith("np_")] + chapters = [ + b + for b in book.get_items() + if isinstance(b, epub.EpubHtml) + if b.id.startswith("np_") + ] for c in chapters: patch_chapter(c, pokemon_lookup) epub_out = epub_filepath.replace(".", "-with-links.") epub.write_epub(epub_out, book, {}) - logging.info(f"{epub_out} written.") + logging.info(f"Write '{epub_out}'.") diff --git a/src/main.py b/src/main.py index 3dbeb84..b7db292 100644 --- a/src/main.py +++ b/src/main.py @@ -1,13 +1,15 @@ +import sys import logging import src.pokemon import src.epub -def init_logging(): - logging.basicConfig(level=logging.INFO) - - def main(): - init_logging() + logging.basicConfig(format="%(message)s", level=logging.INFO) + try: + ptoos_epub = sys.argv[1] + except IndexError: + ptoos_epub = "poos.epub" + logging.info(f"Patching '{ptoos_epub}'.") pokemon = src.pokemon.get_pokemon() - src.epub.patch("poos.epub", pokemon) + src.epub.patch(ptoos_epub, pokemon) diff --git a/src/pokemon.py b/src/pokemon.py index b8b6fa7..926df22 100644 --- a/src/pokemon.py +++ b/src/pokemon.py @@ -9,7 +9,9 @@ from typing import List POKEMON_CACHE_DIRECTORY = "pokemon" BULBAPEDIA_BASE_URL = "https://bulbapedia.bulbagarden.net" -NATIONAL_INDEX_URL = BULBAPEDIA_BASE_URL + "/wiki/List_of_Pok%C3%A9mon_by_National_Pok%C3%A9dex_number" +NATIONAL_INDEX_URL = ( + BULBAPEDIA_BASE_URL + "/wiki/List_of_Pok%C3%A9mon_by_National_Pok%C3%A9dex_number" +) class Pokemon(BaseModel): @@ -24,13 +26,13 @@ class Pokemon(BaseModel): def download_to_file(url: str, filepath: str, override=False): - """ Downloads url into filepath. """ + """Downloads url into filepath.""" if os.path.isfile(filepath) and override is False: logging.debug(f"'{filepath}' exists.") return headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0' + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0" } r = requests.get(url, headers=headers) if r.status_code != 200: @@ -45,12 +47,14 @@ def download_to_file(url: str, filepath: str, override=False): def get_pokemon() -> List[Pokemon]: - """ Scrape Pokemon from the Bulbapedia national dex """ + """Scrape Pokemon from the Bulbapedia national dex""" NATIONAL_INDEX_FILEPATH = os.path.join(POKEMON_CACHE_DIRECTORY, "pokedex.html") download_to_file(NATIONAL_INDEX_URL, NATIONAL_INDEX_FILEPATH) with open(NATIONAL_INDEX_FILEPATH, "r") as r: soup = BeautifulSoup(r, "html.parser") - pokemon_list_soup: BeautifulSoup = soup.find(id="List_of_Pokémon_by_National_Pokédex_number").parent + pokemon_list_soup: BeautifulSoup = soup.find( + id="List_of_Pokémon_by_National_Pokédex_number" + ).parent generation_soups: BeautifulSoup = pokemon_list_soup.find_next_siblings("h3") table_row_soups = [] @@ -77,48 +81,58 @@ def get_pokemon() -> List[Pokemon]: continue index = table_row_soup.find_next("td").next_sibling.next_sibling.text.strip() - html_url = BULBAPEDIA_BASE_URL + table_row_soup.find_next("th").next_element.attrs["href"] + html_url = ( + BULBAPEDIA_BASE_URL + + table_row_soup.find_next("th").next_element.attrs["href"] + ) img_url = table_row_soup.find("img").attrs["src"] html_filepath = os.path.join(POKEMON_CACHE_DIRECTORY, name.lower() + ".html") img_filepath = os.path.join(POKEMON_CACHE_DIRECTORY, name.lower() + ".png") - p = Pokemon(name=name, - index=index, - html_url=html_url, - img_url=img_url, - html_filepath=html_filepath, - img_filepath=img_filepath, - json_filepath=json_filepath) + p = Pokemon( + name=name, + index=index, + html_url=html_url, + img_url=img_url, + html_filepath=html_filepath, + img_filepath=img_filepath, + json_filepath=json_filepath, + ) pokemon.append(p) extend_pokemon(p) - with open(p.json_filepath, 'w') as f: + with open(p.json_filepath, "w") as f: f.write(p.json()) logging.info(f"Saved {p.json_filepath}.") # Filter out speculative Pokemon - pokemon = [p for p in pokemon if not p.description.startswith("This article's contents will change")] + pokemon = [ + p + for p in pokemon + if not p.description.startswith("This article's contents will change") + ] logging.info("Pokemon loaded.") return pokemon def extend_pokemon(p: Pokemon): - """ Add description and download Pokemon image """ + """Add description and download Pokemon image""" download_to_file(p.html_url, p.html_filepath) with open(p.html_filepath, "r") as r: soup = BeautifulSoup(r, "html.parser") - content_soup: BeautifulSoup = soup.find(id='mw-content-text').contents[0] + content_soup: BeautifulSoup = soup.find(id="mw-content-text").contents[0] # description p_soup = content_soup.find("p") description = [] - while p_soup.name == 'p': + while p_soup.name == "p": description.append(p_soup.get_text()) p_soup = p_soup.next_sibling p.description = "".join(description) # image - img_url = content_soup.find("table").find_next_sibling("table").find("img").attrs["src"] + img_url = ( + content_soup.find("table").find_next_sibling("table").find("img").attrs["src"] + ) img_url = img_url.replace("//", "https://") p.img_url = img_url download_to_file(img_url, p.img_filepath) -