From 7d9209d52e5f8cc28120d55c59c39df56d1ec38b Mon Sep 17 00:00:00 2001
From: felixm <mail@felixm.de>
Date: Fri, 28 Oct 2022 13:44:21 -0400
Subject: [PATCH] Handle Mr. Mime, Nidoran, farfetch'd, and sirfetch'd to fix
 #1

---
 src/epub.py          | 60 +++++++++++++++++++++++++++++++++++++-------
 src/pokemon.py       | 32 ++++++++++++-----------
 test/test_pokemon.py |  1 +
 3 files changed, 70 insertions(+), 23 deletions(-)
diff --git a/src/epub.py b/src/epub.py
index a02f986..5161641 100644
--- a/src/epub.py
+++ b/src/epub.py
@@ -23,7 +23,8 @@ def create_pokedex_chapter(pokemon: List[Pokemon]) -> epub.EpubHtml:
     content = ["<h1>Pokedex</h1>"]
 
     for p in pokemon:
-        content.append(f'<h2 id="{POKEMON_ID_PREFIX}{p.name.lower()}">{p.name}</h2>')
+        p_id = p.name.lower().replace(". ", "")
+        content.append(f'<h2 id="{POKEMON_ID_PREFIX}{p.link_id}">{p.name}</h2>')
         content.append(
             f'  <p><img alt="[Pokemon {p.name}]" src="../{p.img_filename}"/><br/></p>'
         )
@@ -39,25 +40,59 @@ def patch_chapter(chapter: epub.EpubHtml, pokemon_lookup: Dict[str, Pokemon]):
     r = re.compile("([:,.!?“”‘’… ]+)")
     soup: BeautifulSoup = BeautifulSoup(chapter.content, "html.parser")
 
-    def pokemon_name_to_link(key: str, word: str) -> Tag:
+    def pokemon_name_to_link(p: Pokemon, name_as_in_book: str) -> Tag:
         tag = soup.new_tag("a")
-        tag.string = word
-        tag.attrs["href"] = f"np_pokedex.xhtml#{POKEMON_ID_PREFIX}{key}"
-        tag.attrs["style"] = "color:black;text-decoration:none"
+        tag.string = name_as_in_book
+        tag.attrs["href"] = f"np_pokedex.xhtml#{POKEMON_ID_PREFIX}{p.link_id}"
+        # tag.attrs["style"] = "color:black;text-decoration:none"
         return tag
 
     def patch_string(section: NavigableString) -> List:
         """Replace Pokemon with link to Pokemon; requires splitting up the
         NavigableString into a list of NavigableStrings and Tags."""
         result = [[]]
-        for word in r.split(str(section)):
+        index, chunks = 0, r.split(str(section))
+        while index < len(chunks):
+            word = chunks[index]
             if word.lower() in pokemon_lookup:
-                pokemon_lookup[word.lower()].appears_in_book = True
-                link = pokemon_name_to_link(word.lower(), word)
+                p = pokemon_lookup[word.lower()]
+                p.appears_in_book = True
+                link = pokemon_name_to_link(p, word)
+                result.append(link)
+                result.append([])
+            elif word == "Mr" and index + 2 < len(chunks) and \
+                 chunks[index + 1] == ". " and chunks[index + 2] == "Mime":
+                # Handle "Mr. Mime" which is split into ["Mr", ". ", "Mime"]
+                p = pokemon_lookup["mr. mime"]
+                p.appears_in_book = True
+                name = "".join(chunks[index:index + 3])
+                link = pokemon_name_to_link(p, name)
+                index += 2
+                result.append(link)
+                result.append([])
+            elif word.lower() == "farfetch" and index + 2 < len(chunks) and \
+                 chunks[index + 1] == "’" and chunks[index + 2] == "d":
+                # Handle "farfetch'ed"
+                p = pokemon_lookup["farfetch'd"]
+                p.appears_in_book = True
+                name = "".join(chunks[index:index + 3])
+                link = pokemon_name_to_link(p, name)
+                index += 2
+                result.append(link)
+                result.append([])
+            elif word.lower() == "sirfetch" and index + 2 < len(chunks) and \
+                 chunks[index + 1] == "’" and chunks[index + 2] == "d":
+                # Handle "sirfetch'ed"
+                p = pokemon_lookup["sirfetch'd"]
+                p.appears_in_book = True
+                name = "".join(chunks[index:index + 3])
+                link = pokemon_name_to_link(p, name)
+                index += 2
                 result.append(link)
                 result.append([])
             else:
                 result[-1].append(word)
+            index += 1
 
         # convert words back into strings
         for i in range(len(result)):
@@ -81,6 +116,13 @@ def patch_chapter(chapter: epub.EpubHtml, pokemon_lookup: Dict[str, Pokemon]):
     chapter.content = str(soup)
 
 
+def get_pokemon_lookup(pokemon: List[Pokemon]) -> Dict[str, Pokemon]:
+    pokemon_lookup = {p.name.lower(): p for p in pokemon}
+    pokemon_lookup["nidoran"] = pokemon_lookup["nidoran♂"]
+    pokemon_lookup["barrierd"] = pokemon_lookup["mr. mime"]
+    return pokemon_lookup
+
+
 def patch(epub_filename: str, pokemon: List[Pokemon]):
     try:
         book = epub.read_epub(epub_filename)
@@ -88,7 +130,7 @@ def patch(epub_filename: str, pokemon: List[Pokemon]):
         logging.exception("Failed to open epub.")
         sys.exit(1)
 
-    pokemon_lookup = {p.name.lower(): p for p in pokemon}
+    pokemon_lookup = get_pokemon_lookup(pokemon)
     chapters = [
         b
         for b in book.get_items()
diff --git a/src/pokemon.py b/src/pokemon.py
index 8ff4dac..acbf5a6 100644
--- a/src/pokemon.py
+++ b/src/pokemon.py
@@ -2,6 +2,7 @@ import requests
 import sys
 import os
 import logging
+import re
 from rich.progress import track
 from pydantic import BaseModel
 from bs4 import BeautifulSoup
@@ -17,6 +18,7 @@ NATIONAL_INDEX_URL = (
 
 class Pokemon(BaseModel):
     name: str
+    link_id: str
     index: str
     html_url: str
     img_url: str
@@ -68,6 +70,7 @@ def get_pokemon_table_row_soups(national_index_filename: str) -> List[BeautifulS
 
 def extract_pokemon_from_table_row(table_row_soup: BeautifulSoup) -> Pokemon:
     name = table_row_soup.find_next("th").next_element.attrs["title"]
+    link_id = re.sub("[^a-z]", "", name.lower())
 
     # load Pokemon from JSON if it already exists
     json_filename = os.path.join(POKEMON_CACHE_DIRECTORY, name.lower() + ".json")
@@ -86,6 +89,7 @@ def extract_pokemon_from_table_row(table_row_soup: BeautifulSoup) -> Pokemon:
     img_filename = os.path.join(POKEMON_CACHE_DIRECTORY, name.lower() + ".png")
     return Pokemon(
         name=name,
+        link_id=link_id,
         index=index,
         html_url=html_url,
         img_url=img_url,
@@ -138,18 +142,18 @@ def extend_pokemon(p: Pokemon):
         soup = BeautifulSoup(r, "html.parser")
     content_soup: BeautifulSoup = soup.find(id="mw-content-text").contents[0]
 
-    # description
-    p_soup = content_soup.find("p")
-    description = []
-    while p_soup.name == "p":
-        description.append(p_soup.get_text())
-        p_soup = p_soup.next_sibling
-    p.description = "".join(description)
+    if not p.description:
+        p_soup = content_soup.find("p")
+        description = []
+        while p_soup.name == "p":
+            description.append(p_soup.get_text())
+            p_soup = p_soup.next_sibling
+        p.description = "".join(description)
 
-    # image
-    img_url = (
-        content_soup.find("table").find_next_sibling("table").find("img").attrs["src"]
-    )
-    img_url = img_url.replace("//", "https://")
-    p.img_url = img_url
-    download_to_file(img_url, p.img_filename)
+    if not os.path.isfile(p.img_filename):
+        img_url = (
+            content_soup.find("table").find_next_sibling("table").find("img").attrs["src"]
+        )
+        img_url = img_url.replace("//", "https://")
+        p.img_url = img_url
+        download_to_file(img_url, p.img_filename)
diff --git a/test/test_pokemon.py b/test/test_pokemon.py
index 601cfd0..fd32736 100644
--- a/test/test_pokemon.py
+++ b/test/test_pokemon.py
@@ -19,6 +19,7 @@ def test_extract_pokemon_from_table_row(tmp_path):
     row_soups = pokemon.get_pokemon_table_row_soups(national_index) 
     p = pokemon.extract_pokemon_from_table_row(row_soups[42])
     assert p.name == 'Vulpix'
+    assert p.link_id == 'vulpix'
     assert p.index == '#037'
     assert p.html_url == 'https://bulbapedia.bulbagarden.net/wiki/Vulpix_(Pok%C3%A9mon)'
     assert p.img_url == '//archives.bulbagarden.net/media/upload/thumb/3/35/037Vulpix-Alola.png/70px-037Vulpix-Alola.png'