Handle Mr. Mime, Nidoran, farfetch'd, and sirfetch'd to fix #1
This commit is contained in:
60
src/epub.py
60
src/epub.py
@@ -23,7 +23,8 @@ def create_pokedex_chapter(pokemon: List[Pokemon]) -> epub.EpubHtml:
|
|||||||
content = ["<h1>Pokedex</h1>"]
|
content = ["<h1>Pokedex</h1>"]
|
||||||
|
|
||||||
for p in pokemon:
|
for p in pokemon:
|
||||||
content.append(f'<h2 id="{POKEMON_ID_PREFIX}{p.name.lower()}">{p.name}</h2>')
|
p_id = p.name.lower().replace(". ", "")
|
||||||
|
content.append(f'<h2 id="{POKEMON_ID_PREFIX}{p.link_id}">{p.name}</h2>')
|
||||||
content.append(
|
content.append(
|
||||||
f' <p><img alt="[Pokemon {p.name}]" src="../{p.img_filename}"/><br/></p>'
|
f' <p><img alt="[Pokemon {p.name}]" src="../{p.img_filename}"/><br/></p>'
|
||||||
)
|
)
|
||||||
@@ -39,25 +40,59 @@ def patch_chapter(chapter: epub.EpubHtml, pokemon_lookup: Dict[str, Pokemon]):
|
|||||||
r = re.compile("([:,.!?“”‘’… ]+)")
|
r = re.compile("([:,.!?“”‘’… ]+)")
|
||||||
soup: BeautifulSoup = BeautifulSoup(chapter.content, "html.parser")
|
soup: BeautifulSoup = BeautifulSoup(chapter.content, "html.parser")
|
||||||
|
|
||||||
def pokemon_name_to_link(key: str, word: str) -> Tag:
|
def pokemon_name_to_link(p: Pokemon, name_as_in_book: str) -> Tag:
|
||||||
tag = soup.new_tag("a")
|
tag = soup.new_tag("a")
|
||||||
tag.string = word
|
tag.string = name_as_in_book
|
||||||
tag.attrs["href"] = f"np_pokedex.xhtml#{POKEMON_ID_PREFIX}{key}"
|
tag.attrs["href"] = f"np_pokedex.xhtml#{POKEMON_ID_PREFIX}{p.link_id}"
|
||||||
tag.attrs["style"] = "color:black;text-decoration:none"
|
# tag.attrs["style"] = "color:black;text-decoration:none"
|
||||||
return tag
|
return tag
|
||||||
|
|
||||||
def patch_string(section: NavigableString) -> List:
|
def patch_string(section: NavigableString) -> List:
|
||||||
"""Replace Pokemon with link to Pokemon; requires splitting up the
|
"""Replace Pokemon with link to Pokemon; requires splitting up the
|
||||||
NavigableString into a list of NavigableStrings and Tags."""
|
NavigableString into a list of NavigableStrings and Tags."""
|
||||||
result = [[]]
|
result = [[]]
|
||||||
for word in r.split(str(section)):
|
index, chunks = 0, r.split(str(section))
|
||||||
|
while index < len(chunks):
|
||||||
|
word = chunks[index]
|
||||||
if word.lower() in pokemon_lookup:
|
if word.lower() in pokemon_lookup:
|
||||||
pokemon_lookup[word.lower()].appears_in_book = True
|
p = pokemon_lookup[word.lower()]
|
||||||
link = pokemon_name_to_link(word.lower(), word)
|
p.appears_in_book = True
|
||||||
|
link = pokemon_name_to_link(p, word)
|
||||||
|
result.append(link)
|
||||||
|
result.append([])
|
||||||
|
elif word == "Mr" and index + 2 < len(chunks) and \
|
||||||
|
chunks[index + 1] == ". " and chunks[index + 2] == "Mime":
|
||||||
|
# Handle "Mr. Mime" which is split into ["Mr", ". ", "Mime"]
|
||||||
|
p = pokemon_lookup["mr. mime"]
|
||||||
|
p.appears_in_book = True
|
||||||
|
name = "".join(chunks[index:index + 3])
|
||||||
|
link = pokemon_name_to_link(p, name)
|
||||||
|
index += 2
|
||||||
|
result.append(link)
|
||||||
|
result.append([])
|
||||||
|
elif word.lower() == "farfetch" and index + 2 < len(chunks) and \
|
||||||
|
chunks[index + 1] == "’" and chunks[index + 2] == "d":
|
||||||
|
# Handle "farfetch'ed"
|
||||||
|
p = pokemon_lookup["farfetch'd"]
|
||||||
|
p.appears_in_book = True
|
||||||
|
name = "".join(chunks[index:index + 3])
|
||||||
|
link = pokemon_name_to_link(p, name)
|
||||||
|
index += 2
|
||||||
|
result.append(link)
|
||||||
|
result.append([])
|
||||||
|
elif word.lower() == "sirfetch" and index + 2 < len(chunks) and \
|
||||||
|
chunks[index + 1] == "’" and chunks[index + 2] == "d":
|
||||||
|
# Handle "sirfetch'ed"
|
||||||
|
p = pokemon_lookup["sirfetch'd"]
|
||||||
|
p.appears_in_book = True
|
||||||
|
name = "".join(chunks[index:index + 3])
|
||||||
|
link = pokemon_name_to_link(p, name)
|
||||||
|
index += 2
|
||||||
result.append(link)
|
result.append(link)
|
||||||
result.append([])
|
result.append([])
|
||||||
else:
|
else:
|
||||||
result[-1].append(word)
|
result[-1].append(word)
|
||||||
|
index += 1
|
||||||
|
|
||||||
# convert words back into strings
|
# convert words back into strings
|
||||||
for i in range(len(result)):
|
for i in range(len(result)):
|
||||||
@@ -81,6 +116,13 @@ def patch_chapter(chapter: epub.EpubHtml, pokemon_lookup: Dict[str, Pokemon]):
|
|||||||
chapter.content = str(soup)
|
chapter.content = str(soup)
|
||||||
|
|
||||||
|
|
||||||
|
def get_pokemon_lookup(pokemon: List[Pokemon]) -> Dict[str, Pokemon]:
|
||||||
|
pokemon_lookup = {p.name.lower(): p for p in pokemon}
|
||||||
|
pokemon_lookup["nidoran"] = pokemon_lookup["nidoran♂"]
|
||||||
|
pokemon_lookup["barrierd"] = pokemon_lookup["mr. mime"]
|
||||||
|
return pokemon_lookup
|
||||||
|
|
||||||
|
|
||||||
def patch(epub_filename: str, pokemon: List[Pokemon]):
|
def patch(epub_filename: str, pokemon: List[Pokemon]):
|
||||||
try:
|
try:
|
||||||
book = epub.read_epub(epub_filename)
|
book = epub.read_epub(epub_filename)
|
||||||
@@ -88,7 +130,7 @@ def patch(epub_filename: str, pokemon: List[Pokemon]):
|
|||||||
logging.exception("Failed to open epub.")
|
logging.exception("Failed to open epub.")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
pokemon_lookup = {p.name.lower(): p for p in pokemon}
|
pokemon_lookup = get_pokemon_lookup(pokemon)
|
||||||
chapters = [
|
chapters = [
|
||||||
b
|
b
|
||||||
for b in book.get_items()
|
for b in book.get_items()
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ import requests
|
|||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
from rich.progress import track
|
from rich.progress import track
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
@@ -17,6 +18,7 @@ NATIONAL_INDEX_URL = (
|
|||||||
|
|
||||||
class Pokemon(BaseModel):
|
class Pokemon(BaseModel):
|
||||||
name: str
|
name: str
|
||||||
|
link_id: str
|
||||||
index: str
|
index: str
|
||||||
html_url: str
|
html_url: str
|
||||||
img_url: str
|
img_url: str
|
||||||
@@ -68,6 +70,7 @@ def get_pokemon_table_row_soups(national_index_filename: str) -> List[BeautifulS
|
|||||||
|
|
||||||
def extract_pokemon_from_table_row(table_row_soup: BeautifulSoup) -> Pokemon:
|
def extract_pokemon_from_table_row(table_row_soup: BeautifulSoup) -> Pokemon:
|
||||||
name = table_row_soup.find_next("th").next_element.attrs["title"]
|
name = table_row_soup.find_next("th").next_element.attrs["title"]
|
||||||
|
link_id = re.sub("[^a-z]", "", name.lower())
|
||||||
|
|
||||||
# load Pokemon from JSON if it already exists
|
# load Pokemon from JSON if it already exists
|
||||||
json_filename = os.path.join(POKEMON_CACHE_DIRECTORY, name.lower() + ".json")
|
json_filename = os.path.join(POKEMON_CACHE_DIRECTORY, name.lower() + ".json")
|
||||||
@@ -86,6 +89,7 @@ def extract_pokemon_from_table_row(table_row_soup: BeautifulSoup) -> Pokemon:
|
|||||||
img_filename = os.path.join(POKEMON_CACHE_DIRECTORY, name.lower() + ".png")
|
img_filename = os.path.join(POKEMON_CACHE_DIRECTORY, name.lower() + ".png")
|
||||||
return Pokemon(
|
return Pokemon(
|
||||||
name=name,
|
name=name,
|
||||||
|
link_id=link_id,
|
||||||
index=index,
|
index=index,
|
||||||
html_url=html_url,
|
html_url=html_url,
|
||||||
img_url=img_url,
|
img_url=img_url,
|
||||||
@@ -138,18 +142,18 @@ def extend_pokemon(p: Pokemon):
|
|||||||
soup = BeautifulSoup(r, "html.parser")
|
soup = BeautifulSoup(r, "html.parser")
|
||||||
content_soup: BeautifulSoup = soup.find(id="mw-content-text").contents[0]
|
content_soup: BeautifulSoup = soup.find(id="mw-content-text").contents[0]
|
||||||
|
|
||||||
# description
|
if not p.description:
|
||||||
p_soup = content_soup.find("p")
|
p_soup = content_soup.find("p")
|
||||||
description = []
|
description = []
|
||||||
while p_soup.name == "p":
|
while p_soup.name == "p":
|
||||||
description.append(p_soup.get_text())
|
description.append(p_soup.get_text())
|
||||||
p_soup = p_soup.next_sibling
|
p_soup = p_soup.next_sibling
|
||||||
p.description = "".join(description)
|
p.description = "".join(description)
|
||||||
|
|
||||||
# image
|
if not os.path.isfile(p.img_filename):
|
||||||
img_url = (
|
img_url = (
|
||||||
content_soup.find("table").find_next_sibling("table").find("img").attrs["src"]
|
content_soup.find("table").find_next_sibling("table").find("img").attrs["src"]
|
||||||
)
|
)
|
||||||
img_url = img_url.replace("//", "https://")
|
img_url = img_url.replace("//", "https://")
|
||||||
p.img_url = img_url
|
p.img_url = img_url
|
||||||
download_to_file(img_url, p.img_filename)
|
download_to_file(img_url, p.img_filename)
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ def test_extract_pokemon_from_table_row(tmp_path):
|
|||||||
row_soups = pokemon.get_pokemon_table_row_soups(national_index)
|
row_soups = pokemon.get_pokemon_table_row_soups(national_index)
|
||||||
p = pokemon.extract_pokemon_from_table_row(row_soups[42])
|
p = pokemon.extract_pokemon_from_table_row(row_soups[42])
|
||||||
assert p.name == 'Vulpix'
|
assert p.name == 'Vulpix'
|
||||||
|
assert p.link_id == 'vulpix'
|
||||||
assert p.index == '#037'
|
assert p.index == '#037'
|
||||||
assert p.html_url == 'https://bulbapedia.bulbagarden.net/wiki/Vulpix_(Pok%C3%A9mon)'
|
assert p.html_url == 'https://bulbapedia.bulbagarden.net/wiki/Vulpix_(Pok%C3%A9mon)'
|
||||||
assert p.img_url == '//archives.bulbagarden.net/media/upload/thumb/3/35/037Vulpix-Alola.png/70px-037Vulpix-Alola.png'
|
assert p.img_url == '//archives.bulbagarden.net/media/upload/thumb/3/35/037Vulpix-Alola.png/70px-037Vulpix-Alola.png'
|
||||||
|
|||||||
Reference in New Issue
Block a user