Begin to scrape Pokemon
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -3,6 +3,8 @@
|
|||||||
__pycache__/
|
__pycache__/
|
||||||
*.py[cod]
|
*.py[cod]
|
||||||
*$py.class
|
*$py.class
|
||||||
|
.vscode
|
||||||
|
pokemon
|
||||||
|
|
||||||
# C extensions
|
# C extensions
|
||||||
*.so
|
*.so
|
||||||
|
|||||||
2
Pipfile
2
Pipfile
@@ -9,6 +9,8 @@ python_version = "3.10"
|
|||||||
[packages]
|
[packages]
|
||||||
bs4 = "*"
|
bs4 = "*"
|
||||||
ebooklib = "*"
|
ebooklib = "*"
|
||||||
|
lxml = "*"
|
||||||
|
requests = "*"
|
||||||
|
|
||||||
[dev-packages]
|
[dev-packages]
|
||||||
black = "*"
|
black = "*"
|
||||||
|
|||||||
44
Pipfile.lock
generated
44
Pipfile.lock
generated
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"_meta": {
|
"_meta": {
|
||||||
"hash": {
|
"hash": {
|
||||||
"sha256": "cda2eded9d5e4bc251bfebd3c4ca1abb72405bf9d35187ca794466dff1b9bff8"
|
"sha256": "1efb2d47545e7351ff8f5cd6187b3f583170ff7f5a8df26c4232fcf2cab59837"
|
||||||
},
|
},
|
||||||
"pipfile-spec": 6,
|
"pipfile-spec": 6,
|
||||||
"requires": {
|
"requires": {
|
||||||
@@ -31,6 +31,22 @@
|
|||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"version": "==0.0.1"
|
"version": "==0.0.1"
|
||||||
},
|
},
|
||||||
|
"certifi": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:0d9c601124e5a6ba9712dbc60d9c53c21e34f5f641fe83002317394311bdce14",
|
||||||
|
"sha256:90c1a32f1d68f940488354e36370f6cca89f0f106db09518524c88d6ed83f382"
|
||||||
|
],
|
||||||
|
"markers": "python_version >= '3.6'",
|
||||||
|
"version": "==2022.9.24"
|
||||||
|
},
|
||||||
|
"charset-normalizer": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845",
|
||||||
|
"sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f"
|
||||||
|
],
|
||||||
|
"markers": "python_full_version >= '3.6.0'",
|
||||||
|
"version": "==2.1.1"
|
||||||
|
},
|
||||||
"ebooklib": {
|
"ebooklib": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:fe23e22c28050196c68db3e7b13b257bf39426d927cb395c6f2cc13ac11327f1"
|
"sha256:fe23e22c28050196c68db3e7b13b257bf39426d927cb395c6f2cc13ac11327f1"
|
||||||
@@ -38,6 +54,14 @@
|
|||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"version": "==0.17.1"
|
"version": "==0.17.1"
|
||||||
},
|
},
|
||||||
|
"idna": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4",
|
||||||
|
"sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"
|
||||||
|
],
|
||||||
|
"markers": "python_version >= '3.5'",
|
||||||
|
"version": "==3.4"
|
||||||
|
},
|
||||||
"lxml": {
|
"lxml": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:04da965dfebb5dac2619cb90fcf93efdb35b3c6994fea58a157a834f2f94b318",
|
"sha256:04da965dfebb5dac2619cb90fcf93efdb35b3c6994fea58a157a834f2f94b318",
|
||||||
@@ -114,12 +138,20 @@
|
|||||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
|
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
|
||||||
"version": "==4.9.1"
|
"version": "==4.9.1"
|
||||||
},
|
},
|
||||||
|
"requests": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983",
|
||||||
|
"sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349"
|
||||||
|
],
|
||||||
|
"index": "pypi",
|
||||||
|
"version": "==2.28.1"
|
||||||
|
},
|
||||||
"six": {
|
"six": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926",
|
"sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926",
|
||||||
"sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"
|
"sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"
|
||||||
],
|
],
|
||||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'",
|
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||||
"version": "==1.16.0"
|
"version": "==1.16.0"
|
||||||
},
|
},
|
||||||
"soupsieve": {
|
"soupsieve": {
|
||||||
@@ -129,6 +161,14 @@
|
|||||||
],
|
],
|
||||||
"markers": "python_version >= '3.6'",
|
"markers": "python_version >= '3.6'",
|
||||||
"version": "==2.3.2.post1"
|
"version": "==2.3.2.post1"
|
||||||
|
},
|
||||||
|
"urllib3": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:3fa96cf423e6987997fc326ae8df396db2a8b7c667747d47ddd8ecba91f4a74e",
|
||||||
|
"sha256:b930dd878d5a8afb066a637fbb35144fe7901e3b209d1cd4f524bd0e9deee997"
|
||||||
|
],
|
||||||
|
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' and python_version < '4'",
|
||||||
|
"version": "==1.26.12"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"develop": {
|
"develop": {
|
||||||
|
|||||||
32
src/main.py
32
src/main.py
@@ -1,4 +1,34 @@
|
|||||||
|
import requests
|
||||||
|
import sys
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
POKEMON_FILE = "pokemon/pokedex.html"
|
||||||
|
POKEMON_URL = "https://bulbapedia.bulbagarden.net/wiki/List_of_Pok%C3%A9mon_by_National_Pok%C3%A9dex_number"
|
||||||
|
|
||||||
|
|
||||||
|
def get_pokedex():
|
||||||
|
headers = {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0'
|
||||||
|
}
|
||||||
|
r = requests.get(POKEMON_URL, headers=headers)
|
||||||
|
with open(POKEMON_FILE, "w") as f:
|
||||||
|
f.write(r.text)
|
||||||
|
|
||||||
|
def pokemon_for_generation_soup(generation_soup: BeautifulSoup):
|
||||||
|
print(generation_soup)
|
||||||
|
table_soup: BeautifulSoup = generation_soup.find_next_sibling("table")
|
||||||
|
tbody_soup: BeautifulSoup = generation_soup.find_next("tbody")
|
||||||
|
table_row_soups: List[BeautifulSoup()] = tbody_soup.find_all_next("tr")
|
||||||
|
for table_row_soup in table_row_soups:
|
||||||
|
print(table_row_soup.find_next("th").next_element.attrs["title"])
|
||||||
|
sys.exit(0)
|
||||||
|
return tbody_soup
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
print("hello, world!")
|
with open(POKEMON_FILE, "r") as r:
|
||||||
|
soup = BeautifulSoup(r, "html.parser")
|
||||||
|
pokemon_list_soup: BeautifulSoup = soup.find(id="List_of_Pokémon_by_National_Pokédex_number").parent
|
||||||
|
generation_soups: BeautifulSoup = pokemon_list_soup.find_next_siblings("h3")[0:1]
|
||||||
|
pokemon = map(pokemon_for_generation_soup, generation_soups)
|
||||||
|
print(list(pokemon))
|
||||||
|
|||||||
Reference in New Issue
Block a user