Add readmes, explanation and link for chromedriver, clear notebook outputs
parent
dfefa0151c
commit
328066a03f
|
@ -0,0 +1,9 @@
|
|||
# The purpose of this directory is to perform scraping of [Archives of Nethys](https://2e.aonprd.com)
|
||||
|
||||
## Requirements
|
||||
1. Python 3.6.8
|
||||
2. pandas==0.24.2
|
||||
3. splinter==0.11.0
|
||||
4. beautifulsoup4==4.8.0
|
||||
5. selenium==3.141.0
|
||||
6. Download the [chrome WebDriver](https://splinter.readthedocs.io/en/latest/drivers/chrome.html) and place it in this directory. It is required by selenium and splinter as using Requests was prone to failure.
|
|
@ -0,0 +1,4 @@
|
|||
pandas==0.24.2
|
||||
splinter==0.11.0
|
||||
beautifulsoup4==4.8.0
|
||||
selenium==3.141.0
|
|
@ -0,0 +1,7 @@
|
|||
# This directory scrapes the weapons from the [Archives of Nethys](https://2e.aonprd.com/Weapons.aspx)
|
||||
|
||||
## Steps to scrape the weapons
|
||||
1. Install the requirements from [the previous readme](../README.md)
|
||||
2. Generate .csv files from copy pasting the tables from [here](https://2e.aonprd.com/Weapons.aspx) and save them in this directory
|
||||
3. Set the number_of_weapons variable to the number of weapons in the database it is currently 83
|
||||
4. Run the [python file](scrape.py) or [Jupyter Notebook](scrape.ipynb)
|
|
@ -9,7 +9,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -20,7 +20,7 @@
|
|||
"from splinter import Browser\n",
|
||||
"\n",
|
||||
"# Setting up Selenium\n",
|
||||
"chrome_driver = os.path.join('..', 'resources', 'chromedriver.exe')\n",
|
||||
"chrome_driver = os.path.join('..', 'chromedriver.exe')\n",
|
||||
"executable_path = {'executable_path': chrome_driver}\n",
|
||||
"browser = Browser('chrome', **executable_path, headless=False)\n",
|
||||
"\n",
|
||||
|
@ -30,103 +30,9 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Beginning Data Retrieval\n",
|
||||
"------------------------\n",
|
||||
"Processing Weapon 1 of 83 | Fist\n",
|
||||
"Processing Weapon 2 of 83 | Club\n",
|
||||
"Processing Weapon 3 of 83 | Dagger\n",
|
||||
"Processing Weapon 4 of 83 | Gauntlet\n",
|
||||
"Processing Weapon 5 of 83 | Light Mace\n",
|
||||
"Processing Weapon 6 of 83 | Longspear\n",
|
||||
"Processing Weapon 7 of 83 | Mace\n",
|
||||
"Processing Weapon 8 of 83 | Morningstar\n",
|
||||
"Processing Weapon 9 of 83 | Sickle\n",
|
||||
"Processing Weapon 10 of 83 | Spear\n",
|
||||
"Processing Weapon 11 of 83 | Spiked Gauntlet\n",
|
||||
"Processing Weapon 12 of 83 | Staff\n",
|
||||
"Processing Weapon 13 of 83 | Clan Dagger\n",
|
||||
"Processing Weapon 14 of 83 | Katar\n",
|
||||
"Processing Weapon 15 of 83 | Bastard Sword\n",
|
||||
"Processing Weapon 16 of 83 | Battle Axe\n",
|
||||
"Processing Weapon 17 of 83 | Bo Staff\n",
|
||||
"Processing Weapon 18 of 83 | Falchion\n",
|
||||
"Processing Weapon 19 of 83 | Flail\n",
|
||||
"Processing Weapon 20 of 83 | Glaive\n",
|
||||
"Processing Weapon 21 of 83 | Greataxe\n",
|
||||
"Processing Weapon 22 of 83 | Greatclub\n",
|
||||
"Processing Weapon 23 of 83 | Greatpick\n",
|
||||
"Processing Weapon 24 of 83 | Greatsword\n",
|
||||
"Processing Weapon 25 of 83 | Guisarme\n",
|
||||
"Processing Weapon 26 of 83 | Halberd\n",
|
||||
"Processing Weapon 27 of 83 | Hatchet\n",
|
||||
"Processing Weapon 28 of 83 | Lance\n",
|
||||
"Processing Weapon 29 of 83 | Light Hammer\n",
|
||||
"Processing Weapon 30 of 83 | Light Pick\n",
|
||||
"Processing Weapon 31 of 83 | Longsword\n",
|
||||
"Processing Weapon 32 of 83 | Main-gauche\n",
|
||||
"Processing Weapon 33 of 83 | Maul\n",
|
||||
"Processing Weapon 34 of 83 | Pick\n",
|
||||
"Processing Weapon 35 of 83 | Ranseur\n",
|
||||
"Processing Weapon 36 of 83 | Rapier\n",
|
||||
"Processing Weapon 37 of 83 | Sap\n",
|
||||
"Processing Weapon 38 of 83 | Scimitar\n",
|
||||
"Processing Weapon 39 of 83 | Scythe\n",
|
||||
"Processing Weapon 40 of 83 | Shield Bash\n",
|
||||
"Processing Weapon 41 of 83 | Shield Boss\n",
|
||||
"Processing Weapon 42 of 83 | Shield Spikes\n",
|
||||
"Processing Weapon 43 of 83 | Shortsword\n",
|
||||
"Processing Weapon 44 of 83 | Starknife\n",
|
||||
"Processing Weapon 45 of 83 | Trident\n",
|
||||
"Processing Weapon 46 of 83 | War Flail\n",
|
||||
"Processing Weapon 47 of 83 | Warhammer\n",
|
||||
"Processing Weapon 48 of 83 | Whip\n",
|
||||
"Processing Weapon 49 of 83 | Dogslicer\n",
|
||||
"Processing Weapon 50 of 83 | Elven Curve Blade\n",
|
||||
"Processing Weapon 51 of 83 | Filcher's Fork\n",
|
||||
"Processing Weapon 52 of 83 | Gnome Hooked Hammer\n",
|
||||
"Processing Weapon 53 of 83 | Horsechopper\n",
|
||||
"Processing Weapon 54 of 83 | Kama\n",
|
||||
"Processing Weapon 55 of 83 | Katana\n",
|
||||
"Processing Weapon 56 of 83 | Kukri\n",
|
||||
"Processing Weapon 57 of 83 | Nunchaku\n",
|
||||
"Processing Weapon 58 of 83 | Orc Knuckle Dagger\n",
|
||||
"Processing Weapon 59 of 83 | Sai\n",
|
||||
"Processing Weapon 60 of 83 | Spiked Chain\n",
|
||||
"Processing Weapon 61 of 83 | Temple Sword\n",
|
||||
"Processing Weapon 62 of 83 | Dwarven War Axe\n",
|
||||
"Processing Weapon 63 of 83 | Gnome Flickmace\n",
|
||||
"Processing Weapon 64 of 83 | Orc Necksplitter\n",
|
||||
"Processing Weapon 65 of 83 | Sawtooth Saber\n",
|
||||
"Processing Weapon 66 of 83 | Blowgun\n",
|
||||
"Processing Weapon 67 of 83 | Crossbow\n",
|
||||
"Processing Weapon 68 of 83 | Dart\n",
|
||||
"Processing Weapon 69 of 83 | Hand Crossbow\n",
|
||||
"Processing Weapon 70 of 83 | Heavy Crossbow\n",
|
||||
"Processing Weapon 71 of 83 | Javelin\n",
|
||||
"Processing Weapon 72 of 83 | Sling\n",
|
||||
"Processing Weapon 73 of 83 | Alchemical Bomb\n",
|
||||
"Processing Weapon 74 of 83 | Composite Longbow\n",
|
||||
"Processing Weapon 75 of 83 | Composite Shortbow\n",
|
||||
"Processing Weapon 76 of 83 | Longbow\n",
|
||||
"Processing Weapon 77 of 83 | Shortbow\n",
|
||||
"Processing Weapon 78 of 83 | Halfling Sling Staff\n",
|
||||
"Processing Weapon 79 of 83 | Shuriken\n",
|
||||
"Processing Weapon 80 of 83 | Blowgun Darts\n",
|
||||
"Processing Weapon 81 of 83 | Bolts\n",
|
||||
"Processing Weapon 82 of 83 | Sling Bullets\n",
|
||||
"Processing Weapon 83 of 83 | Arrows\n",
|
||||
"------------------------\n",
|
||||
"Data Retrieval Complete\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# url that contains all the links\n",
|
||||
"url_weapon = 'https://2e.aonprd.com/Weapons.aspx?ID='\n",
|
||||
|
@ -177,7 +83,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -188,7 +94,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -198,7 +104,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -207,7 +113,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -216,7 +122,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
|
|
@ -0,0 +1,112 @@
|
|||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# # Scrape data from aon2e and generate csvs to import in to sqlite
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# Dependencies
|
||||
import pandas as pd
|
||||
from bs4 import BeautifulSoup as bs
|
||||
import os
|
||||
from splinter import Browser
|
||||
|
||||
# Setting up Selenium
|
||||
chrome_driver = os.path.join('..', 'chromedriver.exe')
|
||||
executable_path = {'executable_path': chrome_driver}
|
||||
browser = Browser('chrome', **executable_path, headless=False)
|
||||
|
||||
# Pandas config
|
||||
pd.set_option('display.max_columns', None)
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# url that contains all the links
|
||||
url_weapon = 'https://2e.aonprd.com/Weapons.aspx?ID='
|
||||
|
||||
# Number of weapons
|
||||
number_of_weapons = 83
|
||||
|
||||
# Empty lists to store the scraped values
|
||||
name_list = []
|
||||
description_list = []
|
||||
|
||||
print(f'Beginning Data Retrieval')
|
||||
print(f'------------------------')
|
||||
|
||||
# Loop from 1 to the value in weapon_number
|
||||
for weapon in range(1, number_of_weapons+1):
|
||||
|
||||
url = url_weapon + str(weapon)
|
||||
browser.visit(url)
|
||||
html = browser.html
|
||||
soup = bs(html, 'html.parser')
|
||||
|
||||
# Select only the content section
|
||||
content = soup.find(id='ctl00_MainContent_DetailedOutput')
|
||||
|
||||
try:
|
||||
# Store the name and description
|
||||
name = content.find('a').text.strip()
|
||||
|
||||
except:
|
||||
name = f'weapon: {weapon}'
|
||||
|
||||
try:
|
||||
description = content.find('hr').next.text.strip()
|
||||
|
||||
except:
|
||||
description = content.find('hr').next.strip()
|
||||
|
||||
print(f'Processing Weapon {weapon} of {number_of_weapons} | {name}')
|
||||
|
||||
# Append values to our empty lists
|
||||
name_list.append(name)
|
||||
description_list.append(description)
|
||||
|
||||
print(f'------------------------')
|
||||
print(f'Data Retrieval Complete')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# Directory of csv files which are taken from https://2e.aonprd.com/Weapons.aspx
|
||||
melee = pd.read_csv('melee.csv')
|
||||
ranged = pd.read_csv('ranged.csv')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
data = {'Name': name_list, 'description': description_list}
|
||||
scrape = pd.DataFrame(data)
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
melee = melee.merge(scrape, how='left', on='Name')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
ranged = ranged.merge(scrape, how='left', on='Name')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
melee.to_csv('melee.csv')
|
||||
ranged.to_csv('ranged.csv')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue