Merge branch 'master' into feat-paragraphs
commit
aae25acdcb
|
@ -146,9 +146,7 @@ The following text is the property of Wizards of the Coast, Inc. and is Copyrigh
|
||||||
- System Reference Document. Copyright 2000. Wizards of the Coast, Inc;
|
- System Reference Document. Copyright 2000. Wizards of the Coast, Inc;
|
||||||
Authors: Jonathan Tweet, Monte Cook, Skip Williams, based on material by E.
|
Authors: Jonathan Tweet, Monte Cook, Skip Williams, based on material by E.
|
||||||
Gary Gygax and Dave Arneson.
|
Gary Gygax and Dave Arneson.
|
||||||
<!--
|
|
||||||
- The Archives of Nethys. Copyright 2010, Blake Davis.
|
- The Archives of Nethys. Copyright 2010, Blake Davis.
|
||||||
-->
|
|
||||||
- Age of Ashes Player's Guide
|
- Age of Ashes Player's Guide
|
||||||
- Age of Ashes Player’s Guide © 2019, Paizo Inc.; Authors: James Jacobs, with Amanda Hamon.
|
- Age of Ashes Player’s Guide © 2019, Paizo Inc.; Authors: James Jacobs, with Amanda Hamon.
|
||||||
- Bestiary
|
- Bestiary
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
# The purpose of this directory is to perform scraping of [Archives of Nethys](https://2e.aonprd.com)
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
1. Python 3.6.8
|
||||||
|
2. pandas==0.24.2
|
||||||
|
3. splinter==0.11.0
|
||||||
|
4. beautifulsoup4==4.8.0
|
||||||
|
5. selenium==3.141.0
|
||||||
|
6. Download the [chrome WebDriver](https://splinter.readthedocs.io/en/latest/drivers/chrome.html) and place it in this directory. It is required by selenium and splinter as using Requests was prone to failure.
|
|
@ -0,0 +1,4 @@
|
||||||
|
pandas==0.24.2
|
||||||
|
splinter==0.11.0
|
||||||
|
beautifulsoup4==4.8.0
|
||||||
|
selenium==3.141.0
|
|
@ -0,0 +1,7 @@
|
||||||
|
# This directory scrapes the weapons from the [Archives of Nethys](https://2e.aonprd.com/Weapons.aspx)
|
||||||
|
|
||||||
|
## Steps to scrape the weapons
|
||||||
|
1. Install the requirements from [the previous readme](../README.md)
|
||||||
|
2. Generate .csv files from copy pasting the tables from [here](https://2e.aonprd.com/Weapons.aspx) and save them in this directory
|
||||||
|
3. Set the number_of_weapons variable to the number of weapons in the database it is currently 83
|
||||||
|
4. Run the [python file](scrape.py) or [Jupyter Notebook](scrape.ipynb)
|
|
@ -9,7 +9,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -20,7 +20,7 @@
|
||||||
"from splinter import Browser\n",
|
"from splinter import Browser\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Setting up Selenium\n",
|
"# Setting up Selenium\n",
|
||||||
"chrome_driver = os.path.join('..', 'resources', 'chromedriver.exe')\n",
|
"chrome_driver = os.path.join('..', 'chromedriver.exe')\n",
|
||||||
"executable_path = {'executable_path': chrome_driver}\n",
|
"executable_path = {'executable_path': chrome_driver}\n",
|
||||||
"browser = Browser('chrome', **executable_path, headless=False)\n",
|
"browser = Browser('chrome', **executable_path, headless=False)\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
@ -30,103 +30,9 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Beginning Data Retrieval\n",
|
|
||||||
"------------------------\n",
|
|
||||||
"Processing Weapon 1 of 83 | Fist\n",
|
|
||||||
"Processing Weapon 2 of 83 | Club\n",
|
|
||||||
"Processing Weapon 3 of 83 | Dagger\n",
|
|
||||||
"Processing Weapon 4 of 83 | Gauntlet\n",
|
|
||||||
"Processing Weapon 5 of 83 | Light Mace\n",
|
|
||||||
"Processing Weapon 6 of 83 | Longspear\n",
|
|
||||||
"Processing Weapon 7 of 83 | Mace\n",
|
|
||||||
"Processing Weapon 8 of 83 | Morningstar\n",
|
|
||||||
"Processing Weapon 9 of 83 | Sickle\n",
|
|
||||||
"Processing Weapon 10 of 83 | Spear\n",
|
|
||||||
"Processing Weapon 11 of 83 | Spiked Gauntlet\n",
|
|
||||||
"Processing Weapon 12 of 83 | Staff\n",
|
|
||||||
"Processing Weapon 13 of 83 | Clan Dagger\n",
|
|
||||||
"Processing Weapon 14 of 83 | Katar\n",
|
|
||||||
"Processing Weapon 15 of 83 | Bastard Sword\n",
|
|
||||||
"Processing Weapon 16 of 83 | Battle Axe\n",
|
|
||||||
"Processing Weapon 17 of 83 | Bo Staff\n",
|
|
||||||
"Processing Weapon 18 of 83 | Falchion\n",
|
|
||||||
"Processing Weapon 19 of 83 | Flail\n",
|
|
||||||
"Processing Weapon 20 of 83 | Glaive\n",
|
|
||||||
"Processing Weapon 21 of 83 | Greataxe\n",
|
|
||||||
"Processing Weapon 22 of 83 | Greatclub\n",
|
|
||||||
"Processing Weapon 23 of 83 | Greatpick\n",
|
|
||||||
"Processing Weapon 24 of 83 | Greatsword\n",
|
|
||||||
"Processing Weapon 25 of 83 | Guisarme\n",
|
|
||||||
"Processing Weapon 26 of 83 | Halberd\n",
|
|
||||||
"Processing Weapon 27 of 83 | Hatchet\n",
|
|
||||||
"Processing Weapon 28 of 83 | Lance\n",
|
|
||||||
"Processing Weapon 29 of 83 | Light Hammer\n",
|
|
||||||
"Processing Weapon 30 of 83 | Light Pick\n",
|
|
||||||
"Processing Weapon 31 of 83 | Longsword\n",
|
|
||||||
"Processing Weapon 32 of 83 | Main-gauche\n",
|
|
||||||
"Processing Weapon 33 of 83 | Maul\n",
|
|
||||||
"Processing Weapon 34 of 83 | Pick\n",
|
|
||||||
"Processing Weapon 35 of 83 | Ranseur\n",
|
|
||||||
"Processing Weapon 36 of 83 | Rapier\n",
|
|
||||||
"Processing Weapon 37 of 83 | Sap\n",
|
|
||||||
"Processing Weapon 38 of 83 | Scimitar\n",
|
|
||||||
"Processing Weapon 39 of 83 | Scythe\n",
|
|
||||||
"Processing Weapon 40 of 83 | Shield Bash\n",
|
|
||||||
"Processing Weapon 41 of 83 | Shield Boss\n",
|
|
||||||
"Processing Weapon 42 of 83 | Shield Spikes\n",
|
|
||||||
"Processing Weapon 43 of 83 | Shortsword\n",
|
|
||||||
"Processing Weapon 44 of 83 | Starknife\n",
|
|
||||||
"Processing Weapon 45 of 83 | Trident\n",
|
|
||||||
"Processing Weapon 46 of 83 | War Flail\n",
|
|
||||||
"Processing Weapon 47 of 83 | Warhammer\n",
|
|
||||||
"Processing Weapon 48 of 83 | Whip\n",
|
|
||||||
"Processing Weapon 49 of 83 | Dogslicer\n",
|
|
||||||
"Processing Weapon 50 of 83 | Elven Curve Blade\n",
|
|
||||||
"Processing Weapon 51 of 83 | Filcher's Fork\n",
|
|
||||||
"Processing Weapon 52 of 83 | Gnome Hooked Hammer\n",
|
|
||||||
"Processing Weapon 53 of 83 | Horsechopper\n",
|
|
||||||
"Processing Weapon 54 of 83 | Kama\n",
|
|
||||||
"Processing Weapon 55 of 83 | Katana\n",
|
|
||||||
"Processing Weapon 56 of 83 | Kukri\n",
|
|
||||||
"Processing Weapon 57 of 83 | Nunchaku\n",
|
|
||||||
"Processing Weapon 58 of 83 | Orc Knuckle Dagger\n",
|
|
||||||
"Processing Weapon 59 of 83 | Sai\n",
|
|
||||||
"Processing Weapon 60 of 83 | Spiked Chain\n",
|
|
||||||
"Processing Weapon 61 of 83 | Temple Sword\n",
|
|
||||||
"Processing Weapon 62 of 83 | Dwarven War Axe\n",
|
|
||||||
"Processing Weapon 63 of 83 | Gnome Flickmace\n",
|
|
||||||
"Processing Weapon 64 of 83 | Orc Necksplitter\n",
|
|
||||||
"Processing Weapon 65 of 83 | Sawtooth Saber\n",
|
|
||||||
"Processing Weapon 66 of 83 | Blowgun\n",
|
|
||||||
"Processing Weapon 67 of 83 | Crossbow\n",
|
|
||||||
"Processing Weapon 68 of 83 | Dart\n",
|
|
||||||
"Processing Weapon 69 of 83 | Hand Crossbow\n",
|
|
||||||
"Processing Weapon 70 of 83 | Heavy Crossbow\n",
|
|
||||||
"Processing Weapon 71 of 83 | Javelin\n",
|
|
||||||
"Processing Weapon 72 of 83 | Sling\n",
|
|
||||||
"Processing Weapon 73 of 83 | Alchemical Bomb\n",
|
|
||||||
"Processing Weapon 74 of 83 | Composite Longbow\n",
|
|
||||||
"Processing Weapon 75 of 83 | Composite Shortbow\n",
|
|
||||||
"Processing Weapon 76 of 83 | Longbow\n",
|
|
||||||
"Processing Weapon 77 of 83 | Shortbow\n",
|
|
||||||
"Processing Weapon 78 of 83 | Halfling Sling Staff\n",
|
|
||||||
"Processing Weapon 79 of 83 | Shuriken\n",
|
|
||||||
"Processing Weapon 80 of 83 | Blowgun Darts\n",
|
|
||||||
"Processing Weapon 81 of 83 | Bolts\n",
|
|
||||||
"Processing Weapon 82 of 83 | Sling Bullets\n",
|
|
||||||
"Processing Weapon 83 of 83 | Arrows\n",
|
|
||||||
"------------------------\n",
|
|
||||||
"Data Retrieval Complete\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"# url that contains all the links\n",
|
"# url that contains all the links\n",
|
||||||
"url_weapon = 'https://2e.aonprd.com/Weapons.aspx?ID='\n",
|
"url_weapon = 'https://2e.aonprd.com/Weapons.aspx?ID='\n",
|
||||||
|
@ -177,7 +83,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 3,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -188,7 +94,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 13,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -198,7 +104,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 16,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -207,7 +113,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 17,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -216,7 +122,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 18,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
|
|
@ -0,0 +1,112 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# # Scrape data from aon2e and generate csvs to import in to sqlite
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
# Dependencies
|
||||||
|
import pandas as pd
|
||||||
|
from bs4 import BeautifulSoup as bs
|
||||||
|
import os
|
||||||
|
from splinter import Browser
|
||||||
|
|
||||||
|
# Setting up Selenium
|
||||||
|
chrome_driver = os.path.join('..', 'chromedriver.exe')
|
||||||
|
executable_path = {'executable_path': chrome_driver}
|
||||||
|
browser = Browser('chrome', **executable_path, headless=False)
|
||||||
|
|
||||||
|
# Pandas config
|
||||||
|
pd.set_option('display.max_columns', None)
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
# url that contains all the links
|
||||||
|
url_weapon = 'https://2e.aonprd.com/Weapons.aspx?ID='
|
||||||
|
|
||||||
|
# Number of weapons
|
||||||
|
number_of_weapons = 83
|
||||||
|
|
||||||
|
# Empty lists to store the scraped values
|
||||||
|
name_list = []
|
||||||
|
description_list = []
|
||||||
|
|
||||||
|
print(f'Beginning Data Retrieval')
|
||||||
|
print(f'------------------------')
|
||||||
|
|
||||||
|
# Loop from 1 to the value in weapon_number
|
||||||
|
for weapon in range(1, number_of_weapons+1):
|
||||||
|
|
||||||
|
url = url_weapon + str(weapon)
|
||||||
|
browser.visit(url)
|
||||||
|
html = browser.html
|
||||||
|
soup = bs(html, 'html.parser')
|
||||||
|
|
||||||
|
# Select only the content section
|
||||||
|
content = soup.find(id='ctl00_MainContent_DetailedOutput')
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Store the name and description
|
||||||
|
name = content.find('a').text.strip()
|
||||||
|
|
||||||
|
except:
|
||||||
|
name = f'weapon: {weapon}'
|
||||||
|
|
||||||
|
try:
|
||||||
|
description = content.find('hr').next.text.strip()
|
||||||
|
|
||||||
|
except:
|
||||||
|
description = content.find('hr').next.strip()
|
||||||
|
|
||||||
|
print(f'Processing Weapon {weapon} of {number_of_weapons} | {name}')
|
||||||
|
|
||||||
|
# Append values to our empty lists
|
||||||
|
name_list.append(name)
|
||||||
|
description_list.append(description)
|
||||||
|
|
||||||
|
print(f'------------------------')
|
||||||
|
print(f'Data Retrieval Complete')
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
# Directory of csv files which are taken from https://2e.aonprd.com/Weapons.aspx
|
||||||
|
melee = pd.read_csv('melee.csv')
|
||||||
|
ranged = pd.read_csv('ranged.csv')
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
data = {'Name': name_list, 'description': description_list}
|
||||||
|
scrape = pd.DataFrame(data)
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
melee = melee.merge(scrape, how='left', on='Name')
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
ranged = ranged.merge(scrape, how='left', on='Name')
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
melee.to_csv('melee.csv')
|
||||||
|
ranged.to_csv('ranged.csv')
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue