Redo file structure
parent
22fb3bf949
commit
a15cb9adf1
|
@ -1,31 +0,0 @@
|
|||
# Directory for scraping [aon2e](https://2e.aonprd.com/Sources.aspx?ID=1)
|
||||
|
||||
## Current languages
|
||||
* Python
|
||||
|
||||
### Python requirements
|
||||
1. Python 3.6.8
|
||||
2. pip
|
||||
3. pandas 0.24.2
|
||||
4. requests 2.21.0
|
||||
5. beautifulsoup4==4.8.0
|
||||
6. time
|
||||
7. re
|
||||
|
||||
#### Spells
|
||||
| Column | Data type | Comments |
|
||||
| ------ | ------ | ------ |
|
||||
| name | string | |
|
||||
| level | integer | 1 to 9 |
|
||||
| traits | list of strings | None may be possible |
|
||||
| source | string | Follows format 'Core Rulebook pg. ?' |
|
||||
| traditions | list of strings | should not contain any empty lists |
|
||||
| actions | list of integers or string | contains either a list of integers representing the number of actions or a string representing the time to cast error handling isn't complete |
|
||||
| components | list of strings | currently only contains material, somatic or verbal |
|
||||
| spell_range | string | |
|
||||
| target | string | |
|
||||
| description | string | TODO separators for blank lines and |
|
||||
| save | string | |
|
||||
| duration | string | |
|
||||
| success | ? | list of dictionaries maybe? |
|
||||
| heighten | ? | list of dictionaries maybe? |
|
|
@ -1,3 +0,0 @@
|
|||
pandas==0.24.2
|
||||
requests==2.21.0
|
||||
beautifulsoup4==4.8.0
|
|
@ -1,448 +0,0 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Scrape data from aon2e and generate csvs to import in to sqlite"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 173,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Dependencies\n",
|
||||
"import pandas as pd\n",
|
||||
"from bs4 import BeautifulSoup as bs\n",
|
||||
"import requests\n",
|
||||
"import time\n",
|
||||
"import re\n",
|
||||
"\n",
|
||||
"# Pandas config\n",
|
||||
"pd.set_option('display.max_columns', None)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Ancestries TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# url that contains all the links\n",
|
||||
"url_ancestry = 'https://2e.aonprd.com/Ancestries.aspx?ID='\n",
|
||||
"\n",
|
||||
"# Empty list to store the ancestry data\n",
|
||||
"ancestry = []\n",
|
||||
"\n",
|
||||
"# Make the request to the aon2e\n",
|
||||
"response_ancestry = requests.get(f'{url_ancestry}1')\n",
|
||||
"\n",
|
||||
"# Use BS4 html parser to generate soup\n",
|
||||
"soup_ancestry = bs(response_ancestry.text, 'html.parser')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Store the data needed from the soup\n",
|
||||
"name = soup_ancestry.find(id='ctl00_MainContent_DetailedOutput').a.text\n",
|
||||
"traits = [trait.a.text for trait in soup_ancestry.find_all(class_='trait')]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Raw description\n",
|
||||
"description = soup_ancestry.find(id='ctl00_MainContent_DetailedOutput').text"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Animal Companions TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Animals (Rentals/Sales) TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Arcane Schools TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Arcane Thesis TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Archetypes TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Armor TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Backgrounds TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Bloodlines TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Champion Causes TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Champion Tenets TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Classes TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Class Kits TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Class Sample Builds TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Conditions TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Deities TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Doctrines TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Domains TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Druidic Orders TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Equipment TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Familiar Abilities TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Feats TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Hazards TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Hunter's Edges TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instincts TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Languages TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Muses TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Rackets TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Research Fields TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Rituals TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Rules TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Shields TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Skills TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Skills (General) TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Spells TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Traits TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Weapons TODO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 306,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# url that contains all the links\n",
|
||||
"url_spells = 'https://2e.aonprd.com/Spells.aspx?ID='\n",
|
||||
"\n",
|
||||
"# Number of spells taken from https://2e.aonprd.com/Sources.aspx?ID=1\n",
|
||||
"spell_number = 343\n",
|
||||
"\n",
|
||||
"# Make the request to the aon2e\n",
|
||||
"response_spells = requests.get(f'{url_spells}{spell_number}')\n",
|
||||
"\n",
|
||||
"# Use BS4 html parser to generate soup\n",
|
||||
"soup_spells = bs(response_spells.text, 'html.parser')\n",
|
||||
"\n",
|
||||
"# Select only the content\n",
|
||||
"content = soup_spells.find(id='ctl00_MainContent_DetailedOutput')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 323,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"['1 minute']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"name, level = content.h1.text.replace(' ', '').split('Spell')\n",
|
||||
"traits = [trait.a.text.strip() for trait in content.find_all(class_='trait')]\n",
|
||||
"source = content.find(class_='external-link').text.strip()\n",
|
||||
"traditions = [tradition.text.strip() for tradition\n",
|
||||
" in content.find_all('a', href=re.compile(\"Tradition\"))]\n",
|
||||
"\n",
|
||||
"## Actions sections\n",
|
||||
"actions = []\n",
|
||||
"# Start at cast and then iterate over the next elements on the line\n",
|
||||
"for e in content.find('b', text='Cast').next_siblings:\n",
|
||||
" if e.name == 'br':\n",
|
||||
" # If the end of the line is reached break the loop\n",
|
||||
" break\n",
|
||||
" elif e.name == 'hr':\n",
|
||||
" # If a horizontal line is reached\n",
|
||||
" break\n",
|
||||
" try:\n",
|
||||
" if e['alt']=='Single Action' and 'actiondark' in e['class']:\n",
|
||||
" # If it's the single action icon\n",
|
||||
" actions.append(1)\n",
|
||||
" elif e['alt']=='Three Actions' and 'actiondark' in e['class']:\n",
|
||||
" # If it is the three action icon\n",
|
||||
" actions.append(3)\n",
|
||||
" except TypeError:\n",
|
||||
" # If there is no icon handle it as a string\n",
|
||||
" if 'to' in e:\n",
|
||||
" # If to exists it must be 1 to 3 actions\n",
|
||||
" actions.append(2)\n",
|
||||
" else:\n",
|
||||
" actions.append(e.split(' (')[0].strip())\n",
|
||||
" continue\n",
|
||||
" except KeyError:\n",
|
||||
" continue\n",
|
||||
" \n",
|
||||
"components = content.find('b', text='Cast').next.next \\\n",
|
||||
" .replace('(', '').replace(')', ''). replace(',', '').split(' ')\n",
|
||||
"components = [x for x in components if x\n",
|
||||
" in ['material', 'somatic', 'verbal']]\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" # If the spell has a range\n",
|
||||
" spell_range = content.find('b', text='Range').next.next.replace(';', '')\n",
|
||||
"except AttributeError:\n",
|
||||
" # If the spell doesn't have a range\n",
|
||||
" spell_range = None\n",
|
||||
"\n",
|
||||
"target = content.find('hr').previous.strip()\n",
|
||||
"\n",
|
||||
"# save TODO\n",
|
||||
"\n",
|
||||
"# duration TODO\n",
|
||||
"\n",
|
||||
"## Description section\n",
|
||||
"description = content.find('hr').next\n",
|
||||
"print(actions)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
Binary file not shown.
|
@ -30,103 +30,9 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Beginning Data Retrieval\n",
|
||||
"------------------------\n",
|
||||
"Processing Weapon 1 of 83 | Fist\n",
|
||||
"Processing Weapon 2 of 83 | Club\n",
|
||||
"Processing Weapon 3 of 83 | Dagger\n",
|
||||
"Processing Weapon 4 of 83 | Gauntlet\n",
|
||||
"Processing Weapon 5 of 83 | Light Mace\n",
|
||||
"Processing Weapon 6 of 83 | Longspear\n",
|
||||
"Processing Weapon 7 of 83 | Mace\n",
|
||||
"Processing Weapon 8 of 83 | Morningstar\n",
|
||||
"Processing Weapon 9 of 83 | Sickle\n",
|
||||
"Processing Weapon 10 of 83 | Spear\n",
|
||||
"Processing Weapon 11 of 83 | Spiked Gauntlet\n",
|
||||
"Processing Weapon 12 of 83 | Staff\n",
|
||||
"Processing Weapon 13 of 83 | Clan Dagger\n",
|
||||
"Processing Weapon 14 of 83 | Katar\n",
|
||||
"Processing Weapon 15 of 83 | Bastard Sword\n",
|
||||
"Processing Weapon 16 of 83 | Battle Axe\n",
|
||||
"Processing Weapon 17 of 83 | Bo Staff\n",
|
||||
"Processing Weapon 18 of 83 | Falchion\n",
|
||||
"Processing Weapon 19 of 83 | Flail\n",
|
||||
"Processing Weapon 20 of 83 | Glaive\n",
|
||||
"Processing Weapon 21 of 83 | Greataxe\n",
|
||||
"Processing Weapon 22 of 83 | Greatclub\n",
|
||||
"Processing Weapon 23 of 83 | Greatpick\n",
|
||||
"Processing Weapon 24 of 83 | Greatsword\n",
|
||||
"Processing Weapon 25 of 83 | Guisarme\n",
|
||||
"Processing Weapon 26 of 83 | Halberd\n",
|
||||
"Processing Weapon 27 of 83 | Hatchet\n",
|
||||
"Processing Weapon 28 of 83 | Lance\n",
|
||||
"Processing Weapon 29 of 83 | Light Hammer\n",
|
||||
"Processing Weapon 30 of 83 | Light Pick\n",
|
||||
"Processing Weapon 31 of 83 | Longsword\n",
|
||||
"Processing Weapon 32 of 83 | Main-gauche\n",
|
||||
"Processing Weapon 33 of 83 | Maul\n",
|
||||
"Processing Weapon 34 of 83 | Pick\n",
|
||||
"Processing Weapon 35 of 83 | Ranseur\n",
|
||||
"Processing Weapon 36 of 83 | Rapier\n",
|
||||
"Processing Weapon 37 of 83 | Sap\n",
|
||||
"Processing Weapon 38 of 83 | Scimitar\n",
|
||||
"Processing Weapon 39 of 83 | Scythe\n",
|
||||
"Processing Weapon 40 of 83 | Shield Bash\n",
|
||||
"Processing Weapon 41 of 83 | Shield Boss\n",
|
||||
"Processing Weapon 42 of 83 | Shield Spikes\n",
|
||||
"Processing Weapon 43 of 83 | Shortsword\n",
|
||||
"Processing Weapon 44 of 83 | Starknife\n",
|
||||
"Processing Weapon 45 of 83 | Trident\n",
|
||||
"Processing Weapon 46 of 83 | War Flail\n",
|
||||
"Processing Weapon 47 of 83 | Warhammer\n",
|
||||
"Processing Weapon 48 of 83 | Whip\n",
|
||||
"Processing Weapon 49 of 83 | Dogslicer\n",
|
||||
"Processing Weapon 50 of 83 | Elven Curve Blade\n",
|
||||
"Processing Weapon 51 of 83 | Filcher's Fork\n",
|
||||
"Processing Weapon 52 of 83 | Gnome Hooked Hammer\n",
|
||||
"Processing Weapon 53 of 83 | Horsechopper\n",
|
||||
"Processing Weapon 54 of 83 | Kama\n",
|
||||
"Processing Weapon 55 of 83 | Katana\n",
|
||||
"Processing Weapon 56 of 83 | Kukri\n",
|
||||
"Processing Weapon 57 of 83 | Nunchaku\n",
|
||||
"Processing Weapon 58 of 83 | Orc Knuckle Dagger\n",
|
||||
"Processing Weapon 59 of 83 | Sai\n",
|
||||
"Processing Weapon 60 of 83 | Spiked Chain\n",
|
||||
"Processing Weapon 61 of 83 | Temple Sword\n",
|
||||
"Processing Weapon 62 of 83 | Dwarven War Axe\n",
|
||||
"Processing Weapon 63 of 83 | Gnome Flickmace\n",
|
||||
"Processing Weapon 64 of 83 | Orc Necksplitter\n",
|
||||
"Processing Weapon 65 of 83 | Sawtooth Saber\n",
|
||||
"Processing Weapon 66 of 83 | Blowgun\n",
|
||||
"Processing Weapon 67 of 83 | Crossbow\n",
|
||||
"Processing Weapon 68 of 83 | Dart\n",
|
||||
"Processing Weapon 69 of 83 | Hand Crossbow\n",
|
||||
"Processing Weapon 70 of 83 | Heavy Crossbow\n",
|
||||
"Processing Weapon 71 of 83 | Javelin\n",
|
||||
"Processing Weapon 72 of 83 | Sling\n",
|
||||
"Processing Weapon 73 of 83 | Alchemical Bomb\n",
|
||||
"Processing Weapon 74 of 83 | Composite Longbow\n",
|
||||
"Processing Weapon 75 of 83 | Composite Shortbow\n",
|
||||
"Processing Weapon 76 of 83 | Longbow\n",
|
||||
"Processing Weapon 77 of 83 | Shortbow\n",
|
||||
"Processing Weapon 78 of 83 | Halfling Sling Staff\n",
|
||||
"Processing Weapon 79 of 83 | Shuriken\n",
|
||||
"Processing Weapon 80 of 83 | Blowgun Darts\n",
|
||||
"Processing Weapon 81 of 83 | Bolts\n",
|
||||
"Processing Weapon 82 of 83 | Sling Bullets\n",
|
||||
"Processing Weapon 83 of 83 | Arrows\n",
|
||||
"------------------------\n",
|
||||
"Data Retrieval Complete\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# url that contains all the links\n",
|
||||
"url_weapon = 'https://2e.aonprd.com/Weapons.aspx?ID='\n",
|
||||
|
|
Loading…
Reference in New Issue