Redo file structure

merge-requests/23/head
Brian Haley 2019-08-08 22:23:38 -04:00
parent 22fb3bf949
commit a15cb9adf1
5 changed files with 2 additions and 578 deletions

View File

@ -1,31 +0,0 @@
# Directory for scraping [aon2e](https://2e.aonprd.com/Sources.aspx?ID=1)
## Current languages
* Python
### Python requirements
1. Python 3.6.8
2. pip
3. pandas 0.24.2
4. requests 2.21.0
5. beautifulsoup4==4.8.0
6. time
7. re
#### Spells
| Column | Data type | Comments |
| ------ | ------ | ------ |
| name | string | |
| level | integer | 1 to 9 |
| traits | list of strings | None may be possible |
| source | string | Follows format 'Core Rulebook pg. ?' |
| traditions | list of strings | should not contain any empty lists |
| actions | list of integers or string | contains either a list of integers representing the number of actions or a string representing the time to cast error handling isn't complete |
| components | list of strings | currently only contains material, somatic or verbal |
| spell_range | string | |
| target | string | |
| description | string | TODO separators for blank lines and |
| save | string | |
| duration | string | |
| success | ? | list of dictionaries maybe? |
| heighten | ? | list of dictionaries maybe? |

View File

@ -1,3 +0,0 @@
pandas==0.24.2
requests==2.21.0
beautifulsoup4==4.8.0

View File

@ -1,448 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Scrape data from aon2e and generate csvs to import in to sqlite"
]
},
{
"cell_type": "code",
"execution_count": 173,
"metadata": {},
"outputs": [],
"source": [
"# Dependencies\n",
"import pandas as pd\n",
"from bs4 import BeautifulSoup as bs\n",
"import requests\n",
"import time\n",
"import re\n",
"\n",
"# Pandas config\n",
"pd.set_option('display.max_columns', None)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Ancestries TODO"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# url that contains all the links\n",
"url_ancestry = 'https://2e.aonprd.com/Ancestries.aspx?ID='\n",
"\n",
"# Empty list to store the ancestry data\n",
"ancestry = []\n",
"\n",
"# Make the request to the aon2e\n",
"response_ancestry = requests.get(f'{url_ancestry}1')\n",
"\n",
"# Use BS4 html parser to generate soup\n",
"soup_ancestry = bs(response_ancestry.text, 'html.parser')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Store the data needed from the soup\n",
"name = soup_ancestry.find(id='ctl00_MainContent_DetailedOutput').a.text\n",
"traits = [trait.a.text for trait in soup_ancestry.find_all(class_='trait')]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Raw description\n",
"description = soup_ancestry.find(id='ctl00_MainContent_DetailedOutput').text"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Animal Companions TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Animals (Rentals/Sales) TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Arcane Schools TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Arcane Thesis TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Archetypes TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Armor TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Backgrounds TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Bloodlines TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Champion Causes TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Champion Tenets TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Classes TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Class Kits TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Class Sample Builds TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Conditions TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Deities TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Doctrines TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Domains TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Druidic Orders TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Equipment TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Familiar Abilities TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Feats TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Hazards TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Hunter's Edges TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Instincts TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Languages TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Muses TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Rackets TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Research Fields TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Rituals TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Rules TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Shields TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Skills TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Skills (General) TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Spells TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Traits TODO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Weapons TODO"
]
},
{
"cell_type": "code",
"execution_count": 306,
"metadata": {},
"outputs": [],
"source": [
"# url that contains all the links\n",
"url_spells = 'https://2e.aonprd.com/Spells.aspx?ID='\n",
"\n",
"# Number of spells taken from https://2e.aonprd.com/Sources.aspx?ID=1\n",
"spell_number = 343\n",
"\n",
"# Make the request to the aon2e\n",
"response_spells = requests.get(f'{url_spells}{spell_number}')\n",
"\n",
"# Use BS4 html parser to generate soup\n",
"soup_spells = bs(response_spells.text, 'html.parser')\n",
"\n",
"# Select only the content\n",
"content = soup_spells.find(id='ctl00_MainContent_DetailedOutput')"
]
},
{
"cell_type": "code",
"execution_count": 323,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['1 minute']\n"
]
}
],
"source": [
"name, level = content.h1.text.replace(' ', '').split('Spell')\n",
"traits = [trait.a.text.strip() for trait in content.find_all(class_='trait')]\n",
"source = content.find(class_='external-link').text.strip()\n",
"traditions = [tradition.text.strip() for tradition\n",
" in content.find_all('a', href=re.compile(\"Tradition\"))]\n",
"\n",
"## Actions sections\n",
"actions = []\n",
"# Start at cast and then iterate over the next elements on the line\n",
"for e in content.find('b', text='Cast').next_siblings:\n",
" if e.name == 'br':\n",
" # If the end of the line is reached break the loop\n",
" break\n",
" elif e.name == 'hr':\n",
" # If a horizontal line is reached\n",
" break\n",
" try:\n",
" if e['alt']=='Single Action' and 'actiondark' in e['class']:\n",
" # If it's the single action icon\n",
" actions.append(1)\n",
" elif e['alt']=='Three Actions' and 'actiondark' in e['class']:\n",
" # If it is the three action icon\n",
" actions.append(3)\n",
" except TypeError:\n",
" # If there is no icon handle it as a string\n",
" if 'to' in e:\n",
" # If to exists it must be 1 to 3 actions\n",
" actions.append(2)\n",
" else:\n",
" actions.append(e.split(' (')[0].strip())\n",
" continue\n",
" except KeyError:\n",
" continue\n",
" \n",
"components = content.find('b', text='Cast').next.next \\\n",
" .replace('(', '').replace(')', ''). replace(',', '').split(' ')\n",
"components = [x for x in components if x\n",
" in ['material', 'somatic', 'verbal']]\n",
"\n",
"try:\n",
" # If the spell has a range\n",
" spell_range = content.find('b', text='Range').next.next.replace(';', '')\n",
"except AttributeError:\n",
" # If the spell doesn't have a range\n",
" spell_range = None\n",
"\n",
"target = content.find('hr').previous.strip()\n",
"\n",
"# save TODO\n",
"\n",
"# duration TODO\n",
"\n",
"## Description section\n",
"description = content.find('hr').next\n",
"print(actions)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

Binary file not shown.

View File

@ -30,103 +30,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"Beginning Data Retrieval\n",
"------------------------\n",
"Processing Weapon 1 of 83 | Fist\n",
"Processing Weapon 2 of 83 | Club\n",
"Processing Weapon 3 of 83 | Dagger\n",
"Processing Weapon 4 of 83 | Gauntlet\n",
"Processing Weapon 5 of 83 | Light Mace\n",
"Processing Weapon 6 of 83 | Longspear\n",
"Processing Weapon 7 of 83 | Mace\n",
"Processing Weapon 8 of 83 | Morningstar\n",
"Processing Weapon 9 of 83 | Sickle\n",
"Processing Weapon 10 of 83 | Spear\n",
"Processing Weapon 11 of 83 | Spiked Gauntlet\n",
"Processing Weapon 12 of 83 | Staff\n",
"Processing Weapon 13 of 83 | Clan Dagger\n",
"Processing Weapon 14 of 83 | Katar\n",
"Processing Weapon 15 of 83 | Bastard Sword\n",
"Processing Weapon 16 of 83 | Battle Axe\n",
"Processing Weapon 17 of 83 | Bo Staff\n",
"Processing Weapon 18 of 83 | Falchion\n",
"Processing Weapon 19 of 83 | Flail\n",
"Processing Weapon 20 of 83 | Glaive\n",
"Processing Weapon 21 of 83 | Greataxe\n",
"Processing Weapon 22 of 83 | Greatclub\n",
"Processing Weapon 23 of 83 | Greatpick\n",
"Processing Weapon 24 of 83 | Greatsword\n",
"Processing Weapon 25 of 83 | Guisarme\n",
"Processing Weapon 26 of 83 | Halberd\n",
"Processing Weapon 27 of 83 | Hatchet\n",
"Processing Weapon 28 of 83 | Lance\n",
"Processing Weapon 29 of 83 | Light Hammer\n",
"Processing Weapon 30 of 83 | Light Pick\n",
"Processing Weapon 31 of 83 | Longsword\n",
"Processing Weapon 32 of 83 | Main-gauche\n",
"Processing Weapon 33 of 83 | Maul\n",
"Processing Weapon 34 of 83 | Pick\n",
"Processing Weapon 35 of 83 | Ranseur\n",
"Processing Weapon 36 of 83 | Rapier\n",
"Processing Weapon 37 of 83 | Sap\n",
"Processing Weapon 38 of 83 | Scimitar\n",
"Processing Weapon 39 of 83 | Scythe\n",
"Processing Weapon 40 of 83 | Shield Bash\n",
"Processing Weapon 41 of 83 | Shield Boss\n",
"Processing Weapon 42 of 83 | Shield Spikes\n",
"Processing Weapon 43 of 83 | Shortsword\n",
"Processing Weapon 44 of 83 | Starknife\n",
"Processing Weapon 45 of 83 | Trident\n",
"Processing Weapon 46 of 83 | War Flail\n",
"Processing Weapon 47 of 83 | Warhammer\n",
"Processing Weapon 48 of 83 | Whip\n",
"Processing Weapon 49 of 83 | Dogslicer\n",
"Processing Weapon 50 of 83 | Elven Curve Blade\n",
"Processing Weapon 51 of 83 | Filcher's Fork\n",
"Processing Weapon 52 of 83 | Gnome Hooked Hammer\n",
"Processing Weapon 53 of 83 | Horsechopper\n",
"Processing Weapon 54 of 83 | Kama\n",
"Processing Weapon 55 of 83 | Katana\n",
"Processing Weapon 56 of 83 | Kukri\n",
"Processing Weapon 57 of 83 | Nunchaku\n",
"Processing Weapon 58 of 83 | Orc Knuckle Dagger\n",
"Processing Weapon 59 of 83 | Sai\n",
"Processing Weapon 60 of 83 | Spiked Chain\n",
"Processing Weapon 61 of 83 | Temple Sword\n",
"Processing Weapon 62 of 83 | Dwarven War Axe\n",
"Processing Weapon 63 of 83 | Gnome Flickmace\n",
"Processing Weapon 64 of 83 | Orc Necksplitter\n",
"Processing Weapon 65 of 83 | Sawtooth Saber\n",
"Processing Weapon 66 of 83 | Blowgun\n",
"Processing Weapon 67 of 83 | Crossbow\n",
"Processing Weapon 68 of 83 | Dart\n",
"Processing Weapon 69 of 83 | Hand Crossbow\n",
"Processing Weapon 70 of 83 | Heavy Crossbow\n",
"Processing Weapon 71 of 83 | Javelin\n",
"Processing Weapon 72 of 83 | Sling\n",
"Processing Weapon 73 of 83 | Alchemical Bomb\n",
"Processing Weapon 74 of 83 | Composite Longbow\n",
"Processing Weapon 75 of 83 | Composite Shortbow\n",
"Processing Weapon 76 of 83 | Longbow\n",
"Processing Weapon 77 of 83 | Shortbow\n",
"Processing Weapon 78 of 83 | Halfling Sling Staff\n",
"Processing Weapon 79 of 83 | Shuriken\n",
"Processing Weapon 80 of 83 | Blowgun Darts\n",
"Processing Weapon 81 of 83 | Bolts\n",
"Processing Weapon 82 of 83 | Sling Bullets\n",
"Processing Weapon 83 of 83 | Arrows\n",
"------------------------\n",
"Data Retrieval Complete\n"
]
}
],
"source": [ "source": [
"# url that contains all the links\n", "# url that contains all the links\n",
"url_weapon = 'https://2e.aonprd.com/Weapons.aspx?ID='\n", "url_weapon = 'https://2e.aonprd.com/Weapons.aspx?ID='\n",