diff --git a/scrape/README.md b/scrape/README.md deleted file mode 100644 index e36fcaa..0000000 --- a/scrape/README.md +++ /dev/null @@ -1,31 +0,0 @@ -# Directory for scraping [aon2e](https://2e.aonprd.com/Sources.aspx?ID=1) - -## Current languages -* Python - -### Python requirements -1. Python 3.6.8 -2. pip -3. pandas 0.24.2 -4. requests 2.21.0 -5. beautifulsoup4==4.8.0 -6. time -7. re - -#### Spells -| Column | Data type | Comments | -| ------ | ------ | ------ | -| name | string | | -| level | integer | 1 to 9 | -| traits | list of strings | None may be possible | -| source | string | Follows format 'Core Rulebook pg. ?' | -| traditions | list of strings | should not contain any empty lists | -| actions | list of integers or string | contains either a list of integers representing the number of actions or a string representing the time to cast error handling isn't complete | -| components | list of strings | currently only contains material, somatic or verbal | -| spell_range | string | | -| target | string | | -| description | string | TODO separators for blank lines and | -| save | string | | -| duration | string | | -| success | ? | list of dictionaries maybe? | -| heighten | ? | list of dictionaries maybe? | \ No newline at end of file diff --git a/scrape/requirements.txt b/scrape/requirements.txt deleted file mode 100644 index 7959273..0000000 --- a/scrape/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -pandas==0.24.2 -requests==2.21.0 -beautifulsoup4==4.8.0 diff --git a/scrape/scrape.ipynb b/scrape/scrape.ipynb deleted file mode 100644 index 88cecf8..0000000 --- a/scrape/scrape.ipynb +++ /dev/null @@ -1,448 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Scrape data from aon2e and generate csvs to import in to sqlite" - ] - }, - { - "cell_type": "code", - "execution_count": 173, - "metadata": {}, - "outputs": [], - "source": [ - "# Dependencies\n", - "import pandas as pd\n", - "from bs4 import BeautifulSoup as bs\n", - "import requests\n", - "import time\n", - "import re\n", - "\n", - "# Pandas config\n", - "pd.set_option('display.max_columns', None)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Ancestries TODO" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# url that contains all the links\n", - "url_ancestry = 'https://2e.aonprd.com/Ancestries.aspx?ID='\n", - "\n", - "# Empty list to store the ancestry data\n", - "ancestry = []\n", - "\n", - "# Make the request to the aon2e\n", - "response_ancestry = requests.get(f'{url_ancestry}1')\n", - "\n", - "# Use BS4 html parser to generate soup\n", - "soup_ancestry = bs(response_ancestry.text, 'html.parser')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Store the data needed from the soup\n", - "name = soup_ancestry.find(id='ctl00_MainContent_DetailedOutput').a.text\n", - "traits = [trait.a.text for trait in soup_ancestry.find_all(class_='trait')]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Raw description\n", - "description = soup_ancestry.find(id='ctl00_MainContent_DetailedOutput').text" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Animal Companions TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Animals (Rentals/Sales) TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Arcane Schools TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Arcane Thesis TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Archetypes TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Armor TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Backgrounds TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Bloodlines TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Champion Causes TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Champion Tenets TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Classes TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Class Kits TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Class Sample Builds TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Conditions TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Deities TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Doctrines TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Domains TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Druidic Orders TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Equipment TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Familiar Abilities TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Feats TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Hazards TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Hunter's Edges TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Instincts TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Languages TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Muses TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Rackets TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Research Fields TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Rituals TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Rules TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Shields TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Skills TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Skills (General) TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Spells TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Traits TODO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Weapons TODO" - ] - }, - { - "cell_type": "code", - "execution_count": 306, - "metadata": {}, - "outputs": [], - "source": [ - "# url that contains all the links\n", - "url_spells = 'https://2e.aonprd.com/Spells.aspx?ID='\n", - "\n", - "# Number of spells taken from https://2e.aonprd.com/Sources.aspx?ID=1\n", - "spell_number = 343\n", - "\n", - "# Make the request to the aon2e\n", - "response_spells = requests.get(f'{url_spells}{spell_number}')\n", - "\n", - "# Use BS4 html parser to generate soup\n", - "soup_spells = bs(response_spells.text, 'html.parser')\n", - "\n", - "# Select only the content\n", - "content = soup_spells.find(id='ctl00_MainContent_DetailedOutput')" - ] - }, - { - "cell_type": "code", - "execution_count": 323, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['1 minute']\n" - ] - } - ], - "source": [ - "name, level = content.h1.text.replace(' ', '').split('Spell')\n", - "traits = [trait.a.text.strip() for trait in content.find_all(class_='trait')]\n", - "source = content.find(class_='external-link').text.strip()\n", - "traditions = [tradition.text.strip() for tradition\n", - " in content.find_all('a', href=re.compile(\"Tradition\"))]\n", - "\n", - "## Actions sections\n", - "actions = []\n", - "# Start at cast and then iterate over the next elements on the line\n", - "for e in content.find('b', text='Cast').next_siblings:\n", - " if e.name == 'br':\n", - " # If the end of the line is reached break the loop\n", - " break\n", - " elif e.name == 'hr':\n", - " # If a horizontal line is reached\n", - " break\n", - " try:\n", - " if e['alt']=='Single Action' and 'actiondark' in e['class']:\n", - " # If it's the single action icon\n", - " actions.append(1)\n", - " elif e['alt']=='Three Actions' and 'actiondark' in e['class']:\n", - " # If it is the three action icon\n", - " actions.append(3)\n", - " except TypeError:\n", - " # If there is no icon handle it as a string\n", - " if 'to' in e:\n", - " # If to exists it must be 1 to 3 actions\n", - " actions.append(2)\n", - " else:\n", - " actions.append(e.split(' (')[0].strip())\n", - " continue\n", - " except KeyError:\n", - " continue\n", - " \n", - "components = content.find('b', text='Cast').next.next \\\n", - " .replace('(', '').replace(')', ''). replace(',', '').split(' ')\n", - "components = [x for x in components if x\n", - " in ['material', 'somatic', 'verbal']]\n", - "\n", - "try:\n", - " # If the spell has a range\n", - " spell_range = content.find('b', text='Range').next.next.replace(';', '')\n", - "except AttributeError:\n", - " # If the spell doesn't have a range\n", - " spell_range = None\n", - "\n", - "target = content.find('hr').previous.strip()\n", - "\n", - "# save TODO\n", - "\n", - "# duration TODO\n", - "\n", - "## Description section\n", - "description = content.find('hr').next\n", - "print(actions)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/src/resources/chromedriver.exe b/src/resources/chromedriver.exe new file mode 100644 index 0000000..5268e3a Binary files /dev/null and b/src/resources/chromedriver.exe differ diff --git a/src/weapons/scrape.ipynb b/src/weapons/scrape.ipynb index 1de5ad3..e01b582 100644 --- a/src/weapons/scrape.ipynb +++ b/src/weapons/scrape.ipynb @@ -30,103 +30,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Beginning Data Retrieval\n", - "------------------------\n", - "Processing Weapon 1 of 83 | Fist\n", - "Processing Weapon 2 of 83 | Club\n", - "Processing Weapon 3 of 83 | Dagger\n", - "Processing Weapon 4 of 83 | Gauntlet\n", - "Processing Weapon 5 of 83 | Light Mace\n", - "Processing Weapon 6 of 83 | Longspear\n", - "Processing Weapon 7 of 83 | Mace\n", - "Processing Weapon 8 of 83 | Morningstar\n", - "Processing Weapon 9 of 83 | Sickle\n", - "Processing Weapon 10 of 83 | Spear\n", - "Processing Weapon 11 of 83 | Spiked Gauntlet\n", - "Processing Weapon 12 of 83 | Staff\n", - "Processing Weapon 13 of 83 | Clan Dagger\n", - "Processing Weapon 14 of 83 | Katar\n", - "Processing Weapon 15 of 83 | Bastard Sword\n", - "Processing Weapon 16 of 83 | Battle Axe\n", - "Processing Weapon 17 of 83 | Bo Staff\n", - "Processing Weapon 18 of 83 | Falchion\n", - "Processing Weapon 19 of 83 | Flail\n", - "Processing Weapon 20 of 83 | Glaive\n", - "Processing Weapon 21 of 83 | Greataxe\n", - "Processing Weapon 22 of 83 | Greatclub\n", - "Processing Weapon 23 of 83 | Greatpick\n", - "Processing Weapon 24 of 83 | Greatsword\n", - "Processing Weapon 25 of 83 | Guisarme\n", - "Processing Weapon 26 of 83 | Halberd\n", - "Processing Weapon 27 of 83 | Hatchet\n", - "Processing Weapon 28 of 83 | Lance\n", - "Processing Weapon 29 of 83 | Light Hammer\n", - "Processing Weapon 30 of 83 | Light Pick\n", - "Processing Weapon 31 of 83 | Longsword\n", - "Processing Weapon 32 of 83 | Main-gauche\n", - "Processing Weapon 33 of 83 | Maul\n", - "Processing Weapon 34 of 83 | Pick\n", - "Processing Weapon 35 of 83 | Ranseur\n", - "Processing Weapon 36 of 83 | Rapier\n", - "Processing Weapon 37 of 83 | Sap\n", - "Processing Weapon 38 of 83 | Scimitar\n", - "Processing Weapon 39 of 83 | Scythe\n", - "Processing Weapon 40 of 83 | Shield Bash\n", - "Processing Weapon 41 of 83 | Shield Boss\n", - "Processing Weapon 42 of 83 | Shield Spikes\n", - "Processing Weapon 43 of 83 | Shortsword\n", - "Processing Weapon 44 of 83 | Starknife\n", - "Processing Weapon 45 of 83 | Trident\n", - "Processing Weapon 46 of 83 | War Flail\n", - "Processing Weapon 47 of 83 | Warhammer\n", - "Processing Weapon 48 of 83 | Whip\n", - "Processing Weapon 49 of 83 | Dogslicer\n", - "Processing Weapon 50 of 83 | Elven Curve Blade\n", - "Processing Weapon 51 of 83 | Filcher's Fork\n", - "Processing Weapon 52 of 83 | Gnome Hooked Hammer\n", - "Processing Weapon 53 of 83 | Horsechopper\n", - "Processing Weapon 54 of 83 | Kama\n", - "Processing Weapon 55 of 83 | Katana\n", - "Processing Weapon 56 of 83 | Kukri\n", - "Processing Weapon 57 of 83 | Nunchaku\n", - "Processing Weapon 58 of 83 | Orc Knuckle Dagger\n", - "Processing Weapon 59 of 83 | Sai\n", - "Processing Weapon 60 of 83 | Spiked Chain\n", - "Processing Weapon 61 of 83 | Temple Sword\n", - "Processing Weapon 62 of 83 | Dwarven War Axe\n", - "Processing Weapon 63 of 83 | Gnome Flickmace\n", - "Processing Weapon 64 of 83 | Orc Necksplitter\n", - "Processing Weapon 65 of 83 | Sawtooth Saber\n", - "Processing Weapon 66 of 83 | Blowgun\n", - "Processing Weapon 67 of 83 | Crossbow\n", - "Processing Weapon 68 of 83 | Dart\n", - "Processing Weapon 69 of 83 | Hand Crossbow\n", - "Processing Weapon 70 of 83 | Heavy Crossbow\n", - "Processing Weapon 71 of 83 | Javelin\n", - "Processing Weapon 72 of 83 | Sling\n", - "Processing Weapon 73 of 83 | Alchemical Bomb\n", - "Processing Weapon 74 of 83 | Composite Longbow\n", - "Processing Weapon 75 of 83 | Composite Shortbow\n", - "Processing Weapon 76 of 83 | Longbow\n", - "Processing Weapon 77 of 83 | Shortbow\n", - "Processing Weapon 78 of 83 | Halfling Sling Staff\n", - "Processing Weapon 79 of 83 | Shuriken\n", - "Processing Weapon 80 of 83 | Blowgun Darts\n", - "Processing Weapon 81 of 83 | Bolts\n", - "Processing Weapon 82 of 83 | Sling Bullets\n", - "Processing Weapon 83 of 83 | Arrows\n", - "------------------------\n", - "Data Retrieval Complete\n" - ] - } - ], + "outputs": [], "source": [ "# url that contains all the links\n", "url_weapon = 'https://2e.aonprd.com/Weapons.aspx?ID='\n",