{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Scrape data from aon2e and generate csvs to import in to sqlite" ] }, { "cell_type": "code", "execution_count": 173, "metadata": {}, "outputs": [], "source": [ "# Dependencies\n", "import pandas as pd\n", "from bs4 import BeautifulSoup as bs\n", "import requests\n", "import time\n", "import re\n", "\n", "# Pandas config\n", "pd.set_option('display.max_columns', None)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Ancestries TODO" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# url that contains all the links\n", "url_ancestry = 'https://2e.aonprd.com/Ancestries.aspx?ID='\n", "\n", "# Empty list to store the ancestry data\n", "ancestry = []\n", "\n", "# Make the request to the aon2e\n", "response_ancestry = requests.get(f'{url_ancestry}1')\n", "\n", "# Use BS4 html parser to generate soup\n", "soup_ancestry = bs(response_ancestry.text, 'html.parser')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Store the data needed from the soup\n", "name = soup_ancestry.find(id='ctl00_MainContent_DetailedOutput').a.text\n", "traits = [trait.a.text for trait in soup_ancestry.find_all(class_='trait')]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Raw description\n", "description = soup_ancestry.find(id='ctl00_MainContent_DetailedOutput').text" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Animal Companions TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Animals (Rentals/Sales) TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Arcane Schools TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Arcane Thesis TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Archetypes TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Armor TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Backgrounds TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Bloodlines TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Champion Causes TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Champion Tenets TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Classes TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Class Kits TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Class Sample Builds TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Conditions TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Deities TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Doctrines TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Domains TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Druidic Orders TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Equipment TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Familiar Abilities TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Feats TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Hazards TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Hunter's Edges TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Instincts TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Languages TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Muses TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Rackets TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Research Fields TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Rituals TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Rules TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Shields TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Skills TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Skills (General) TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Spells TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Traits TODO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Weapons TODO" ] }, { "cell_type": "code", "execution_count": 306, "metadata": {}, "outputs": [], "source": [ "# url that contains all the links\n", "url_spells = 'https://2e.aonprd.com/Spells.aspx?ID='\n", "\n", "# Number of spells taken from https://2e.aonprd.com/Sources.aspx?ID=1\n", "spell_number = 343\n", "\n", "# Make the request to the aon2e\n", "response_spells = requests.get(f'{url_spells}{spell_number}')\n", "\n", "# Use BS4 html parser to generate soup\n", "soup_spells = bs(response_spells.text, 'html.parser')\n", "\n", "# Select only the content\n", "content = soup_spells.find(id='ctl00_MainContent_DetailedOutput')" ] }, { "cell_type": "code", "execution_count": 323, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['1 minute']\n" ] } ], "source": [ "name, level = content.h1.text.replace(' ', '').split('Spell')\n", "traits = [trait.a.text.strip() for trait in content.find_all(class_='trait')]\n", "source = content.find(class_='external-link').text.strip()\n", "traditions = [tradition.text.strip() for tradition\n", " in content.find_all('a', href=re.compile(\"Tradition\"))]\n", "\n", "## Actions sections\n", "actions = []\n", "# Start at cast and then iterate over the next elements on the line\n", "for e in content.find('b', text='Cast').next_siblings:\n", " if e.name == 'br':\n", " # If the end of the line is reached break the loop\n", " break\n", " elif e.name == 'hr':\n", " # If a horizontal line is reached\n", " break\n", " try:\n", " if e['alt']=='Single Action' and 'actiondark' in e['class']:\n", " # If it's the single action icon\n", " actions.append(1)\n", " elif e['alt']=='Three Actions' and 'actiondark' in e['class']:\n", " # If it is the three action icon\n", " actions.append(3)\n", " except TypeError:\n", " # If there is no icon handle it as a string\n", " if 'to' in e:\n", " # If to exists it must be 1 to 3 actions\n", " actions.append(2)\n", " else:\n", " actions.append(e.split(' (')[0].strip())\n", " continue\n", " except KeyError:\n", " continue\n", " \n", "components = content.find('b', text='Cast').next.next \\\n", " .replace('(', '').replace(')', ''). replace(',', '').split(' ')\n", "components = [x for x in components if x\n", " in ['material', 'somatic', 'verbal']]\n", "\n", "try:\n", " # If the spell has a range\n", " spell_range = content.find('b', text='Range').next.next.replace(';', '')\n", "except AttributeError:\n", " # If the spell doesn't have a range\n", " spell_range = None\n", "\n", "target = content.find('hr').previous.strip()\n", "\n", "# save TODO\n", "\n", "# duration TODO\n", "\n", "## Description section\n", "description = content.find('hr').next\n", "print(actions)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" } }, "nbformat": 4, "nbformat_minor": 4 }