Copy weapons files
parent
7a08cd30af
commit
2481dcd413
|
@ -0,0 +1,7 @@
|
|||
# This directory scrapes the weapons from the [Archives of Nethys](https://2e.aonprd.com/Weapons.aspx)
|
||||
|
||||
## Steps to scrape the weapons
|
||||
1. Install the requirements from [the previous readme](../README.md)
|
||||
2. Generate .csv files from copy pasting the tables from [here](https://2e.aonprd.com/Weapons.aspx) and save them in this directory
|
||||
3. Set the number_of_weapons variable to the number of weapons in the database it is currently 83
|
||||
4. Run the [python file](scrape.py) or [Jupyter Notebook](scrape.ipynb)
|
|
@ -0,0 +1,162 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Scrape data from aon2e and generate csvs to import in to sqlite"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Dependencies\n",
|
||||
"import pandas as pd\n",
|
||||
"from bs4 import BeautifulSoup as bs\n",
|
||||
"import os\n",
|
||||
"from splinter import Browser\n",
|
||||
"\n",
|
||||
"# Setting up Selenium\n",
|
||||
"chrome_driver = os.path.join('..', 'chromedriver.exe')\n",
|
||||
"executable_path = {'executable_path': chrome_driver}\n",
|
||||
"browser = Browser('chrome', **executable_path, headless=False)\n",
|
||||
"\n",
|
||||
"# Pandas config\n",
|
||||
"pd.set_option('display.max_columns', None)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# url that contains all the links\n",
|
||||
"url_weapon = 'https://2e.aonprd.com/Weapons.aspx?ID='\n",
|
||||
"\n",
|
||||
"# Number of weapons\n",
|
||||
"number_of_weapons = 83\n",
|
||||
"\n",
|
||||
"# Empty lists to store the scraped values\n",
|
||||
"name_list = []\n",
|
||||
"description_list = []\n",
|
||||
"\n",
|
||||
"print(f'Beginning Data Retrieval')\n",
|
||||
"print(f'------------------------')\n",
|
||||
"\n",
|
||||
"# Loop from 1 to the value in weapon_number\n",
|
||||
"for weapon in range(1, number_of_weapons+1):\n",
|
||||
" \n",
|
||||
" url = url_weapon + str(weapon)\n",
|
||||
" browser.visit(url)\n",
|
||||
" html = browser.html\n",
|
||||
" soup = bs(html, 'html.parser')\n",
|
||||
"\n",
|
||||
" # Select only the content section\n",
|
||||
" content = soup.find(id='ctl00_MainContent_DetailedOutput')\n",
|
||||
"\n",
|
||||
" try:\n",
|
||||
" # Store the name and description\n",
|
||||
" name = content.find('a').text.strip()\n",
|
||||
" \n",
|
||||
" except:\n",
|
||||
" name = f'weapon: {weapon}'\n",
|
||||
"\n",
|
||||
" try:\n",
|
||||
" description = content.find('hr').next.text.strip()\n",
|
||||
" \n",
|
||||
" except:\n",
|
||||
" description = content.find('hr').next.strip()\n",
|
||||
" \n",
|
||||
" print(f'Processing Weapon {weapon} of {number_of_weapons} | {name}')\n",
|
||||
"\n",
|
||||
" # Append values to our empty lists\n",
|
||||
" name_list.append(name) \n",
|
||||
" description_list.append(description)\n",
|
||||
"\n",
|
||||
"print(f'------------------------')\n",
|
||||
"print(f'Data Retrieval Complete')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Directory of csv files which are taken from https://2e.aonprd.com/Weapons.aspx\n",
|
||||
"melee = pd.read_csv('melee.csv')\n",
|
||||
"ranged = pd.read_csv('ranged.csv')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = {'Name': name_list, 'description': description_list}\n",
|
||||
"scrape = pd.DataFrame(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"melee = melee.merge(scrape, how='left', on='Name')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ranged = ranged.merge(scrape, how='left', on='Name')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"melee.to_csv('melee.csv')\n",
|
||||
"ranged.to_csv('ranged.csv')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
Loading…
Reference in New Issue