Create scrape.py
parent
974b43df62
commit
cda0ad991d
|
@ -0,0 +1,112 @@
|
|||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# # Scrape data from aon2e and generate csvs to import in to sqlite
|
||||
|
||||
# In[1]:
|
||||
|
||||
|
||||
# Dependencies
|
||||
import pandas as pd
|
||||
from bs4 import BeautifulSoup as bs
|
||||
import os
|
||||
from splinter import Browser
|
||||
|
||||
# Setting up Selenium
|
||||
chrome_driver = os.path.join('..', 'chromedriver.exe')
|
||||
executable_path = {'executable_path': chrome_driver}
|
||||
browser = Browser('chrome', **executable_path, headless=False)
|
||||
|
||||
# Pandas config
|
||||
pd.set_option('display.max_columns', None)
|
||||
|
||||
|
||||
# In[2]:
|
||||
|
||||
|
||||
# url that contains all the links
|
||||
url_weapon = 'https://2e.aonprd.com/Weapons.aspx?ID='
|
||||
|
||||
# Number of weapons
|
||||
number_of_weapons = 83
|
||||
|
||||
# Empty lists to store the scraped values
|
||||
name_list = []
|
||||
description_list = []
|
||||
|
||||
print(f'Beginning Data Retrieval')
|
||||
print(f'------------------------')
|
||||
|
||||
# Loop from 1 to the value in weapon_number
|
||||
for weapon in range(1, number_of_weapons+1):
|
||||
|
||||
url = url_weapon + str(weapon)
|
||||
browser.visit(url)
|
||||
html = browser.html
|
||||
soup = bs(html, 'html.parser')
|
||||
|
||||
# Select only the content section
|
||||
content = soup.find(id='ctl00_MainContent_DetailedOutput')
|
||||
|
||||
try:
|
||||
# Store the name and description
|
||||
name = content.find('a').text.strip()
|
||||
|
||||
except:
|
||||
name = f'weapon: {weapon}'
|
||||
|
||||
try:
|
||||
description = content.find('hr').next.text.strip()
|
||||
|
||||
except:
|
||||
description = content.find('hr').next.strip()
|
||||
|
||||
print(f'Processing Weapon {weapon} of {number_of_weapons} | {name}')
|
||||
|
||||
# Append values to our empty lists
|
||||
name_list.append(name)
|
||||
description_list.append(description)
|
||||
|
||||
print(f'------------------------')
|
||||
print(f'Data Retrieval Complete')
|
||||
|
||||
|
||||
# In[3]:
|
||||
|
||||
|
||||
# Directory of csv files which are taken from https://2e.aonprd.com/Weapons.aspx
|
||||
melee = pd.read_csv('melee.csv')
|
||||
ranged = pd.read_csv('ranged.csv')
|
||||
|
||||
|
||||
# In[13]:
|
||||
|
||||
|
||||
data = {'Name': name_list, 'description': description_list}
|
||||
scrape = pd.DataFrame(data)
|
||||
|
||||
|
||||
# In[16]:
|
||||
|
||||
|
||||
melee = melee.merge(scrape, how='left', on='Name')
|
||||
|
||||
|
||||
# In[17]:
|
||||
|
||||
|
||||
ranged = ranged.merge(scrape, how='left', on='Name')
|
||||
|
||||
|
||||
# In[18]:
|
||||
|
||||
|
||||
melee.to_csv('melee.csv')
|
||||
ranged.to_csv('ranged.csv')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue