#!/usr/bin/env python # coding: utf-8 # # Scrape data from aon2e and generate csvs to import in to sqlite # In[1]: # Dependencies import pandas as pd from bs4 import BeautifulSoup as bs import os from splinter import Browser # Setting up Selenium chrome_driver = os.path.join('..', 'chromedriver.exe') executable_path = {'executable_path': chrome_driver} browser = Browser('chrome', **executable_path, headless=False) # Pandas config pd.set_option('display.max_columns', None) # In[2]: # url that contains all the links url_weapon = 'https://2e.aonprd.com/Weapons.aspx?ID=' # Number of weapons number_of_weapons = 83 # Empty lists to store the scraped values name_list = [] description_list = [] print(f'Beginning Data Retrieval') print(f'------------------------') # Loop from 1 to the value in weapon_number for weapon in range(1, number_of_weapons+1): url = url_weapon + str(weapon) browser.visit(url) html = browser.html soup = bs(html, 'html.parser') # Select only the content section content = soup.find(id='ctl00_MainContent_DetailedOutput') try: # Store the name and description name = content.find('a').text.strip() except: name = f'weapon: {weapon}' try: description = content.find('hr').next.text.strip() except: description = content.find('hr').next.strip() print(f'Processing Weapon {weapon} of {number_of_weapons} | {name}') # Append values to our empty lists name_list.append(name) description_list.append(description) print(f'------------------------') print(f'Data Retrieval Complete') # In[3]: # Directory of csv files which are taken from https://2e.aonprd.com/Weapons.aspx melee = pd.read_csv('melee.csv') ranged = pd.read_csv('ranged.csv') # In[13]: data = {'Name': name_list, 'description': description_list} scrape = pd.DataFrame(data) # In[16]: melee = melee.merge(scrape, how='left', on='Name') # In[17]: ranged = ranged.merge(scrape, how='left', on='Name') # In[18]: melee.to_csv('melee.csv') ranged.to_csv('ranged.csv') # In[ ]: