pathfinder-2-sqlite-MIRROR/src/weapons/scrape.py

#!/usr/bin/env python
# coding: utf-8

# # Scrape data from aon2e and generate csvs to import in to sqlite

# In[1]:


# Dependencies
import pandas as pd
from bs4 import BeautifulSoup as bs
import os
from splinter import Browser

# Setting up Selenium
chrome_driver = os.path.join('..', 'chromedriver.exe')
executable_path = {'executable_path': chrome_driver}
browser = Browser('chrome', **executable_path, headless=False)

# Pandas config
pd.set_option('display.max_columns', None)


# In[2]:


# url that contains all the links
url_weapon = 'https://2e.aonprd.com/Weapons.aspx?ID='

# Number of weapons
number_of_weapons = 83

# Empty lists to store the scraped values
name_list = []
description_list = []

print(f'Beginning Data Retrieval')
print(f'------------------------')

# Loop from 1 to the value in weapon_number
for weapon in range(1, number_of_weapons+1):
    
    url = url_weapon + str(weapon)
    browser.visit(url)
    html = browser.html
    soup = bs(html, 'html.parser')

    # Select only the content section
    content = soup.find(id='ctl00_MainContent_DetailedOutput')

    try:
        # Store the name and description
        name = content.find('a').text.strip()
        
    except:
        name = f'weapon: {weapon}'

    try:
        description = content.find('hr').next.text.strip()
        
    except:
        description = content.find('hr').next.strip()
        
    print(f'Processing Weapon {weapon} of {number_of_weapons} | {name}')

    # Append values to our empty lists
    name_list.append(name)      
    description_list.append(description)

print(f'------------------------')
print(f'Data Retrieval Complete')


# In[3]:


# Directory of csv files which are taken from https://2e.aonprd.com/Weapons.aspx
melee = pd.read_csv('melee.csv')
ranged = pd.read_csv('ranged.csv')


# In[13]:


data = {'Name': name_list, 'description': description_list}
scrape = pd.DataFrame(data)


# In[16]:


melee = melee.merge(scrape, how='left', on='Name')


# In[17]:


ranged = ranged.merge(scrape, how='left', on='Name')


# In[18]:


melee.to_csv('melee.csv')
ranged.to_csv('ranged.csv')


# In[ ]:
Create scrape.py 2019-08-09 01:34:26 -04:00			`#!/usr/bin/env python`
			`# coding: utf-8`

			`# # Scrape data from aon2e and generate csvs to import in to sqlite`

			`# In[1]:`


			`# Dependencies`
			`import pandas as pd`
			`from bs4 import BeautifulSoup as bs`
			`import os`
			`from splinter import Browser`

			`# Setting up Selenium`
			`chrome_driver = os.path.join('..', 'chromedriver.exe')`
			`executable_path = {'executable_path': chrome_driver}`
			`browser = Browser('chrome', **executable_path, headless=False)`

			`# Pandas config`
			`pd.set_option('display.max_columns', None)`


			`# In[2]:`


			`# url that contains all the links`
			`url_weapon = 'https://2e.aonprd.com/Weapons.aspx?ID='`

			`# Number of weapons`
			`number_of_weapons = 83`

			`# Empty lists to store the scraped values`
			`name_list = []`
			`description_list = []`

			`print(f'Beginning Data Retrieval')`
			`print(f'------------------------')`

			`# Loop from 1 to the value in weapon_number`
			`for weapon in range(1, number_of_weapons+1):`

			`url = url_weapon + str(weapon)`
			`browser.visit(url)`
			`html = browser.html`
			`soup = bs(html, 'html.parser')`

			`# Select only the content section`
			`content = soup.find(id='ctl00_MainContent_DetailedOutput')`

			`try:`
			`# Store the name and description`
			`name = content.find('a').text.strip()`

			`except:`
			`name = f'weapon: {weapon}'`

			`try:`
			`description = content.find('hr').next.text.strip()`

			`except:`
			`description = content.find('hr').next.strip()`

			`print(f'Processing Weapon {weapon} of {number_of_weapons} \| {name}')`

			`# Append values to our empty lists`
			`name_list.append(name)`
			`description_list.append(description)`

			`print(f'------------------------')`
			`print(f'Data Retrieval Complete')`


			`# In[3]:`


			`# Directory of csv files which are taken from https://2e.aonprd.com/Weapons.aspx`
			`melee = pd.read_csv('melee.csv')`
			`ranged = pd.read_csv('ranged.csv')`


			`# In[13]:`


			`data = {'Name': name_list, 'description': description_list}`
			`scrape = pd.DataFrame(data)`


			`# In[16]:`


			`melee = melee.merge(scrape, how='left', on='Name')`


			`# In[17]:`


			`ranged = ranged.merge(scrape, how='left', on='Name')`


			`# In[18]:`


			`melee.to_csv('melee.csv')`
			`ranged.to_csv('ranged.csv')`


			`# In[ ]:`