142 lines
3.9 KiB
Python
142 lines
3.9 KiB
Python
|
#!/usr/bin/env python
|
||
|
# coding: utf-8
|
||
|
|
||
|
# In[28]:
|
||
|
|
||
|
|
||
|
from bs4 import BeautifulSoup as bs
|
||
|
from bs4 import NavigableString, Tag
|
||
|
import csv
|
||
|
from dataclasses import dataclass
|
||
|
from os import path
|
||
|
import pandas as pd
|
||
|
import re
|
||
|
# import requests
|
||
|
from ruamel import yaml
|
||
|
from splinter import Browser
|
||
|
from typing import List # Dict
|
||
|
get_ipython().run_line_magic('autosave', '300')
|
||
|
|
||
|
|
||
|
# ## Dataclasses
|
||
|
|
||
|
# In[2]:
|
||
|
|
||
|
|
||
|
@dataclass(frozen=True)
|
||
|
class Feat:
|
||
|
name_yaml: str
|
||
|
name_aon: str
|
||
|
level: int
|
||
|
url_search: str
|
||
|
url_aon: str
|
||
|
match: bool
|
||
|
|
||
|
|
||
|
# ## Functions
|
||
|
|
||
|
# In[3]:
|
||
|
|
||
|
|
||
|
def feat_names(path: str) -> List:
|
||
|
"""Accepts a string path to a yaml file and returns a list of the feat names."""
|
||
|
with open(path, 'r', encoding='utf8') as file:
|
||
|
feats = pd.io.json.json_normalize(yaml.safe_load(file), 'feat')
|
||
|
return feats['name'].to_list()
|
||
|
|
||
|
|
||
|
# In[4]:
|
||
|
|
||
|
|
||
|
def feat_level(name_yaml: str) -> Feat:
|
||
|
"""Accepts the name of a feat and returns a Feat dataclass."""
|
||
|
name_clean: str = re.sub(' \([A-Z, a-z]*\)$', '', name_yaml)
|
||
|
url_search: str = 'https://2e.aonprd.com/Search.aspx?query=' + name_clean
|
||
|
browser.visit(url_search)
|
||
|
for number in range(0, 13):
|
||
|
if number != 6:
|
||
|
browser.uncheck(f'ctl00$MainContent$TableList${number}')
|
||
|
else:
|
||
|
continue
|
||
|
browser.click_link_by_id('ctl00_MainContent_btnBestMatch')
|
||
|
url_aon: str = browser.url
|
||
|
soup: bs = bs(browser.html)
|
||
|
try:
|
||
|
name_aon: str = soup.find('h1', 'title').a.text
|
||
|
except AttributeError:
|
||
|
name_aon: str = ''
|
||
|
match: bool = True if name_aon == name_clean else False
|
||
|
try:
|
||
|
level: int = int(soup.find('h1', 'title').span.text.split(' ')[1].strip())
|
||
|
except AttributeError:
|
||
|
level:int = -1
|
||
|
return Feat(name_yaml, name_aon, level, url_search, url_aon, match)
|
||
|
|
||
|
|
||
|
# ## Execution
|
||
|
|
||
|
# In[5]:
|
||
|
|
||
|
|
||
|
names = feat_names(path.join('..', 'data', 'yaml', 'feats.yaml'))
|
||
|
feats = []
|
||
|
counter: int = 0
|
||
|
executable_path = {'executable_path': 'chromedriver.exe'}
|
||
|
browser = Browser('chrome', **executable_path, headless=True)
|
||
|
print('############################################################')
|
||
|
print(f'Beginning level scraping')
|
||
|
print('############################################################')
|
||
|
for name in names:
|
||
|
counter += 1
|
||
|
print(f'Name: {name} {counter}/{len(names)}')
|
||
|
feats.append(feat_level(name))
|
||
|
print(f'AON name: {feats[-1].name_aon}')
|
||
|
print(f'Level: {feats[-1].level}')
|
||
|
print(f'Match: {feats[-1].match}')
|
||
|
print(f'AON url: {feats[-1].url_aon}')
|
||
|
print(f'Search url: {feats[-1].url_search}')
|
||
|
print('############################################################')
|
||
|
print(f'End of level scraping')
|
||
|
browser.quit()
|
||
|
|
||
|
|
||
|
# In[21]:
|
||
|
|
||
|
|
||
|
names2 = names[len(feats):]
|
||
|
if names2:
|
||
|
counter2: int = 0
|
||
|
browser = Browser('chrome', **executable_path, headless=True)
|
||
|
print('############################################################')
|
||
|
print(f'Beginning level scraping')
|
||
|
print('############################################################')
|
||
|
for name in names2:
|
||
|
counter2 += 1
|
||
|
print(f'Name: {name} {counter2}/{len(names2)}')
|
||
|
feats.append(feat_level(name))
|
||
|
print(f'AON name: {feats[-1].name_aon}')
|
||
|
print(f'Level: {feats[-1].level}')
|
||
|
print(f'Match: {feats[-1].match}')
|
||
|
print(f'AON url: {feats[-1].url_aon}')
|
||
|
print(f'Search url: {feats[-1].url_search}')
|
||
|
print('############################################################')
|
||
|
print(f'End of level scraping')
|
||
|
browser.quit()
|
||
|
|
||
|
|
||
|
# In[31]:
|
||
|
|
||
|
|
||
|
with open('feat_level.csv', mode='w', newline='') as csv_file:
|
||
|
csv_writer = writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
||
|
csv_writer.writerow(['name', 'name_aon', 'level', 'url_search', 'url_aon', 'match'])
|
||
|
for feat in feats:
|
||
|
csv_writer.writerow([feat.name_yaml, feat.name_aon, feat.level, feat.url_search, feat.url_aon, feat.match])
|
||
|
|
||
|
|
||
|
# In[ ]:
|
||
|
|
||
|
|
||
|
|
||
|
|