pathfinder-2-sqlite-MIRROR/deprecated/deprecated-old-scripts-dir/feat_level.py

142 lines
3.9 KiB
Python

#!/usr/bin/env python
# coding: utf-8
# In[28]:
from bs4 import BeautifulSoup as bs
from bs4 import NavigableString, Tag
import csv
from dataclasses import dataclass
from os import path
import pandas as pd
import re
# import requests
from ruamel import yaml
from splinter import Browser
from typing import List # Dict
get_ipython().run_line_magic('autosave', '300')
# ## Dataclasses
# In[2]:
@dataclass(frozen=True)
class Feat:
name_yaml: str
name_aon: str
level: int
url_search: str
url_aon: str
match: bool
# ## Functions
# In[3]:
def feat_names(path: str) -> List:
"""Accepts a string path to a yaml file and returns a list of the feat names."""
with open(path, 'r', encoding='utf8') as file:
feats = pd.io.json.json_normalize(yaml.safe_load(file), 'feat')
return feats['name'].to_list()
# In[4]:
def feat_level(name_yaml: str) -> Feat:
"""Accepts the name of a feat and returns a Feat dataclass."""
name_clean: str = re.sub(' \([A-Z, a-z]*\)$', '', name_yaml)
url_search: str = 'https://2e.aonprd.com/Search.aspx?query=' + name_clean
browser.visit(url_search)
for number in range(0, 13):
if number != 6:
browser.uncheck(f'ctl00$MainContent$TableList${number}')
else:
continue
browser.click_link_by_id('ctl00_MainContent_btnBestMatch')
url_aon: str = browser.url
soup: bs = bs(browser.html)
try:
name_aon: str = soup.find('h1', 'title').a.text
except AttributeError:
name_aon: str = ''
match: bool = True if name_aon == name_clean else False
try:
level: int = int(soup.find('h1', 'title').span.text.split(' ')[1].strip())
except AttributeError:
level:int = -1
return Feat(name_yaml, name_aon, level, url_search, url_aon, match)
# ## Execution
# In[5]:
names = feat_names(path.join('..', 'data', 'yaml', 'feats.yaml'))
feats = []
counter: int = 0
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=True)
print('############################################################')
print(f'Beginning level scraping')
print('############################################################')
for name in names:
counter += 1
print(f'Name: {name} {counter}/{len(names)}')
feats.append(feat_level(name))
print(f'AON name: {feats[-1].name_aon}')
print(f'Level: {feats[-1].level}')
print(f'Match: {feats[-1].match}')
print(f'AON url: {feats[-1].url_aon}')
print(f'Search url: {feats[-1].url_search}')
print('############################################################')
print(f'End of level scraping')
browser.quit()
# In[21]:
names2 = names[len(feats):]
if names2:
counter2: int = 0
browser = Browser('chrome', **executable_path, headless=True)
print('############################################################')
print(f'Beginning level scraping')
print('############################################################')
for name in names2:
counter2 += 1
print(f'Name: {name} {counter2}/{len(names2)}')
feats.append(feat_level(name))
print(f'AON name: {feats[-1].name_aon}')
print(f'Level: {feats[-1].level}')
print(f'Match: {feats[-1].match}')
print(f'AON url: {feats[-1].url_aon}')
print(f'Search url: {feats[-1].url_search}')
print('############################################################')
print(f'End of level scraping')
browser.quit()
# In[31]:
with open('feat_level.csv', mode='w', newline='') as csv_file:
csv_writer = writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
csv_writer.writerow(['name', 'name_aon', 'level', 'url_search', 'url_aon', 'match'])
for feat in feats:
csv_writer.writerow([feat.name_yaml, feat.name_aon, feat.level, feat.url_search, feat.url_aon, feat.match])
# In[ ]: