Customise for backgrounds

merge-requests/34/head
Brian Haley 2019-08-16 18:40:48 -04:00
parent 6ad5787a9d
commit 9217748e15
1 changed files with 25 additions and 10 deletions

View File

@ -54,21 +54,36 @@
" content = soup.find(id='ctl00_MainContent_DetailedOutput')\n",
"\n",
" try:\n",
" # Store the name and description\n",
" name = content.find('h1', class_='title')\n",
" name.span.decompose()\n",
" name = name.text\n",
" # Store the name\n",
" name = content.find('h1', class_='title').a.text.strip()\n",
" name\n",
"\n",
" except:\n",
" name = f'name: {page}'\n",
"\n",
" try:\n",
" # Start the loop after the link to the book\n",
" start = content.find('a', class_='external-link').next_sibling\n",
" description = ''\n",
" start = content.find('hr')\n",
" for e in start.next_siblings:\n",
" if isinstance(e, Tag):\n",
" description = description + e.text.strip()\n",
" if e.name == 'br':\n",
" if e.next_sibling.name == 'br':\n",
" # If the next 2 elements are br skip this\n",
" # loop it will be handled in the elif\n",
" continue\n",
" elif e.previous_sibling.name == 'br':\n",
" # If this element and the previous are br\n",
" # and the next is not append \\n\n",
" description = description + ' \\n\\n '\n",
" else:\n",
" # If there is just one br append \\n\n",
" description = description + ' \\n '\n",
" else:\n",
" # Append the text inside the element\n",
" description = description + e.text.strip()\n",
" elif isinstance(e, NavigableString):\n",
" # Since it is just a text append it\n",
" description = description + e\n",
"\n",
" except:\n",
@ -97,10 +112,10 @@
"outputs": [],
"source": [
"# scrape the descriptions\n",
"url_background = 'https://2e.aonprd.com/Equipment.aspx?ID='\n",
"number_background = 65 #65 to scrape\n",
"url_background = 'https://2e.aonprd.com/Backgrounds.aspx?ID='\n",
"number_background = 50 # number to scrape\n",
"\n",
"description_background = scrape_description(url_gear, number_gear)"
"description_background = scrape_description(url_background, number_background)"
]
},
{
@ -109,7 +124,7 @@
"metadata": {},
"outputs": [],
"source": [
"gear.to_csv('background.csv')"
"description_background.to_csv('background.csv', encoding='UTF-8', index=False)"
]
},
{