Customise for backgrounds

merge-requests/34/head
Brian Haley 2019-08-16 18:40:48 -04:00
parent 6ad5787a9d
commit 9217748e15
1 changed files with 25 additions and 10 deletions

View File

@ -54,21 +54,36 @@
" content = soup.find(id='ctl00_MainContent_DetailedOutput')\n", " content = soup.find(id='ctl00_MainContent_DetailedOutput')\n",
"\n", "\n",
" try:\n", " try:\n",
" # Store the name and description\n", " # Store the name\n",
" name = content.find('h1', class_='title')\n", " name = content.find('h1', class_='title').a.text.strip()\n",
" name.span.decompose()\n", " name\n",
" name = name.text\n",
"\n", "\n",
" except:\n", " except:\n",
" name = f'name: {page}'\n", " name = f'name: {page}'\n",
"\n", "\n",
" try:\n", " try:\n",
" # Start the loop after the link to the book\n",
" start = content.find('a', class_='external-link').next_sibling\n",
" description = ''\n", " description = ''\n",
" start = content.find('hr')\n",
" for e in start.next_siblings:\n", " for e in start.next_siblings:\n",
" if isinstance(e, Tag):\n", " if isinstance(e, Tag):\n",
" description = description + e.text.strip()\n", " if e.name == 'br':\n",
" if e.next_sibling.name == 'br':\n",
" # If the next 2 elements are br skip this\n",
" # loop it will be handled in the elif\n",
" continue\n",
" elif e.previous_sibling.name == 'br':\n",
" # If this element and the previous are br\n",
" # and the next is not append \\n\n",
" description = description + ' \\n\\n '\n",
" else:\n",
" # If there is just one br append \\n\n",
" description = description + ' \\n '\n",
" else:\n",
" # Append the text inside the element\n",
" description = description + e.text.strip()\n",
" elif isinstance(e, NavigableString):\n", " elif isinstance(e, NavigableString):\n",
" # Since it is just a text append it\n",
" description = description + e\n", " description = description + e\n",
"\n", "\n",
" except:\n", " except:\n",
@ -97,10 +112,10 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# scrape the descriptions\n", "# scrape the descriptions\n",
"url_background = 'https://2e.aonprd.com/Equipment.aspx?ID='\n", "url_background = 'https://2e.aonprd.com/Backgrounds.aspx?ID='\n",
"number_background = 65 #65 to scrape\n", "number_background = 50 # number to scrape\n",
"\n", "\n",
"description_background = scrape_description(url_gear, number_gear)" "description_background = scrape_description(url_background, number_background)"
] ]
}, },
{ {
@ -109,7 +124,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"gear.to_csv('background.csv')" "description_background.to_csv('background.csv', encoding='UTF-8', index=False)"
] ]
}, },
{ {