Customise for backgrounds
parent
6ad5787a9d
commit
9217748e15
|
@ -54,21 +54,36 @@
|
|||
" content = soup.find(id='ctl00_MainContent_DetailedOutput')\n",
|
||||
"\n",
|
||||
" try:\n",
|
||||
" # Store the name and description\n",
|
||||
" name = content.find('h1', class_='title')\n",
|
||||
" name.span.decompose()\n",
|
||||
" name = name.text\n",
|
||||
" # Store the name\n",
|
||||
" name = content.find('h1', class_='title').a.text.strip()\n",
|
||||
" name\n",
|
||||
"\n",
|
||||
" except:\n",
|
||||
" name = f'name: {page}'\n",
|
||||
"\n",
|
||||
" try:\n",
|
||||
" # Start the loop after the link to the book\n",
|
||||
" start = content.find('a', class_='external-link').next_sibling\n",
|
||||
" description = ''\n",
|
||||
" start = content.find('hr')\n",
|
||||
" for e in start.next_siblings:\n",
|
||||
" if isinstance(e, Tag):\n",
|
||||
" description = description + e.text.strip()\n",
|
||||
" if e.name == 'br':\n",
|
||||
" if e.next_sibling.name == 'br':\n",
|
||||
" # If the next 2 elements are br skip this\n",
|
||||
" # loop it will be handled in the elif\n",
|
||||
" continue\n",
|
||||
" elif e.previous_sibling.name == 'br':\n",
|
||||
" # If this element and the previous are br\n",
|
||||
" # and the next is not append \\n\n",
|
||||
" description = description + ' \\n\\n '\n",
|
||||
" else:\n",
|
||||
" # If there is just one br append \\n\n",
|
||||
" description = description + ' \\n '\n",
|
||||
" else:\n",
|
||||
" # Append the text inside the element\n",
|
||||
" description = description + e.text.strip()\n",
|
||||
" elif isinstance(e, NavigableString):\n",
|
||||
" # Since it is just a text append it\n",
|
||||
" description = description + e\n",
|
||||
"\n",
|
||||
" except:\n",
|
||||
|
@ -97,10 +112,10 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"# scrape the descriptions\n",
|
||||
"url_background = 'https://2e.aonprd.com/Equipment.aspx?ID='\n",
|
||||
"number_background = 65 #65 to scrape\n",
|
||||
"url_background = 'https://2e.aonprd.com/Backgrounds.aspx?ID='\n",
|
||||
"number_background = 50 # number to scrape\n",
|
||||
"\n",
|
||||
"description_background = scrape_description(url_gear, number_gear)"
|
||||
"description_background = scrape_description(url_background, number_background)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -109,7 +124,7 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"gear.to_csv('background.csv')"
|
||||
"description_background.to_csv('background.csv', encoding='UTF-8', index=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue