Customise for backgrounds
							parent
							
								
									6ad5787a9d
								
							
						
					
					
						commit
						9217748e15
					
				|  | @ -54,21 +54,36 @@ | ||||||
|     "        content = soup.find(id='ctl00_MainContent_DetailedOutput')\n", |     "        content = soup.find(id='ctl00_MainContent_DetailedOutput')\n", | ||||||
|     "\n", |     "\n", | ||||||
|     "        try:\n", |     "        try:\n", | ||||||
|     "            # Store the name and description\n", |     "            # Store the name\n", | ||||||
|     "            name = content.find('h1', class_='title')\n", |     "            name = content.find('h1', class_='title').a.text.strip()\n", | ||||||
|     "            name.span.decompose()\n", |     "            name\n", | ||||||
|     "            name = name.text\n", |  | ||||||
|     "\n", |     "\n", | ||||||
|     "        except:\n", |     "        except:\n", | ||||||
|     "            name = f'name: {page}'\n", |     "            name = f'name: {page}'\n", | ||||||
|     "\n", |     "\n", | ||||||
|     "        try:\n", |     "        try:\n", | ||||||
|  |     "            # Start the loop after the link to the book\n", | ||||||
|  |     "            start = content.find('a', class_='external-link').next_sibling\n", | ||||||
|     "            description = ''\n", |     "            description = ''\n", | ||||||
|     "            start = content.find('hr')\n", |  | ||||||
|     "            for e in start.next_siblings:\n", |     "            for e in start.next_siblings:\n", | ||||||
|     "                if isinstance(e, Tag):\n", |     "                if isinstance(e, Tag):\n", | ||||||
|     "                    description = description + e.text.strip()\n", |     "                    if e.name == 'br':\n", | ||||||
|  |     "                        if e.next_sibling.name == 'br':\n", | ||||||
|  |     "                            # If the next 2 elements are br skip this\n", | ||||||
|  |     "                            # loop it will be handled in the elif\n", | ||||||
|  |     "                            continue\n", | ||||||
|  |     "                        elif e.previous_sibling.name == 'br':\n", | ||||||
|  |     "                            # If this element and the previous are br\n", | ||||||
|  |     "                            # and the next is not append \\n\n", | ||||||
|  |     "                            description = description + ' \\n\\n '\n", | ||||||
|  |     "                        else:\n", | ||||||
|  |     "                            # If there is just one br append \\n\n", | ||||||
|  |     "                            description =  description + ' \\n '\n", | ||||||
|  |     "                    else:\n", | ||||||
|  |     "                        # Append the text inside the element\n", | ||||||
|  |     "                        description = description + e.text.strip()\n", | ||||||
|     "                elif isinstance(e, NavigableString):\n", |     "                elif isinstance(e, NavigableString):\n", | ||||||
|  |     "                    # Since it is just a text append it\n", | ||||||
|     "                    description = description + e\n", |     "                    description = description + e\n", | ||||||
|     "\n", |     "\n", | ||||||
|     "        except:\n", |     "        except:\n", | ||||||
|  | @ -97,10 +112,10 @@ | ||||||
|    "outputs": [], |    "outputs": [], | ||||||
|    "source": [ |    "source": [ | ||||||
|     "# scrape the descriptions\n", |     "# scrape the descriptions\n", | ||||||
|     "url_background = 'https://2e.aonprd.com/Equipment.aspx?ID='\n", |     "url_background = 'https://2e.aonprd.com/Backgrounds.aspx?ID='\n", | ||||||
|     "number_background = 65 #65 to scrape\n", |     "number_background = 50 # number to scrape\n", | ||||||
|     "\n", |     "\n", | ||||||
|     "description_background = scrape_description(url_gear, number_gear)" |     "description_background = scrape_description(url_background, number_background)" | ||||||
|    ] |    ] | ||||||
|   }, |   }, | ||||||
|   { |   { | ||||||
|  | @ -109,7 +124,7 @@ | ||||||
|    "metadata": {}, |    "metadata": {}, | ||||||
|    "outputs": [], |    "outputs": [], | ||||||
|    "source": [ |    "source": [ | ||||||
|     "gear.to_csv('background.csv')" |     "description_background.to_csv('background.csv', encoding='UTF-8', index=False)" | ||||||
|    ] |    ] | ||||||
|   }, |   }, | ||||||
|   { |   { | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue