Working initial scrape
parent
6bd325b08c
commit
336fa74125
|
@ -0,0 +1,135 @@
|
||||||
|
# Created by .ignore support plugin (hsz.mobi)
|
||||||
|
### Python template
|
||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
pip-wheel-metadata/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
.python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||||
|
# install all needed dependencies.
|
||||||
|
#Pipfile.lock
|
||||||
|
|
||||||
|
# celery beat schedule file
|
||||||
|
celerybeat-schedule
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
### Example user template template
|
||||||
|
### Example user template
|
||||||
|
|
||||||
|
# IntelliJ project files
|
||||||
|
.idea
|
||||||
|
*.iml
|
||||||
|
out
|
||||||
|
gen
|
|
@ -0,0 +1,12 @@
|
||||||
|
[[source]]
|
||||||
|
name = "pypi"
|
||||||
|
url = "https://pypi.org/simple"
|
||||||
|
verify_ssl = true
|
||||||
|
|
||||||
|
[dev-packages]
|
||||||
|
|
||||||
|
[packages]
|
||||||
|
scrapy = "*"
|
||||||
|
|
||||||
|
[requires]
|
||||||
|
python_version = "3.8"
|
|
@ -0,0 +1,324 @@
|
||||||
|
{
|
||||||
|
"_meta": {
|
||||||
|
"hash": {
|
||||||
|
"sha256": "0248cce57c7a462bbed0c9ce5d4a84b92d0e146d394de44d71af073668ad2c1d"
|
||||||
|
},
|
||||||
|
"pipfile-spec": 6,
|
||||||
|
"requires": {
|
||||||
|
"python_version": "3.8"
|
||||||
|
},
|
||||||
|
"sources": [
|
||||||
|
{
|
||||||
|
"name": "pypi",
|
||||||
|
"url": "https://pypi.org/simple",
|
||||||
|
"verify_ssl": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"attrs": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c",
|
||||||
|
"sha256:f7b7ce16570fe9965acd6d30101a28f62fb4a7f9e926b3bbc9b61f8b04247e72"
|
||||||
|
],
|
||||||
|
"version": "==19.3.0"
|
||||||
|
},
|
||||||
|
"automat": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:7979803c74610e11ef0c0d68a2942b152df52da55336e0c9d58daf1831cbdf33",
|
||||||
|
"sha256:b6feb6455337df834f6c9962d6ccf771515b7d939bca142b29c20c2376bc6111"
|
||||||
|
],
|
||||||
|
"version": "==20.2.0"
|
||||||
|
},
|
||||||
|
"cffi": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:001bf3242a1bb04d985d63e138230802c6c8d4db3668fb545fb5005ddf5bb5ff",
|
||||||
|
"sha256:00789914be39dffba161cfc5be31b55775de5ba2235fe49aa28c148236c4e06b",
|
||||||
|
"sha256:028a579fc9aed3af38f4892bdcc7390508adabc30c6af4a6e4f611b0c680e6ac",
|
||||||
|
"sha256:14491a910663bf9f13ddf2bc8f60562d6bc5315c1f09c704937ef17293fb85b0",
|
||||||
|
"sha256:1cae98a7054b5c9391eb3249b86e0e99ab1e02bb0cc0575da191aedadbdf4384",
|
||||||
|
"sha256:2089ed025da3919d2e75a4d963d008330c96751127dd6f73c8dc0c65041b4c26",
|
||||||
|
"sha256:2d384f4a127a15ba701207f7639d94106693b6cd64173d6c8988e2c25f3ac2b6",
|
||||||
|
"sha256:337d448e5a725bba2d8293c48d9353fc68d0e9e4088d62a9571def317797522b",
|
||||||
|
"sha256:399aed636c7d3749bbed55bc907c3288cb43c65c4389964ad5ff849b6370603e",
|
||||||
|
"sha256:3b911c2dbd4f423b4c4fcca138cadde747abdb20d196c4a48708b8a2d32b16dd",
|
||||||
|
"sha256:3d311bcc4a41408cf5854f06ef2c5cab88f9fded37a3b95936c9879c1640d4c2",
|
||||||
|
"sha256:62ae9af2d069ea2698bf536dcfe1e4eed9090211dbaafeeedf5cb6c41b352f66",
|
||||||
|
"sha256:66e41db66b47d0d8672d8ed2708ba91b2f2524ece3dee48b5dfb36be8c2f21dc",
|
||||||
|
"sha256:675686925a9fb403edba0114db74e741d8181683dcf216be697d208857e04ca8",
|
||||||
|
"sha256:7e63cbcf2429a8dbfe48dcc2322d5f2220b77b2e17b7ba023d6166d84655da55",
|
||||||
|
"sha256:8a6c688fefb4e1cd56feb6c511984a6c4f7ec7d2a1ff31a10254f3c817054ae4",
|
||||||
|
"sha256:8c0ffc886aea5df6a1762d0019e9cb05f825d0eec1f520c51be9d198701daee5",
|
||||||
|
"sha256:95cd16d3dee553f882540c1ffe331d085c9e629499ceadfbda4d4fde635f4b7d",
|
||||||
|
"sha256:99f748a7e71ff382613b4e1acc0ac83bf7ad167fb3802e35e90d9763daba4d78",
|
||||||
|
"sha256:b8c78301cefcf5fd914aad35d3c04c2b21ce8629b5e4f4e45ae6812e461910fa",
|
||||||
|
"sha256:c420917b188a5582a56d8b93bdd8e0f6eca08c84ff623a4c16e809152cd35793",
|
||||||
|
"sha256:c43866529f2f06fe0edc6246eb4faa34f03fe88b64a0a9a942561c8e22f4b71f",
|
||||||
|
"sha256:cab50b8c2250b46fe738c77dbd25ce017d5e6fb35d3407606e7a4180656a5a6a",
|
||||||
|
"sha256:cef128cb4d5e0b3493f058f10ce32365972c554572ff821e175dbc6f8ff6924f",
|
||||||
|
"sha256:cf16e3cf6c0a5fdd9bc10c21687e19d29ad1fe863372b5543deaec1039581a30",
|
||||||
|
"sha256:e56c744aa6ff427a607763346e4170629caf7e48ead6921745986db3692f987f",
|
||||||
|
"sha256:e577934fc5f8779c554639376beeaa5657d54349096ef24abe8c74c5d9c117c3",
|
||||||
|
"sha256:f2b0fa0c01d8a0c7483afd9f31d7ecf2d71760ca24499c8697aeb5ca37dc090c"
|
||||||
|
],
|
||||||
|
"version": "==1.14.0"
|
||||||
|
},
|
||||||
|
"constantly": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:586372eb92059873e29eba4f9dec8381541b4d3834660707faf8ba59146dfc35",
|
||||||
|
"sha256:dd2fa9d6b1a51a83f0d7dd76293d734046aa176e384bf6e33b7e44880eb37c5d"
|
||||||
|
],
|
||||||
|
"version": "==15.1.0"
|
||||||
|
},
|
||||||
|
"cryptography": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:091d31c42f444c6f519485ed528d8b451d1a0c7bf30e8ca583a0cac44b8a0df6",
|
||||||
|
"sha256:18452582a3c85b96014b45686af264563e3e5d99d226589f057ace56196ec78b",
|
||||||
|
"sha256:1dfa985f62b137909496e7fc182dac687206d8d089dd03eaeb28ae16eec8e7d5",
|
||||||
|
"sha256:1e4014639d3d73fbc5ceff206049c5a9a849cefd106a49fa7aaaa25cc0ce35cf",
|
||||||
|
"sha256:22e91636a51170df0ae4dcbd250d318fd28c9f491c4e50b625a49964b24fe46e",
|
||||||
|
"sha256:3b3eba865ea2754738616f87292b7f29448aec342a7c720956f8083d252bf28b",
|
||||||
|
"sha256:651448cd2e3a6bc2bb76c3663785133c40d5e1a8c1a9c5429e4354201c6024ae",
|
||||||
|
"sha256:726086c17f94747cedbee6efa77e99ae170caebeb1116353c6cf0ab67ea6829b",
|
||||||
|
"sha256:844a76bc04472e5135b909da6aed84360f522ff5dfa47f93e3dd2a0b84a89fa0",
|
||||||
|
"sha256:88c881dd5a147e08d1bdcf2315c04972381d026cdb803325c03fe2b4a8ed858b",
|
||||||
|
"sha256:96c080ae7118c10fcbe6229ab43eb8b090fccd31a09ef55f83f690d1ef619a1d",
|
||||||
|
"sha256:a0c30272fb4ddda5f5ffc1089d7405b7a71b0b0f51993cb4e5dbb4590b2fc229",
|
||||||
|
"sha256:bb1f0281887d89617b4c68e8db9a2c42b9efebf2702a3c5bf70599421a8623e3",
|
||||||
|
"sha256:c447cf087cf2dbddc1add6987bbe2f767ed5317adb2d08af940db517dd704365",
|
||||||
|
"sha256:c4fd17d92e9d55b84707f4fd09992081ba872d1a0c610c109c18e062e06a2e55",
|
||||||
|
"sha256:d0d5aeaedd29be304848f1c5059074a740fa9f6f26b84c5b63e8b29e73dfc270",
|
||||||
|
"sha256:daf54a4b07d67ad437ff239c8a4080cfd1cc7213df57d33c97de7b4738048d5e",
|
||||||
|
"sha256:e993468c859d084d5579e2ebee101de8f5a27ce8e2159959b6673b418fd8c785",
|
||||||
|
"sha256:f118a95c7480f5be0df8afeb9a11bd199aa20afab7a96bcf20409b411a3a85f0"
|
||||||
|
],
|
||||||
|
"version": "==2.9.2"
|
||||||
|
},
|
||||||
|
"cssselect": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:f612ee47b749c877ebae5bb77035d8f4202c6ad0f0fc1271b3c18ad6c4468ecf",
|
||||||
|
"sha256:f95f8dedd925fd8f54edb3d2dfb44c190d9d18512377d3c1e2388d16126879bc"
|
||||||
|
],
|
||||||
|
"version": "==1.1.0"
|
||||||
|
},
|
||||||
|
"hyperlink": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:4288e34705da077fada1111a24a0aa08bb1e76699c9ce49876af722441845654",
|
||||||
|
"sha256:ab4a308feb039b04f855a020a6eda3b18ca5a68e6d8f8c899cbe9e653721d04f"
|
||||||
|
],
|
||||||
|
"version": "==19.0.0"
|
||||||
|
},
|
||||||
|
"idna": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:7588d1c14ae4c77d74036e8c22ff447b26d0fde8f007354fd48a7814db15b7cb",
|
||||||
|
"sha256:a068a21ceac8a4d63dbfd964670474107f541babbd2250d61922f029858365fa"
|
||||||
|
],
|
||||||
|
"version": "==2.9"
|
||||||
|
},
|
||||||
|
"incremental": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:717e12246dddf231a349175f48d74d93e2897244939173b01974ab6661406b9f",
|
||||||
|
"sha256:7b751696aaf36eebfab537e458929e194460051ccad279c72b755a167eebd4b3"
|
||||||
|
],
|
||||||
|
"version": "==17.5.0"
|
||||||
|
},
|
||||||
|
"lxml": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:06d4e0bbb1d62e38ae6118406d7cdb4693a3fa34ee3762238bcb96c9e36a93cd",
|
||||||
|
"sha256:0701f7965903a1c3f6f09328c1278ac0eee8f56f244e66af79cb224b7ef3801c",
|
||||||
|
"sha256:1f2c4ec372bf1c4a2c7e4bb20845e8bcf8050365189d86806bad1e3ae473d081",
|
||||||
|
"sha256:4235bc124fdcf611d02047d7034164897ade13046bda967768836629bc62784f",
|
||||||
|
"sha256:5828c7f3e615f3975d48f40d4fe66e8a7b25f16b5e5705ffe1d22e43fb1f6261",
|
||||||
|
"sha256:585c0869f75577ac7a8ff38d08f7aac9033da2c41c11352ebf86a04652758b7a",
|
||||||
|
"sha256:5d467ce9c5d35b3bcc7172c06320dddb275fea6ac2037f72f0a4d7472035cea9",
|
||||||
|
"sha256:63dbc21efd7e822c11d5ddbedbbb08cd11a41e0032e382a0fd59b0b08e405a3a",
|
||||||
|
"sha256:7bc1b221e7867f2e7ff1933165c0cec7153dce93d0cdba6554b42a8beb687bdb",
|
||||||
|
"sha256:8620ce80f50d023d414183bf90cc2576c2837b88e00bea3f33ad2630133bbb60",
|
||||||
|
"sha256:8a0ebda56ebca1a83eb2d1ac266649b80af8dd4b4a3502b2c1e09ac2f88fe128",
|
||||||
|
"sha256:90ed0e36455a81b25b7034038e40880189169c308a3df360861ad74da7b68c1a",
|
||||||
|
"sha256:95e67224815ef86924fbc2b71a9dbd1f7262384bca4bc4793645794ac4200717",
|
||||||
|
"sha256:afdb34b715daf814d1abea0317b6d672476b498472f1e5aacbadc34ebbc26e89",
|
||||||
|
"sha256:b4b2c63cc7963aedd08a5f5a454c9f67251b1ac9e22fd9d72836206c42dc2a72",
|
||||||
|
"sha256:d068f55bda3c2c3fcaec24bd083d9e2eede32c583faf084d6e4b9daaea77dde8",
|
||||||
|
"sha256:d5b3c4b7edd2e770375a01139be11307f04341ec709cf724e0f26ebb1eef12c3",
|
||||||
|
"sha256:deadf4df349d1dcd7b2853a2c8796593cc346600726eff680ed8ed11812382a7",
|
||||||
|
"sha256:df533af6f88080419c5a604d0d63b2c33b1c0c4409aba7d0cb6de305147ea8c8",
|
||||||
|
"sha256:e4aa948eb15018a657702fee0b9db47e908491c64d36b4a90f59a64741516e77",
|
||||||
|
"sha256:e5d842c73e4ef6ed8c1bd77806bf84a7cb535f9c0cf9b2c74d02ebda310070e1",
|
||||||
|
"sha256:ebec08091a22c2be870890913bdadd86fcd8e9f0f22bcb398abd3af914690c15",
|
||||||
|
"sha256:edc15fcfd77395e24543be48871c251f38132bb834d9fdfdad756adb6ea37679",
|
||||||
|
"sha256:f2b74784ed7e0bc2d02bd53e48ad6ba523c9b36c194260b7a5045071abbb1012",
|
||||||
|
"sha256:fa071559f14bd1e92077b1b5f6c22cf09756c6de7139370249eb372854ce51e6",
|
||||||
|
"sha256:fd52e796fee7171c4361d441796b64df1acfceb51f29e545e812f16d023c4bbc",
|
||||||
|
"sha256:fe976a0f1ef09b3638778024ab9fb8cde3118f203364212c198f71341c0715ca"
|
||||||
|
],
|
||||||
|
"markers": "python_version != '3.4'",
|
||||||
|
"version": "==4.5.0"
|
||||||
|
},
|
||||||
|
"parsel": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:4da4262ba4605573b6b72a5f557616a2fc9dee7a47a1efad562752a28d366723",
|
||||||
|
"sha256:74f8e9d3b345b14cb1416bd777a03982cde33a74d8b32e0c71e651d07d41d40a"
|
||||||
|
],
|
||||||
|
"version": "==1.5.2"
|
||||||
|
},
|
||||||
|
"protego": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:a682771bc7b51b2ff41466460896c1a5a653f9a1e71639ef365a72e66d8734b4"
|
||||||
|
],
|
||||||
|
"version": "==0.1.16"
|
||||||
|
},
|
||||||
|
"pyasn1": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d",
|
||||||
|
"sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba"
|
||||||
|
],
|
||||||
|
"version": "==0.4.8"
|
||||||
|
},
|
||||||
|
"pyasn1-modules": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:905f84c712230b2c592c19470d3ca8d552de726050d1d1716282a1f6146be65e",
|
||||||
|
"sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74"
|
||||||
|
],
|
||||||
|
"version": "==0.2.8"
|
||||||
|
},
|
||||||
|
"pycparser": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:2d475327684562c3a96cc71adf7dc8c4f0565175cf86b6d7a404ff4c771f15f0",
|
||||||
|
"sha256:7582ad22678f0fcd81102833f60ef8d0e57288b6b5fb00323d101be910e35705"
|
||||||
|
],
|
||||||
|
"version": "==2.20"
|
||||||
|
},
|
||||||
|
"pydispatcher": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:5570069e1b1769af1fe481de6dd1d3a388492acddd2cdad7a3bde145615d5caf",
|
||||||
|
"sha256:5be4a8be12805ef7d712dd9a93284fb8bc53f309867e573f653a72e5fd10e433"
|
||||||
|
],
|
||||||
|
"version": "==2.0.5"
|
||||||
|
},
|
||||||
|
"pyhamcrest": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:412e00137858f04bde0729913874a48485665f2d36fe9ee449f26be864af9316",
|
||||||
|
"sha256:7ead136e03655af85069b6f47b23eb7c3e5c221aa9f022a4fbb499f5b7308f29"
|
||||||
|
],
|
||||||
|
"version": "==2.0.2"
|
||||||
|
},
|
||||||
|
"pyopenssl": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:621880965a720b8ece2f1b2f54ea2071966ab00e2970ad2ce11d596102063504",
|
||||||
|
"sha256:9a24494b2602aaf402be5c9e30a0b82d4a5c67528fe8fb475e3f3bc00dd69507"
|
||||||
|
],
|
||||||
|
"version": "==19.1.0"
|
||||||
|
},
|
||||||
|
"queuelib": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:42b413295551bdc24ed9376c1a2cd7d0b1b0fa4746b77b27ca2b797a276a1a17",
|
||||||
|
"sha256:ff43b5b74b9266f8df4232a8f768dc4d67281a271905e2ed4a3689d4d304cd02"
|
||||||
|
],
|
||||||
|
"version": "==1.5.0"
|
||||||
|
},
|
||||||
|
"scrapy": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:640aea0f9be9b055f5cfec5ab78ee88bb37a5be3809b138329bd2af51392ec7f",
|
||||||
|
"sha256:883ab2dcb6cafb22c7448616baeb5b4d7bedef57ca6a9a5f4e902cb1d6e7aeca"
|
||||||
|
],
|
||||||
|
"index": "pypi",
|
||||||
|
"version": "==2.1.0"
|
||||||
|
},
|
||||||
|
"service-identity": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:001c0707759cb3de7e49c078a7c0c9cd12594161d3bf06b9c254fdcb1a60dc36",
|
||||||
|
"sha256:0858a54aabc5b459d1aafa8a518ed2081a285087f349fe3e55197989232e2e2d"
|
||||||
|
],
|
||||||
|
"version": "==18.1.0"
|
||||||
|
},
|
||||||
|
"six": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:236bdbdce46e6e6a3d61a337c0f8b763ca1e8717c03b369e87a7ec7ce1319c0a",
|
||||||
|
"sha256:8f3cd2e254d8f793e7f3d6d9df77b92252b52637291d0f0da013c76ea2724b6c"
|
||||||
|
],
|
||||||
|
"version": "==1.14.0"
|
||||||
|
},
|
||||||
|
"twisted": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:040eb6641125d2a9a09cf198ec7b83dd8858c6f51f6770325ed9959c00f5098f",
|
||||||
|
"sha256:147780b8caf21ba2aef3688628eaf13d7e7fe02a86747cd54bfaf2140538f042",
|
||||||
|
"sha256:158ddb80719a4813d292293ac44ba41d8b56555ed009d90994a278237ee63d2c",
|
||||||
|
"sha256:2182000d6ffc05d269e6c03bfcec8b57e20259ca1086180edaedec3f1e689292",
|
||||||
|
"sha256:25ffcf37944bdad4a99981bc74006d735a678d2b5c193781254fbbb6d69e3b22",
|
||||||
|
"sha256:3281d9ce889f7b21bdb73658e887141aa45a102baf3b2320eafcfba954fcefec",
|
||||||
|
"sha256:356e8d8dd3590e790e3dba4db139eb8a17aca64b46629c622e1b1597a4a92478",
|
||||||
|
"sha256:70952c56e4965b9f53b180daecf20a9595cf22b8d0935cd3bd664c90273c3ab2",
|
||||||
|
"sha256:7408c6635ee1b96587289283ebe90ee15dbf9614b05857b446055116bc822d29",
|
||||||
|
"sha256:7c547fd0215db9da8a1bc23182b309e84a232364cc26d829e9ee196ce840b114",
|
||||||
|
"sha256:894f6f3cfa57a15ea0d0714e4283913a5f2511dbd18653dd148eba53b3919797",
|
||||||
|
"sha256:94ac3d55a58c90e2075c5fe1853f2aa3892b73e3bf56395f743aefde8605eeaa",
|
||||||
|
"sha256:a58e61a2a01e5bcbe3b575c0099a2bcb8d70a75b1a087338e0c48dd6e01a5f15",
|
||||||
|
"sha256:c09c47ff9750a8e3aa60ad169c4b95006d455a29b80ad0901f031a103b2991cd",
|
||||||
|
"sha256:ca3a0b8c9110800e576d89b5337373e52018b41069bc879f12fa42b7eb2d0274",
|
||||||
|
"sha256:cd1dc5c85b58494138a3917752b54bb1daa0045d234b7c132c37a61d5483ebad",
|
||||||
|
"sha256:cdbc4c7f0cd7a2218b575844e970f05a1be1861c607b0e048c9bceca0c4d42f7",
|
||||||
|
"sha256:d267125cc0f1e8a0eed6319ba4ac7477da9b78a535601c49ecd20c875576433a",
|
||||||
|
"sha256:d72c55b5d56e176563b91d11952d13b01af8725c623e498db5507b6614fc1e10",
|
||||||
|
"sha256:d95803193561a243cb0401b0567c6b7987d3f2a67046770e1dccd1c9e49a9780",
|
||||||
|
"sha256:e92703bed0cc21d6cb5c61d66922b3b1564015ca8a51325bd164a5e33798d504",
|
||||||
|
"sha256:f058bd0168271de4dcdc39845b52dd0a4a2fecf5f1246335f13f5e96eaebb467",
|
||||||
|
"sha256:f3c19e5bd42bbe4bf345704ad7c326c74d3fd7a1b3844987853bef180be638d4"
|
||||||
|
],
|
||||||
|
"version": "==20.3.0"
|
||||||
|
},
|
||||||
|
"w3lib": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:847704b837b2b973cddef6938325d466628e6078266bc2e1f7ac49ba85c34823",
|
||||||
|
"sha256:8b1854fef570b5a5fc84d960e025debd110485d73fd283580376104762774315"
|
||||||
|
],
|
||||||
|
"version": "==1.21.0"
|
||||||
|
},
|
||||||
|
"zope.interface": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:0103cba5ed09f27d2e3de7e48bb320338592e2fabc5ce1432cf33808eb2dfd8b",
|
||||||
|
"sha256:14415d6979356629f1c386c8c4249b4d0082f2ea7f75871ebad2e29584bd16c5",
|
||||||
|
"sha256:1ae4693ccee94c6e0c88a4568fb3b34af8871c60f5ba30cf9f94977ed0e53ddd",
|
||||||
|
"sha256:1b87ed2dc05cb835138f6a6e3595593fea3564d712cb2eb2de963a41fd35758c",
|
||||||
|
"sha256:269b27f60bcf45438e8683269f8ecd1235fa13e5411de93dae3b9ee4fe7f7bc7",
|
||||||
|
"sha256:27d287e61639d692563d9dab76bafe071fbeb26818dd6a32a0022f3f7ca884b5",
|
||||||
|
"sha256:39106649c3082972106f930766ae23d1464a73b7d30b3698c986f74bf1256a34",
|
||||||
|
"sha256:40e4c42bd27ed3c11b2c983fecfb03356fae1209de10686d03c02c8696a1d90e",
|
||||||
|
"sha256:461d4339b3b8f3335d7e2c90ce335eb275488c587b61aca4b305196dde2ff086",
|
||||||
|
"sha256:4f98f70328bc788c86a6a1a8a14b0ea979f81ae6015dd6c72978f1feff70ecda",
|
||||||
|
"sha256:558a20a0845d1a5dc6ff87cd0f63d7dac982d7c3be05d2ffb6322a87c17fa286",
|
||||||
|
"sha256:562dccd37acec149458c1791da459f130c6cf8902c94c93b8d47c6337b9fb826",
|
||||||
|
"sha256:5e86c66a6dea8ab6152e83b0facc856dc4d435fe0f872f01d66ce0a2131b7f1d",
|
||||||
|
"sha256:60a207efcd8c11d6bbeb7862e33418fba4e4ad79846d88d160d7231fcb42a5ee",
|
||||||
|
"sha256:645a7092b77fdbc3f68d3cc98f9d3e71510e419f54019d6e282328c0dd140dcd",
|
||||||
|
"sha256:6874367586c020705a44eecdad5d6b587c64b892e34305bb6ed87c9bbe22a5e9",
|
||||||
|
"sha256:74bf0a4f9091131de09286f9a605db449840e313753949fe07c8d0fe7659ad1e",
|
||||||
|
"sha256:7b726194f938791a6691c7592c8b9e805fc6d1b9632a833b9c0640828cd49cbc",
|
||||||
|
"sha256:8149ded7f90154fdc1a40e0c8975df58041a6f693b8f7edcd9348484e9dc17fe",
|
||||||
|
"sha256:8cccf7057c7d19064a9e27660f5aec4e5c4001ffcf653a47531bde19b5aa2a8a",
|
||||||
|
"sha256:911714b08b63d155f9c948da2b5534b223a1a4fc50bb67139ab68b277c938578",
|
||||||
|
"sha256:a5f8f85986197d1dd6444763c4a15c991bfed86d835a1f6f7d476f7198d5f56a",
|
||||||
|
"sha256:a744132d0abaa854d1aad50ba9bc64e79c6f835b3e92521db4235a1991176813",
|
||||||
|
"sha256:af2c14efc0bb0e91af63d00080ccc067866fb8cbbaca2b0438ab4105f5e0f08d",
|
||||||
|
"sha256:b054eb0a8aa712c8e9030065a59b5e6a5cf0746ecdb5f087cca5ec7685690c19",
|
||||||
|
"sha256:b0becb75418f8a130e9d465e718316cd17c7a8acce6fe8fe07adc72762bee425",
|
||||||
|
"sha256:b1d2ed1cbda2ae107283befd9284e650d840f8f7568cb9060b5466d25dc48975",
|
||||||
|
"sha256:ba4261c8ad00b49d48bbb3b5af388bb7576edfc0ca50a49c11dcb77caa1d897e",
|
||||||
|
"sha256:d1fe9d7d09bb07228650903d6a9dc48ea649e3b8c69b1d263419cc722b3938e8",
|
||||||
|
"sha256:d7804f6a71fc2dda888ef2de266727ec2f3915373d5a785ed4ddc603bbc91e08",
|
||||||
|
"sha256:da2844fba024dd58eaa712561da47dcd1e7ad544a257482392472eae1c86d5e5",
|
||||||
|
"sha256:dcefc97d1daf8d55199420e9162ab584ed0893a109f45e438b9794ced44c9fd0",
|
||||||
|
"sha256:dd98c436a1fc56f48c70882cc243df89ad036210d871c7427dc164b31500dc11",
|
||||||
|
"sha256:e74671e43ed4569fbd7989e5eecc7d06dc134b571872ab1d5a88f4a123814e9f",
|
||||||
|
"sha256:eb9b92f456ff3ec746cd4935b73c1117538d6124b8617bc0fe6fda0b3816e345",
|
||||||
|
"sha256:ebb4e637a1fb861c34e48a00d03cffa9234f42bef923aec44e5625ffb9a8e8f9",
|
||||||
|
"sha256:ef739fe89e7f43fb6494a43b1878a36273e5924869ba1d866f752c5812ae8d58",
|
||||||
|
"sha256:f40db0e02a8157d2b90857c24d89b6310f9b6c3642369852cdc3b5ac49b92afc",
|
||||||
|
"sha256:f68bf937f113b88c866d090fea0bc52a098695173fc613b055a17ff0cf9683b6",
|
||||||
|
"sha256:fb55c182a3f7b84c1a2d6de5fa7b1a05d4660d866b91dbf8d74549c57a1499e8"
|
||||||
|
],
|
||||||
|
"version": "==5.1.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"develop": {}
|
||||||
|
}
|
|
@ -0,0 +1,18 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Define here the models for your scraped items
|
||||||
|
#
|
||||||
|
# See documentation in:
|
||||||
|
# https://docs.scrapy.org/en/latest/topics/items.html
|
||||||
|
|
||||||
|
import scrapy
|
||||||
|
|
||||||
|
|
||||||
|
class Creature(scrapy.Item):
|
||||||
|
# define the fields for your item here like:
|
||||||
|
name = scrapy.Field()
|
||||||
|
family = scrapy.Field()
|
||||||
|
level = scrapy.Field()
|
||||||
|
publisher = scrapy.Field()
|
||||||
|
source = scrapy.Field()
|
||||||
|
url = scrapy.Field()
|
|
@ -0,0 +1,103 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Define here the models for your spider middleware
|
||||||
|
#
|
||||||
|
# See documentation in:
|
||||||
|
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||||
|
|
||||||
|
from scrapy import signals
|
||||||
|
|
||||||
|
|
||||||
|
class CreaturesSpiderMiddleware:
|
||||||
|
# Not all methods need to be defined. If a method is not defined,
|
||||||
|
# scrapy acts as if the spider middleware does not modify the
|
||||||
|
# passed objects.
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_crawler(cls, crawler):
|
||||||
|
# This method is used by Scrapy to create your spiders.
|
||||||
|
s = cls()
|
||||||
|
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
|
||||||
|
return s
|
||||||
|
|
||||||
|
def process_spider_input(self, response, spider):
|
||||||
|
# Called for each response that goes through the spider
|
||||||
|
# middleware and into the spider.
|
||||||
|
|
||||||
|
# Should return None or raise an exception.
|
||||||
|
return None
|
||||||
|
|
||||||
|
def process_spider_output(self, response, result, spider):
|
||||||
|
# Called with the results returned from the Spider, after
|
||||||
|
# it has processed the response.
|
||||||
|
|
||||||
|
# Must return an iterable of Request, dict or Item objects.
|
||||||
|
for i in result:
|
||||||
|
yield i
|
||||||
|
|
||||||
|
def process_spider_exception(self, response, exception, spider):
|
||||||
|
# Called when a spider or process_spider_input() method
|
||||||
|
# (from other spider middleware) raises an exception.
|
||||||
|
|
||||||
|
# Should return either None or an iterable of Request, dict
|
||||||
|
# or Item objects.
|
||||||
|
pass
|
||||||
|
|
||||||
|
def process_start_requests(self, start_requests, spider):
|
||||||
|
# Called with the start requests of the spider, and works
|
||||||
|
# similarly to the process_spider_output() method, except
|
||||||
|
# that it doesn’t have a response associated.
|
||||||
|
|
||||||
|
# Must return only requests (not items).
|
||||||
|
for r in start_requests:
|
||||||
|
yield r
|
||||||
|
|
||||||
|
def spider_opened(self, spider):
|
||||||
|
spider.logger.info('Spider opened: %s' % spider.name)
|
||||||
|
|
||||||
|
|
||||||
|
class CreaturesDownloaderMiddleware:
|
||||||
|
# Not all methods need to be defined. If a method is not defined,
|
||||||
|
# scrapy acts as if the downloader middleware does not modify the
|
||||||
|
# passed objects.
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_crawler(cls, crawler):
|
||||||
|
# This method is used by Scrapy to create your spiders.
|
||||||
|
s = cls()
|
||||||
|
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
|
||||||
|
return s
|
||||||
|
|
||||||
|
def process_request(self, request, spider):
|
||||||
|
# Called for each request that goes through the downloader
|
||||||
|
# middleware.
|
||||||
|
|
||||||
|
# Must either:
|
||||||
|
# - return None: continue processing this request
|
||||||
|
# - or return a Response object
|
||||||
|
# - or return a Request object
|
||||||
|
# - or raise IgnoreRequest: process_exception() methods of
|
||||||
|
# installed downloader middleware will be called
|
||||||
|
return None
|
||||||
|
|
||||||
|
def process_response(self, request, response, spider):
|
||||||
|
# Called with the response returned from the downloader.
|
||||||
|
|
||||||
|
# Must either;
|
||||||
|
# - return a Response object
|
||||||
|
# - return a Request object
|
||||||
|
# - or raise IgnoreRequest
|
||||||
|
return response
|
||||||
|
|
||||||
|
def process_exception(self, request, exception, spider):
|
||||||
|
# Called when a download handler or a process_request()
|
||||||
|
# (from other downloader middleware) raises an exception.
|
||||||
|
|
||||||
|
# Must either:
|
||||||
|
# - return None: continue processing this exception
|
||||||
|
# - return a Response object: stops process_exception() chain
|
||||||
|
# - return a Request object: stops process_exception() chain
|
||||||
|
pass
|
||||||
|
|
||||||
|
def spider_opened(self, spider):
|
||||||
|
spider.logger.info('Spider opened: %s' % spider.name)
|
|
@ -0,0 +1,11 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Define your item pipelines here
|
||||||
|
#
|
||||||
|
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
||||||
|
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||||
|
|
||||||
|
|
||||||
|
class CreaturesPipeline:
|
||||||
|
def process_item(self, item, spider):
|
||||||
|
return item
|
|
@ -0,0 +1,90 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Scrapy settings for creatures project
|
||||||
|
#
|
||||||
|
# For simplicity, this file contains only settings considered important or
|
||||||
|
# commonly used. You can find more settings consulting the documentation:
|
||||||
|
#
|
||||||
|
# https://docs.scrapy.org/en/latest/topics/settings.html
|
||||||
|
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||||
|
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||||
|
|
||||||
|
BOT_NAME = 'creatures'
|
||||||
|
|
||||||
|
SPIDER_MODULES = ['creatures.spiders']
|
||||||
|
NEWSPIDER_MODULE = 'creatures.spiders'
|
||||||
|
|
||||||
|
|
||||||
|
# Crawl responsibly by identifying yourself (and your website) on the user-agent
|
||||||
|
#USER_AGENT = 'creatures (+http://www.yourdomain.com)'
|
||||||
|
|
||||||
|
# Obey robots.txt rules
|
||||||
|
ROBOTSTXT_OBEY = True
|
||||||
|
|
||||||
|
# Configure maximum concurrent requests performed by Scrapy (default: 16)
|
||||||
|
#CONCURRENT_REQUESTS = 32
|
||||||
|
|
||||||
|
# Configure a delay for requests for the same website (default: 0)
|
||||||
|
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
|
||||||
|
# See also autothrottle settings and docs
|
||||||
|
#DOWNLOAD_DELAY = 3
|
||||||
|
# The download delay setting will honor only one of:
|
||||||
|
#CONCURRENT_REQUESTS_PER_DOMAIN = 16
|
||||||
|
#CONCURRENT_REQUESTS_PER_IP = 16
|
||||||
|
|
||||||
|
# Disable cookies (enabled by default)
|
||||||
|
#COOKIES_ENABLED = False
|
||||||
|
|
||||||
|
# Disable Telnet Console (enabled by default)
|
||||||
|
#TELNETCONSOLE_ENABLED = False
|
||||||
|
|
||||||
|
# Override the default request headers:
|
||||||
|
#DEFAULT_REQUEST_HEADERS = {
|
||||||
|
# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||||
|
# 'Accept-Language': 'en',
|
||||||
|
#}
|
||||||
|
|
||||||
|
# Enable or disable spider middlewares
|
||||||
|
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||||
|
#SPIDER_MIDDLEWARES = {
|
||||||
|
# 'creatures.middlewares.CreaturesSpiderMiddleware': 543,
|
||||||
|
#}
|
||||||
|
|
||||||
|
# Enable or disable downloader middlewares
|
||||||
|
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||||
|
#DOWNLOADER_MIDDLEWARES = {
|
||||||
|
# 'creatures.middlewares.CreaturesDownloaderMiddleware': 543,
|
||||||
|
#}
|
||||||
|
|
||||||
|
# Enable or disable extensions
|
||||||
|
# See https://docs.scrapy.org/en/latest/topics/extensions.html
|
||||||
|
#EXTENSIONS = {
|
||||||
|
# 'scrapy.extensions.telnet.TelnetConsole': None,
|
||||||
|
#}
|
||||||
|
|
||||||
|
# Configure item pipelines
|
||||||
|
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||||
|
#ITEM_PIPELINES = {
|
||||||
|
# 'creatures.pipelines.CreaturesPipeline': 300,
|
||||||
|
#}
|
||||||
|
|
||||||
|
# Enable and configure the AutoThrottle extension (disabled by default)
|
||||||
|
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
|
||||||
|
#AUTOTHROTTLE_ENABLED = True
|
||||||
|
# The initial download delay
|
||||||
|
#AUTOTHROTTLE_START_DELAY = 5
|
||||||
|
# The maximum download delay to be set in case of high latencies
|
||||||
|
#AUTOTHROTTLE_MAX_DELAY = 60
|
||||||
|
# The average number of requests Scrapy should be sending in parallel to
|
||||||
|
# each remote server
|
||||||
|
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
|
||||||
|
# Enable showing throttling stats for every response received:
|
||||||
|
#AUTOTHROTTLE_DEBUG = False
|
||||||
|
|
||||||
|
# Enable and configure HTTP caching (disabled by default)
|
||||||
|
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
|
||||||
|
#HTTPCACHE_ENABLED = True
|
||||||
|
#HTTPCACHE_EXPIRATION_SECS = 0
|
||||||
|
#HTTPCACHE_DIR = 'httpcache'
|
||||||
|
#HTTPCACHE_IGNORE_HTTP_CODES = []
|
||||||
|
#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
|
|
@ -0,0 +1,4 @@
|
||||||
|
# This package will contain the spiders of your Scrapy project
|
||||||
|
#
|
||||||
|
# Please refer to the documentation for information on how to create and manage
|
||||||
|
# your spiders.
|
|
@ -0,0 +1,31 @@
|
||||||
|
import scrapy
|
||||||
|
import datetime as dt
|
||||||
|
from creatures.items import Creature
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
|
||||||
|
class CreateListSpider(scrapy.Spider):
|
||||||
|
name = "creatures"
|
||||||
|
start_urls = ["https://pf2.d20pfsrd.com/monster"]
|
||||||
|
|
||||||
|
def parse(self, response):
|
||||||
|
rows = response.xpath('//table[@id="archive-data-table"]'
|
||||||
|
'/tr')
|
||||||
|
creatures: List[Creature] = []
|
||||||
|
for row in rows:
|
||||||
|
name = row.xpath('.//td[1]/a/text()').get()
|
||||||
|
family = row.xpath('.//td[2]/a/text()').get()
|
||||||
|
level = row.xpath('.//td[3]/a/text()').get()
|
||||||
|
publisher = row.xpath('.//td[4]/a/text()').get()
|
||||||
|
source = row.xpath('.//td[5]/a/text()').get()
|
||||||
|
url = row.xpath('.//td[1]/a/@href').get()
|
||||||
|
creatures.append(Creature(name=name,
|
||||||
|
family=family,
|
||||||
|
level=level,
|
||||||
|
publisher=publisher,
|
||||||
|
source=source,
|
||||||
|
url=url,))
|
||||||
|
yield {
|
||||||
|
'date': dt.datetime.now(tz=dt.timezone.utc),
|
||||||
|
'data': creatures,
|
||||||
|
}
|
|
@ -0,0 +1,11 @@
|
||||||
|
# Automatically created by: scrapy startproject
|
||||||
|
#
|
||||||
|
# For more information about the [deploy] section see:
|
||||||
|
# https://scrapyd.readthedocs.io/en/latest/deploy.html
|
||||||
|
|
||||||
|
[settings]
|
||||||
|
default = creatures.settings
|
||||||
|
|
||||||
|
[deploy]
|
||||||
|
#url = http://localhost:6800/
|
||||||
|
project = creatures
|
Loading…
Reference in New Issue