diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..4a98e6b --- /dev/null +++ b/Pipfile @@ -0,0 +1,25 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +beautifulsoup4 = "~=4.12" +selenium = "~=4.17" +tenacity = "~=8.2" +fire = "~=0.5" +jsonlines = "~=4.0" +pandas = "~=2.0" +requests = "~=2.31" +chardet = "~=5.2" +html5lib = "~=1.1" +xmltodict = "~=0.13" + +[dev-packages] +black = "24.2.0" + +[requires] +python_version = "3.12" + +[scripts] +format = "black ." diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..e136217 --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,504 @@ +{ + "_meta": { + "hash": { + "sha256": "9e74674769f104a01d72eec7f149b84f2949d7efa69cb3789d76f722a9ee47e0" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.12" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "attrs": { + "hashes": [ + "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30", + "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1" + ], + "markers": "python_version >= '3.7'", + "version": "==23.2.0" + }, + "beautifulsoup4": { + "hashes": [ + "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051", + "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed" + ], + "index": "pypi", + "markers": "python_full_version >= '3.6.0'", + "version": "==4.12.3" + }, + "certifi": { + "hashes": [ + "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f", + "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1" + ], + "markers": "python_version >= '3.6'", + "version": "==2024.2.2" + }, + "chardet": { + "hashes": [ + "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7", + "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970" + ], + "index": "pypi", + "markers": "python_version >= '3.7'", + "version": "==5.2.0" + }, + "charset-normalizer": { + "hashes": [ + "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027", + "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087", + "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786", + "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8", + "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09", + "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185", + "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574", + "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e", + "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519", + "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898", + "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269", + "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3", + "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f", + "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6", + "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8", + "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a", + "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73", + "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc", + "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714", + "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2", + "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc", + "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce", + "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d", + "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e", + "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6", + "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269", + "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96", + "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d", + "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a", + "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4", + "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77", + "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d", + "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0", + "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed", + "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068", + "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac", + "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25", + "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8", + "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab", + "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26", + "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2", + "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db", + "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f", + "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5", + "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99", + "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c", + "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d", + "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811", + "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa", + "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a", + "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03", + "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b", + "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04", + "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c", + "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001", + "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458", + "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389", + "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99", + "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985", + "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537", + "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238", + "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f", + "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d", + "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796", + "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a", + "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143", + "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8", + "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c", + "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5", + "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5", + "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711", + "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4", + "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6", + "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c", + "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7", + "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4", + "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b", + "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae", + "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12", + "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c", + "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae", + "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8", + "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887", + "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b", + "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4", + "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f", + "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5", + "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33", + "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519", + "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561" + ], + "markers": "python_full_version >= '3.7.0'", + "version": "==3.3.2" + }, + "fire": { + "hashes": [ + "sha256:a6b0d49e98c8963910021f92bba66f65ab440da2982b78eb1bbf95a0a34aacc6" + ], + "index": "pypi", + "version": "==0.5.0" + }, + "h11": { + "hashes": [ + "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d", + "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761" + ], + "markers": "python_version >= '3.7'", + "version": "==0.14.0" + }, + "html5lib": { + "hashes": [ + "sha256:0d78f8fde1c230e99fe37986a60526d7049ed4bf8a9fadbad5f00e22e58e041d", + "sha256:b2e5b40261e20f354d198eae92afc10d750afb487ed5e50f9c4eaf07c184146f" + ], + "index": "pypi", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==1.1" + }, + "idna": { + "hashes": [ + "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca", + "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f" + ], + "markers": "python_version >= '3.5'", + "version": "==3.6" + }, + "jsonlines": { + "hashes": [ + "sha256:0c6d2c09117550c089995247f605ae4cf77dd1533041d366351f6f298822ea74", + "sha256:185b334ff2ca5a91362993f42e83588a360cf95ce4b71a73548502bda52a7c55" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==4.0.0" + }, + "numpy": { + "hashes": [ + "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b", + "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818", + "sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20", + "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0", + "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010", + "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a", + "sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea", + "sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c", + "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71", + "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110", + "sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be", + "sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a", + "sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a", + "sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5", + "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed", + "sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd", + "sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c", + "sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e", + "sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0", + "sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c", + "sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a", + "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b", + "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0", + "sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6", + "sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2", + "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a", + "sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30", + "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218", + "sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5", + "sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07", + "sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2", + "sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4", + "sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764", + "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef", + "sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3", + "sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f" + ], + "markers": "python_version >= '3.12'", + "version": "==1.26.4" + }, + "outcome": { + "hashes": [ + "sha256:9dcf02e65f2971b80047b377468e72a268e15c0af3cf1238e6ff14f7f91143b8", + "sha256:e771c5ce06d1415e356078d3bdd68523f284b4ce5419828922b6871e65eda82b" + ], + "markers": "python_version >= '3.7'", + "version": "==1.3.0.post0" + }, + "pandas": { + "hashes": [ + "sha256:04f6ec3baec203c13e3f8b139fb0f9f86cd8c0b94603ae3ae8ce9a422e9f5bee", + "sha256:06cf591dbaefb6da9de8472535b185cba556d0ce2e6ed28e21d919704fef1a9e", + "sha256:0ab90f87093c13f3e8fa45b48ba9f39181046e8f3317d3aadb2fffbb1b978572", + "sha256:0f573ab277252ed9aaf38240f3b54cfc90fff8e5cab70411ee1d03f5d51f3944", + "sha256:101d0eb9c5361aa0146f500773395a03839a5e6ecde4d4b6ced88b7e5a1a6403", + "sha256:11940e9e3056576ac3244baef2fedade891977bcc1cb7e5cc8f8cc7d603edc89", + "sha256:1ba21b1d5c0e43416218db63037dbe1a01fc101dc6e6024bcad08123e48004ab", + "sha256:4aa1d8707812a658debf03824016bf5ea0d516afdea29b7dc14cf687bc4d4ec6", + "sha256:4acf681325ee1c7f950d058b05a820441075b0dd9a2adf5c4835b9bc056bf4fb", + "sha256:53680dc9b2519cbf609c62db3ed7c0b499077c7fefda564e330286e619ff0dd9", + "sha256:739cc70eaf17d57608639e74d63387b0d8594ce02f69e7a0b046f117974b3019", + "sha256:76f27a809cda87e07f192f001d11adc2b930e93a2b0c4a236fde5429527423be", + "sha256:7d2ed41c319c9fb4fd454fe25372028dfa417aacb9790f68171b2e3f06eae8cd", + "sha256:88ecb5c01bb9ca927ebc4098136038519aa5d66b44671861ffab754cae75102c", + "sha256:8df8612be9cd1c7797c93e1c5df861b2ddda0b48b08f2c3eaa0702cf88fb5f88", + "sha256:94e714a1cca63e4f5939cdce5f29ba8d415d85166be3441165edd427dc9f6bc0", + "sha256:9bd8a40f47080825af4317d0340c656744f2bfdb6819f818e6ba3cd24c0e1397", + "sha256:9d1265545f579edf3f8f0cb6f89f234f5e44ba725a34d86535b1a1d38decbccc", + "sha256:a935a90a76c44fe170d01e90a3594beef9e9a6220021acfb26053d01426f7dc2", + "sha256:af5d3c00557d657c8773ef9ee702c61dd13b9d7426794c9dfeb1dc4a0bf0ebc7", + "sha256:c2ce852e1cf2509a69e98358e8458775f89599566ac3775e70419b98615f4b06", + "sha256:c38ce92cb22a4bea4e3929429aa1067a454dcc9c335799af93ba9be21b6beb51", + "sha256:c391f594aae2fd9f679d419e9a4d5ba4bce5bb13f6a989195656e7dc4b95c8f0", + "sha256:c70e00c2d894cb230e5c15e4b1e1e6b2b478e09cf27cc593a11ef955b9ecc81a", + "sha256:df0c37ebd19e11d089ceba66eba59a168242fc6b7155cba4ffffa6eccdfb8f16", + "sha256:e97fbb5387c69209f134893abc788a6486dbf2f9e511070ca05eed4b930b1b02", + "sha256:f02a3a6c83df4026e55b63c1f06476c9aa3ed6af3d89b4f04ea656ccdaaaa359", + "sha256:f821213d48f4ab353d20ebc24e4faf94ba40d76680642fb7ce2ea31a3ad94f9b", + "sha256:f9d3558d263073ed95e46f4650becff0c5e1ffe0fc3a015de3c79283dfbdb3df" + ], + "index": "pypi", + "markers": "python_version >= '3.9'", + "version": "==2.2.1" + }, + "pysocks": { + "hashes": [ + "sha256:08e69f092cc6dbe92a0fdd16eeb9b9ffbc13cadfe5ca4c7bd92ffb078b293299", + "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5", + "sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0" + ], + "version": "==1.7.1" + }, + "python-dateutil": { + "hashes": [ + "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", + "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==2.9.0.post0" + }, + "pytz": { + "hashes": [ + "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812", + "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319" + ], + "version": "==2024.1" + }, + "requests": { + "hashes": [ + "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f", + "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1" + ], + "index": "pypi", + "markers": "python_version >= '3.7'", + "version": "==2.31.0" + }, + "selenium": { + "hashes": [ + "sha256:a11f67afa8bfac6b77e148c987b33f6b14eb1cae4d352722a75de1f26e3f0ae2", + "sha256:b24a3cdd2d47c29832e81345bfcde0c12bb608738013e53c781b211b418df241" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==4.18.1" + }, + "six": { + "hashes": [ + "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", + "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.16.0" + }, + "sniffio": { + "hashes": [ + "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", + "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc" + ], + "markers": "python_version >= '3.7'", + "version": "==1.3.1" + }, + "sortedcontainers": { + "hashes": [ + "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", + "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0" + ], + "version": "==2.4.0" + }, + "soupsieve": { + "hashes": [ + "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690", + "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7" + ], + "markers": "python_version >= '3.8'", + "version": "==2.5" + }, + "tenacity": { + "hashes": [ + "sha256:5398ef0d78e63f40007c1fb4c0bff96e1911394d2fa8d194f77619c05ff6cc8a", + "sha256:ce510e327a630c9e1beaf17d42e6ffacc88185044ad85cf74c0a8887c6a0f88c" + ], + "index": "pypi", + "markers": "python_version >= '3.7'", + "version": "==8.2.3" + }, + "termcolor": { + "hashes": [ + "sha256:9297c0df9c99445c2412e832e882a7884038a25617c60cea2ad69488d4040d63", + "sha256:aab9e56047c8ac41ed798fa36d892a37aca6b3e9159f3e0c24bc64a9b3ac7b7a" + ], + "markers": "python_version >= '3.8'", + "version": "==2.4.0" + }, + "trio": { + "hashes": [ + "sha256:c3bd3a4e3e3025cd9a2241eae75637c43fe0b9e88b4c97b9161a55b9e54cd72c", + "sha256:ffa09a74a6bf81b84f8613909fb0beaee84757450183a7a2e0b47b455c0cac5d" + ], + "markers": "python_version >= '3.8'", + "version": "==0.24.0" + }, + "trio-websocket": { + "hashes": [ + "sha256:18c11793647703c158b1f6e62de638acada927344d534e3c7628eedcb746839f", + "sha256:520d046b0d030cf970b8b2b2e00c4c2245b3807853ecd44214acd33d74581638" + ], + "markers": "python_version >= '3.7'", + "version": "==0.11.1" + }, + "typing-extensions": { + "hashes": [ + "sha256:69b1a937c3a517342112fb4c6df7e72fc39a38e7891a5730ed4985b5214b5475", + "sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb" + ], + "markers": "python_version >= '3.8'", + "version": "==4.10.0" + }, + "tzdata": { + "hashes": [ + "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd", + "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252" + ], + "markers": "python_version >= '2'", + "version": "==2024.1" + }, + "urllib3": { + "extras": [ + "socks" + ], + "hashes": [ + "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d", + "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19" + ], + "markers": "python_version >= '3.8'", + "version": "==2.2.1" + }, + "webencodings": { + "hashes": [ + "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78", + "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923" + ], + "version": "==0.5.1" + }, + "wsproto": { + "hashes": [ + "sha256:ad565f26ecb92588a3e43bc3d96164de84cd9902482b130d0ddbaa9664a85065", + "sha256:b9acddd652b585d75b20477888c56642fdade28bdfd3579aa24a4d2c037dd736" + ], + "markers": "python_full_version >= '3.7.0'", + "version": "==1.2.0" + }, + "xmltodict": { + "hashes": [ + "sha256:341595a488e3e01a85a9d8911d8912fd922ede5fecc4dce437eb4b6c8d037e56", + "sha256:aa89e8fd76320154a40d19a0df04a4695fb9dc5ba977cbb68ab3e4eb225e7852" + ], + "index": "pypi", + "markers": "python_version >= '3.4'", + "version": "==0.13.0" + } + }, + "develop": { + "black": { + "hashes": [ + "sha256:057c3dc602eaa6fdc451069bd027a1b2635028b575a6c3acfd63193ced20d9c8", + "sha256:08654d0797e65f2423f850fc8e16a0ce50925f9337fb4a4a176a7aa4026e63f8", + "sha256:163baf4ef40e6897a2a9b83890e59141cc8c2a98f2dda5080dc15c00ee1e62cd", + "sha256:1e08fb9a15c914b81dd734ddd7fb10513016e5ce7e6704bdd5e1251ceee51ac9", + "sha256:4dd76e9468d5536abd40ffbc7a247f83b2324f0c050556d9c371c2b9a9a95e31", + "sha256:4f9de21bafcba9683853f6c96c2d515e364aee631b178eaa5145fc1c61a3cc92", + "sha256:61a0391772490ddfb8a693c067df1ef5227257e72b0e4108482b8d41b5aee13f", + "sha256:6981eae48b3b33399c8757036c7f5d48a535b962a7c2310d19361edeef64ce29", + "sha256:7e53a8c630f71db01b28cd9602a1ada68c937cbf2c333e6ed041390d6968faf4", + "sha256:810d445ae6069ce64030c78ff6127cd9cd178a9ac3361435708b907d8a04c693", + "sha256:93601c2deb321b4bad8f95df408e3fb3943d85012dddb6121336b8e24a0d1218", + "sha256:992e451b04667116680cb88f63449267c13e1ad134f30087dec8527242e9862a", + "sha256:9db528bccb9e8e20c08e716b3b09c6bdd64da0dd129b11e160bf082d4642ac23", + "sha256:a0057f800de6acc4407fe75bb147b0c2b5cbb7c3ed110d3e5999cd01184d53b0", + "sha256:ba15742a13de85e9b8f3239c8f807723991fbfae24bad92d34a2b12e81904982", + "sha256:bce4f25c27c3435e4dace4815bcb2008b87e167e3bf4ee47ccdc5ce906eb4894", + "sha256:ca610d29415ee1a30a3f30fab7a8f4144e9d34c89a235d81292a1edb2b55f540", + "sha256:d533d5e3259720fdbc1b37444491b024003e012c5173f7d06825a77508085430", + "sha256:d84f29eb3ee44859052073b7636533ec995bd0f64e2fb43aeceefc70090e752b", + "sha256:e37c99f89929af50ffaf912454b3e3b47fd64109659026b678c091a4cd450fb2", + "sha256:e8a6ae970537e67830776488bca52000eaa37fa63b9988e8c487458d9cd5ace6", + "sha256:faf2ee02e6612577ba0181f4347bcbcf591eb122f7841ae5ba233d12c39dcb4d" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==24.2.0" + }, + "click": { + "hashes": [ + "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28", + "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de" + ], + "markers": "python_version >= '3.7'", + "version": "==8.1.7" + }, + "mypy-extensions": { + "hashes": [ + "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d", + "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782" + ], + "markers": "python_version >= '3.5'", + "version": "==1.0.0" + }, + "packaging": { + "hashes": [ + "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5", + "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7" + ], + "markers": "python_version >= '3.7'", + "version": "==23.2" + }, + "pathspec": { + "hashes": [ + "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", + "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712" + ], + "markers": "python_version >= '3.8'", + "version": "==0.12.1" + }, + "platformdirs": { + "hashes": [ + "sha256:0614df2a2f37e1a662acbd8e2b25b92ccf8632929bc6d43467e17fe89c75e068", + "sha256:ef0cc731df711022c174543cb70a9b5bd22e5a9337c8624ef2c2ceb8ddad8768" + ], + "markers": "python_version >= '3.8'", + "version": "==4.2.0" + } + } +} diff --git a/README.md b/README.md index 36ddc0e..adff6e5 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,7 @@ CLI tool and resources enabling efficient and consistent retrieval of corporate and financial data from the SEC. + ## What is EDGAR? EDGAR is a database of corporate filings maintained by the United States Securities and Exchange Commission (SEC). @@ -10,7 +11,7 @@ Whether you are looking to study the fundamentals of your favorite stocks, or to But there's a catch. -To _programatically_ access EDGAR data in a consistent and reliable manner is a complex problem. +To _programmatically_ access EDGAR data in a consistent and reliable manner is a complex problem. Most people who have found solutions to this problem charge a fee for it, or only provide limited free access to the obtained data. This tool was initially developed as part of the Bellingcat Tech Fellowship program, we hope it helps you utilise this incredible, free resource. @@ -25,6 +26,11 @@ git clone https://github.com/bellingcat/EDGAR.git cd EDGAR ``` +### Option 1: Use a Python Virtual Environment + +This method is best if you just want to use EDGAR. See option 2, using Pipenv, +if you would like to contribute. + Create a virtual environment ```bash @@ -46,6 +52,52 @@ Then install the dependencies with pip install -r requirements.txt ``` +### Option 2: Use Pipenv to Manage Your Python Virtual Environment + +This workflow is better for those looking to contribute to EDGAR. + +
+ Click to expand + +[Pipenv](https://pipenv.pypa.io/en/latest/) is a Python virtualenv management tool. +It automatically creates and manages a virtualenv for the project, handles adding and removing packages, +and generates deterministic builds via `Pipfile.lock`. + +The recommended Python version for this project is Python 3.12, which you can download from https://www.python.org/downloads/ + +You can install and activate your Pipenv managed virtual environment by running the following: + +```shell +pip3.12 install pipenv # Installs Pipenv +pipenv install --dev # Installs all prod & dev dependencies +pipenv shell # Activates the Python virtual environment +``` + +You can always deactivate the virtual environment by typing `deactivate` in your terminal. +```shell +deactivate +``` + +You can see all available `Pipenv` commands by typing `pipenv` in your terminal. +Type `pipenv scripts` to see available scripts. + +```shell +(EDGAR) ➜ EDGAR git:(main) ✗ pipenv scripts +Command Script +------- ------- +format black . +``` + +Which you can then run via `pipenv run