From 7c77ad0b078702918882c1fd0976419642daad24 Mon Sep 17 00:00:00 2001 From: Germey Date: Sun, 30 Jun 2024 11:08:48 +0800 Subject: [PATCH] validate testing ip --- README.md | 11 ---------- build.yaml | 18 ---------------- docker-compose.yml | 5 ++--- proxypool/processors/tester.py | 39 +++++++++++++++++++++++----------- 4 files changed, 29 insertions(+), 44 deletions(-) delete mode 100644 build.yaml diff --git a/README.md b/README.md index d01d7a08..e1435763 100644 --- a/README.md +++ b/README.md @@ -74,12 +74,6 @@ proxypool | 2020-02-19 17:09:46,596 INFO success: tester entered RUNNING stat 这时候访问 [http://localhost:5555/random](http://localhost:5555/random) 即可获取一个随机可用代理。 -当然你也可以选择自己 Build,直接运行如下命令即可: - -``` -docker-compose -f build.yaml up -``` - 如果下载速度特别慢,可以自行修改 Dockerfile,修改: ```diff @@ -347,11 +341,6 @@ class Daili66Crawler(BaseCrawler): 本项目提供了 Kubernetes 部署脚本,如需部署到 Kubernetes,请参考 [kubernetes](./kubernetes)。 -## 待开发 - -- [ ] 前端页面管理 -- [ ] 使用情况统计分析 - 如有一起开发的兴趣可以在 Issue 留言,非常感谢! ## LICENSE diff --git a/build.yaml b/build.yaml deleted file mode 100644 index 74b2fd0b..00000000 --- a/build.yaml +++ /dev/null @@ -1,18 +0,0 @@ -version: "3" -services: - redis4proxypool: - image: redis:alpine - container_name: redis4proxypool - ports: - - "6374:6379" - proxypool: - build: . - image: "germey/proxypool:master" - container_name: proxypool - ports: - - "5555:5555" - restart: always - # volumes: - # - proxypool/crawlers/private:/app/proxypool/crawlers/private - environment: - PROXYPOOL_REDIS_CONNECTION_STRING: redis://@redis4proxypool:6379/0 diff --git a/docker-compose.yml b/docker-compose.yml index cf367f42..4e4d5936 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,16 +3,15 @@ services: redis4proxypool: image: redis:alpine container_name: redis4proxypool - # ports: - # - "6374:6379" proxypool: + build: . image: "germey/proxypool:master" container_name: proxypool ports: - "5555:5555" restart: always # volumes: - # - proxypool/crawlers/private:/app/proxypool/crawlers/private + # - proxypool/crawlers/private:~/proxypool/crawlers/private environment: PROXYPOOL_REDIS_HOST: redis4proxypool diff --git a/proxypool/processors/tester.py b/proxypool/processors/tester.py index 470259a9..6937af8c 100644 --- a/proxypool/processors/tester.py +++ b/proxypool/processors/tester.py @@ -45,27 +45,33 @@ async def test(self, proxy: Proxy): logger.debug(f'testing {proxy.string()}') # if TEST_ANONYMOUS is True, make sure that # the proxy has the effect of hiding the real IP + # logger.debug(f'TEST_ANONYMOUS {TEST_ANONYMOUS}') if TEST_ANONYMOUS: url = 'https://httpbin.org/ip' async with session.get(url, timeout=TEST_TIMEOUT) as response: resp_json = await response.json() origin_ip = resp_json['origin'] + # logger.debug(f'origin ip is {origin_ip}') async with session.get(url, proxy=f'http://{proxy.string()}', timeout=TEST_TIMEOUT) as response: resp_json = await response.json() anonymous_ip = resp_json['origin'] + logger.debug(f'anonymous ip is {anonymous_ip}') assert origin_ip != anonymous_ip assert proxy.host == anonymous_ip async with session.get(TEST_URL, proxy=f'http://{proxy.string()}', timeout=TEST_TIMEOUT, allow_redirects=False) as response: if response.status in TEST_VALID_STATUS: if TEST_DONT_SET_MAX_SCORE: - logger.debug(f'proxy {proxy.string()} is valid, remain current score') + logger.debug( + f'proxy {proxy.string()} is valid, remain current score') else: self.redis.max(proxy) - logger.debug(f'proxy {proxy.string()} is valid, set max score') + logger.debug( + f'proxy {proxy.string()} is valid, set max score') else: self.redis.decrease(proxy) - logger.debug(f'proxy {proxy.string()} is invalid, decrease score') + logger.debug( + f'proxy {proxy.string()} is invalid, decrease score') # if independent tester class found, create new set of storage and do the extra test for tester in self.testers: key = tester.key @@ -82,18 +88,25 @@ async def test(self, proxy: Proxy): is_valid = await tester.parse(resp_text, test_url, proxy.string()) if is_valid: if tester.test_dont_set_max_score: - logger.info(f'key[{key}] proxy {proxy.string()} is valid, remain current score') + logger.info( + f'key[{key}] proxy {proxy.string()} is valid, remain current score') else: - self.redis.max(proxy, key, tester.proxy_score_max) - logger.info(f'key[{key}] proxy {proxy.string()} is valid, set max score') + self.redis.max( + proxy, key, tester.proxy_score_max) + logger.info( + f'key[{key}] proxy {proxy.string()} is valid, set max score') else: - self.redis.decrease(proxy, tester.key, tester.proxy_score_min) - logger.info(f'key[{key}] proxy {proxy.string()} is invalid, decrease score') + self.redis.decrease( + proxy, tester.key, tester.proxy_score_min) + logger.info( + f'key[{key}] proxy {proxy.string()} is invalid, decrease score') except EXCEPTIONS: self.redis.decrease(proxy) - [self.redis.decrease(proxy, tester.key, tester.proxy_score_min) for tester in self.testers] - logger.debug(f'proxy {proxy.string()} is invalid, decrease score') + [self.redis.decrease(proxy, tester.key, tester.proxy_score_min) + for tester in self.testers] + logger.debug( + f'proxy {proxy.string()} is invalid, decrease score') @logger.catch def run(self): @@ -107,10 +120,12 @@ def run(self): logger.debug(f'{count} proxies to test') cursor = 0 while True: - logger.debug(f'testing proxies use cursor {cursor}, count {TEST_BATCH}') + logger.debug( + f'testing proxies use cursor {cursor}, count {TEST_BATCH}') cursor, proxies = self.redis.batch(cursor, count=TEST_BATCH) if proxies: - tasks = [self.loop.create_task(self.test(proxy)) for proxy in proxies] + tasks = [self.loop.create_task( + self.test(proxy)) for proxy in proxies] self.loop.run_until_complete(asyncio.wait(tasks)) if not cursor: break