Skip to content

Commit

Permalink
validate testing ip
Browse files Browse the repository at this point in the history
  • Loading branch information
Germey committed Jun 30, 2024
1 parent 78b3244 commit 7c77ad0
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 44 deletions.
11 changes: 0 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,6 @@ proxypool | 2020-02-19 17:09:46,596 INFO success: tester entered RUNNING stat

这时候访问 [http://localhost:5555/random](http://localhost:5555/random) 即可获取一个随机可用代理。

当然你也可以选择自己 Build,直接运行如下命令即可:

```
docker-compose -f build.yaml up
```

如果下载速度特别慢,可以自行修改 Dockerfile,修改:

```diff
Expand Down Expand Up @@ -347,11 +341,6 @@ class Daili66Crawler(BaseCrawler):
本项目提供了 Kubernetes 部署脚本,如需部署到 Kubernetes,请参考 [kubernetes](./kubernetes)。
## 待开发
- [ ] 前端页面管理
- [ ] 使用情况统计分析
如有一起开发的兴趣可以在 Issue 留言,非常感谢!
## LICENSE
Expand Down
18 changes: 0 additions & 18 deletions build.yaml

This file was deleted.

5 changes: 2 additions & 3 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,15 @@ services:
redis4proxypool:
image: redis:alpine
container_name: redis4proxypool
# ports:
# - "6374:6379"
proxypool:
build: .
image: "germey/proxypool:master"
container_name: proxypool
ports:
- "5555:5555"
restart: always
# volumes:
# - proxypool/crawlers/private:/app/proxypool/crawlers/private
# - proxypool/crawlers/private:~/proxypool/crawlers/private
environment:
PROXYPOOL_REDIS_HOST: redis4proxypool

39 changes: 27 additions & 12 deletions proxypool/processors/tester.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,27 +45,33 @@ async def test(self, proxy: Proxy):
logger.debug(f'testing {proxy.string()}')
# if TEST_ANONYMOUS is True, make sure that
# the proxy has the effect of hiding the real IP
# logger.debug(f'TEST_ANONYMOUS {TEST_ANONYMOUS}')
if TEST_ANONYMOUS:
url = 'https://httpbin.org/ip'
async with session.get(url, timeout=TEST_TIMEOUT) as response:
resp_json = await response.json()
origin_ip = resp_json['origin']
# logger.debug(f'origin ip is {origin_ip}')
async with session.get(url, proxy=f'http://{proxy.string()}', timeout=TEST_TIMEOUT) as response:
resp_json = await response.json()
anonymous_ip = resp_json['origin']
logger.debug(f'anonymous ip is {anonymous_ip}')
assert origin_ip != anonymous_ip
assert proxy.host == anonymous_ip
async with session.get(TEST_URL, proxy=f'http://{proxy.string()}', timeout=TEST_TIMEOUT,
allow_redirects=False) as response:
if response.status in TEST_VALID_STATUS:
if TEST_DONT_SET_MAX_SCORE:
logger.debug(f'proxy {proxy.string()} is valid, remain current score')
logger.debug(
f'proxy {proxy.string()} is valid, remain current score')
else:
self.redis.max(proxy)
logger.debug(f'proxy {proxy.string()} is valid, set max score')
logger.debug(
f'proxy {proxy.string()} is valid, set max score')
else:
self.redis.decrease(proxy)
logger.debug(f'proxy {proxy.string()} is invalid, decrease score')
logger.debug(
f'proxy {proxy.string()} is invalid, decrease score')
# if independent tester class found, create new set of storage and do the extra test
for tester in self.testers:
key = tester.key
Expand All @@ -82,18 +88,25 @@ async def test(self, proxy: Proxy):
is_valid = await tester.parse(resp_text, test_url, proxy.string())
if is_valid:
if tester.test_dont_set_max_score:
logger.info(f'key[{key}] proxy {proxy.string()} is valid, remain current score')
logger.info(
f'key[{key}] proxy {proxy.string()} is valid, remain current score')
else:
self.redis.max(proxy, key, tester.proxy_score_max)
logger.info(f'key[{key}] proxy {proxy.string()} is valid, set max score')
self.redis.max(
proxy, key, tester.proxy_score_max)
logger.info(
f'key[{key}] proxy {proxy.string()} is valid, set max score')
else:
self.redis.decrease(proxy, tester.key, tester.proxy_score_min)
logger.info(f'key[{key}] proxy {proxy.string()} is invalid, decrease score')
self.redis.decrease(
proxy, tester.key, tester.proxy_score_min)
logger.info(
f'key[{key}] proxy {proxy.string()} is invalid, decrease score')

except EXCEPTIONS:
self.redis.decrease(proxy)
[self.redis.decrease(proxy, tester.key, tester.proxy_score_min) for tester in self.testers]
logger.debug(f'proxy {proxy.string()} is invalid, decrease score')
[self.redis.decrease(proxy, tester.key, tester.proxy_score_min)
for tester in self.testers]
logger.debug(
f'proxy {proxy.string()} is invalid, decrease score')

@logger.catch
def run(self):
Expand All @@ -107,10 +120,12 @@ def run(self):
logger.debug(f'{count} proxies to test')
cursor = 0
while True:
logger.debug(f'testing proxies use cursor {cursor}, count {TEST_BATCH}')
logger.debug(
f'testing proxies use cursor {cursor}, count {TEST_BATCH}')
cursor, proxies = self.redis.batch(cursor, count=TEST_BATCH)
if proxies:
tasks = [self.loop.create_task(self.test(proxy)) for proxy in proxies]
tasks = [self.loop.create_task(
self.test(proxy)) for proxy in proxies]
self.loop.run_until_complete(asyncio.wait(tasks))
if not cursor:
break
Expand Down

0 comments on commit 7c77ad0

Please sign in to comment.