From b02a9efcf40522d63d8e24d9e757ec2739af286f Mon Sep 17 00:00:00 2001 From: niuxiaozu Date: Mon, 17 Jun 2024 15:34:03 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9download=E6=88=90=E5=8A=9F?= =?UTF-8?q?=E5=90=8E=E5=AF=B9=E7=8A=B6=E6=80=81=E7=A0=81=E7=9A=84=E5=A4=84?= =?UTF-8?q?=E7=90=86=EF=BC=8C=E5=A6=82=E6=9E=9C=E7=8A=B6=E6=80=81=E7=A0=81?= =?UTF-8?q?=E4=B8=8D=E8=A2=ABsite.acceptStatCode=E6=8E=A5=E6=94=B6?= =?UTF-8?q?=E7=9A=84=E8=AF=9D=E5=B0=B1=E7=AE=97=E5=A4=B1=E8=B4=A5=EF=BC=8C?= =?UTF-8?q?=E8=BF=9B=E8=A1=8CdoCycleRetry=E9=87=8D=E8=AF=95=E9=80=BB?= =?UTF-8?q?=E8=BE=91=E3=80=82=E5=B9=B6=E4=B8=94=E5=8A=A0=E5=85=A5=E9=98=9F?= =?UTF-8?q?=E5=88=97=E5=89=8D=E5=85=88sleep=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/main/java/us/codecraft/webmagic/Spider.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java index 11a671f7a..69d6e8325 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java @@ -456,6 +456,11 @@ private void onDownloadSuccess(Request request, Page page) { } } else { logger.info("page status code error, page {} , code: {}", request.getUrl(), page.getStatusCode()); + if (site.getCycleRetryTimes() != 0) { + sleep(site.getRetrySleepTime()); + // for cycle retry + doCycleRetry(request); + } } sleep(site.getSleepTime()); return; @@ -465,6 +470,7 @@ private void onDownloaderFail(Request request) { if (site.getCycleRetryTimes() == 0) { sleep(site.getSleepTime()); } else { + sleep(site.getRetrySleepTime()); // for cycle retry doCycleRetry(request); } @@ -477,11 +483,10 @@ private void doCycleRetry(Request request) { } else { int cycleTriedTimes = (Integer) cycleTriedTimesObject; cycleTriedTimes++; - if (cycleTriedTimes < site.getCycleRetryTimes()) { + if (site.getCycleRetryTimes() < 0 || cycleTriedTimes < site.getCycleRetryTimes()) { addRequest(SerializationUtils.clone(request).setPriority(0).putExtra(Request.CYCLE_TRIED_TIMES, cycleTriedTimes)); } } - sleep(site.getRetrySleepTime()); } protected void sleep(int time) {