From 0262f1fb9213c6a800792a7d66720da43189481b Mon Sep 17 00:00:00 2001 From: Stanislav Schmidt Date: Fri, 10 Dec 2021 17:13:37 +0100 Subject: [PATCH] Don't error out in rma_all for incomplete response --- src/atldld/requests.py | 21 ++++++++++++++++++--- tests/test_requests.py | 9 ++++++--- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/atldld/requests.py b/src/atldld/requests.py index bfffae3..9fbd530 100644 --- a/src/atldld/requests.py +++ b/src/atldld/requests.py @@ -181,14 +181,29 @@ def rma_all(rma_parameters: RMAParameters) -> list: f'Expected total_rows to be {total_rows} but got {status["total_rows"]}' ) - # Each new request should yield new data. If no data was received, then - # something must have gone wrong. + # We haven't got all the rows yet, but the query yielded zero results. + # This could be due to an arbitrary server-side problem, but we know + # of at least one query for which the number of returned results is + # consistently smaller than the number of reported results: + # RMAParameters( + # "SectionDataSet", + # criteria={ + # "specimen": {"donor": {"age": {"days": 56}}}, + # "probes": {"orientation": {"name": "Antisense"}} + # }, + # ) + # this gives total_rows=50476 but len(msg)=50274. + # Previously we would raise an error when this happened, but since this + # is reproducible we now only print a warning. if status["num_rows"] == 0: - raise RuntimeError("No data received") + break pos += status["num_rows"] msg += new_msg + if not len(msg) == total_rows: + logger.warning("The server sent only %d of %d results", len(msg), total_rows) + return msg diff --git a/tests/test_requests.py b/tests/test_requests.py index 0ae067d..758e386 100644 --- a/tests/test_requests.py +++ b/tests/test_requests.py @@ -14,6 +14,7 @@ # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +import logging import re import pytest @@ -241,7 +242,7 @@ def test_inconsistent_total_rows(self): rma_all(params) @responses.activate - def test_no_data_received(self): + def test_incomplete_data_received(self, caplog): params = RMAParameters("my-model") # Can at most fetch 25_000 in one request msg = list(range(26_000)) @@ -259,12 +260,14 @@ def test_no_data_received(self): "success": True, "id": 0, "start_row": len(msg_1), - "num_rows": 0, # this should always be greater than 0 + "num_rows": 0, # this should normally always be greater than 0 "total_rows": len(msg), "msg": msg_2, } responses.add(responses.GET, re.compile(""), json=return_json_1) responses.add(responses.GET, re.compile(""), json=return_json_2) - with pytest.raises(RuntimeError, match="No data received"): + with caplog.at_level(logging.WARNING, logger="atldld.requests"): rma_all(params) + + assert "The server sent only" in caplog.text