Skip to content

Commit

Permalink
ENH: look at prior versions index when crdc_series_uuid is used
Browse files Browse the repository at this point in the history
  • Loading branch information
fedorov committed Oct 7, 2024
1 parent 510bbc8 commit 51242bf
Showing 1 changed file with 61 additions and 5 deletions.
66 changes: 61 additions & 5 deletions idc_index/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ def __init__(self):
self.previous_versions_index_path = (
idc_index_data.PRIOR_VERSIONS_INDEX_PARQUET_FILEPATH
)
file_path = idc_index_data.PRIOR_VERSIONS_INDEX_PARQUET_FILEPATH

self.previous_versions_index = pd.read_parquet(file_path)

# self.index = self.index.astype(str).replace("nan", "")
self.index["series_size_MB"] = self.index["series_size_MB"].astype(float)
Expand Down Expand Up @@ -678,7 +681,28 @@ def _validate_update_manifest_and_get_download_size(
manifest_df.columns = ["manifest_cp_cmd"]

# create a copy of the index
index_df_copy = self.index
index_df_copy = self.index[
[
"SeriesInstanceUID",
"series_aws_url",
"series_size_MB",
"PatientID",
"collection_id",
"Modality",
"StudyInstanceUID",
]
]
previous_versions_index_df_copy = self.previous_versions_index[
[
"SeriesInstanceUID",
"series_aws_url",
"series_size_MB",
"PatientID",
"collection_id",
"Modality",
"StudyInstanceUID",
]
]

# use default hierarchy
if dirTemplate is not None:
Expand Down Expand Up @@ -773,7 +797,7 @@ def _validate_update_manifest_and_get_download_size(
series_size_MB,
{hierarchy} AS path,
FROM
'{self.previous_versions_index_path}' pvip
previous_versions_index_df_copy pvip
),
index_temp AS (
Expand Down Expand Up @@ -1435,7 +1459,6 @@ def citations_from_selection(
patientId=None,
studyInstanceUID=None,
seriesInstanceUID=None,
crdc_series_uuid=None,
citation_format=CITATION_FORMAT_APA,
):
"""Get the list of publications that should be cited/attributed for the specific collection, patient (case) ID, study or series UID.
Expand All @@ -1450,13 +1473,14 @@ def citations_from_selection(
Returns:
List of citations in the requested format.
"""

result_df = self._safe_filter_by_selection(
self.index,
collection_id=collection_id,
patientId=patientId,
studyInstanceUID=studyInstanceUID,
seriesInstanceUID=seriesInstanceUID,
crdc_series_uuid=crdc_series_uuid,
crdc_series_uuid=None,
)

citations = []
Expand Down Expand Up @@ -1533,8 +1557,40 @@ def download_from_selection(

downloadDir = self._check_create_directory(downloadDir)

if crdc_series_uuid is not None:
download_df = pd.concat(
[
self.index[
[
"PatientID",
"collection_id",
"Modality",
"StudyInstanceUID",
"SeriesInstanceUID",
"crdc_series_uuid",
"series_aws_url",
"series_size_MB",
]
],
self.previous_versions_index[
[
"PatientID",
"collection_id",
"Modality",
"StudyInstanceUID",
"SeriesInstanceUID",
"crdc_series_uuid",
"series_aws_url",
"series_size_MB",
]
],
],
)
else:
download_df = self.index

result_df = self._safe_filter_by_selection(
self.index,
download_df,
collection_id=collection_id,
patientId=patientId,
studyInstanceUID=studyInstanceUID,
Expand Down

0 comments on commit 51242bf

Please sign in to comment.