Skip to content
This repository has been archived by the owner on Apr 11, 2024. It is now read-only.

Commit

Permalink
[Fetch Migration] Filter out system indices for metadata migration
Browse files Browse the repository at this point in the history
Also includes a minor optimization to short-circuit logic when there are no source indices found.

Signed-off-by: Kartik Ganesh <[email protected]>
  • Loading branch information
kartg committed Oct 25, 2023
1 parent 8ef0514 commit 0378cba
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 7 deletions.
17 changes: 11 additions & 6 deletions FetchMigration/python/index_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,18 @@
def fetch_all_indices(endpoint: EndpointInfo) -> dict:
actual_endpoint = endpoint.url + __ALL_INDICES_ENDPOINT
resp = requests.get(actual_endpoint, auth=endpoint.auth, verify=endpoint.verify_ssl)
# Remove internal settings
result = dict(resp.json())
for index in result:
for setting in __INTERNAL_SETTINGS_KEYS:
index_settings = result[index][SETTINGS_KEY]
if __INDEX_KEY in index_settings:
index_settings[__INDEX_KEY].pop(setting, None)
print(result)
for index in list(result.keys()):
# Remove system indices
if index.startswith("."):
del result[index]
# Remove internal settings
else:
for setting in __INTERNAL_SETTINGS_KEYS:
index_settings = result[index][SETTINGS_KEY]
if __INDEX_KEY in index_settings:
index_settings[__INDEX_KEY].pop(setting, None)
return result


Expand Down
4 changes: 4 additions & 0 deletions FetchMigration/python/metadata_migration.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,12 @@ def run(args: MetadataMigrationParams) -> MetadataMigrationResult:
# We expect the Data Prepper pipeline to only have a single top-level value
pipeline_config = next(iter(dp_config.values()))
validate_pipeline_config(pipeline_config)
result = MetadataMigrationResult()
# Fetch EndpointInfo and indices
source_endpoint_info, source_indices = compute_endpoint_and_fetch_indices(pipeline_config, SOURCE_KEY)
# If source indices is empty, return immediately
if len(source_indices.keys()) == 0:
return result
target_endpoint_info, target_indices = compute_endpoint_and_fetch_indices(pipeline_config, SINK_KEY)
# Compute index differences and print report
diff = get_index_differences(source_indices, target_indices)
Expand Down
12 changes: 11 additions & 1 deletion FetchMigration/python/tests/test_index_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,17 @@ class TestSearchEndpoint(unittest.TestCase):
@responses.activate
def test_fetch_all_indices(self):
# Set up GET response
responses.get(test_constants.SOURCE_ENDPOINT + "*", json=test_constants.BASE_INDICES_DATA)
test_data = copy.deepcopy(test_constants.BASE_INDICES_DATA)
# Add system index
test_data[".system-index"] = {
test_constants.SETTINGS_KEY: {
test_constants.INDEX_KEY: {
test_constants.NUM_SHARDS_SETTING: 1,
test_constants.NUM_REPLICAS_SETTING: 1
}
}
}
responses.get(test_constants.SOURCE_ENDPOINT + "*", json=test_data)
# Now send request
index_data = index_operations.fetch_all_indices(EndpointInfo(test_constants.SOURCE_ENDPOINT))
self.assertEqual(3, len(index_data.keys()))
Expand Down

0 comments on commit 0378cba

Please sign in to comment.