Skip to content

Commit

Permalink
Adding action, referenceAnnDataFile to default mixpanel props, fixing…
Browse files Browse the repository at this point in the history
… trigger value
  • Loading branch information
bistline committed Sep 17, 2024
1 parent 91f10c7 commit 497564d
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 10 deletions.
23 changes: 15 additions & 8 deletions ingest/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
MONGO_CONNECTION = MongoConnection()


def init(study_id, study_file_id, user_metric_uuid=None):
def init(study_id, study_file_id, user_metric_uuid=None, action=None):
global __metric_properties

study = Study(study_id)
study_file = StudyFile(study_file_id)
__metric_properties = MetricProperties(study, study_file, user_metric_uuid)
__metric_properties = MetricProperties(study, study_file, user_metric_uuid, action)


def set_parent_event_name(event_name):
Expand All @@ -39,7 +39,7 @@ class MetricProperties:
# This is a generic write-only log token, not a secret
USER_ID = "2f30ec50-a04d-4d43-8fd1-b136a2045079"

def __init__(self, study, study_file, user_uuid=None):
def __init__(self, study, study_file, user_uuid=None, action=None):
distinct_id = user_uuid if user_uuid else MetricProperties.USER_ID
self.__properties = {
"distinct_id": distinct_id,
Expand All @@ -50,7 +50,11 @@ def __init__(self, study, study_file, user_uuid=None):
"trigger": study_file.trigger,
"logger": "ingest-pipeline",
"appId": "single-cell-portal",
"action": action
}
# merge in referenceAnnDataFile if necessary
if study_file.file_type == 'AnnData':
self.__properties["referenceAnnDataFile"] = study_file.is_reference_anndata

def get_properties(self):
return self.__properties
Expand Down Expand Up @@ -171,12 +175,15 @@ def study_file(self, study_file_id):
self.file_type = self.study_file["file_type"]
self.file_size = self.study_file["upload_file_size"]
self.file_name = self.study_file["name"]
upload_trigger = self.study_file.get("options", {}).get("upload_trigger")
# when set, remote_location is the name of the file in the bucket
if self.study_file.get("remote_location") is not None:
if self.study_file["remote_location"] == "":
self.trigger = 'upload'
else:
self.trigger = 'sync'
if upload_trigger is not None:
self.trigger = upload_trigger
elif self.study_file["remote_location"] is not None:
self.trigger = 'upload' if self.study_file["remote_location"] == "" else 'sync'
# indicate trigger state for tests/mocks
else:
self.trigger = 'not set'

if self.study_file["file_type"] == 'AnnData':
self.is_reference_anndata = self.study_file.get("ann_data_file_info", {}).get("reference_file")
11 changes: 11 additions & 0 deletions ingest/ingest_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,12 @@ class IngestPipeline:
"../schema/alexandria_convention/alexandria_convention_schema.json"
)

# array of actions to use when reporting to Mixpanel
ACTION_NAMES = [
'ingest_cluster', 'ingest_cell_metadata', 'ingest_expression', 'ingest_anndata', 'ingest_subsample',
'ingest_differential_expression', 'differential_expression', 'render_expression_arrays', 'rank_genes'
]

# Logger provides more details for trouble shooting
dev_logger = setup_logger(__name__, "log.txt", format="support_configs")
user_logger = setup_logger(__name__ + ".usr", "user_log.txt", level=logging.ERROR)
Expand Down Expand Up @@ -786,6 +792,10 @@ def exit_pipeline(ingest, status, status_cell_metadata, arguments):
sys.exit(os.EX_DATAERR)
sys.exit(1)

def get_action_from_args(arguments):
"""Get the action from list of arguments denoting which data type is being ingested/extracted"""
action = list(set(IngestPipeline.ACTION_NAMES) & set(arguments))
return action[0] if action else ""

def main() -> None:
"""Enables running Ingest Pipeline via CLI
Expand All @@ -811,6 +821,7 @@ def main() -> None:
arguments["study_id"],
arguments["study_file_id"],
arguments["user_metrics_uuid"],
get_action_from_args(arguments)
)
ingest = IngestPipeline(**arguments)
status, status_cell_metadata = run_ingest(ingest, arguments, parsed_args)
Expand Down
20 changes: 20 additions & 0 deletions tests/test_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
IngestPipeline,
exit_pipeline,
run_ingest,
get_action_from_args
)
from expression_files.expression_files import GeneExpression

Expand Down Expand Up @@ -793,6 +794,25 @@ def test_extract_processed_matrix_from_anndata(self):
except:
print(f"Error while deleting file : {file}")

def test_get_action_from_args(self):
args = [
"--study-id",
"5d276a50421aa9117c982845",
"--study-file-id",
"5dd5ae25421aa910a723a337",
"ingest_subsample",
"--cluster-file",
"../tests/data/good_subsample_cluster.csv",
"--name",
"cluster1",
"--cell-metadata-file",
"../tests/data/test_cell_metadata.csv",
"--subsample",
]
self.assertEqual("ingest_subsample", get_action_from_args(args))
bad_args = ["foo", "bar", "bing"]
self.assertEqual("", get_action_from_args(bad_args))


if __name__ == "__main__":
unittest.main()
37 changes: 35 additions & 2 deletions tests/test_metrics_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def mock_post_event(props):
props = json.loads(props)
expected_props = {
"properties": {
"action": "ingest_expression",
"studyAccession": "SCP123",
"fileName": "File_name.txt",
"fileType": "Expression matrix",
Expand All @@ -29,6 +30,7 @@ def mock_post_event(props):
"logger": "ingest-pipeline",
"appId": "single-cell-portal",
"status": "success",
"trigger": "bucket"
}
}
if props["event"] == "file-validation":
Expand Down Expand Up @@ -56,6 +58,8 @@ class MetricsServiceTestCase(unittest.TestCase):
"file_type": "Expression matrix",
"upload_file_size": 400,
"name": "File_name.txt",
"remote_location": "File_name.txt",
"options": { "upload_trigger": "bucket" }
}
]
# Build client mock with functions and return values needed to query
Expand Down Expand Up @@ -94,7 +98,7 @@ def test_log(self, mock_execute_ingest, mock_MONGO_CONNECTION, mock_post_event):
"dense",
]
# Initialize global variables
config.init("5d276a50421aa9117c982845", "5dd5ae25421aa910a723a337")
config.init("5d276a50421aa9117c982845", "5dd5ae25421aa910a723a337", None, 'ingest_expression')
config.set_parent_event_name("ingest-pipeline:expression:ingest")
IngestTestCase.execute_ingest(args)
metrics_model = config.get_metric_properties()
Expand All @@ -108,10 +112,39 @@ def test_log(self, mock_execute_ingest, mock_MONGO_CONNECTION, mock_post_event):
# args.insert(3).append(user_metrics_uuid)
# Initialize global variables
config.init(
"5d276a50421aa9117c982845", "5dd5ae25421aa910a723a337", user_metrics_uuid
"5d276a50421aa9117c982845", "5dd5ae25421aa910a723a337", user_metrics_uuid, 'ingest_expression'
)
config.set_parent_event_name("ingest-pipeline:expression:ingest")
IngestTestCase.execute_ingest(args)
metrics_model = config.get_metric_properties()
# Log Mixpanel events
MetricsService.log(config.get_parent_event_name(), metrics_model)

@patch("config.MONGO_CONNECTION")
def test_init(self, mock_mongo_conn):
client_values = {}
client_values["study_accessions"] = MagicMock()
client_values["study_accessions"].find.return_value = [{"accession": "SCP123"}]
client_values["study_files"] = MagicMock()
client_values["study_files"].find.return_value = [
{
"file_type": "AnnData",
"upload_file_size": 400,
"name": "matrix.h5ad",
"remote_location": "",
"ann_data_file_info": {
"reference_file": False
}
}
]
# Build client mock with functions and return values needed to query
client_mock = MagicMock()
client_mock.__getitem__.side_effect = client_values.__getitem__
mock_mongo_conn._client = client_mock
config.init(
"5d276a50421aa9117c982845", "5dd5ae25421aa910a723a337", user_metrics_uuid, "ingest_anndata"
)
props = config.get_metric_properties().get_properties()
self.assertEqual("ingest_anndata", props["action"])
self.assertEqual("upload", props["trigger"])
self.assertFalse(props["referenceAnnDataFile"])

0 comments on commit 497564d

Please sign in to comment.