Skip to content

Commit

Permalink
feat: big query procedure for comparing validator versions (#770)
Browse files Browse the repository at this point in the history
  • Loading branch information
cka-y authored Oct 22, 2024
1 parent d869227 commit 12eedfb
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 0 deletions.
88 changes: 88 additions & 0 deletions bigquery/compare-validation-reports.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
WITH
validation_report AS (
SELECT
*,
summary.validatorVersion
FROM
`${project_id}.data_analytics.gtfs_validation_reports_*`
),
most_recent_reports AS (
SELECT
t1.*
FROM (
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY feedId ORDER BY validatedAt DESC) AS row_num
FROM
validation_report
WHERE
summary.validatorVersion = current_version
) AS t1
WHERE row_num = 1
),
old_version_reports AS (
SELECT
t1.*
FROM (
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY feedId ORDER BY validatedAt DESC) AS row_num
FROM
validation_report
WHERE
summary.validatorVersion = previous_version
) AS t1
WHERE row_num = 1
),
most_recent_notices AS (
SELECT
feedId,
datasetId,
notice.code,
notice.severity,
notice.totalNotices
FROM
most_recent_reports,
UNNEST(notices) AS notice
),
old_version_notices AS (
SELECT
feedId,
datasetId,
notice.code,
notice.severity,
notice.totalNotices
FROM
old_version_reports,
UNNEST(notices) AS notice
),
merged_reports AS (
SELECT
COALESCE(most_recent_notices.feedId, old_version_notices.feedId) AS feedId,
COALESCE(most_recent_notices.code, old_version_notices.code) AS code,
old_version_notices.severity AS severity_previous,
most_recent_notices.severity AS severity_current,
IFNULL(old_version_notices.totalNotices, 0) AS totalNotices_previous,
IFNULL(most_recent_notices.totalNotices, 0) AS totalNotices_current
FROM
old_version_notices
FULL OUTER JOIN
most_recent_notices
ON
old_version_notices.feedId = most_recent_notices.feedId
AND old_version_notices.code = most_recent_notices.code
)

SELECT
feedId AS `Feed ID`,
code AS `Code`,
COALESCE(severity_previous, severity_current) AS `Severity`,
totalNotices_previous AS `Total Previous`,
totalNotices_current AS `Total Current`,
GREATEST(totalNotices_current - totalNotices_previous, 0) AS `New Notices`,
GREATEST(totalNotices_previous - totalNotices_current, 0) AS `Dropped Notices`
FROM
merged_reports
ORDER BY
`New Notices` DESC,
`Dropped Notices` DESC
32 changes: 32 additions & 0 deletions infra/metrics/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,38 @@ resource "google_cloudfunctions2_function" "gbfs_preprocessed_analytics" {

}

# 2.8 Compare validation reports routine
resource "google_bigquery_routine" "compare_validation_reports_function" {
dataset_id = var.dataset_id
routine_id = "compare_validation_reports"
routine_type = "TABLE_VALUED_FUNCTION"

language = "SQL"
definition_body = templatefile("${path.module}/../../bigquery/compare-validation-reports.sql", {
project_id = var.project_id
})
arguments {
name = "previous_version"
data_type = jsonencode({ "typeKind" : "STRING" })
}
arguments {
name = "current_version"
data_type = jsonencode({ "typeKind" : "STRING" })
}
return_table_type = jsonencode({
"columns": [
{ "name": "Feed ID", "type": { "typeKind": "STRING" } },
{ "name": "Code", "type": { "typeKind": "STRING" } },
{ "name": "Severity", "type": { "typeKind": "STRING" } },
{ "name": "Total Previous", "type": { "typeKind": "INT64" } },
{ "name": "Total Current", "type": { "typeKind": "INT64" } },
{ "name": "New Notices", "type": { "typeKind": "INT64" } },
{ "name": "Dropped Notices", "type": { "typeKind": "INT64" } }
]
})
}



# Grant permissions to the service account
# 1. BigQuery roles
Expand Down

0 comments on commit 12eedfb

Please sign in to comment.