Skip to content

Commit

Permalink
Merge pull request #5157 from kobotoolbox/TASK-869-group-at-fetch
Browse files Browse the repository at this point in the history
Group submission access logs
  • Loading branch information
noliveleger authored Oct 7, 2024
2 parents 98a6cba + b93b0f0 commit 6e4e3c2
Show file tree
Hide file tree
Showing 9 changed files with 557 additions and 102 deletions.
63 changes: 62 additions & 1 deletion kobo/apps/audit_log/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from django.conf import settings
from django.db import models
from django.db.models import Case, Count, F, Min, Value, When
from django.db.models.functions import Cast, Concat, Trunc
from django.utils import timezone

from kobo.apps.kobo_auth.shortcuts import User
Expand All @@ -10,6 +12,7 @@
from kpi.constants import (
ACCESS_LOG_LOGINAS_AUTH_TYPE,
ACCESS_LOG_SUBMISSION_AUTH_TYPE,
ACCESS_LOG_SUBMISSION_GROUP_AUTH_TYPE,
ACCESS_LOG_UNKNOWN_AUTH_TYPE,
)
from kpi.fields.kpi_uid import UUID_LENGTH
Expand Down Expand Up @@ -124,6 +127,64 @@ def create(self, **kwargs):
**kwargs,
)

def with_group_key(self):
"""
Adds a group key to every access log. Used for grouping submissions.
"""
# add a group key to every access log
return self.annotate(
group_key=Case(
# for submissions, the group key is hour created + user_uid
# this enables us to group submissions by user by hour
When(
metadata__auth_type=ACCESS_LOG_SUBMISSION_AUTH_TYPE,
then=Concat(
# get the time, rounded down to the hour, as a string
Cast(
Trunc('date_created', 'hour'),
output_field=models.CharField(),
),
'user_uid',
),
),
# for everything else, the group key is just the id
# since they won't be grouped
default=Cast('id', output_field=models.CharField()),
)
)

def with_submissions_grouped(self):
"""
Returns minimal representation with submissions grouped by user by hour
"""
return (
self.with_group_key()
.select_related('user')
# adding 'group_key' in the values lets us group submissions
# for performance and clarity, ignore things like action and log_type,
# which are the same for all audit logs
.values('user__username', 'object_id', 'user_uid', 'group_key')
.annotate(
# include the number of submissions per group
# will be '1' for everything else
count=Count('pk'),
metadata=Case(
When(
# override the metadata for submission groups
metadata__auth_type=ACCESS_LOG_SUBMISSION_AUTH_TYPE,
then=Value(
{'auth_type': ACCESS_LOG_SUBMISSION_GROUP_AUTH_TYPE},
models.JSONField(),
),
),
# keep the metadata the same for everything else
default=F('metadata'),
),
# for submission groups, use the earliest submission as the date_created
date_created=Min('date_created'),
)
)


class AccessLog(AuditLog):
objects = AccessLogManager()
Expand Down Expand Up @@ -157,7 +218,7 @@ def create_from_request(
)
is_submission = (
request.resolver_match is not None
and request.resolver_match.url_name == 'submissions'
and request.resolver_match.url_name in ['submissions', 'submissions-list']
and request.method == 'POST'
)
# a regular login may have an anonymous user as _cached_user, ignore that
Expand Down
21 changes: 20 additions & 1 deletion kobo/apps/audit_log/serializers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from django.contrib.auth import get_user_model
from rest_framework import serializers

from .models import AuditAction, AuditLog
from kpi.fields import RelativePrefixHyperlinkedRelatedField
from .models import AuditLog


class AuditLogSerializer(serializers.ModelSerializer):
Expand Down Expand Up @@ -45,3 +46,21 @@ def get_date_created(self, audit_log):

def get_username(self, audit_log):
return audit_log.user.username


class AccessLogSerializer(serializers.Serializer):
user = RelativePrefixHyperlinkedRelatedField(
view_name='user-kpi-detail',
lookup_field='user__username',
lookup_url_kwarg='username',
read_only=True,
source='user__username',
)
date_created = serializers.SerializerMethodField()
username = serializers.CharField(source='user__username')
metadata = serializers.JSONField()
user_uid = serializers.CharField()
count = serializers.IntegerField()

def get_date_created(self, audit_log):
return audit_log['date_created'].strftime('%Y-%m-%dT%H:%M:%SZ')
Loading

0 comments on commit 6e4e3c2

Please sign in to comment.