Skip to content

Commit

Permalink
Merge pull request #4479 from freelawproject/4230_search_query_sugges…
Browse files Browse the repository at this point in the history
…ted_changes

4230 Create SearchQuery model to store user's queries
  • Loading branch information
mlissner authored Oct 15, 2024
2 parents c1bfbd9 + 118ee00 commit 3aad9a6
Show file tree
Hide file tree
Showing 6 changed files with 482 additions and 158 deletions.
41 changes: 41 additions & 0 deletions cl/lib/search_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re
from datetime import date, datetime, timedelta
from math import ceil
from typing import Any, Dict, List, Optional, Tuple, Union, cast
from urllib.parse import parse_qs, urlencode

Expand Down Expand Up @@ -39,6 +40,7 @@
Court,
OpinionCluster,
RECAPDocument,
SearchQuery,
)

HYPERSCAN_TOKENIZER = HyperscanTokenizer(cache_dir=".hyperscan")
Expand Down Expand Up @@ -1199,3 +1201,42 @@ async def clean_up_recap_document_file(item: RECAPDocument) -> None:
item.page_count = None
item.is_available = False
await item.asave()


def store_search_query(request: HttpRequest, search_results: dict) -> None:
"""Saves an user's search query in a SearchQuery model
:param request: the request object
:param search_results: the dict returned by `do_search` or
`do_es_search` functions
:return None
"""
is_error = search_results.get("error")
is_es_search = search_results.get("results_details") is not None

search_query = SearchQuery(
user=None if request.user.is_anonymous else request.user,
get_params=request.GET.urlencode(),
failed=is_error,
query_time_ms=None,
hit_cache=False,
source=SearchQuery.WEBSITE,
engine=SearchQuery.ELASTICSEARCH if is_es_search else SearchQuery.SOLR,
)
if is_error:
# Leave `query_time_ms` as None if there is an error
search_query.save()
return

if is_es_search:
search_query.query_time_ms = ceil(search_results["results_details"][0])
# do_es_search returns 1 as query time if the micro cache was hit
search_query.hit_cache = search_query.query_time_ms == 1
else:
# Solr searches are not cached unless a cache_key is passed
# No cache_key is passed for the endpoints we are storing
search_query.query_time_ms = ceil(
search_results["results"].object_list.QTime
)

search_query.save()
95 changes: 95 additions & 0 deletions cl/search/migrations/0036_add_searchquery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# Generated by Django 5.1.1 on 2024-10-14 15:20

import django.db.models.deletion
from django.conf import settings
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("search", "0035_pghistory_v3_4_0_trigger_update"),
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
]

operations = [
migrations.CreateModel(
name="SearchQuery",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"source",
models.SmallIntegerField(
choices=[(1, "Website"), (2, "API request")],
help_text="The interface used to perform the query.",
),
),
(
"get_params",
models.TextField(
help_text="The GET parameters of the search query."
),
),
(
"query_time_ms",
models.IntegerField(
help_text="The milliseconds to execute the query, as returned in the ElasticSearch or Solr response.",
null=True,
),
),
(
"hit_cache",
models.BooleanField(
help_text="Whether the query hit the cache or not."
),
),
(
"failed",
models.BooleanField(
help_text="True if there was an error executing the query."
),
),
(
"engine",
models.SmallIntegerField(
choices=[(1, "Elasticsearch"), (2, "Solr")],
help_text="The engine that executed the search",
),
),
(
"date_created",
models.DateTimeField(
auto_now_add=True,
help_text="Datetime when the record was created.",
),
),
(
"user",
models.ForeignKey(
blank=True,
help_text="The user who performed this search query.",
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="search_queries",
to=settings.AUTH_USER_MODEL,
),
),
],
options={
"indexes": [
models.Index(
fields=["date_created"],
name="search_sear_date_cr_c5fff9_idx",
)
],
},
),
]
8 changes: 8 additions & 0 deletions cl/search/migrations/0036_add_searchquery.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
--
-- Create model SearchQuery
--
CREATE TABLE "search_searchquery" ("id" integer NOT NULL PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, "source" smallint NOT NULL, "get_params" text NOT NULL, "query_time_ms" integer NULL, "hit_cache" boolean NOT NULL, "failed" boolean NOT NULL, "engine" smallint NOT NULL, "date_created" timestamp with time zone NOT NULL, "user_id" integer NULL);
ALTER TABLE "search_searchquery" ADD CONSTRAINT "search_searchquery_user_id_8918791c_fk_auth_user_id" FOREIGN KEY ("user_id") REFERENCES "auth_user" ("id") DEFERRABLE INITIALLY DEFERRED;
CREATE INDEX "search_searchquery_user_id_8918791c" ON "search_searchquery" ("user_id");
CREATE INDEX "search_sear_date_cr_c5fff9_idx" ON "search_searchquery" ("date_created");
COMMIT;
54 changes: 54 additions & 0 deletions cl/search/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pytz
from asgiref.sync import sync_to_async
from celery.canvas import chain
from django.contrib.auth.models import User
from django.contrib.contenttypes.fields import GenericRelation
from django.contrib.postgres.indexes import HashIndex
from django.core.exceptions import ValidationError
Expand Down Expand Up @@ -44,6 +45,7 @@
from cl.lib.string_utils import trunc
from cl.lib.utils import deepgetattr
from cl.search.docket_sources import DocketSources
from cl.users.models import User

HYPERSCAN_TOKENIZER = HyperscanTokenizer(cache_dir=".hyperscan")

Expand Down Expand Up @@ -3921,3 +3923,55 @@ class SEARCH_TYPES:
(PARENTHETICAL, "Parenthetical"),
)
ALL_TYPES = [OPINION, RECAP, ORAL_ARGUMENT, PEOPLE]


class SearchQuery(models.Model):
WEBSITE = 1
API = 2
SOURCES = (
(WEBSITE, "Website"),
(API, "API request"),
)
ELASTICSEARCH = 1
SOLR = 2
ENGINES = (
(ELASTICSEARCH, "Elasticsearch"),
(SOLR, "Solr"),
)
user = models.ForeignKey(
User,
help_text="The user who performed this search query.",
related_name="search_queries",
on_delete=models.CASCADE,
null=True,
blank=True,
)
source = models.SmallIntegerField(
help_text="The interface used to perform the query.", choices=SOURCES
)
get_params = models.TextField(
help_text="The GET parameters of the search query."
)
query_time_ms = models.IntegerField(
help_text="The milliseconds to execute the query, as returned in "
"the ElasticSearch or Solr response.",
null=True,
)
hit_cache = models.BooleanField(
help_text="Whether the query hit the cache or not."
)
failed = models.BooleanField(
help_text="True if there was an error executing the query."
)
engine = models.SmallIntegerField(
help_text="The engine that executed the search", choices=ENGINES
)
date_created = models.DateTimeField(
help_text="Datetime when the record was created.",
auto_now_add=True,
)

class Meta:
indexes = [
models.Index(fields=["date_created"]),
]
Loading

0 comments on commit 3aad9a6

Please sign in to comment.