Skip to content

Commit

Permalink
SmartSearch tuning
Browse files Browse the repository at this point in the history
* Introduced per-weighted-facet default weight
* Introduced a cap on matched resources ($matchesLimit parameter of the
  search() method) to avoid hopelessly long queries
* Restore skipping resources with any facet weight of 0
  • Loading branch information
zozlak committed Mar 15, 2024
1 parent a7602ef commit 8996ac6
Showing 1 changed file with 99 additions and 89 deletions.
188 changes: 99 additions & 89 deletions src/acdhOeaw/arche/lib/SmartSearch.php
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ class SmartSearch {
* @var array<string, object>
*/
private array $rangeFacets = [];
private float $facetsDefaultWeight = 1.0;
private float $exactWeight = 10.0;
private float $langWeight = 10.0;
private string $namedEntitiesProperty = RDF::RDF_TYPE;
Expand Down Expand Up @@ -112,8 +111,9 @@ public function setPropertyWeights(array $weights,
*/
public function setWeightedFacets(array $facets, float $defaultWeight = 1.0): self {
foreach ($facets as $i) {
$i->weights ??= null;
$i->type ??= '';
$i->weights ??= null;
$i->type ??= '';
$i->defaultWeight ??= $defaultWeight;
if (is_object($i->weights)) {
$i->weights = (array) $i->weights;
}
Expand All @@ -122,8 +122,7 @@ public function setWeightedFacets(array $facets, float $defaultWeight = 1.0): se
}
}
unset($i);
$this->facets = $facets;
$this->facetsDefaultWeight = $defaultWeight;
$this->facets = $facets;
return $this;
}

Expand Down Expand Up @@ -200,7 +199,7 @@ public function search(string $phrase, string $language = '',
bool $inBinary = true, array $allowedProperties = [],
array $searchTerms = [],
?SearchTerm $spatialTerm = null,
array $parentIds = []): void {
array $parentIds = [], int $matchesLimit = 10000): void {
if ($this->pdo->inTransaction()) {
$this->pdo->rollBack();
}
Expand All @@ -215,9 +214,9 @@ public function search(string $phrase, string $language = '',

// FILTERS
// filters are applied both
$filterExp = '';
$filterExp = '';
if ($filteredSearch) {
$filterExp = "WHERE EXISTS (SELECT 1 FROM filters WHERE id = s.id)";
$filterExp = "WHERE EXISTS (SELECT 1 FROM filters WHERE id = s.id)";
$filterQuery = "CREATE TEMPORARY TABLE filters AS (\nSELECT id FROM\n";
$filterParam = [];
$n = 0;
Expand All @@ -241,7 +240,7 @@ public function search(string $phrase, string $language = '',
}
$filterQuery .= ")\n";
$this->queryLog?->debug((string) (new QueryPart($filterQuery, $filterParam)));
$t = microtime(true);
$t = microtime(true);
$filterQuery = $this->pdo->prepare($filterQuery);
$filterQuery->execute($filterParam);
$this->queryLog?->debug('Execution time ' . microtime(true) - $t);
Expand Down Expand Up @@ -275,7 +274,6 @@ public function search(string $phrase, string $language = '',
$searchParam = array_merge($searchParam, $tmpQuery->param);
}
$matchQuery = substr($matchQuery, 0, -2) . "\n"; // get rid of final coma

// INITIAL SEARCH
if (!$indexSearch && $filteredSearch) {
$searchQuery .= "
Expand Down Expand Up @@ -317,8 +315,8 @@ public function search(string $phrase, string $language = '',
FROM $tmpQuery->query $inBinaryF $propsFilter
)
";
$searchParam = array_merge($searchParam, $tmpQuery->param, $propsParam);
$curTab = 'search1';
$searchParam = array_merge($searchParam, $tmpQuery->param, $propsParam);
$curTab = 'search1';
} else {
$propsFilter = '';
$propsParam = [];
Expand Down Expand Up @@ -360,14 +358,14 @@ public function search(string $phrase, string $language = '',
$phrase, $this->exactWeight, $langParam, $this->langWeight,
SearchTerm::escapeFts($phrase)
],
$propsParam
$propsParam
);
$curTab = 'search1';
$curTab = 'search1';

// SPATIAL SEARCH
if ($spatialTerm !== null) {
$tmpQuery = $spatialTerm->getSqlQuery($baseUrl, $idProp, []);
$inBinaryF = $inBinary ? '' : 'AND ss.id IS NULL';
$tmpQuery = $spatialTerm->getSqlQuery($baseUrl, $idProp, []);
$inBinaryF = $inBinary ? '' : 'AND ss.id IS NULL';
$searchQuery .= ",
search1s AS (
SELECT *
Expand All @@ -377,13 +375,14 @@ public function search(string $phrase, string $language = '',
)
";
$searchParam = array_merge($searchParam, $tmpQuery->param);
$curTab = 'search1s';
$curTab = 'search1s';
}
}

if (!$linkNamedEntities) {
$searchQuery .= "SELECT * FROM $curTab s $filterExp\n";
$matchQuery .= "
$searchQuery .= "SELECT * FROM $curTab s $filterExp ORDER BY weight_m DESC LIMIT ?\n";
$searchParam[] = $matchesLimit;
$matchQuery .= "
SELECT
s.id, s.ftsid, s.property,
null::text AS link_property, null::text AS facet, null::text AS value,
Expand All @@ -392,40 +391,44 @@ public function search(string $phrase, string $language = '',
search9 s
LEFT JOIN weights_p w ON s.property = w.value
";
$matchParam[] = $this->propDefaultWeight;
$matchParam[] = $this->propDefaultWeight;
} else {
// LINK TO NAMED ENTITIES
$neIn = substr(str_repeat('?, ', count($this->namedEntitiesValues)), 0, -2);
$searchQuery .= "
SELECT
id,
ftsid,
property,
NULL::text AS link_property,
weight_m * coalesce(weight_p, ?) AS weight
FROM
$curTab s
LEFT JOIN weights_p w ON s.property = w.value
$filterExp
UNION
SELECT
s.id,
t.ftsid,
t.property,
s.property AS link_property,
t.weight_m * coalesce(t.weight_p, ?) * coalesce(wne.weight_ne, ?) AS weight
FROM
(
SELECT DISTINCT ON (id) *
FROM
$curTab s
LEFT JOIN weights_p w ON s.property = w.value
ORDER BY id, coalesce(weight_p, ?) * weight_m DESC
) t
JOIN metadata mne ON t.id = mne.id AND mne.property = ? AND mne.value IN ($neIn)
JOIN relations s ON t.id = s.target_id
LEFT JOIN weights_ne wne ON s.property = wne.value
$filterExp
SELECT * FROM (
SELECT
id,
ftsid,
property,
NULL::text AS link_property,
weight_m * coalesce(weight_p, ?) AS weight
FROM
$curTab s
LEFT JOIN weights_p w ON s.property = w.value
$filterExp
UNION
SELECT
s.id,
t.ftsid,
t.property,
s.property AS link_property,
t.weight_m * coalesce(t.weight_p, ?) * coalesce(wne.weight_ne, ?) AS weight
FROM
(
SELECT DISTINCT ON (id) *
FROM
$curTab s
LEFT JOIN weights_p w ON s.property = w.value
ORDER BY id, coalesce(weight_p, ?) * weight_m DESC
) t
JOIN metadata mne ON t.id = mne.id AND mne.property = ? AND mne.value IN ($neIn)
JOIN relations s ON t.id = s.target_id
LEFT JOIN weights_ne wne ON s.property = wne.value
$filterExp
) t
ORDER BY weight DESC
LIMIT ?
";
$searchParam = array_merge(
$searchParam,
Expand All @@ -436,11 +439,12 @@ public function search(string $phrase, string $language = '',
$this->propDefaultWeight, // subselect of second union part
$this->namedEntitiesProperty // join with mne
],
$this->namedEntitiesValues // join with mne
$this->namedEntitiesValues, // join with mne
[$matchesLimit],
);
$matchQuery .= "SELECT id, ftsid, property, link_property, null::text AS facet, null::text AS value, weight FROM search9\n";
$matchQuery .= "SELECT id, ftsid, property, link_property, null::text AS facet, null::text AS value, weight FROM search9\n";
}
$this->queryLog?->debug((string)(new QueryPart($searchQuery, $searchParam)));
$this->queryLog?->debug((string) (new QueryPart($searchQuery, $searchParam)));
$t = microtime(true);
$searchQuery = $this->pdo->prepare($searchQuery);
$searchQuery->execute($searchParam);
Expand All @@ -449,20 +453,20 @@ public function search(string $phrase, string $language = '',

// ORDINARY FACETS DATA
foreach ($this->facets as $mn => $facet) {
$srcTab = 'metadata';
$valCol = 'value';
$srcTab = 'metadata';
$valCol = 'value';
if ($facet->type === 'object') {
$srcTab = 'relations';
$valCol = 'target_id';
$srcTab = 'relations';
$valCol = 'target_id';
}
$weightQuery = '';
$weightValue = 'null::float';
if (is_array($facet->weights)){
$weightQuery = "LEFT JOIN weights_$mn w ON m.$valCol = w.value";
$weightValue = "coalesce(w.weight_$mn, ?)";
$matchParam[] = $this->facetsDefaultWeight;
if (is_array($facet->weights)) {
$weightQuery = "LEFT JOIN weights_$mn w ON m.$valCol = w.value";
$weightValue = "coalesce(w.weight_$mn, ?)";
$matchParam[] = $facet->defaultWeight;
}
$matchQuery .= "UNION
$matchQuery .= "UNION
SELECT
s.id,
null::bigint as fstid,
Expand All @@ -481,8 +485,8 @@ public function search(string $phrase, string $language = '',
// RANGE FACETS DATA
$rangeFilterExp = 'AND' . substr($filterExp, 5);
foreach ($this->rangeFacets as $facetKey => $facet) {
$minPlch = substr(str_repeat(', ?', count($facet->start)), 2);
$maxPlch = substr(str_repeat(', ?', count($facet->end)), 2);
$minPlch = substr(str_repeat(', ?', count($facet->start)), 2);
$maxPlch = substr(str_repeat(', ?', count($facet->end)), 2);
$matchQuery .= "UNION
SELECT
s.id,
Expand Down Expand Up @@ -511,15 +515,21 @@ public function search(string $phrase, string $language = '',
GROUP BY 1
) t2 USING (id)
";
$matchParam = array_merge($matchParam, [$facetKey], $facet->start, $facet->end);
$matchParam = array_merge($matchParam, [$facetKey], $facet->start, $facet->end);
}

$this->queryLog?->debug((string) (new QueryPart($matchQuery, $matchParam)));
$this->pdo->beginTransaction();
$t = microtime(true);
$t = microtime(true);
$matchQuery = $this->pdo->prepare($matchQuery);
$matchQuery->execute($matchParam);
$this->queryLog?->debug('Execution time ' . (microtime(true) - $t));

$query = "DELETE FROM " . self::TEMPTABNAME . " WHERE id IN (SELECT id FROM " . self::TEMPTABNAME . " WHERE weight = 0)";
$this->queryLog?->debug($query);
$t = microtime(true);
$this->pdo->query($query);
$this->queryLog?->debug('Execution time ' . microtime(true) - $t);
}

/**
Expand Down Expand Up @@ -573,8 +583,8 @@ public function getSearchPage(int $page, int $pageSize,
$param[] = $offset;
$param[] = $pageSize;
$this->queryLog?->debug(new QueryPart($query, $param));
$t = microtime(true);
$query = $this->pdo->prepare($query);
$t = microtime(true);
$query = $this->pdo->prepare($query);
$query->execute($param);
$this->queryLog?->debug('Execution time ' . (microtime(true) - $t));

Expand Down Expand Up @@ -620,20 +630,20 @@ public function getSearchPage(int $page, int $pageSize,
$this->schema->searchMatch, RDF::XSD_ANY_URI,
$this->schema->searchWeight, RDF::XSD_FLOAT,
];
$t = microtime(true);
$t = microtime(true);
$this->queryLog?->debug(new QueryPart($query, $param));
$query = $this->pdo->prepare($query);
$query = $this->pdo->prepare($query);
$query->execute($param);
$this->queryLog?->debug('Execution time ' . (microtime(true) - $t));
while ($row = $query->fetchObject()) {
while ($row = $query->fetchObject()) {
yield $row;
}

// metadata of matched resources
$query = "SELECT id FROM _page";
$t = microtime(true);
$t = microtime(true);
$query = $this->repo->getPdoStatementBySqlQuery($query, [], $config);
$this->queryLog?->debug('Execution time ' . (microtime(true) - $t));
$this->queryLog->debug('Execution time ' . (microtime(true) - $t));
while ($row = $query->fetchObject()) {
yield $row;
}
Expand All @@ -646,7 +656,7 @@ public function getSearchPage(int $page, int $pageSize,
*/
public function getSearchFacets(string $prefLang = ''): array {
$stats = [];
$t = microtime(true);
$t = microtime(true);

// MATCH PROPERTY
$query = $this->pdo->query("
Expand All @@ -662,14 +672,14 @@ public function getSearchFacets(string $prefLang = ''): array {
$values = $query->fetchAll(PDO::FETCH_OBJ);
if (count($values) > 0) {
$stats['property'] = [
'continues' => false,
'values' => $values,
'continuous' => false,
'values' => $values,
];
}

// LINK PROPERTY
if ($this->linkNamedEntities()) {
$query = $this->pdo->query("
$query = $this->pdo->query("
SELECT
link_property AS value,
link_property AS label,
Expand All @@ -682,8 +692,8 @@ public function getSearchFacets(string $prefLang = ''): array {
$values = $query->fetchAll(PDO::FETCH_OBJ);
if (count($values) > 0) {
$stats['linkProperty'] = [
'continues' => false,
'values' => $values,
'continuous' => false,
'values' => $values,
];
}
}
Expand All @@ -694,12 +704,12 @@ public function getSearchFacets(string $prefLang = ''): array {
foreach ($this->facets as $facet) {
if ($facet->type === 'object') {
$objectFacets[] = $facet->property;
}else{
} else {
$valueFacets[] = $facet->property;
}
$stats[$facet->property] = [
'values' => [],
'continues' => false,
'values' => [],
'continuous' => false,
];
}
// object facets
Expand Down Expand Up @@ -731,7 +741,7 @@ public function getSearchFacets(string $prefLang = ''): array {
[$this->schema->label, $prefLang]
);
$query->execute($param);
while($row = $query->fetchObject()) {
while ($row = $query->fetchObject()) {
$facet = $row->facet;
unset($row->facet);
$stats[$facet]['values'][] = $row;
Expand All @@ -751,9 +761,9 @@ public function getSearchFacets(string $prefLang = ''): array {
ORDER BY 1, 4 DESC
");
$query->execute($valueFacets);
while($row = $query->fetchObject()) {
$row->value = is_numeric($row->value) ? (float) $row->value : $row->value;
$facet = $row->facet;
while ($row = $query->fetchObject()) {
$row->value = is_numeric($row->value) ? (float) $row->value : $row->value;
$facet = $row->facet;
unset($row->facet);
$stats[$facet]['values'][] = $row;
}
Expand Down Expand Up @@ -826,10 +836,10 @@ public function getSearchFacets(string $prefLang = ''): array {
$query->execute($param);
$values = $query->fetchAll(PDO::FETCH_OBJ);
$stats[$fid] = [
'continues' => true,
'values' => $values,
'min' => (float) reset($values)?->lower,
'max' => (float) end($values)?->upper,
'continuous' => true,
'values' => $values,
'min' => (float) reset($values)?->lower,
'max' => (float) end($values)?->upper,
];
}

Expand Down

0 comments on commit 8996ac6

Please sign in to comment.