Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Separate record and text in full text search #2152

Open
wants to merge 4 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions application/Module.php
Original file line number Diff line number Diff line change
Expand Up @@ -710,7 +710,8 @@ public function searchFulltext(ZendEvent $event)
}
$qb = $event->getParam('queryBuilder');

$match = 'MATCH(omeka_fulltext_search.title, omeka_fulltext_search.text) AGAINST (:omeka_fulltext_search)';
$match = '(MATCH(omeka_fulltext_search.title, omeka_fulltext_search.record) AGAINST (:omeka_fulltext_search) > 0 OR MATCH(omeka_fulltext_search.text) AGAINST (:omeka_fulltext_search) > 0)';
$matchOrder = '(MATCH(omeka_fulltext_search.title, omeka_fulltext_search.record) AGAINST (:omeka_fulltext_search) OR MATCH(omeka_fulltext_search.text) AGAINST (:omeka_fulltext_search))';

if ('api.search.query' === $event->getName()) {

Expand All @@ -727,7 +728,7 @@ public function searchFulltext(ZendEvent $event)
$qb->innerJoin('Omeka\Entity\FulltextSearch', 'omeka_fulltext_search', 'WITH', $joinConditions);

// Filter out resources with no similarity.
$qb->andWhere(sprintf('%s > 0', $match));
$qb->andWhere($match);

// Set visibility constraints.
$acl = $this->getServiceLocator()->get('Omeka\Acl');
Expand All @@ -754,7 +755,7 @@ public function searchFulltext(ZendEvent $event)

if (isset($query['sort_by_default']) || !$qb->getDQLPart('orderBy')) {
$sortOrder = 'asc' === $query['sort_order'] ? 'ASC' : 'DESC';
$qb->orderBy($match, $sortOrder);
$qb->orderBy($matchOrder, $sortOrder);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,10 @@ public function __construct(?\Closure $initializer = null, ?\Closure $cloner = n
public function __sleep()
{
if ($this->__isInitialized__) {
return ['__isInitialized__', 'id', 'resource', 'owner', 'isPublic', 'title', 'text'];
return ['__isInitialized__', 'id', 'resource', 'owner', 'isPublic', 'title', 'record', 'text'];
}

return ['__isInitialized__', 'id', 'resource', 'owner', 'isPublic', 'title', 'text'];
return ['__isInitialized__', 'id', 'resource', 'owner', 'isPublic', 'title', 'record', 'text'];
}

/**
Expand Down Expand Up @@ -273,6 +273,28 @@ public function getTitle()
return parent::getTitle();
}

/**
* {@inheritDoc}
*/
public function setRecord($record)
{

$this->__initializer__ && $this->__initializer__->__invoke($this, 'setRecord', [$record]);

return parent::setRecord($record);
}

/**
* {@inheritDoc}
*/
public function getRecord()
{

$this->__initializer__ && $this->__initializer__->__invoke($this, 'getRecord', []);

return parent::getRecord();
}

/**
* {@inheritDoc}
*/
Expand Down
5 changes: 4 additions & 1 deletion application/data/install/schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,13 @@ CREATE TABLE `fulltext_search` (
`owner_id` int DEFAULT NULL,
`is_public` tinyint(1) NOT NULL,
`title` longtext COLLATE utf8mb4_unicode_ci,
`record` longtext COLLATE utf8mb4_unicode_ci,
`text` longtext COLLATE utf8mb4_unicode_ci,
PRIMARY KEY (`id`,`resource`),
KEY `IDX_AA31FE4A7E3C61F9` (`owner_id`),
FULLTEXT KEY `IDX_AA31FE4A2B36786B3B8BA7C7` (`title`,`text`),
KEY `is_public` (`is_public`),
FULLTEXT KEY `IDX_AA31FE4A2B36786B9B349F91` (`title`,`record`),
FULLTEXT KEY `IDX_AA31FE4A3B8BA7C7` (`text`),
CONSTRAINT `FK_AA31FE4A7E3C61F9` FOREIGN KEY (`owner_id`) REFERENCES `user` (`id`) ON DELETE SET NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
CREATE TABLE `item` (
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<?php declare(strict_types=1);

namespace Omeka\Db\Migrations;

use Doctrine\DBAL\Connection;
use Omeka\Db\Migration\MigrationInterface;

class AddIndexFullTextIsPublic implements MigrationInterface
{
public function up(Connection $conn)
{
$sql = <<<'SQL'
ALTER TABLE `fulltext_search` ADD INDEX `is_public` (`is_public`);
SQL;
try {
$conn->executeStatement($sql);
} catch (\Exception $e) {
// Index exists.
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
<?php declare(strict_types=1);

namespace Omeka\Db\Migrations;

use Doctrine\DBAL\Connection;
use Laminas\ServiceManager\ServiceLocatorInterface;
use Omeka\Db\Migration\ConstructedMigrationInterface;
use Omeka\Job\Dispatcher as JobDispatcher;

class SeparateRecordAndTextForFullText implements ConstructedMigrationInterface
{
/**
* @var \Omeka\Job\Dispatcher
*/
private $jobDispatcher;

public function __construct(JobDispatcher $jobDispatcher)
{
$this->jobDispatcher = $jobDispatcher;
}

public function up(Connection $conn)
{
$sql = <<<'SQL'
TRUNCATE TABLE `fulltext_search`;

ALTER TABLE `fulltext_search`
ADD `record` longtext COLLATE 'utf8mb4_unicode_ci' NULL AFTER `title`;

ALTER TABLE `fulltext_search`
DROP INDEX `IDX_AA31FE4A2B36786B3B8BA7C7`;

ALTER TABLE `fulltext_search`
ADD FULLTEXT `IDX_AA31FE4A2B36786B9B349F91` (`title`, `record`);

ALTER TABLE `fulltext_search`
ADD FULLTEXT `IDX_AA31FE4A3B8BA7C7` (`text`);

SQL;
$conn->executeStatement($sql);

$this->jobDispatcher->dispatch(\DerivativeMedia\Job\DerivativeItem::class);
}

public static function create(ServiceLocatorInterface $services)
{
return new self($services->get(\Omeka\Job\Dispatcher::class));
}
}
17 changes: 16 additions & 1 deletion application/src/Api/Adapter/AbstractResourceEntityAdapter.php
Original file line number Diff line number Diff line change
Expand Up @@ -715,15 +715,29 @@ public function getFulltextTitle($resource)
return $resource->getTitle();
}

public function getFulltextRecord($resource)
{
return $this->getFulltext($resource, 'record');
}

public function getFulltextText($resource)
{
return $this->getFulltext($resource, 'text');
}

protected function getFulltext($resource, string $type)
{
$services = $this->getServiceLocator();
$dataTypes = $services->get('Omeka\DataTypeManager');
$view = $services->get('ViewRenderer');
$eventManager = $this->getEventManager();

$criteria = Criteria::create()->where(Criteria::expr()->eq('isPublic', true));
$args = $eventManager->prepareArgs(['resource' => $resource, 'criteria' => $criteria]);
$args = $eventManager->prepareArgs([
'resource' => $resource,
'type' => $type,
'criteria' => $criteria,
]);
$event = new Event('api.get_fulltext_text.value_criteria', $this, $args);
$eventManager->triggerEvent($event);
$criteria = $args['criteria'];
Expand All @@ -738,6 +752,7 @@ public function getFulltextText($resource)
$valueAnnotationCriteria = Criteria::create()->where(Criteria::expr()->eq('isPublic', true));
$args = $eventManager->prepareArgs([
'resource' => $resource,
'type' => $type,
'value' => $value,
'criteria' => $valueAnnotationCriteria,
]);
Expand Down
10 changes: 9 additions & 1 deletion application/src/Api/Adapter/FulltextSearchableInterface.php
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,15 @@ public function getFulltextIsPublic($resource);
public function getFulltextTitle($resource);

/**
* Get the the text of the passed resource.
* Get the record of the passed resource.
*
* @param mixed $resource
* @return string
*/
public function getFulltextRecord($resource);

/**
* Get the the raw text (transcription, ocr, etc.) of the passed resource.
*
* @param mixed $resource
* @return string
Expand Down
16 changes: 16 additions & 0 deletions application/src/Api/Adapter/ItemAdapter.php
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,22 @@ public function preprocessBatchUpdate(array $data, Request $request)
return $data;
}

public function getFulltextRecord($resource)
{
$texts = [];
$texts[] = parent::getFulltextRecord($resource);
// Get media text.
$mediaAdapter = $this->getAdapter('media');
foreach ($resource->getMedia() as $media) {
$texts[] = $mediaAdapter->getFulltextRecord($media);
}
// Remove empty texts.
$texts = array_filter($texts, function ($text) {
return !is_null($text) && $text !== '';
});
return implode("\n", $texts);
}

public function getFulltextText($resource)
{
$texts = [];
Expand Down
12 changes: 12 additions & 0 deletions application/src/Api/Adapter/MediaAdapter.php
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,18 @@ public function preprocessBatchUpdate(array $data, Request $request)
return $data;
}

public function getFulltextRecord($resource)
{
$renderer = $this->getServiceLocator()
->get('Omeka\Media\Renderer\Manager')
->get($resource->getRenderer());
$fulltextRecord = parent::getFulltextRecord($resource);
if ($renderer instanceof FulltextSearchableInterface) {
$fulltextRecord .= ' ' . $renderer->getFulltextRecord($this->getRepresentation($resource));
}
return $fulltextRecord;
}

public function getFulltextText($resource)
{
$renderer = $this->getServiceLocator()
Expand Down
5 changes: 5 additions & 0 deletions application/src/Api/Adapter/SitePageAdapter.php
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,11 @@ public function getFulltextTitle($resource)
return $resource->getTitle();
}

public function getFulltextRecord($resource)
{
return '';
}

public function getFulltextText($resource)
{
$services = $this->getServiceLocator();
Expand Down
19 changes: 18 additions & 1 deletion application/src/Entity/FulltextSearch.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
* @Entity
* @Table(
* indexes={
* @Index(columns={"title", "text"}, flags={"fulltext"})
* @Index(name="is_public", columns={"is_public"}),
* @Index(columns={"title", "record"}, flags={"fulltext"}),
* @Index(columns={"text"}, flags={"fulltext"})
* }
* )
*/
Expand Down Expand Up @@ -39,6 +41,11 @@ class FulltextSearch
*/
protected $title;

/**
* @Column(type="text", nullable=true)
*/
protected $record;

/**
* @Column(type="text", nullable=true)
*/
Expand Down Expand Up @@ -94,6 +101,16 @@ public function getTitle()
return $this->title;
}

public function setRecord($record)
{
$this->record = $record;
}

public function getRecord()
{
return $this->record;
}

public function setText($text)
{
$this->text = $text;
Expand Down
2 changes: 1 addition & 1 deletion application/src/Job/IndexFulltextSearch.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public function perform()

// First delete all rows from the fulltext table to clear out the
// resources that don't belong.
$conn->executeStatement('DELETE FROM `fulltext_search`');
$conn->executeStatement('TRUNCATE TABLE `fulltext_search`');

// Then iterate through all resource types and index the ones that are
// fulltext searchable. Note that we don't index "resource" and "value
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ interface FulltextSearchableInterface
/**
* Get the the text of the passed media.
*
* @param Media $media
* @param MediaRepresentation $media
* @return string
*/
public function getFulltextText(MediaRepresentation $media);
Expand Down
7 changes: 4 additions & 3 deletions application/src/Stdlib/FulltextSearch.php
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,19 @@ public function save(ResourceInterface $resource, AdapterInterface $adapter)
$ownerId = $owner ? $owner->getId() : null;

$sql = 'INSERT INTO `fulltext_search` (
`id`, `resource`, `owner_id`, `is_public`, `title`, `text`
`id`, `resource`, `owner_id`, `is_public`, `title`, `record`, `text`
) VALUES (
:id, :resource, :owner_id, :is_public, :title, :text
:id, :resource, :owner_id, :is_public, :title, :record, :text
) ON DUPLICATE KEY UPDATE
`owner_id` = :owner_id, `is_public` = :is_public, `title` = :title, `text` = :text';
`owner_id` = :owner_id, `is_public` = :is_public, `title` = :title, `record` = :record, `text` = :text';
$stmt = $this->conn->prepare($sql);

$stmt->bindValue('id', $resourceId, PDO::PARAM_INT);
$stmt->bindValue('resource', $resourceName, PDO::PARAM_STR);
$stmt->bindValue('owner_id', $ownerId, PDO::PARAM_INT);
$stmt->bindValue('is_public', $adapter->getFulltextIsPublic($resource), PDO::PARAM_BOOL);
$stmt->bindValue('title', $adapter->getFulltextTitle($resource), PDO::PARAM_STR);
$stmt->bindValue('record', $adapter->getFulltextRecord($resource), PDO::PARAM_STR);
$stmt->bindValue('text', $adapter->getFulltextText($resource), PDO::PARAM_STR);
$stmt->executeStatement();
}
Expand Down
Loading