Skip to content

Commit

Permalink
Merge pull request #394 from Flowpack/fix-node-move-issues
Browse files Browse the repository at this point in the history
BUGFIX: Moving nodes must not lead to duplicated documents
  • Loading branch information
daniellienert authored May 4, 2022
2 parents 294d42c + 579d4be commit 307b252
Show file tree
Hide file tree
Showing 9 changed files with 165 additions and 97 deletions.
45 changes: 0 additions & 45 deletions Classes/Domain/Model/TargetContextPath.php

This file was deleted.

7 changes: 7 additions & 0 deletions Classes/Driver/AbstractIndexerDriver.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
* source code.
*/

use Flowpack\ElasticSearch\ContentRepositoryAdaptor\Service\DocumentIdentifier\DocumentIdentifierGeneratorInterface;
use Neos\ContentRepository\Domain\Model\NodeInterface;
use Neos\Flow\Annotations as Flow;
use Neos\Flow\Log\Utility\LogEnvironment;
Expand All @@ -28,6 +29,12 @@ abstract class AbstractIndexerDriver extends AbstractDriver
*/
protected $nodeTypeMappingBuilder;

/**
* @Flow\Inject
* @var DocumentIdentifierGeneratorInterface
*/
protected $documentIdentifierGenerator;

/**
* Whether the node is configured as fulltext root.
*
Expand Down
15 changes: 7 additions & 8 deletions Classes/Driver/Version6/IndexerDriver.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
* source code.
*/

use Flowpack\ElasticSearch\ContentRepositoryAdaptor\Domain\Model\TargetContextPath;
use Neos\Flow\Annotations as Flow;
use Flowpack\ElasticSearch\ContentRepositoryAdaptor\Indexer\NodeIndexer;
use Flowpack\ElasticSearch\ContentRepositoryAdaptor\Driver\AbstractIndexerDriver;
use Flowpack\ElasticSearch\ContentRepositoryAdaptor\Driver\IndexerDriverInterface;
use Flowpack\ElasticSearch\Domain\Model\Document as ElasticSearchDocument;
use Neos\ContentRepository\Domain\Model\NodeInterface;
use Neos\Flow\Annotations as Flow;
use Neos\Flow\Log\Utility\LogEnvironment;

/**
Expand Down Expand Up @@ -81,7 +81,10 @@ public function document(string $indexName, NodeInterface $node, ElasticSearchDo

/**
* {@inheritdoc}
* @throws \Neos\Flow\Persistence\Exception\IllegalObjectTypeException
* @param NodeInterface $node
* @param array $fulltextIndexOfNode
* @param string|null $targetWorkspaceName
* @return array
*/
public function fulltext(NodeInterface $node, array $fulltextIndexOfNode, string $targetWorkspaceName = null): array
{
Expand All @@ -90,11 +93,7 @@ public function fulltext(NodeInterface $node, array $fulltextIndexOfNode, string
return [];
}

$closestFulltextNodeContextPath = $closestFulltextNode->getContextPath();
if ($targetWorkspaceName !== null) {
$closestFulltextNodeContextPath = (string)(new TargetContextPath($node, $targetWorkspaceName, $closestFulltextNodeContextPath));
}
$closestFulltextNodeDocumentIdentifier = sha1($closestFulltextNodeContextPath);
$closestFulltextNodeDocumentIdentifier = $this->documentIdentifierGenerator->generate($closestFulltextNode, $targetWorkspaceName);

if ($closestFulltextNode->isRemoved()) {
// fulltext root is removed, abort silently...
Expand Down
67 changes: 24 additions & 43 deletions Classes/Indexer/NodeIndexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
use Flowpack\ElasticSearch\ContentRepositoryAdaptor\ErrorHandling\ErrorStorageInterface;
use Flowpack\ElasticSearch\ContentRepositoryAdaptor\Exception;
use Flowpack\ElasticSearch\ContentRepositoryAdaptor\Service\DimensionsService;
use Flowpack\ElasticSearch\ContentRepositoryAdaptor\Service\DocumentIdentifier\DocumentIdentifierGeneratorInterface;
use Flowpack\ElasticSearch\ContentRepositoryAdaptor\Service\IndexNameService;
use Flowpack\ElasticSearch\ContentRepositoryAdaptor\Service\NodeTypeIndexingConfiguration;
use Flowpack\ElasticSearch\Domain\Model\Document as ElasticSearchDocument;
Expand Down Expand Up @@ -56,13 +57,6 @@ class NodeIndexer extends AbstractNodeIndexer implements BulkNodeIndexerInterfac
*/
protected $nodeTypeMappingBuilder;

/**
* Optional postfix for the index, e.g. to have different indexes by timestamp.
*
* @var string
*/
protected $indexNamePostfix = '';

/**
* @Flow\Inject
* @var ErrorHandlingService
Expand Down Expand Up @@ -129,18 +123,6 @@ class NodeIndexer extends AbstractNodeIndexer implements BulkNodeIndexerInterfac
*/
protected $indexConfiguration;

/**
* The current Elasticsearch bulk request, in the format required by http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/docs-bulk.html
*
* @var array
*/
protected $currentBulkRequest = [];

/**
* @var boolean
*/
protected $bulkProcessing = false;

/**
* @Flow\Inject
* @var DimensionsService
Expand All @@ -153,12 +135,30 @@ class NodeIndexer extends AbstractNodeIndexer implements BulkNodeIndexerInterfac
*/
protected $nodeTypeIndexingConfiguration;

/**
* @Flow\Inject
* @var DocumentIdentifierGeneratorInterface
*/
protected $documentIdentifierGenerator;

/**
* @Flow\Inject
* @var ErrorStorageInterface
*/
protected $errorStorage;

/**
* The current Elasticsearch bulk request, in the format required by http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/docs-bulk.html
*/
protected array $currentBulkRequest = [];

/**
* Optional postfix for the index, e.g. to have different indexes by timestamp.
*/
protected string $indexNamePostfix = '';

protected bool $bulkProcessing = false;

public function setDimensions(array $dimensionsValues): void
{
$this->searchClient->setDimensions($dimensionsValues);
Expand Down Expand Up @@ -243,7 +243,7 @@ public function indexNode(NodeInterface $node, $targetWorkspaceName = null): voi
}
}

$documentIdentifier = $this->calculateDocumentIdentifier($node, $targetWorkspaceName);
$documentIdentifier = $this->documentIdentifierGenerator->generate($node, $targetWorkspaceName);
$nodeType = $node->getNodeType();

$mappingType = $this->getIndex()->findType($nodeType->getName());
Expand Down Expand Up @@ -335,34 +335,14 @@ protected function toBulkRequest(NodeInterface $node, array $requests = null): v
$this->flushIfNeeded();
}

/**
* Returns a stable identifier for the Elasticsearch document representing the node
*
* @param NodeInterface $node
* @param string $targetWorkspaceName
* @return string
* @throws IllegalObjectTypeException
*/
protected function calculateDocumentIdentifier(NodeInterface $node, $targetWorkspaceName = null): string
{
$contextPath = $node->getContextPath();

if ($targetWorkspaceName !== null) {
$contextPath = (string)(new TargetContextPath($node, $targetWorkspaceName, $contextPath));
}

return sha1($contextPath);
}

/**
* Schedule node removal into the current bulk request.
*
* @param NodeInterface $node
* @param string $targetWorkspaceName
* @param string|null $targetWorkspaceName
* @return void
* @throws Exception
* @throws FilesException
* @throws IllegalObjectTypeException
* @throws \Flowpack\ElasticSearch\Exception
*/
public function removeNode(NodeInterface $node, string $targetWorkspaceName = null): void
Expand All @@ -380,7 +360,7 @@ public function removeNode(NodeInterface $node, string $targetWorkspaceName = nu
}
}

$documentIdentifier = $this->calculateDocumentIdentifier($node, $targetWorkspaceName);
$documentIdentifier = $this->documentIdentifierGenerator->generate($node, $targetWorkspaceName);

$this->toBulkRequest($node, $this->documentDriver->delete($node, $documentIdentifier));
$this->toBulkRequest($node, $this->indexerDriver->fulltext($node, [], $targetWorkspaceName));
Expand All @@ -390,8 +370,8 @@ public function removeNode(NodeInterface $node, string $targetWorkspaceName = nu

/**
* @throws Exception
* @throws Exception\ConfigurationException
* @throws \Flowpack\ElasticSearch\Exception
* @throws FilesException
*/
protected function flushIfNeeded(): void
{
Expand Down Expand Up @@ -550,6 +530,7 @@ public function updateIndexAlias(): void
* Update the main alias to allow to query all indices at once
* @throws Exception
* @throws Exception\ConfigurationException
* @throws ApiException
*/
public function updateMainAlias(): void
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<?php
declare(strict_types=1);

namespace Flowpack\ElasticSearch\ContentRepositoryAdaptor\Service\DocumentIdentifier;

/*
* This file is part of the Flowpack.ElasticSearch.ContentRepositoryAdaptor package.
*
* (c) Contributors of the Neos Project - www.neos.io
*
* This package is Open Source Software. For the full copyright and license
* information, please view the LICENSE file which was distributed with this
* source code.
*/

use Neos\ContentRepository\Domain\Model\NodeInterface;

interface DocumentIdentifierGeneratorInterface
{
/**
* Generates a stable identifier out of the given node
*
* @param NodeInterface $node
* @param string|null $targetWorkspaceName
* @return string
*/
public function generate(NodeInterface $node, ?string $targetWorkspaceName = null): string;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<?php
declare(strict_types=1);

namespace Flowpack\ElasticSearch\ContentRepositoryAdaptor\Service\DocumentIdentifier;

/*
* This file is part of the Flowpack.ElasticSearch.ContentRepositoryAdaptor package.
*
* (c) Contributors of the Neos Project - www.neos.io
*
* This package is Open Source Software. For the full copyright and license
* information, please view the LICENSE file which was distributed with this
* source code.
*/

use Neos\ContentRepository\Domain\Model\NodeInterface;

class NodeIdentifierBasedDocumentIdentifierGenerator implements DocumentIdentifierGeneratorInterface
{
public function generate(NodeInterface $node, ?string $targetWorkspaceName = null): string
{
$workspaceName = $targetWorkspaceName ?: $node->getWorkspace()->getName();
$nodeIdentifier = $node->getIdentifier();

return sha1($nodeIdentifier . $workspaceName);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<?php
declare(strict_types=1);

namespace Flowpack\ElasticSearch\ContentRepositoryAdaptor\Service\DocumentIdentifier;

/*
* This file is part of the Flowpack.ElasticSearch.ContentRepositoryAdaptor package.
*
* (c) Contributors of the Neos Project - www.neos.io
*
* This package is Open Source Software. For the full copyright and license
* information, please view the LICENSE file which was distributed with this
* source code.
*/

use Neos\ContentRepository\Domain\Model\NodeInterface;

class NodePathBasedDocumentIdentifierGenerator implements DocumentIdentifierGeneratorInterface
{
public function generate(NodeInterface $node, ?string $targetWorkspaceName = null): string
{
$contextPath = $node->getContextPath();

if ($targetWorkspaceName !== null) {
$contextPath = str_replace($node->getContext()->getWorkspace()->getName(), $targetWorkspaceName, $contextPath);
}

return sha1($contextPath);
}
}
3 changes: 3 additions & 0 deletions Configuration/Objects.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ Neos\ContentRepository\Search\Indexer\NodeIndexerInterface:
Neos\ContentRepository\Search\AssetExtraction\AssetExtractorInterface:
className: 'Flowpack\ElasticSearch\ContentRepositoryAdaptor\AssetExtraction\IngestAttachmentAssetExtractor'

'Flowpack\ElasticSearch\ContentRepositoryAdaptor\Service\DocumentIdentifier\DocumentIdentifierGeneratorInterface':
className: 'Flowpack\ElasticSearch\ContentRepositoryAdaptor\Service\DocumentIdentifier\NodePathBasedDocumentIdentifierGenerator'

Flowpack\ElasticSearch\ContentRepositoryAdaptor\Driver\QueryInterface:
scope: prototype
factoryObjectName: 'Flowpack\ElasticSearch\ContentRepositoryAdaptor\Factory\QueryFactory'
Expand Down
40 changes: 39 additions & 1 deletion Tests/Functional/Indexer/NodeIndexerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,45 @@ public function nodeMoveIsHandledCorrectly(): void
$testNode = $this->setupCrAndIndexTestNode();
self::assertTrue($this->nodeExistsInIndex($testNode), 'Node was not successfully indexed.');

// $testNode->moveInto();
$testNode2 = $this->siteNode->getNode('test-node-2');

// move this node (test-node-1) into test-node-2
$testNode->setProperty('title', 'changed');
$testNode->moveInto($testNode2);

// re-index
$this->nodeIndexer->indexNode($testNode);
$this->nodeIndexer->flush();
sleep(1);

// check if we do have more than one single occurrence (nodeExistsInIndex will check that indirectly)
self::assertTrue($this->nodeExistsInIndex($testNode), 'Node was not successfully indexed.');

// check the node path in es after indexing
$pathInEs = $this->getNeosPathOfNodeInIndex($testNode);
self::assertNotNull($pathInEs, 'Node does not exist after indexing');
self::assertEquals($pathInEs, $testNode->getPath(), 'Wrong node path in elasticsearch after indexing');
}

/**
* Fetch the node path (stored in elasticsearch) of the given node
*/
private function getNeosPathOfNodeInIndex(NodeInterface $node): ?string
{
$this->searchClient->setContextNode($this->siteNode);
/** @var FilteredQuery $query */
$query = $this->objectManager->get(QueryInterface::class);
$query->queryFilter('term', ['neos_node_identifier' => $node->getIdentifier()]);

$result = $this->nodeIndexer->getIndex()->request('GET', '/_search', [], $query->toArray())->getTreatedContent();

$firstHit = current(Arrays::getValueByPath($result, 'hits.hits'));

if ($firstHit === false) {
return null;
}

return Arrays::getValueByPath($firstHit, '_source.neos_path');
}

/**
Expand Down

0 comments on commit 307b252

Please sign in to comment.