Skip to content

Commit

Permalink
Merge pull request #434 from jackalope/1-to-2
Browse files Browse the repository at this point in the history
1 to 2
  • Loading branch information
dbu committed Jan 8, 2024
2 parents 10ab71f + c2c2098 commit 7809647
Show file tree
Hide file tree
Showing 7 changed files with 347 additions and 85 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,16 @@ Changelog
1.x
===

1.11.1
------

* Fix regression of 1.11.0: Reference deletion should work regardless of upper or lowercase of the type names in XML data.

1.11.0
------

* Improve delete properties performance by replace DOMDocument with `xml_parse`.

1.10.1
------

Expand Down
113 changes: 34 additions & 79 deletions src/Jackalope/Transport/DoctrineDBAL/Client.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
use Jackalope\Transport\BaseTransport;
use Jackalope\Transport\DoctrineDBAL\Query\QOMWalker;
use Jackalope\Transport\DoctrineDBAL\XmlParser\XmlToPropsParser;
use Jackalope\Transport\DoctrineDBAL\XmlPropsRemover\XmlPropsRemover;
use Jackalope\Transport\NodeTypeManagementInterface;
use Jackalope\Transport\QueryInterface as QueryTransport;
use Jackalope\Transport\StandardNodeTypes;
Expand Down Expand Up @@ -1190,6 +1191,7 @@ public function getNode(string $path): \stdClass

$nestedNodes = $this->getNodesData($rows);
$node = array_shift($nestedNodes);

foreach ($nestedNodes as $nestedPath => $nested) {
$relativePath = PathHelper::relativizePath($nestedPath, $path);
$this->nestNode($node, $nested, explode('/', $relativePath));
Expand Down Expand Up @@ -1547,20 +1549,16 @@ public function deleteProperties(array $operations): void
$nodesByPath = [];
foreach ($operations as $op) {
$nodePath = PathHelper::getParentPath($op->srcPath);
$propertyName = PathHelper::getNodeName($op->srcPath);
if (!array_key_exists($nodePath, $nodesByPath)) {
$nodesByPath[$nodePath] = [];
}
$nodesByPath[$nodePath][] = $op->srcPath;
$nodesByPath[$nodePath][$propertyName] = $propertyName;
}

// Doing the actual removal
foreach ($nodesByPath as $nodePath => $pathsToDelete) {
$nodeId = $this->getSystemIdForNode($nodePath);
if (!$nodeId) {
// no we really don't know that path
throw new ItemNotFoundException('No item found at '.$nodePath);
}
$this->removePropertiesFromNode($nodeId, $pathsToDelete);
foreach ($nodesByPath as $nodePath => $propertiesToDelete) {
$this->removePropertiesFromNode($nodePath, $propertiesToDelete);
}
}

Expand All @@ -1578,108 +1576,65 @@ public function deletePropertyImmediately(string $path): void
private function deleteProperty(string $path): void
{
$this->assertLoggedIn();

$nodePath = PathHelper::getParentPath($path);
$nodeId = $this->getSystemIdForNode($nodePath);
if (!$nodeId) {
// no we really don't know that path
throw new ItemNotFoundException('No item found at '.$path);
}
$this->removePropertiesFromNode($nodeId, [$path]);
$propertyName = PathHelper::getNodeName($path);
$this->removePropertiesFromNode($nodePath, [$propertyName]);
}

/**
* Removes a list of properties from a given node.
*
* @param array<string> $paths Path belonging to that node that should be deleted
* @param array<string> $propertiesToDelete Path belonging to that node that should be deleted
*/
private function removePropertiesFromNode(string|int $nodeId, array $paths): void
private function removePropertiesFromNode(string $nodePath, array $propertiesToDelete): void
{
$nodeId = $this->getSystemIdForNode($nodePath);
if (!$nodeId) {
// no we really don't know that path
throw new ItemNotFoundException("No item found at $nodePath");
}

$query = 'SELECT props FROM phpcr_nodes WHERE id = ?';
$xml = $this->getConnection()->fetchOne($query, [$nodeId]);

$dom = new \DOMDocument('1.0', 'UTF-8');
$dom->loadXML($xml);
$xpath = new \DOMXPath($dom);

$found = false;
foreach ($paths as $path) {
$propertyName = PathHelper::getNodeName($path);
$tablesToDeleteReferencesFrom = [];
$deleteBinary = false;
foreach ($xpath->query(sprintf('//*[@sv:name="%s"]', $propertyName)) as $propertyNode) {
\assert($propertyNode instanceof \DOMElement);
$found = true;
// would be nice to have the property object to ask for type
// but its in state deleted, would mean lots of refactoring
if (!$propertyNode->hasAttribute('sv:type')) {
continue;
}

$type = strtolower($propertyNode->getAttribute('sv:type'));
switch ($type) {
case 'reference':
$tablesToDeleteReferencesFrom[] = $this->referenceTables[PropertyType::REFERENCE];
break;
case 'weakreference':
$tablesToDeleteReferencesFrom[] = $this->referenceTables[PropertyType::WEAKREFERENCE];
break;
case 'binary':
$deleteBinary = true;
break;
default:
// nothing to do
}

// Doing the XML removal
$propertyNode->parentNode->removeChild($propertyNode);
}

if (!$found) {
$nodePath = PathHelper::getParentPath($path);
throw new ItemNotFoundException("Node $nodePath has no property $propertyName");
}
$xmlPropsRemover = new XmlPropsRemover($xml, $propertiesToDelete);
[$xml, $references, $binaries] = $xmlPropsRemover->removeProps();

// Deleting the references
foreach (array_unique($tablesToDeleteReferencesFrom) as $table) {
foreach ($references as $type => $propertyNames) {
$table = $this->referenceTables['reference' === $type ? PropertyType::REFERENCE : PropertyType::WEAKREFERENCE];
foreach ($propertyNames as $propertyName) {
try {
$query = "DELETE FROM $table WHERE source_id = ? AND source_property_name = ?";
$this->getConnection()->executeQuery($query, [$nodeId, $propertyName]);
} catch (DBALException $e) {
throw new RepositoryException(
"Can not delete references for property $propertyName from `$table`",
'Unexpected exception while cleaning up deleted nodes',
$e->getCode(),
$e
);
}
}
if ($deleteBinary) {
try {
$query = "DELETE FROM {$this->binaryTable} WHERE node_id = ? AND property_name = ?";
$this->getConnection()->executeQuery($query, [$nodeId, $propertyName]);
} catch (DBALException $e) {
throw new RepositoryException(
"Can not delete binaries for property $propertyName from `{$this->binaryTable}`",
$e->getCode(),
$e
);
}
}
foreach ($binaries as $propertyName) {
try {
$query = "DELETE FROM {$this->binaryTable} WHERE node_id = ? AND property_name = ?";
$this->getConnection()->executeQuery($query, [$nodeId, $propertyName]);
} catch (DBALException $e) {
throw new RepositoryException(
"Can not delete binaries for property $propertyName from `{$this->binaryTable}`",
$e->getCode(),
$e
);
}
}

$xml = $dom->saveXML();

$query = 'UPDATE phpcr_nodes SET props = ? WHERE id = ?';
$params = [$xml, $nodeId];

try {
$this->getConnection()->executeQuery($query, $params);
} catch (DBALException $e) {
throw new RepositoryException(
"Unexpected exception while updating properties of node with id $nodeId",
$e->getCode(),
$e
);
throw new RepositoryException("Unexpected exception while updating properties of $nodePath", $e->getCode(), $e);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,18 +49,18 @@ public function parse(): \stdClass
{
$this->data = new \stdClass();

$parser = xml_parser_create();
$parser = \xml_parser_create();

xml_set_element_handler(
\xml_set_element_handler(
$parser,
[$this, 'startHandler'],
[$this, 'endHandler']
);

xml_set_character_data_handler($parser, [$this, 'dataHandler']);
\xml_set_character_data_handler($parser, [$this, 'dataHandler']);

xml_parse($parser, $this->xml, true);
xml_parser_free($parser);
\xml_parse($parser, $this->xml, true);
\xml_parser_free($parser);
// avoid memory leaks and unset the parser see: https://www.php.net/manual/de/function.xml-parser-free.php
unset($parser);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
<?php

namespace Jackalope\Transport\DoctrineDBAL\XmlPropsRemover;

use PHPCR\PropertyType;

/**
* @internal
*/
class XmlPropsRemover
{
private string $xml;

/**
* @var string[]
*/
private array $propertyNames;

private bool $skipCurrentTag = false;

private string $newXml = '';

private string $newStartTag = '';

/**
* @var string[]
*/
private array $weakReferences = [];

/**
* @var string[]
*/
private array $binaries = [];

/**
* @var string[]
*/
private array $references = [];

public function __construct(string $xml, array $propertyNames)
{
$this->xml = $xml;
$this->propertyNames = $propertyNames;
}

/**
* @example [$xml, $references, $binaries] = $xmlPropsRemover->removeProps($xml, $propertiesToDelete);
*
* @return array{
* 0: string,
* 1: array{
* reference: string[],
* weakreference: string[],
* },
* 2: string[]
* } An array with the new xml (0) and the references (1) which requires to be removed
*/
public function removeProps(): array
{
$this->newXml = '<?xml version="1.0" encoding="UTF-8"?>'.PHP_EOL;
$this->references = [];
$this->weakReferences = [];
$this->binaries = [];
$this->newStartTag = '';
$this->skipCurrentTag = false;

$parser = \xml_parser_create();

\xml_set_element_handler(
$parser,
[$this, 'startHandler'],
[$this, 'endHandler']
);

\xml_set_character_data_handler($parser, [$this, 'dataHandler']);

\xml_parse($parser, $this->xml, true);
\xml_parser_free($parser);
// avoid memory leaks and unset the parser see: https://www.php.net/manual/de/function.xml-parser-free.php
unset($parser);

return [
$this->newXml.PHP_EOL,
[
'reference' => $this->references,
'weakreference' => $this->weakReferences,
],
$this->binaries,
];
}

/**
* @param mixed[] $attrs
*/
private function startHandler(\XMLParser $parser, string $name, array $attrs): void
{
if ($this->skipCurrentTag) {
return;
}

if ('SV:PROPERTY' === $name) {
$svName = $attrs['SV:NAME'];

if (\in_array((string) $svName, $this->propertyNames, true)) {
$this->skipCurrentTag = true;
// use PHPCR PropertyType to avoid encoding issues
switch (PropertyType::valueFromName($attrs['SV:TYPE'])) {
case PropertyType::REFERENCE:
$this->references[] = $svName;
break;
case PropertyType::WEAKREFERENCE:
$this->weakReferences[] = $svName;
break;
case PropertyType::BINARY:
$this->binaries[] = $svName;
break;
}

return;
}
}

$tag = '<'.\strtolower($name);
foreach ($attrs as $key => $value) {
$tag .= ' '.\strtolower($key) // there is no case key which requires escaping for performance reasons we avoid it so
.'="'
.\htmlspecialchars($value, ENT_COMPAT, 'UTF-8')
.'"';
}
$tag .= '>';

$this->newXml .= $this->newStartTag;
$this->newStartTag = $tag; // handling self closing tags in endHandler
}

private function endHandler(\XMLParser $parser, string $name): void
{
if ('SV:PROPERTY' === $name && $this->skipCurrentTag) {
$this->skipCurrentTag = false;

return;
}

if ($this->skipCurrentTag) {
return;
}

if ($this->newStartTag) {
// if the tag is not rendered to newXml it can be a self-closing tag
$this->newXml .= \substr($this->newStartTag, 0.0, -1).'/>';
$this->newStartTag = '';

return;
}

$this->newXml .= '</'.\strtolower($name).'>';
}

private function dataHandler(\XMLParser $parser, string $data): void
{
if ($this->skipCurrentTag) {
return;
}

if ('' !== $data) {
$this->newXml .= $this->newStartTag; // non-empty data means no self closing tag so render tag now
$this->newStartTag = '';
$this->newXml .= \htmlspecialchars($data, ENT_XML1, 'UTF-8');
}
}
}

0 comments on commit 7809647

Please sign in to comment.