From 703dded20c3b0b59a00c1da87f9d183b766861d0 Mon Sep 17 00:00:00 2001 From: Oliver Hader Date: Mon, 28 Nov 2022 17:37:08 +0100 Subject: [PATCH] [!!!][FEATURE] Allow to use custom output rules (#98) * [!!!][TASK] Trigger deprecation error when missing Behavior instance * [!!!][FEATURE] Allow to use custom output rules As a consequence, it is required to have `Behavior` available in `Sanitizer`. As fall back and for the time being, this is not a hard requirement - but it will change in future versions of this library. --- README.md | 8 ++- UPGRADING.md | 4 ++ phpunit.xml | 1 + src/Builder/CommonBuilder.php | 2 +- src/Sanitizer.php | 57 ++++++++++++++-- src/Serializer/Rules.php | 110 ++++++++++++++++++++++++++++++ src/Serializer/RulesInterface.php | 42 ++++++++++++ tests/ScenarioTest.php | 21 ++++++ 8 files changed, 237 insertions(+), 8 deletions(-) create mode 100644 src/Serializer/Rules.php create mode 100644 src/Serializer/RulesInterface.php diff --git a/README.md b/README.md index 595cf29..c2cfa55 100644 --- a/README.md +++ b/README.md @@ -81,7 +81,7 @@ $behavior = (new Behavior()) ); $visitors = [new CommonVisitor($behavior)]; -$sanitizer = new Sanitizer(...$visitors); +$sanitizer = new Sanitizer($behavior, ...$visitors); $html = <<< EOH
@@ -108,11 +108,15 @@ will result in the following sanitized output
``` -### Changes +### :information_source: Changes * since `v2.1.0` newly introduced nodes `Behavior\Comment` and `Behavior\CdataSection` are enabled per default for backward compatibility reasons, use e.g. `$behavior->withoutNodes(new Behavior\Comment())` to remove them (later versions of this package won't have this fallback anymore) +* since `v2.1.0` it is suggested to provide a `\TYPO3\HtmlSanitizer\Behavior` when creating a + new instance of `\TYPO3\HtmlSanitizer\Sanitizer`, e.g. `new Sanitizer($behavior, ...$visitors)` + +Find more details on all changes in [UPGRADING.md](UPGRADING.md). ### `Behavior` flags diff --git a/UPGRADING.md b/UPGRADING.md index affb0d5..6375e75 100644 --- a/UPGRADING.md +++ b/UPGRADING.md @@ -7,3 +7,7 @@ * deprecated `\TYPO3\HtmlSanitizer\Behavior\NodeException::getNode()`, use `\TYPO3\HtmlSanitizer\Behavior\NodeException::getDomNode()` instead * deprecated property `\TYPO3\HtmlSanitizer\Sanitizer::$root`, superfluous - don't use it anymore +* requirement to provide instance of `\TYPO3\HtmlSanitizer\Behavior` when creating a + new instance of `\TYPO3\HtmlSanitizer\Sanitizer` (for backward compatibility, this + is not a hard requirement yet, but already issue an `E_USER_DEPRECATED` PHP error), + adjust to use `new Sanitizer($behavior, ...$visitors)` diff --git a/phpunit.xml b/phpunit.xml index 3ba7de5..3e458bd 100644 --- a/phpunit.xml +++ b/phpunit.xml @@ -1,6 +1,7 @@ createBehavior(); $visitor = new CommonVisitor($behavior); - return new Sanitizer($visitor); + return new Sanitizer($behavior, $visitor); } protected function createBehavior(): Behavior diff --git a/src/Sanitizer.php b/src/Sanitizer.php index 2f1312a..f7760f9 100644 --- a/src/Sanitizer.php +++ b/src/Sanitizer.php @@ -18,6 +18,8 @@ use DOMNode; use DOMNodeList; use Masterminds\HTML5; +use TYPO3\HtmlSanitizer\Serializer\Rules; +use TYPO3\HtmlSanitizer\Serializer\RulesInterface; use TYPO3\HtmlSanitizer\Visitor\VisitorInterface; /** @@ -49,6 +51,11 @@ class Sanitizer */ protected $visitors = []; + /** + * @var ?Behavior + */ + protected $behavior = null; + /** * @var HTML5 */ @@ -65,10 +72,30 @@ class Sanitizer */ protected $context; - public function __construct(VisitorInterface ...$visitors) + /** + * @param Behavior|VisitorInterface[] $items + * + * @todo use `__construct(Behavior $behavior, VisitorInterface ...$visitors)` + * (which would have been a breaking change with a PHP fatal error) + */ + public function __construct(...$items) { - $this->visitors = $visitors; + $this->visitors = []; + foreach ($items as $item) { + if ($item instanceof VisitorInterface) { + $this->visitors[] = $item; + } elseif ($item instanceof Behavior && $this->behavior === null) { + $this->behavior = $item; + } + } $this->parser = $this->createParser(); + + if (!$this->behavior instanceof Behavior) { + trigger_error( + 'Add `Behavior` when creating new `Sanitizer` instances, e.g. `new Sanitizer($behavior, $visitor)`', + E_USER_DEPRECATED + ); + } } public function sanitize(string $html, InitiatorInterface $initiator = null): string @@ -77,7 +104,10 @@ public function sanitize(string $html, InitiatorInterface $initiator = null): st // @todo drop deprecated property $this->root = $root; $this->handle($root, $initiator); - return $this->serialize($root); + $rules = $this->createRules($initiator); + $serialized = $this->serialize($root, $rules); + $this->closeRulesStream($rules); + return $serialized; } protected function parse(string $html): DOMDocumentFragment @@ -94,9 +124,13 @@ protected function handle(DOMNode $domNode, InitiatorInterface $initiator = null return $domNode; } - protected function serialize(DOMNode $document): string + /** + * Custom implementation of `\Masterminds\HTML5::save` and `\Masterminds\HTML5::saveHTML`. + */ + protected function serialize(DOMNode $domNode, RulesInterface $rules): string { - return $this->parser->saveHTML($document); + $rules->traverse($domNode); + return stream_get_contents($rules->getStream(), -1, 0); } protected function beforeTraverse(): void @@ -164,6 +198,19 @@ protected function replaceNode(DOMNode $source, ?DOMNode $target): ?DOMNode return $target; } + protected function createRules(InitiatorInterface $initiator = null): Rules + { + $stream = fopen('php://temp', 'wb'); + return (new Rules($stream, self::mastermindsDefaultOptions)) + ->withBehavior($this->behavior ?? new Behavior()) + ->withInitiator($initiator); + } + + protected function closeRulesStream(RulesInterface $rules): bool + { + return fclose($rules->getStream()); + } + protected function createParser(): HTML5 { return new HTML5(self::mastermindsDefaultOptions); diff --git a/src/Serializer/Rules.php b/src/Serializer/Rules.php new file mode 100644 index 0000000..d8502eb --- /dev/null +++ b/src/Serializer/Rules.php @@ -0,0 +1,110 @@ +options = $options; + $target->behavior = $behavior; + return $target; + } + + /** + * @param resource $output + * @param array $options + */ + public function __construct($output, $options = []) + { + $this->options = (array)$options; + parent::__construct($output, $this->options); + } + + public function withBehavior(Behavior $behavior): self + { + if ($this->behavior === $behavior) { + return $this; + } + $target = clone $this; + $target->behavior = $behavior; + return $target; + } + + public function withInitiator(?InitiatorInterface $initiator): self + { + if ($this->initiator === $initiator) { + return $this; + } + $target = clone $this; + $target->initiator = $initiator; + return $target; + } + + public function traverse(DOMNode $domNode): void + { + $traverser = new Traverser($domNode, $this->out, $this, $this->options); + $traverser->walk(); + // release the traverser to avoid cyclic references and allow PHP + // to free memory without waiting for gc_collect_cycles + $this->unsetTraverser(); + } + + /** + * @return resource + */ + public function getStream() + { + return $this->out; + } + + public function getOptions(): array + { + return $this->options; + } +} diff --git a/src/Serializer/RulesInterface.php b/src/Serializer/RulesInterface.php new file mode 100644 index 0000000..e8ab6ea --- /dev/null +++ b/src/Serializer/RulesInterface.php @@ -0,0 +1,42 @@ +expectDeprecation(); + $this->expectDeprecationMessage( + 'Add `Behavior` when creating new `Sanitizer` instances, e.g. `new Sanitizer($behavior, $visitor)`' + ); + $behavior = new Behavior(); + $visitor = new CommonVisitor($behavior); + new Sanitizer($visitor); + } + public static function allTagsAreRemovedOnMissingDeclarationDataProvider(): array { return [ @@ -42,6 +56,7 @@ public function allTagsAreRemovedOnMissingDeclaration(string $payload, string $e { $behavior = new Behavior(); $sanitizer = new Sanitizer( + $behavior, new CommonVisitor($behavior) ); self::assertSame($expectation, $sanitizer->sanitize($payload)); @@ -101,6 +116,7 @@ public function tagFlagsAreProcessed(int $flags, string $payload, string $expect ); $sanitizer = new Sanitizer( + $behavior, new CommonVisitor($behavior) ); self::assertSame($expectation, $sanitizer->sanitize($payload)); @@ -186,6 +202,7 @@ public function tagIsHandled(Behavior\NodeHandler $nodeHandler, string $payload, ->withName('scenario-test') ->withNodes($nodeHandler); $sanitizer = new Sanitizer( + $behavior, new CommonVisitor($behavior) ); self::assertSame($expectation, $sanitizer->sanitize($payload)); @@ -234,6 +251,7 @@ public function commentsAreHandled(bool $allowed, int $flags, string $payload, s $comment = new Behavior\Comment(); $behavior = $allowed ? $behavior->withNodes($comment) : $behavior->withoutNodes($comment); $sanitizer = new Sanitizer( + $behavior, new CommonVisitor($behavior) ); self::assertSame($expectation, $sanitizer->sanitize($payload)); @@ -282,6 +300,7 @@ public function cdataSectionsAreHandled(bool $allowed, int $flags, string $paylo $cdataSection = new Behavior\CdataSection(); $behavior = $allowed ? $behavior->withNodes($cdataSection) : $behavior->withoutNodes($cdataSection); $sanitizer = new Sanitizer( + $behavior, new CommonVisitor($behavior) ); self::assertSame($expectation, $sanitizer->sanitize($payload)); @@ -338,6 +357,7 @@ public function isJsonLdScriptAllowed(): void ); $sanitizer = new Sanitizer( + $behavior, new CommonVisitor($behavior) ); self::assertSame($expectation, $sanitizer->sanitize($payload)); @@ -396,6 +416,7 @@ public function iframeSandboxIsAllowed(): void ); $sanitizer = new Sanitizer( + $behavior, new CommonVisitor($behavior) ); self::assertSame($expectation, $sanitizer->sanitize($payload));