Skip to content

Commit

Permalink
[!!!][FEATURE] Allow to use custom output rules (#98)
Browse files Browse the repository at this point in the history
* [!!!][TASK] Trigger deprecation error when missing Behavior instance
* [!!!][FEATURE] Allow to use custom output rules

As a consequence, it is required to have `Behavior` available in
`Sanitizer`. As fall back and for the time being, this is not a
hard requirement - but it will change in future versions of
this library.
  • Loading branch information
ohader authored Nov 28, 2022
1 parent b97ef20 commit 703dded
Show file tree
Hide file tree
Showing 8 changed files with 237 additions and 8 deletions.
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ $behavior = (new Behavior())
);

$visitors = [new CommonVisitor($behavior)];
$sanitizer = new Sanitizer(...$visitors);
$sanitizer = new Sanitizer($behavior, ...$visitors);

$html = <<< EOH
<div id="main">
Expand All @@ -108,11 +108,15 @@ will result in the following sanitized output
</div>
```

### Changes
### :information_source: Changes

* since `v2.1.0` newly introduced nodes `Behavior\Comment` and `Behavior\CdataSection` are enabled per
default for backward compatibility reasons, use e.g. `$behavior->withoutNodes(new Behavior\Comment())`
to remove them (later versions of this package won't have this fallback anymore)
* since `v2.1.0` it is suggested to provide a `\TYPO3\HtmlSanitizer\Behavior` when creating a
new instance of `\TYPO3\HtmlSanitizer\Sanitizer`, e.g. `new Sanitizer($behavior, ...$visitors)`

Find more details on all changes in [UPGRADING.md](UPGRADING.md).

### `Behavior` flags

Expand Down
4 changes: 4 additions & 0 deletions UPGRADING.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,7 @@
* deprecated `\TYPO3\HtmlSanitizer\Behavior\NodeException::getNode()`,
use `\TYPO3\HtmlSanitizer\Behavior\NodeException::getDomNode()` instead
* deprecated property `\TYPO3\HtmlSanitizer\Sanitizer::$root`, superfluous - don't use it anymore
* requirement to provide instance of `\TYPO3\HtmlSanitizer\Behavior` when creating a
new instance of `\TYPO3\HtmlSanitizer\Sanitizer` (for backward compatibility, this
is not a hard requirement yet, but already issue an `E_USER_DEPRECATED` PHP error),
adjust to use `new Sanitizer($behavior, ...$visitors)`
1 change: 1 addition & 0 deletions phpunit.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
<?xml version="1.0" encoding="utf-8" ?>
<phpunit
bootstrap="vendor/autoload.php"
convertDeprecationsToExceptions="true"
backupGlobals="true"
cacheResult="false"
colors="true"
Expand Down
2 changes: 1 addition & 1 deletion src/Builder/CommonBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public function build(): Sanitizer
{
$behavior = $this->createBehavior();
$visitor = new CommonVisitor($behavior);
return new Sanitizer($visitor);
return new Sanitizer($behavior, $visitor);
}

protected function createBehavior(): Behavior
Expand Down
57 changes: 52 additions & 5 deletions src/Sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
use DOMNode;
use DOMNodeList;
use Masterminds\HTML5;
use TYPO3\HtmlSanitizer\Serializer\Rules;
use TYPO3\HtmlSanitizer\Serializer\RulesInterface;
use TYPO3\HtmlSanitizer\Visitor\VisitorInterface;

/**
Expand Down Expand Up @@ -49,6 +51,11 @@ class Sanitizer
*/
protected $visitors = [];

/**
* @var ?Behavior
*/
protected $behavior = null;

/**
* @var HTML5
*/
Expand All @@ -65,10 +72,30 @@ class Sanitizer
*/
protected $context;

public function __construct(VisitorInterface ...$visitors)
/**
* @param Behavior|VisitorInterface[] $items
*
* @todo use `__construct(Behavior $behavior, VisitorInterface ...$visitors)`
* (which would have been a breaking change with a PHP fatal error)
*/
public function __construct(...$items)
{
$this->visitors = $visitors;
$this->visitors = [];
foreach ($items as $item) {
if ($item instanceof VisitorInterface) {
$this->visitors[] = $item;
} elseif ($item instanceof Behavior && $this->behavior === null) {
$this->behavior = $item;
}
}
$this->parser = $this->createParser();

if (!$this->behavior instanceof Behavior) {
trigger_error(
'Add `Behavior` when creating new `Sanitizer` instances, e.g. `new Sanitizer($behavior, $visitor)`',
E_USER_DEPRECATED
);
}
}

public function sanitize(string $html, InitiatorInterface $initiator = null): string
Expand All @@ -77,7 +104,10 @@ public function sanitize(string $html, InitiatorInterface $initiator = null): st
// @todo drop deprecated property
$this->root = $root;
$this->handle($root, $initiator);
return $this->serialize($root);
$rules = $this->createRules($initiator);
$serialized = $this->serialize($root, $rules);
$this->closeRulesStream($rules);
return $serialized;
}

protected function parse(string $html): DOMDocumentFragment
Expand All @@ -94,9 +124,13 @@ protected function handle(DOMNode $domNode, InitiatorInterface $initiator = null
return $domNode;
}

protected function serialize(DOMNode $document): string
/**
* Custom implementation of `\Masterminds\HTML5::save` and `\Masterminds\HTML5::saveHTML`.
*/
protected function serialize(DOMNode $domNode, RulesInterface $rules): string
{
return $this->parser->saveHTML($document);
$rules->traverse($domNode);
return stream_get_contents($rules->getStream(), -1, 0);
}

protected function beforeTraverse(): void
Expand Down Expand Up @@ -164,6 +198,19 @@ protected function replaceNode(DOMNode $source, ?DOMNode $target): ?DOMNode
return $target;
}

protected function createRules(InitiatorInterface $initiator = null): Rules
{
$stream = fopen('php://temp', 'wb');
return (new Rules($stream, self::mastermindsDefaultOptions))
->withBehavior($this->behavior ?? new Behavior())
->withInitiator($initiator);
}

protected function closeRulesStream(RulesInterface $rules): bool
{
return fclose($rules->getStream());
}

protected function createParser(): HTML5
{
return new HTML5(self::mastermindsDefaultOptions);
Expand Down
110 changes: 110 additions & 0 deletions src/Serializer/Rules.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
<?php

declare(strict_types=1);

/*
* This file is part of the TYPO3 project.
*
* It is free software; you can redistribute it and/or modify it under the terms
* of the MIT License (MIT). For the full copyright and license information,
* please read the LICENSE file that was distributed with this source code.
*
* The TYPO3 project - inspiring people to share!
*/

namespace TYPO3\HtmlSanitizer\Serializer;

use DOMNode;
use Masterminds\HTML5\Serializer\OutputRules;
use Masterminds\HTML5\Serializer\Traverser;
use TYPO3\HtmlSanitizer\Behavior;
use TYPO3\HtmlSanitizer\InitiatorInterface;

class Rules extends OutputRules implements RulesInterface
{
/**
* @var array
*/
protected $options;

/**
* @var ?Traverser
*/
protected $traverser;

/**
* @var ?Behavior
*/
protected $behavior;

/**
* @var ?InitiatorInterface
*/
protected $initiator;

/**
* @param Behavior $behavior
* @param resource$output
* @param array $options
* @return self
*/
public static function create(Behavior $behavior, $output, array $options = []): self
{
$target = new self($output, $options);
$target->options = $options;
$target->behavior = $behavior;
return $target;
}

/**
* @param resource $output
* @param array $options
*/
public function __construct($output, $options = [])
{
$this->options = (array)$options;
parent::__construct($output, $this->options);
}

public function withBehavior(Behavior $behavior): self
{
if ($this->behavior === $behavior) {
return $this;
}
$target = clone $this;
$target->behavior = $behavior;
return $target;
}

public function withInitiator(?InitiatorInterface $initiator): self
{
if ($this->initiator === $initiator) {
return $this;
}
$target = clone $this;
$target->initiator = $initiator;
return $target;
}

public function traverse(DOMNode $domNode): void
{
$traverser = new Traverser($domNode, $this->out, $this, $this->options);
$traverser->walk();
// release the traverser to avoid cyclic references and allow PHP
// to free memory without waiting for gc_collect_cycles
$this->unsetTraverser();
}

/**
* @return resource
*/
public function getStream()
{
return $this->out;
}

public function getOptions(): array
{
return $this->options;
}
}
42 changes: 42 additions & 0 deletions src/Serializer/RulesInterface.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
<?php

declare(strict_types=1);

/*
* This file is part of the TYPO3 project.
*
* It is free software; you can redistribute it and/or modify it under the terms
* of the MIT License (MIT). For the full copyright and license information,
* please read the LICENSE file that was distributed with this source code.
*
* The TYPO3 project - inspiring people to share!
*/

namespace TYPO3\HtmlSanitizer\Serializer;

use DOMNode;
use Masterminds\HTML5\Serializer\RulesInterface as MastermindsRulesInterface;
use TYPO3\HtmlSanitizer\Behavior;
use TYPO3\HtmlSanitizer\InitiatorInterface;

interface RulesInterface extends MastermindsRulesInterface
{
/**
* @return self
*/
public function withBehavior(Behavior $behavior);

/**
* @return self
*/
public function withInitiator(?InitiatorInterface $initiator);

public function traverse(DOMNode $domNode): void;

/**
* @return resource
*/
public function getStream();

public function getOptions(): array;
}
21 changes: 21 additions & 0 deletions tests/ScenarioTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,20 @@

class ScenarioTest extends TestCase
{
/**
* @test
*/
public function missingBehaviorTriggersDeprecationError(): void
{
$this->expectDeprecation();
$this->expectDeprecationMessage(
'Add `Behavior` when creating new `Sanitizer` instances, e.g. `new Sanitizer($behavior, $visitor)`'
);
$behavior = new Behavior();
$visitor = new CommonVisitor($behavior);
new Sanitizer($visitor);
}

public static function allTagsAreRemovedOnMissingDeclarationDataProvider(): array
{
return [
Expand All @@ -42,6 +56,7 @@ public function allTagsAreRemovedOnMissingDeclaration(string $payload, string $e
{
$behavior = new Behavior();
$sanitizer = new Sanitizer(
$behavior,
new CommonVisitor($behavior)
);
self::assertSame($expectation, $sanitizer->sanitize($payload));
Expand Down Expand Up @@ -101,6 +116,7 @@ public function tagFlagsAreProcessed(int $flags, string $payload, string $expect
);

$sanitizer = new Sanitizer(
$behavior,
new CommonVisitor($behavior)
);
self::assertSame($expectation, $sanitizer->sanitize($payload));
Expand Down Expand Up @@ -186,6 +202,7 @@ public function tagIsHandled(Behavior\NodeHandler $nodeHandler, string $payload,
->withName('scenario-test')
->withNodes($nodeHandler);
$sanitizer = new Sanitizer(
$behavior,
new CommonVisitor($behavior)
);
self::assertSame($expectation, $sanitizer->sanitize($payload));
Expand Down Expand Up @@ -234,6 +251,7 @@ public function commentsAreHandled(bool $allowed, int $flags, string $payload, s
$comment = new Behavior\Comment();
$behavior = $allowed ? $behavior->withNodes($comment) : $behavior->withoutNodes($comment);
$sanitizer = new Sanitizer(
$behavior,
new CommonVisitor($behavior)
);
self::assertSame($expectation, $sanitizer->sanitize($payload));
Expand Down Expand Up @@ -282,6 +300,7 @@ public function cdataSectionsAreHandled(bool $allowed, int $flags, string $paylo
$cdataSection = new Behavior\CdataSection();
$behavior = $allowed ? $behavior->withNodes($cdataSection) : $behavior->withoutNodes($cdataSection);
$sanitizer = new Sanitizer(
$behavior,
new CommonVisitor($behavior)
);
self::assertSame($expectation, $sanitizer->sanitize($payload));
Expand Down Expand Up @@ -338,6 +357,7 @@ public function isJsonLdScriptAllowed(): void
);

$sanitizer = new Sanitizer(
$behavior,
new CommonVisitor($behavior)
);
self::assertSame($expectation, $sanitizer->sanitize($payload));
Expand Down Expand Up @@ -396,6 +416,7 @@ public function iframeSandboxIsAllowed(): void
);

$sanitizer = new Sanitizer(
$behavior,
new CommonVisitor($behavior)
);
self::assertSame($expectation, $sanitizer->sanitize($payload));
Expand Down

0 comments on commit 703dded

Please sign in to comment.