Skip to content

Commit

Permalink
[SECURITY] Mitigate XSS in CDATA and HTML raw text elements (#106)
Browse files Browse the repository at this point in the history
* [SECURITY] Mitigate potential XSS in CDATA sections

Due to a parsing issue in upstream package `masterminds/html5`,
malicious markup used in a sequence with special HTML CDATA sections
cannot be filtered and sanitized.

This change mitigates the parsing problem by converting all
CDATA section contents to DOM text nodes like show below:

- from `<![CDATA[<any><span data-value="value"></any>*/]]>`
+ to   `&lt;any&gt;&lt;span data-value="value"&gt;&lt;/any&gt;*/`

In case an individual builder us used, which does not inherit behavior
declarations from `TYPO3\HtmlSanitizer\Builder\CommonBuilder`, those
implementations need to be adjusted manually.

* [SECURITY] Ensure HTML raw text elements are processed

Upstream package `masterminds/html5` provides HTML raw text elements
(`script`, `style`, `noframes`, `noembed` and (void element) `iframe`)
as `DOMText` nodes. Since those text nodes have not been processed by
`typo3/html-sanitizer`, sanitization was not applied to mentioned
HTML tag names.

None of them were defined in the default builder configuration,
that's why only custom behaviors, using one of those tag names,
were vulnerable to cross-site scripting.

In case any of those tags shall be processed as raw text
(for instance this would be reasonable for `script type="ld+json"`),
the new `Behavior\Tag::ALLOW_INSECURE_RAW_TEXT` flag needs to be
used explicitly.

Security-References: CVE-2022-23499
  • Loading branch information
ohader authored Dec 13, 2022
1 parent 16ff534 commit 385741b
Show file tree
Hide file tree
Showing 5 changed files with 209 additions and 3 deletions.
11 changes: 11 additions & 0 deletions src/Behavior/Tag.php
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ class Tag implements NodeInterface
*/
const ALLOW_CHILDREN = 8;

/**
* whether this tag is allowed to be serialized as raw text (e.g. for `<script>` elements)
* see https://html.spec.whatwg.org/multipage/syntax.html#raw-text-elements
*/
const ALLOW_INSECURE_RAW_TEXT = 16;

/**
* @var string
*/
Expand Down Expand Up @@ -112,6 +118,11 @@ public function shallAllowChildren(): bool
return ($this->flags & self::ALLOW_CHILDREN) === self::ALLOW_CHILDREN;
}

public function shallAllowInsecureRawText(): bool
{
return ($this->flags & self::ALLOW_INSECURE_RAW_TEXT) === self::ALLOW_INSECURE_RAW_TEXT;
}

/**
* @return Attr|null
*/
Expand Down
23 changes: 22 additions & 1 deletion src/Builder/CommonBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,11 @@

namespace TYPO3\HtmlSanitizer\Builder;

use DOMNode;
use DOMText;
use TYPO3\HtmlSanitizer\Behavior;
use TYPO3\HtmlSanitizer\Behavior\Attr\UriAttrValueBuilder;
use TYPO3\HtmlSanitizer\Behavior\NodeInterface;
use TYPO3\HtmlSanitizer\Sanitizer;
use TYPO3\HtmlSanitizer\Visitor\CommonVisitor;

Expand Down Expand Up @@ -80,7 +83,8 @@ protected function createBehavior(): Behavior
->withName('common')
->withTags(...array_values($this->createBasicTags()))
->withTags(...array_values($this->createMediaTags()))
->withTags(...array_values($this->createTableTags()));
->withTags(...array_values($this->createTableTags()))
->withNodes(...array_values($this->createSpecialNodes()));
}

protected function createBasicTags(): array
Expand Down Expand Up @@ -207,6 +211,23 @@ protected function createTableTags(): array
return $tags;
}

/**
* @return array<string, Behavior\NodeInterface>
*/
protected function createSpecialNodes(): array
{
$nodes = [];
$nodes['#cdata-section'] = (new Behavior\NodeHandler(
new Behavior\CdataSection(),
new Behavior\Handler\ClosureHandler(
static function (NodeInterface $node, $domNode) {
return $domNode !== null ? new DOMText(trim($domNode->nodeValue)) : null;
}
)
));
return $nodes;
}

/**
* @return Behavior\Attr[]
*/
Expand Down
91 changes: 91 additions & 0 deletions src/Serializer/Rules.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@

namespace TYPO3\HtmlSanitizer\Serializer;

use DOMCharacterData;
use DOMElement;
use DOMNode;
use Masterminds\HTML5\Elements;
use Masterminds\HTML5\Serializer\OutputRules;
use Masterminds\HTML5\Serializer\Traverser;
use TYPO3\HtmlSanitizer\Behavior;
Expand Down Expand Up @@ -113,4 +116,92 @@ public function getOptions(): array
{
return $this->options;
}

public function element($domNode)
{
if (!$domNode instanceof DOMElement) {
return;
}
// process non-raw-text elements and `<svg>` or `<math>` elements as usual
if (!$this->isRawText($domNode)
|| in_array($this->resolveNodeName($domNode), ['svg', 'math'], true)
) {
parent::element($domNode);
return;
}

$this->openTag($domNode);
if ($this->shallAllowInsecureRawText($domNode)) {
// the potentially insecure case, not encoding node data
foreach ($domNode->childNodes as $child) {
if ($child instanceof DOMCharacterData) {
$this->wr($child->data);
} elseif ($child instanceof DOMElement) {
$this->element($child);
}
}
} elseif ($domNode->hasChildNodes()) {
// enforce encoding for those raw text elements (different to original implementation)
$this->traverser->children($domNode->childNodes);
}
if (!$this->isVoid($domNode)) {
$this->closeTag($domNode);
}
}

public function text($domNode)
{
if (!$domNode instanceof DOMNode) {
return;
}
// @todo if allowed as text raw element
$parentDomNode = $domNode->parentNode ?? null;
if (!$this->isRawText($parentDomNode) || !$this->shallAllowInsecureRawText($parentDomNode)) {
$this->wr($this->enc($domNode->data));
return;
}
// the potentially insecure case, not encoding node data
$this->wr($domNode->data);
}

/**
* If the element has a declared namespace in the HTML, MathML or
* SVG namespaces, we use the localName instead of the tagName.
*/
protected function resolveNodeName(DOMElement $domNode): string
{
return $this->traverser->isLocalElement($domNode) ? $domNode->localName : $domNode->tagName;
}

/**
* @param DOMNode|null $domNode
*/
protected function shallAllowInsecureRawText($domNode): bool
{
if (!$domNode instanceof DOMNode || !$this->behavior instanceof Behavior) {
return false;
}
$tag = $this->behavior->getTag($domNode->nodeName);
return $tag instanceof Behavior\Tag && $tag->shallAllowInsecureRawText();
}

/**
* @param DOMNode|null $domNode
*/
protected function isRawText($domNode): bool
{
return $domNode instanceof DOMNode
&& !empty($domNode->tagName)
&& Elements::isA($domNode->localName, Elements::TEXT_RAW);
}

/**
* @param DOMNode|null $domNode
*/
protected function isVoid($domNode): bool
{
return $domNode instanceof DOMNode
&& !empty($domNode->tagName)
&& Elements::isA($domNode->localName, Elements::VOID_TAG);
}
}
26 changes: 25 additions & 1 deletion tests/CommonBuilderTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -257,12 +257,36 @@ public function isSanitizedDataProvider(): array
],
'#910' => [
'<![CDATA[ #cdata ]]>',
'<![CDATA[ #cdata ]]>',
'#cdata',
],
'#911' => [
'#text',
'#text',
],
'#921' => [
'<![CDATA[<any><span data-value="value"></any>*/]]>',
'&lt;any&gt;&lt;span data-value="value"&gt;&lt;/any&gt;*/',
],
'#930' => [
'<br><any>value</any></br>',
'<br>&lt;any&gt;value&lt;/any&gt;<br>',
],
'#931' => [
'<hr><any>value</any></hr>',
'<hr>&lt;any&gt;value&lt;/any&gt;',
],
'#932' => [
'<wbr><any>value</any></wbr>',
'<wbr>&lt;any&gt;value&lt;/any&gt;',
],
'#933' => [
'<source><any>value</any></source>',
'<source>&lt;any&gt;value&lt;/any&gt;',
],
'#934' => [
'<img src="/typo3.org/logo.svg"><any>value</any></img>',
'<img src="/typo3.org/logo.svg">&lt;any&gt;value&lt;/any&gt;',
],
];
}

Expand Down
61 changes: 60 additions & 1 deletion tests/ScenarioTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

use DOMNode;
use DOMText;
use Masterminds\HTML5\Elements;
use PHPUnit\Framework\TestCase;
use TYPO3\HtmlSanitizer\Behavior;
use TYPO3\HtmlSanitizer\Behavior\Attr\UriAttrValueBuilder;
Expand Down Expand Up @@ -92,6 +93,21 @@ public static function tagFlagsAreProcessedDataProvider(): array
'<script><!-- --></script><script data-test="test"><!-- --></script>',
'<script><!-- test --></script><script data-test="test"><!-- test --></script>',
]),
implode("\n", [
'',
'<script>test</script><script data-test="test">test</script>',
'<script>&lt;!-- --&gt;</script><script data-test="test">&lt;!-- --&gt;</script>',
'<script>&lt;!-- test --&gt;</script><script data-test="test">&lt;!-- test --&gt;</script>',
]),
],
[
Behavior\Tag::ALLOW_CHILDREN | Behavior\Tag::PURGE_WITHOUT_CHILDREN | Behavior\Tag::ALLOW_INSECURE_RAW_TEXT,
implode("\n", [
'<script></script><script data-test="test"></script>',
'<script>test</script><script data-test="test">test</script>',
'<script><!-- --></script><script data-test="test"><!-- --></script>',
'<script><!-- test --></script><script data-test="test"><!-- test --></script>',
]),
implode("\n", [
'',
'<script>test</script><script data-test="test">test</script>',
Expand Down Expand Up @@ -308,6 +324,46 @@ public function cdataSectionsAreHandled(bool $allowed, int $flags, string $paylo
self::assertSame($expectation, $sanitizer->sanitize($payload));
}

public static function rawTextElementsAreHandledDataProvider(): \Generator
{
foreach (Elements::$html5 as $name => $flags) {
if (($flags & Elements::TEXT_RAW) !== Elements::TEXT_RAW) {
continue;
}
yield $name => [
sprintf('<%1$s><any>value</any></%1$s>', $name),
sprintf('<%1$s>&lt;any&gt;value&lt;/any&gt;</%1$s>', $name),
];
};
}

/**
* @test
* @dataProvider rawTextElementsAreHandledDataProvider
*/
public function rawTextElementsAreHandled(string $payload, string $expectation)
{
$elements = array_filter(
Elements::$html5,
static function (int $flags) {
return ($flags & Elements::TEXT_RAW) === Elements::TEXT_RAW;
}
);
$behavior = (new Behavior())
->withName('scenario-test')
->withTags(...array_map(
static function (string $name) {
return new Behavior\Tag($name, Behavior\Tag::ALLOW_CHILDREN);
},
array_keys($elements)
));
$sanitizer = new Sanitizer(
$behavior,
new CommonVisitor($behavior)
);
self::assertSame($expectation, $sanitizer->sanitize($payload));
}

/**
* @test
*/
Expand All @@ -331,6 +387,7 @@ public function isJsonLdScriptAllowed()
// rest is keep, since `type` attr value matches and child content is given
'8:<script type="application/ld+json">alert(4)</script>',
'9:<script type="application/ld+json">{"@id": "https://github.com/TYPO3/html-sanitizer"}</script>',
'10:<script type="application/ld+json">{{"@type":"Answer","text":"Usually the answer is <b>42</b>."}</script>',
]);
$expectation = implode("\n" , [
'1:',
Expand All @@ -342,6 +399,7 @@ public function isJsonLdScriptAllowed()
'7:',
'8:<script type="application/ld+json">alert(4)</script>',
'9:<script type="application/ld+json">{"@id": "https://github.com/TYPO3/html-sanitizer"}</script>',
'10:<script type="application/ld+json">{{"@type":"Answer","text":"Usually the answer is <b>42</b>."}</script>',
]);

$behavior = (new Behavior())
Expand All @@ -350,7 +408,8 @@ public function isJsonLdScriptAllowed()
->withTags(
(new Behavior\Tag(
'script',
Behavior\Tag::PURGE_WITHOUT_ATTRS | Behavior\Tag::PURGE_WITHOUT_CHILDREN | Behavior\Tag::ALLOW_CHILDREN
Behavior\Tag::PURGE_WITHOUT_ATTRS | Behavior\Tag::PURGE_WITHOUT_CHILDREN
| Behavior\Tag::ALLOW_CHILDREN | Behavior\Tag::ALLOW_INSECURE_RAW_TEXT
))->addAttrs(
(new Behavior\Attr('id')),
(new Behavior\Attr('type', Behavior\Attr::MANDATORY))
Expand Down

0 comments on commit 385741b

Please sign in to comment.