forked from symfony/html-sanitizer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHtmlSanitizer.php
127 lines (104 loc) · 4.38 KB
/
HtmlSanitizer.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
<?php
/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <[email protected]>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Symfony\Component\HtmlSanitizer;
use Symfony\Component\HtmlSanitizer\Parser\MastermindsParser;
use Symfony\Component\HtmlSanitizer\Parser\ParserInterface;
use Symfony\Component\HtmlSanitizer\Reference\W3CReference;
use Symfony\Component\HtmlSanitizer\TextSanitizer\StringSanitizer;
use Symfony\Component\HtmlSanitizer\Visitor\DomVisitor;
/**
* @author Titouan Galopin <[email protected]>
*/
final class HtmlSanitizer implements HtmlSanitizerInterface
{
private HtmlSanitizerConfig $config;
private ParserInterface $parser;
/**
* @var array<string, DomVisitor>
*/
private array $domVisitors = [];
public function __construct(HtmlSanitizerConfig $config, ?ParserInterface $parser = null)
{
$this->config = $config;
$this->parser = $parser ?? new MastermindsParser();
}
public function sanitize(string $input): string
{
return $this->sanitizeWithContext(W3CReference::CONTEXT_BODY, $input);
}
public function sanitizeFor(string $element, string $input): string
{
return $this->sanitizeWithContext(
W3CReference::CONTEXTS_MAP[StringSanitizer::htmlLower($element)] ?? W3CReference::CONTEXT_BODY,
$input
);
}
private function sanitizeWithContext(string $context, string $input): string
{
// Text context: early return with HTML encoding
if (W3CReference::CONTEXT_TEXT === $context) {
return StringSanitizer::encodeHtmlEntities($input);
}
// Other context: build a DOM visitor
$this->domVisitors[$context] ??= $this->createDomVisitorForContext($context);
// Prevent DOS attack induced by extremely long HTML strings
if (-1 !== $this->config->getMaxInputLength() && \strlen($input) > $this->config->getMaxInputLength()) {
$input = substr($input, 0, $this->config->getMaxInputLength());
}
// Only operate on valid UTF-8 strings. This is necessary to prevent cross
// site scripting issues on Internet Explorer 6. Idea from Drupal (filter_xss).
if (!$this->isValidUtf8($input)) {
return '';
}
// Remove NULL character
$input = str_replace(\chr(0), '', $input);
// Parse as HTML
if (!$parsed = $this->parser->parse($input)) {
return '';
}
// Visit the DOM tree and render the sanitized nodes
return $this->domVisitors[$context]->visit($parsed)?->render() ?? '';
}
private function isValidUtf8(string $html): bool
{
// preg_match() fails silently on strings containing invalid UTF-8.
return '' === $html || preg_match('//u', $html);
}
private function createDomVisitorForContext(string $context): DomVisitor
{
$elementsConfig = [];
// Head: only a few elements are allowed
if (W3CReference::CONTEXT_HEAD === $context) {
foreach ($this->config->getAllowedElements() as $allowedElement => $allowedAttributes) {
if (\array_key_exists($allowedElement, W3CReference::HEAD_ELEMENTS)) {
$elementsConfig[$allowedElement] = $allowedAttributes;
}
}
foreach ($this->config->getBlockedElements() as $blockedElement => $v) {
if (\array_key_exists($blockedElement, W3CReference::HEAD_ELEMENTS)) {
$elementsConfig[$blockedElement] = false;
}
}
return new DomVisitor($this->config, $elementsConfig);
}
// Body: allow any configured element that isn't in <head>
foreach ($this->config->getAllowedElements() as $allowedElement => $allowedAttributes) {
if (!\array_key_exists($allowedElement, W3CReference::HEAD_ELEMENTS)) {
$elementsConfig[$allowedElement] = $allowedAttributes;
}
}
foreach ($this->config->getBlockedElements() as $blockedElement => $v) {
if (!\array_key_exists($blockedElement, W3CReference::HEAD_ELEMENTS)) {
$elementsConfig[$blockedElement] = false;
}
}
return new DomVisitor($this->config, $elementsConfig);
}
}