1 <?php
2 /**
3 * Part of the Joomla Framework Filter Package
4 *
5 * @copyright Copyright (C) 2005 - 2016 Open Source Matters, Inc. All rights reserved.
6 * @license GNU General Public License version 2 or later; see LICENSE
7 */
8
9 namespace Joomla\Filter;
10
11 use Joomla\String\StringHelper;
12
13 /**
14 * InputFilter is a class for filtering input from any data source
15 *
16 * Forked from the php input filter library by: Daniel Morris <dan@rootcube.com>
17 * Original Contributors: Gianpaolo Racca, Ghislain Picard, Marco Wandschneider, Chris Tobin and Andrew Eddie.
18 *
19 * @since 1.0
20 */
21 class InputFilter
22 {
23 /**
24 * Defines the InputFilter instance should use a whitelist method for sanitising tags.
25 *
26 * @var integer
27 * @since 1.3.0
28 */
29 const TAGS_WHITELIST = 0;
30
31 /**
32 * Defines the InputFilter instance should use a blacklist method for sanitising tags.
33 *
34 * @var integer
35 * @since 1.3.0
36 */
37 const TAGS_BLACKLIST = 1;
38
39 /**
40 * Defines the InputFilter instance should use a whitelist method for sanitising attributes.
41 *
42 * @var integer
43 * @since 1.3.0
44 */
45 const ATTR_WHITELIST = 0;
46
47 /**
48 * Defines the InputFilter instance should use a blacklist method for sanitising attributes.
49 *
50 * @var integer
51 * @since 1.3.0
52 */
53 const ATTR_BLACKLIST = 1;
54
55 /**
56 * A container for InputFilter instances.
57 *
58 * @var InputFilter[]
59 * @since 1.0
60 * @deprecated 1.2.0
61 */
62 protected static $instances = array();
63
64 /**
65 * The array of permitted tags (whitelist).
66 *
67 * @var array
68 * @since 1.0
69 */
70 public $tagsArray;
71
72 /**
73 * The array of permitted tag attributes (whitelist).
74 *
75 * @var array
76 * @since 1.0
77 */
78 public $attrArray;
79
80 /**
81 * The method for sanitising tags
82 *
83 * @var integer
84 * @since 1.0
85 */
86 public $tagsMethod;
87
88 /**
89 * The method for sanitising attributes
90 *
91 * @var integer
92 * @since 1.0
93 */
94 public $attrMethod;
95
96 /**
97 * A special list of blacklisted chars
98 *
99 * @var array
100 * @since 1.0
101 */
102 private $blacklistedChars = array(
103 '&tab;',
104 '&space;',
105 ':',
106 '&column;',
107 );
108
109 /**
110 * A flag for XSS checks. Only auto clean essentials = 0, Allow clean blacklisted tags/attr = 1
111 *
112 * @var integer
113 * @since 1.0
114 */
115 public $xssAuto;
116
117 /**
118 * The list of the default blacklisted tags.
119 *
120 * @var array
121 * @since 1.0
122 */
123 public $tagBlacklist = array(
124 'applet',
125 'body',
126 'bgsound',
127 'base',
128 'basefont',
129 'embed',
130 'frame',
131 'frameset',
132 'head',
133 'html',
134 'id',
135 'iframe',
136 'ilayer',
137 'layer',
138 'link',
139 'meta',
140 'name',
141 'object',
142 'script',
143 'style',
144 'title',
145 'xml',
146 );
147
148 /**
149 * The list of the default blacklisted tag attributes. All event handlers implicit.
150 *
151 * @var array
152 * @since 1.0
153 */
154 public $attrBlacklist = array(
155 'action',
156 'background',
157 'codebase',
158 'dynsrc',
159 'formaction',
160 'lowsrc',
161 );
162
163 /**
164 * Constructor for InputFilter class.
165 *
166 * @param array $tagsArray List of user-defined tags
167 * @param array $attrArray List of user-defined attributes
168 * @param integer $tagsMethod WhiteList method = 0, BlackList method = 1
169 * @param integer $attrMethod WhiteList method = 0, BlackList method = 1
170 * @param integer $xssAuto Only auto clean essentials = 0, Allow clean blacklisted tags/attr = 1
171 *
172 * @since 1.0
173 */
174 public function __construct($tagsArray = array(), $attrArray = array(), $tagsMethod = self::TAGS_WHITELIST, $attrMethod = self::ATTR_WHITELIST,
175 $xssAuto = 1)
176 {
177 // Make sure user defined arrays are in lowercase
178 $tagsArray = array_map('strtolower', (array) $tagsArray);
179 $attrArray = array_map('strtolower', (array) $attrArray);
180
181 // Assign member variables
182 $this->tagsArray = $tagsArray;
183 $this->attrArray = $attrArray;
184 $this->tagsMethod = $tagsMethod;
185 $this->attrMethod = $attrMethod;
186 $this->xssAuto = $xssAuto;
187 }
188
189 /**
190 * Method to be called by another php script. Processes for XSS and
191 * specified bad code.
192 *
193 * @param mixed $source Input string/array-of-string to be 'cleaned'
194 * @param string $type The return type for the variable:
195 * INT: An integer, or an array of integers,
196 * UINT: An unsigned integer, or an array of unsigned integers,
197 * FLOAT: A floating point number, or an array of floating point numbers,
198 * BOOLEAN: A boolean value,
199 * WORD: A string containing A-Z or underscores only (not case sensitive),
200 * ALNUM: A string containing A-Z or 0-9 only (not case sensitive),
201 * CMD: A string containing A-Z, 0-9, underscores, periods or hyphens (not case sensitive),
202 * BASE64: A string containing A-Z, 0-9, forward slashes, plus or equals (not case sensitive),
203 * STRING: A fully decoded and sanitised string (default),
204 * HTML: A sanitised string,
205 * ARRAY: An array,
206 * PATH: A sanitised file path, or an array of sanitised file paths,
207 * TRIM: A string trimmed from normal, non-breaking and multibyte spaces
208 * USERNAME: Do not use (use an application specific filter),
209 * RAW: The raw string is returned with no filtering,
210 * unknown: An unknown filter will act like STRING. If the input is an array it will return an
211 * array of fully decoded and sanitised strings.
212 *
213 * @return mixed 'Cleaned' version of input parameter
214 *
215 * @since 1.0
216 */
217 public function clean($source, $type = 'string')
218 {
219 // Handle the type constraint cases
220 switch (strtoupper($type))
221 {
222 case 'INT':
223 case 'INTEGER':
224 $pattern = '/[-+]?[0-9]+/';
225
226 if (is_array($source))
227 {
228 $result = array();
229
230 // Iterate through the array
231 foreach ($source as $eachString)
232 {
233 preg_match($pattern, (string) $eachString, $matches);
234 $result[] = isset($matches[0]) ? (int) $matches[0] : 0;
235 }
236 }
237 else
238 {
239 preg_match($pattern, (string) $source, $matches);
240 $result = isset($matches[0]) ? (int) $matches[0] : 0;
241 }
242
243 break;
244
245 case 'UINT':
246 $pattern = '/[-+]?[0-9]+/';
247
248 if (is_array($source))
249 {
250 $result = array();
251
252 // Iterate through the array
253 foreach ($source as $eachString)
254 {
255 preg_match($pattern, (string) $eachString, $matches);
256 $result[] = isset($matches[0]) ? abs((int) $matches[0]) : 0;
257 }
258 }
259 else
260 {
261 preg_match($pattern, (string) $source, $matches);
262 $result = isset($matches[0]) ? abs((int) $matches[0]) : 0;
263 }
264
265 break;
266
267 case 'FLOAT':
268 case 'DOUBLE':
269 $pattern = '/[-+]?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9]+)?/';
270
271 if (is_array($source))
272 {
273 $result = array();
274
275 // Iterate through the array
276 foreach ($source as $eachString)
277 {
278 preg_match($pattern, (string) $eachString, $matches);
279 $result[] = isset($matches[0]) ? (float) $matches[0] : 0;
280 }
281 }
282 else
283 {
284 preg_match($pattern, (string) $source, $matches);
285 $result = isset($matches[0]) ? (float) $matches[0] : 0;
286 }
287
288 break;
289
290 case 'BOOL':
291 case 'BOOLEAN':
292
293 if (is_array($source))
294 {
295 $result = array();
296
297 // Iterate through the array
298 foreach ($source as $eachString)
299 {
300 $result[] = (bool) $eachString;
301 }
302 }
303 else
304 {
305 $result = (bool) $source;
306 }
307
308 break;
309
310 case 'WORD':
311 $pattern = '/[^A-Z_]/i';
312
313 if (is_array($source))
314 {
315 $result = array();
316
317 // Iterate through the array
318 foreach ($source as $eachString)
319 {
320 $result[] = (string) preg_replace($pattern, '', $eachString);
321 }
322 }
323 else
324 {
325 $result = (string) preg_replace($pattern, '', $source);
326 }
327
328 break;
329
330 case 'ALNUM':
331 $pattern = '/[^A-Z0-9]/i';
332
333 if (is_array($source))
334 {
335 $result = array();
336
337 // Iterate through the array
338 foreach ($source as $eachString)
339 {
340 $result[] = (string) preg_replace($pattern, '', $eachString);
341 }
342 }
343 else
344 {
345 $result = (string) preg_replace($pattern, '', $source);
346 }
347
348 break;
349
350 case 'CMD':
351 $pattern = '/[^A-Z0-9_\.-]/i';
352
353 if (is_array($source))
354 {
355 $result = array();
356
357 // Iterate through the array
358 foreach ($source as $eachString)
359 {
360 $cleaned = (string) preg_replace($pattern, '', $eachString);
361 $result[] = ltrim($cleaned, '.');
362 }
363 }
364 else
365 {
366 $result = (string) preg_replace($pattern, '', $source);
367 $result = ltrim($result, '.');
368 }
369
370 break;
371
372 case 'BASE64':
373 $pattern = '/[^A-Z0-9\/+=]/i';
374
375 if (is_array($source))
376 {
377 $result = array();
378
379 // Iterate through the array
380 foreach ($source as $eachString)
381 {
382 $result[] = (string) preg_replace($pattern, '', $eachString);
383 }
384 }
385 else
386 {
387 $result = (string) preg_replace($pattern, '', $source);
388 }
389
390 break;
391
392 case 'STRING':
393 if (is_array($source))
394 {
395 $result = array();
396
397 // Iterate through the array
398 foreach ($source as $eachString)
399 {
400 $result[] = (string) $this->remove($this->decode((string) $eachString));
401 }
402 }
403 else
404 {
405 $result = (string) $this->remove($this->decode((string) $source));
406 }
407
408 break;
409
410 case 'HTML':
411 if (is_array($source))
412 {
413 $result = array();
414
415 // Iterate through the array
416 foreach ($source as $eachString)
417 {
418 $result[] = (string) $this->remove((string) $eachString);
419 }
420 }
421 else
422 {
423 $result = (string) $this->remove((string) $source);
424 }
425
426 break;
427
428 case 'ARRAY':
429 $result = (array) $source;
430 break;
431
432 case 'PATH':
433 $pattern = '/^[A-Za-z0-9_\/-]+[A-Za-z0-9_\.-]*([\\\\\/][A-Za-z0-9_-]+[A-Za-z0-9_\.-]*)*$/';
434
435 if (is_array($source))
436 {
437 $result = array();
438
439 // Iterate through the array
440 foreach ($source as $eachString)
441 {
442 preg_match($pattern, (string) $eachString, $matches);
443 $result[] = isset($matches[0]) ? (string) $matches[0] : '';
444 }
445 }
446 else
447 {
448 preg_match($pattern, $source, $matches);
449 $result = isset($matches[0]) ? (string) $matches[0] : '';
450 }
451
452 break;
453
454 case 'TRIM':
455 if (is_array($source))
456 {
457 $result = array();
458
459 // Iterate through the array
460 foreach ($source as $eachString)
461 {
462 $cleaned = (string) trim($eachString);
463 $cleaned = StringHelper::trim($cleaned, chr(0xE3) . chr(0x80) . chr(0x80));
464 $result[] = StringHelper::trim($cleaned, chr(0xC2) . chr(0xA0));
465 }
466 }
467 else
468 {
469 $result = (string) trim($source);
470 $result = StringHelper::trim($result, chr(0xE3) . chr(0x80) . chr(0x80));
471 $result = StringHelper::trim($result, chr(0xC2) . chr(0xA0));
472 }
473
474 break;
475
476 case 'USERNAME':
477 $pattern = '/[\x00-\x1F\x7F<>"\'%&]/';
478
479 if (is_array($source))
480 {
481 $result = array();
482
483 // Iterate through the array
484 foreach ($source as $eachString)
485 {
486 $result[] = (string) preg_replace($pattern, '', $eachString);
487 }
488 }
489 else
490 {
491 $result = (string) preg_replace($pattern, '', $source);
492 }
493
494 break;
495
496 case 'RAW':
497 $result = $source;
498 break;
499
500 default:
501 // Are we dealing with an array?
502 if (is_array($source))
503 {
504 foreach ($source as $key => $value)
505 {
506 // Filter element for XSS and other 'bad' code etc.
507 if (is_string($value))
508 {
509 $source[$key] = $this->remove($this->decode($value));
510 }
511 }
512
513 $result = $source;
514 }
515 else
516 {
517 // Or a string?
518 if (is_string($source) && !empty($source))
519 {
520 // Filter source for XSS and other 'bad' code etc.
521 $result = $this->remove($this->decode($source));
522 }
523 else
524 {
525 // Not an array or string... return the passed parameter
526 $result = $source;
527 }
528 }
529
530 break;
531 }
532
533 return $result;
534 }
535
536 /**
537 * Function to determine if contents of an attribute are safe
538 *
539 * @param array $attrSubSet A 2 element array for attribute's name, value
540 *
541 * @return boolean True if bad code is detected
542 *
543 * @since 1.0
544 */
545 public static function checkAttribute($attrSubSet)
546 {
547 $quoteStyle = version_compare(PHP_VERSION, '5.4', '>=') ? ENT_QUOTES | ENT_HTML401 : ENT_QUOTES;
548
549 $attrSubSet[0] = strtolower($attrSubSet[0]);
550 $attrSubSet[1] = html_entity_decode(strtolower($attrSubSet[1]), $quoteStyle, 'UTF-8');
551
552 return ((strpos($attrSubSet[1], 'expression') !== false && $attrSubSet[0] === 'style')
553 || preg_match('/(?:(?:java|vb|live)script|behaviour|mocha)(?::|:|&column;)/', $attrSubSet[1]) !== 0);
554 }
555
556 /**
557 * Internal method to iteratively remove all unwanted tags and attributes
558 *
559 * @param string $source Input string to be 'cleaned'
560 *
561 * @return string 'Cleaned' version of input parameter
562 *
563 * @since 1.0
564 */
565 protected function remove($source)
566 {
567 // Iteration provides nested tag protection
568 do
569 {
570 $temp = $source;
571 $source = $this->cleanTags($source);
572 }
573 while ($temp != $source);
574
575 return $source;
576 }
577
578 /**
579 * Internal method to strip a string of certain tags
580 *
581 * @param string $source Input string to be 'cleaned'
582 *
583 * @return string 'Cleaned' version of input parameter
584 *
585 * @since 1.0
586 */
587 protected function cleanTags($source)
588 {
589 // First, pre-process this for illegal characters inside attribute values
590 $source = $this->escapeAttributeValues($source);
591
592 // In the beginning we don't really have a tag, so everything is postTag
593 $preTag = null;
594 $postTag = $source;
595 $currentSpace = false;
596
597 // Setting to null to deal with undefined variables
598 $attr = '';
599
600 // Is there a tag? If so it will certainly start with a '<'.
601 $tagOpen_start = StringHelper::strpos($source, '<');
602
603 while ($tagOpen_start !== false)
604 {
605 // Get some information about the tag we are processing
606 $preTag .= StringHelper::substr($postTag, 0, $tagOpen_start);
607 $postTag = StringHelper::substr($postTag, $tagOpen_start);
608 $fromTagOpen = StringHelper::substr($postTag, 1);
609 $tagOpen_end = StringHelper::strpos($fromTagOpen, '>');
610
611 // Check for mal-formed tag where we have a second '<' before the first '>'
612 $nextOpenTag = (StringHelper::strlen($postTag) > $tagOpen_start) ? StringHelper::strpos($postTag, '<', $tagOpen_start + 1) : false;
613
614 if (($nextOpenTag !== false) && ($nextOpenTag < $tagOpen_end))
615 {
616 // At this point we have a mal-formed tag -- remove the offending open
617 $postTag = StringHelper::substr($postTag, 0, $tagOpen_start) . StringHelper::substr($postTag, $tagOpen_start + 1);
618 $tagOpen_start = StringHelper::strpos($postTag, '<');
619 continue;
620 }
621
622 // Let's catch any non-terminated tags and skip over them
623 if ($tagOpen_end === false)
624 {
625 $postTag = StringHelper::substr($postTag, $tagOpen_start + 1);
626 $tagOpen_start = StringHelper::strpos($postTag, '<');
627 continue;
628 }
629
630 // Do we have a nested tag?
631 $tagOpen_nested = StringHelper::strpos($fromTagOpen, '<');
632
633 if (($tagOpen_nested !== false) && ($tagOpen_nested < $tagOpen_end))
634 {
635 $preTag .= StringHelper::substr($postTag, 0, ($tagOpen_nested + 1));
636 $postTag = StringHelper::substr($postTag, ($tagOpen_nested + 1));
637 $tagOpen_start = StringHelper::strpos($postTag, '<');
638 continue;
639 }
640
641 // Let's get some information about our tag and setup attribute pairs
642 $tagOpen_nested = (StringHelper::strpos($fromTagOpen, '<') + $tagOpen_start + 1);
643 $currentTag = StringHelper::substr($fromTagOpen, 0, $tagOpen_end);
644 $tagLength = StringHelper::strlen($currentTag);
645 $tagLeft = $currentTag;
646 $attrSet = array();
647 $currentSpace = StringHelper::strpos($tagLeft, ' ');
648
649 // Are we an open tag or a close tag?
650 if (StringHelper::substr($currentTag, 0, 1) == '/')
651 {
652 // Close Tag
653 $isCloseTag = true;
654 list ($tagName) = explode(' ', $currentTag);
655 $tagName = StringHelper::substr($tagName, 1);
656 }
657 else
658 {
659 // Open Tag
660 $isCloseTag = false;
661 list ($tagName) = explode(' ', $currentTag);
662 }
663
664 /*
665 * Exclude all "non-regular" tagnames
666 * OR no tagname
667 * OR remove if xssauto is on and tag is blacklisted
668 */
669 if ((!preg_match("/^[a-z][a-z0-9]*$/i", $tagName))
670 || (!$tagName)
671 || ((in_array(strtolower($tagName), $this->tagBlacklist)) && ($this->xssAuto)))
672 {
673 $postTag = StringHelper::substr($postTag, ($tagLength + 2));
674 $tagOpen_start = StringHelper::strpos($postTag, '<');
675
676 // Strip tag
677 continue;
678 }
679
680 /*
681 * Time to grab any attributes from the tag... need this section in
682 * case attributes have spaces in the values.
683 */
684 while ($currentSpace !== false)
685 {
686 $attr = '';
687 $fromSpace = StringHelper::substr($tagLeft, ($currentSpace + 1));
688 $nextEqual = StringHelper::strpos($fromSpace, '=');
689 $nextSpace = StringHelper::strpos($fromSpace, ' ');
690 $openQuotes = StringHelper::strpos($fromSpace, '"');
691 $closeQuotes = StringHelper::strpos(StringHelper::substr($fromSpace, ($openQuotes + 1)), '"') + $openQuotes + 1;
692
693 $startAtt = '';
694 $startAttPosition = 0;
695
696 // Find position of equal and open quotes ignoring
697 if (preg_match('#\s*=\s*\"#', $fromSpace, $matches, PREG_OFFSET_CAPTURE))
698 {
699 // We have found an attribute, convert its byte position to a UTF-8 string length, using non-multibyte substr()
700 $stringBeforeAttr = substr($fromSpace, 0, $matches[0][1]);
701 $startAttPosition = StringHelper::strlen($stringBeforeAttr);
702 $startAtt = $matches[0][0];
703 $closeQuotePos = StringHelper::strpos(
704 StringHelper::substr($fromSpace, ($startAttPosition + StringHelper::strlen($startAtt))), '"'
705 );
706 $closeQuotes = $closeQuotePos + $startAttPosition + StringHelper::strlen($startAtt);
707 $nextEqual = $startAttPosition + StringHelper::strpos($startAtt, '=');
708 $openQuotes = $startAttPosition + StringHelper::strpos($startAtt, '"');
709 $nextSpace = StringHelper::strpos(StringHelper::substr($fromSpace, $closeQuotes), ' ') + $closeQuotes;
710 }
711
712 // Do we have an attribute to process? [check for equal sign]
713 if ($fromSpace != '/' && (($nextEqual && $nextSpace && $nextSpace < $nextEqual) || !$nextEqual))
714 {
715 if (!$nextEqual)
716 {
717 $attribEnd = StringHelper::strpos($fromSpace, '/') - 1;
718 }
719 else
720 {
721 $attribEnd = $nextSpace - 1;
722 }
723
724 // If there is an ending, use this, if not, do not worry.
725 if ($attribEnd > 0)
726 {
727 $fromSpace = StringHelper::substr($fromSpace, $attribEnd + 1);
728 }
729 }
730
731 if (StringHelper::strpos($fromSpace, '=') !== false)
732 {
733 /*
734 * If the attribute value is wrapped in quotes we need to grab the substring from the closing quote,
735 * otherwise grab until the next space.
736 */
737 if (($openQuotes !== false)
738 && (StringHelper::strpos(StringHelper::substr($fromSpace, ($openQuotes + 1)), '"') !== false))
739 {
740 $attr = StringHelper::substr($fromSpace, 0, ($closeQuotes + 1));
741 }
742 else
743 {
744 $attr = StringHelper::substr($fromSpace, 0, $nextSpace);
745 }
746 }
747 else
748 // No more equal signs so add any extra text in the tag into the attribute array [eg. checked]
749 {
750 if ($fromSpace != '/')
751 {
752 $attr = StringHelper::substr($fromSpace, 0, $nextSpace);
753 }
754 }
755
756 // Last Attribute Pair
757 if (!$attr && $fromSpace != '/')
758 {
759 $attr = $fromSpace;
760 }
761
762 // Add attribute pair to the attribute array
763 $attrSet[] = $attr;
764
765 // Move search point and continue iteration
766 $tagLeft = StringHelper::substr($fromSpace, StringHelper::strlen($attr));
767 $currentSpace = StringHelper::strpos($tagLeft, ' ');
768 }
769
770 // Is our tag in the user input array?
771 $tagFound = in_array(strtolower($tagName), $this->tagsArray);
772
773 // If the tag is allowed let's append it to the output string.
774 if ((!$tagFound && $this->tagsMethod) || ($tagFound && !$this->tagsMethod))
775 {
776 // Reconstruct tag with allowed attributes
777 if (!$isCloseTag)
778 {
779 // Open or single tag
780 $attrSet = $this->cleanAttributes($attrSet);
781 $preTag .= '<' . $tagName;
782
783 for ($i = 0, $count = count($attrSet); $i < $count; $i++)
784 {
785 $preTag .= ' ' . $attrSet[$i];
786 }
787
788 // Reformat single tags to XHTML
789 if (StringHelper::strpos($fromTagOpen, '</' . $tagName))
790 {
791 $preTag .= '>';
792 }
793 else
794 {
795 $preTag .= ' />';
796 }
797 }
798 else
799 // Closing tag
800 {
801 $preTag .= '</' . $tagName . '>';
802 }
803 }
804
805 // Find next tag's start and continue iteration
806 $postTag = StringHelper::substr($postTag, ($tagLength + 2));
807 $tagOpen_start = StringHelper::strpos($postTag, '<');
808 }
809
810 // Append any code after the end of tags and return
811 if ($postTag != '<')
812 {
813 $preTag .= $postTag;
814 }
815
816 return $preTag;
817 }
818
819 /**
820 * Internal method to strip a tag of certain attributes
821 *
822 * @param array $attrSet Array of attribute pairs to filter
823 *
824 * @return array Filtered array of attribute pairs
825 *
826 * @since 1.0
827 */
828 protected function cleanAttributes($attrSet)
829 {
830 $newSet = array();
831
832 $count = count($attrSet);
833
834 // Iterate through attribute pairs
835 for ($i = 0; $i < $count; $i++)
836 {
837 // Skip blank spaces
838 if (!$attrSet[$i])
839 {
840 continue;
841 }
842
843 // Split into name/value pairs
844 $attrSubSet = explode('=', trim($attrSet[$i]), 2);
845
846 // Take the last attribute in case there is an attribute with no value
847 $attrSubSet_0 = explode(' ', trim($attrSubSet[0]));
848 $attrSubSet[0] = array_pop($attrSubSet_0);
849
850 $attrSubSet[0] = strtolower($attrSubSet[0]);
851 $quoteStyle = version_compare(PHP_VERSION, '5.4', '>=') ? ENT_QUOTES | ENT_HTML401 : ENT_QUOTES;
852
853 // Remove all spaces as valid attributes does not have spaces.
854 $attrSubSet[0] = html_entity_decode($attrSubSet[0], $quoteStyle, 'UTF-8');
855 $attrSubSet[0] = preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $attrSubSet[0]);
856 $attrSubSet[0] = preg_replace('/\s+/u', '', $attrSubSet[0]);
857
858 // Replace special blacklisted chars here
859 foreach ($this->blacklistedChars as $blacklistedChar)
860 {
861 $attrSubSet[0] = str_replace($blacklistedChar, '', $attrSubSet[0]);
862 }
863
864 // Remove all "non-regular" attribute names
865 // AND blacklisted attributes
866 if ((!preg_match('/[a-z]*$/i', $attrSubSet[0]))
867 || (($this->xssAuto) && ((in_array(strtolower($attrSubSet[0]), $this->attrBlacklist))
868 || (substr($attrSubSet[0], 0, 2) == 'on'))))
869 {
870 continue;
871 }
872
873 // XSS attribute value filtering
874 if (!isset($attrSubSet[1]))
875 {
876 continue;
877 }
878
879 // Trim leading and trailing spaces
880 $attrSubSet[1] = trim($attrSubSet[1]);
881
882 // Strips unicode, hex, etc
883 $attrSubSet[1] = str_replace('&#', '', $attrSubSet[1]);
884
885 // Strip normal newline within attr value
886 $attrSubSet[1] = preg_replace('/[\n\r]/', '', $attrSubSet[1]);
887
888 // Strip double quotes
889 $attrSubSet[1] = str_replace('"', '', $attrSubSet[1]);
890
891 // Convert single quotes from either side to doubles (Single quotes shouldn't be used to pad attr values)
892 if ((substr($attrSubSet[1], 0, 1) == "'") && (substr($attrSubSet[1], (strlen($attrSubSet[1]) - 1), 1) == "'"))
893 {
894 $attrSubSet[1] = substr($attrSubSet[1], 1, (strlen($attrSubSet[1]) - 2));
895 }
896
897 // Strip slashes
898 $attrSubSet[1] = stripslashes($attrSubSet[1]);
899
900 // Autostrip script tags
901 if (static::checkAttribute($attrSubSet))
902 {
903 continue;
904 }
905
906 // Is our attribute in the user input array?
907 $attrFound = in_array(strtolower($attrSubSet[0]), $this->attrArray);
908
909 // If the tag is allowed lets keep it
910 if ((!$attrFound && $this->attrMethod) || ($attrFound && !$this->attrMethod))
911 {
912 // Does the attribute have a value?
913 if (empty($attrSubSet[1]) === false)
914 {
915 $newSet[] = $attrSubSet[0] . '="' . $attrSubSet[1] . '"';
916 }
917 elseif ($attrSubSet[1] === "0")
918 {
919 // Special Case
920 // Is the value 0?
921 $newSet[] = $attrSubSet[0] . '="0"';
922 }
923 else
924 {
925 // Leave empty attributes alone
926 $newSet[] = $attrSubSet[0] . '=""';
927 }
928 }
929 }
930
931 return $newSet;
932 }
933
934 /**
935 * Try to convert to plaintext
936 *
937 * @param string $source The source string.
938 *
939 * @return string Plaintext string
940 *
941 * @since 1.0
942 * @deprecated This method will be removed once support for PHP 5.3 is discontinued.
943 */
944 protected function decode($source)
945 {
946 return html_entity_decode($source, ENT_QUOTES, 'UTF-8');
947 }
948
949 /**
950 * Escape < > and " inside attribute values
951 *
952 * @param string $source The source string.
953 *
954 * @return string Filtered string
955 *
956 * @since 1.0
957 */
958 protected function escapeAttributeValues($source)
959 {
960 $alreadyFiltered = '';
961 $remainder = $source;
962 $badChars = array('<', '"', '>');
963 $escapedChars = array('<', '"', '>');
964
965 // Process each portion based on presence of =" and "<space>, "/>, or ">
966 // See if there are any more attributes to process
967 while (preg_match('#<[^>]*?=\s*?(\"|\')#s', $remainder, $matches, PREG_OFFSET_CAPTURE))
968 {
969 // We have found a tag with an attribute, convert its byte position to a UTF-8 string length, using non-multibyte substr()
970 $stringBeforeTag = substr($remainder, 0, $matches[0][1]);
971 $tagPosition = StringHelper::strlen($stringBeforeTag);
972
973 // Get the character length before the attribute value
974 $nextBefore = $tagPosition + StringHelper::strlen($matches[0][0]);
975
976 // Figure out if we have a single or double quote and look for the matching closing quote
977 // Closing quote should be "/>, ">, "<space>, or " at the end of the string
978 $quote = StringHelper::substr($matches[0][0], -1);
979 $pregMatch = ($quote == '"') ? '#(\"\s*/\s*>|\"\s*>|\"\s+|\"$)#' : "#(\'\s*/\s*>|\'\s*>|\'\s+|\'$)#";
980
981 // Get the portion after attribute value
982 $attributeValueRemainder = StringHelper::substr($remainder, $nextBefore);
983
984 if (preg_match($pregMatch, $attributeValueRemainder, $matches, PREG_OFFSET_CAPTURE))
985 {
986 // We have a closing quote, convert its byte position to a UTF-8 string length, using non-multibyte substr()
987 $stringBeforeQuote = substr($attributeValueRemainder, 0, $matches[0][1]);
988 $closeQuoteChars = StringHelper::strlen($stringBeforeQuote);
989 $nextAfter = $nextBefore + $matches[0][1];
990 }
991 else
992 {
993 // No closing quote
994 $nextAfter = StringHelper::strlen($remainder);
995 }
996
997 // Get the actual attribute value
998 $attributeValue = StringHelper::substr($remainder, $nextBefore, $nextAfter - $nextBefore);
999
1000 // Escape bad chars
1001 $attributeValue = str_replace($badChars, $escapedChars, $attributeValue);
1002 $attributeValue = $this->stripCssExpressions($attributeValue);
1003 $alreadyFiltered .= StringHelper::substr($remainder, 0, $nextBefore) . $attributeValue . $quote;
1004 $remainder = StringHelper::substr($remainder, $nextAfter + 1);
1005 }
1006
1007 // At this point, we just have to return the $alreadyFiltered and the $remainder
1008 return $alreadyFiltered . $remainder;
1009 }
1010
1011 /**
1012 * Remove CSS Expressions in the form of <property>:expression(...)
1013 *
1014 * @param string $source The source string.
1015 *
1016 * @return string Filtered string
1017 *
1018 * @since 1.0
1019 */
1020 protected function stripCssExpressions($source)
1021 {
1022 // Strip any comments out (in the form of /*...*/)
1023 $test = preg_replace('#\/\*.*\*\/#U', '', $source);
1024
1025 // Test for :expression
1026 if (!stripos($test, ':expression'))
1027 {
1028 // Not found, so we are done
1029 return $source;
1030 }
1031
1032 // At this point, we have stripped out the comments and have found :expression
1033 // Test stripped string for :expression followed by a '('
1034 if (preg_match_all('#:expression\s*\(#', $test, $matches))
1035 {
1036 // If found, remove :expression
1037 return str_ireplace(':expression', '', $test);
1038 }
1039
1040 return $source;
1041 }
1042 }
1043