File joomla/filter/input.php | Joomla! Framework TM

   1 <?php
   2 /**
   3  * @package     Joomla.Platform
   4  * @subpackage  Filter
   5  *
   6  * @copyright   Copyright (C) 2005 - 2017 Open Source Matters, Inc. All rights reserved.
   7  * @license     GNU General Public License version 2 or later; see LICENSE
   8  */
   9 
  10 defined('JPATH_PLATFORM') or die;
  11 
  12 use Joomla\Filter\InputFilter;
  13 use Joomla\String\StringHelper;
  14 
  15 /**
  16  * JFilterInput is a class for filtering input from any data source
  17  *
  18  * Forked from the php input filter library by: Daniel Morris <dan@rootcube.com>
  19  * Original Contributors: Gianpaolo Racca, Ghislain Picard, Marco Wandschneider, Chris Tobin and Andrew Eddie.
  20  *
  21  * @since  11.1
  22  */
  23 class JFilterInput extends InputFilter
  24 {
  25     /**
  26      * A flag for Unicode Supplementary Characters (4-byte Unicode character) stripping.
  27      *
  28      * @var    integer
  29      *
  30      * @since  3.5
  31      */
  32     public $stripUSC = 0;
  33 
  34     /**
  35      * Constructor for inputFilter class. Only first parameter is required.
  36      *
  37      * @param   array    $tagsArray   List of user-defined tags
  38      * @param   array    $attrArray   List of user-defined attributes
  39      * @param   integer  $tagsMethod  WhiteList method = 0, BlackList method = 1
  40      * @param   integer  $attrMethod  WhiteList method = 0, BlackList method = 1
  41      * @param   integer  $xssAuto     Only auto clean essentials = 0, Allow clean blacklisted tags/attr = 1
  42      * @param   integer  $stripUSC    Strip 4-byte unicode characters = 1, no strip = 0, ask the database driver = -1
  43      *
  44      * @since   11.1
  45      */
  46     public function __construct($tagsArray = array(), $attrArray = array(), $tagsMethod = 0, $attrMethod = 0, $xssAuto = 1, $stripUSC = -1)
  47     {
  48         // Make sure user defined arrays are in lowercase
  49         $tagsArray = array_map('strtolower', (array) $tagsArray);
  50         $attrArray = array_map('strtolower', (array) $attrArray);
  51 
  52         // Assign member variables
  53         $this->tagsArray = $tagsArray;
  54         $this->attrArray = $attrArray;
  55         $this->tagsMethod = $tagsMethod;
  56         $this->attrMethod = $attrMethod;
  57         $this->xssAuto = $xssAuto;
  58         $this->stripUSC = $stripUSC;
  59         /**
  60          * If Unicode Supplementary Characters stripping is not set we have to check with the database driver. If the
  61          * driver does not support USCs (i.e. there is no utf8mb4 support) we will enable USC stripping.
  62          */
  63         if ($this->stripUSC === -1)
  64         {
  65             try
  66             {
  67                 // Get the database driver
  68                 $db = JFactory::getDbo();
  69 
  70                 // This trick is required to let the driver determine the utf-8 multibyte support
  71                 $db->connect();
  72 
  73                 // And now we can decide if we should strip USCs
  74                 $this->stripUSC = $db->hasUTF8mb4Support() ? 0 : 1;
  75             }
  76             catch (RuntimeException $e)
  77             {
  78                 // Could not connect to MySQL. Strip USC to be on the safe side.
  79                 $this->stripUSC = 1;
  80             }
  81         }
  82     }
  83 
  84     /**
  85      * Returns an input filter object, only creating it if it doesn't already exist.
  86      *
  87      * @param   array    $tagsArray   List of user-defined tags
  88      * @param   array    $attrArray   List of user-defined attributes
  89      * @param   integer  $tagsMethod  WhiteList method = 0, BlackList method = 1
  90      * @param   integer  $attrMethod  WhiteList method = 0, BlackList method = 1
  91      * @param   integer  $xssAuto     Only auto clean essentials = 0, Allow clean blacklisted tags/attr = 1
  92      * @param   integer  $stripUSC    Strip 4-byte unicode characters = 1, no strip = 0, ask the database driver = -1
  93      *
  94      * @return  JFilterInput  The JFilterInput object.
  95      *
  96      * @since   11.1
  97      */
  98     public static function &getInstance($tagsArray = array(), $attrArray = array(), $tagsMethod = 0, $attrMethod = 0, $xssAuto = 1, $stripUSC = -1)
  99     {
 100         $sig = md5(serialize(array($tagsArray, $attrArray, $tagsMethod, $attrMethod, $xssAuto)));
 101 
 102         if (empty(self::$instances[$sig]))
 103         {
 104             self::$instances[$sig] = new JFilterInput($tagsArray, $attrArray, $tagsMethod, $attrMethod, $xssAuto, $stripUSC);
 105         }
 106 
 107         return self::$instances[$sig];
 108     }
 109 
 110     /**
 111      * Method to be called by another php script. Processes for XSS and
 112      * specified bad code.
 113      *
 114      * @param   mixed   $source  Input string/array-of-string to be 'cleaned'
 115      * @param   string  $type    The return type for the variable:
 116      *                           INT:       An integer, or an array of integers,
 117      *                           UINT:      An unsigned integer, or an array of unsigned integers,
 118      *                           FLOAT:     A floating point number, or an array of floating point numbers,
 119      *                           BOOLEAN:   A boolean value,
 120      *                           WORD:      A string containing A-Z or underscores only (not case sensitive),
 121      *                           ALNUM:     A string containing A-Z or 0-9 only (not case sensitive),
 122      *                           CMD:       A string containing A-Z, 0-9, underscores, periods or hyphens (not case sensitive),
 123      *                           BASE64:    A string containing A-Z, 0-9, forward slashes, plus or equals (not case sensitive),
 124      *                           STRING:    A fully decoded and sanitised string (default),
 125      *                           HTML:      A sanitised string,
 126      *                           ARRAY:     An array,
 127      *                           PATH:      A sanitised file path, or an array of sanitised file paths,
 128      *                           TRIM:      A string trimmed from normal, non-breaking and multibyte spaces
 129      *                           USERNAME:  Do not use (use an application specific filter),
 130      *                           RAW:       The raw string is returned with no filtering,
 131      *                           unknown:   An unknown filter will act like STRING. If the input is an array it will return an
 132      *                                      array of fully decoded and sanitised strings.
 133      *
 134      * @return  mixed  'Cleaned' version of input parameter
 135      *
 136      * @since   11.1
 137      */
 138     public function clean($source, $type = 'string')
 139     {
 140         // Strip Unicode Supplementary Characters when requested to do so
 141         if ($this->stripUSC)
 142         {
 143             // Alternatively: preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xE2\xAF\x91", $source) but it'd be slower.
 144             $source = $this->stripUSC($source);
 145         }
 146 
 147         // Handle the type constraint cases
 148         switch (strtoupper($type))
 149         {
 150             case 'INT':
 151             case 'INTEGER':
 152                 $pattern = '/[-+]?[0-9]+/';
 153 
 154                 if (is_array($source))
 155                 {
 156                     $result = array();
 157 
 158                     // Itterate through the array
 159                     foreach ($source as $eachString)
 160                     {
 161                         preg_match($pattern, (string) $eachString, $matches);
 162                         $result[] = isset($matches[0]) ? (int) $matches[0] : 0;
 163                     }
 164                 }
 165                 else
 166                 {
 167                     preg_match($pattern, (string) $source, $matches);
 168                     $result = isset($matches[0]) ? (int) $matches[0] : 0;
 169                 }
 170 
 171                 break;
 172             case 'UINT':
 173                 $pattern = '/[-+]?[0-9]+/';
 174 
 175                 if (is_array($source))
 176                 {
 177                     $result = array();
 178 
 179                     // Itterate through the array
 180                     foreach ($source as $eachString)
 181                     {
 182                         preg_match($pattern, (string) $eachString, $matches);
 183                         $result[] = isset($matches[0]) ? abs((int) $matches[0]) : 0;
 184                     }
 185                 }
 186                 else
 187                 {
 188                     preg_match($pattern, (string) $source, $matches);
 189                     $result = isset($matches[0]) ? abs((int) $matches[0]) : 0;
 190                 }
 191 
 192                 break;
 193             case 'FLOAT':
 194             case 'DOUBLE':
 195                 $pattern = '/[-+]?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9]+)?/';
 196 
 197                 if (is_array($source))
 198                 {
 199                     $result = array();
 200 
 201                     // Itterate through the array
 202                     foreach ($source as $eachString)
 203                     {
 204                         preg_match($pattern, (string) $eachString, $matches);
 205                         $result[] = isset($matches[0]) ? (float) $matches[0] : 0;
 206                     }
 207                 }
 208                 else
 209                 {
 210                     preg_match($pattern, (string) $source, $matches);
 211                     $result = isset($matches[0]) ? (float) $matches[0] : 0;
 212                 }
 213 
 214                 break;
 215             case 'BOOL':
 216             case 'BOOLEAN':
 217 
 218                 if (is_array($source))
 219                 {
 220                     $result = array();
 221 
 222                     // Iterate through the array
 223                     foreach ($source as $eachString)
 224                     {
 225                         $result[] = (bool) $eachString;
 226                     }
 227                 }
 228                 else
 229                 {
 230                     $result = (bool) $source;
 231                 }
 232 
 233                 break;
 234             case 'WORD':
 235                 $pattern = '/[^A-Z_]/i';
 236 
 237                 if (is_array($source))
 238                 {
 239                     $result = array();
 240 
 241                     // Iterate through the array
 242                     foreach ($source as $eachString)
 243                     {
 244                         $result[] = (string) preg_replace($pattern, '', $eachString);
 245                     }
 246                 }
 247                 else
 248                 {
 249                     $result = (string) preg_replace($pattern, '', $source);
 250                 }
 251 
 252                 break;
 253             case 'ALNUM':
 254                 $pattern = '/[^A-Z0-9]/i';
 255 
 256                 if (is_array($source))
 257                 {
 258                     $result = array();
 259 
 260                     // Iterate through the array
 261                     foreach ($source as $eachString)
 262                     {
 263                         $result[] = (string) preg_replace($pattern, '', $eachString);
 264                     }
 265                 }
 266                 else
 267                 {
 268                     $result = (string) preg_replace($pattern, '', $source);
 269                 }
 270 
 271                 break;
 272             case 'CMD':
 273                 $pattern = '/[^A-Z0-9_\.-]/i';
 274 
 275                 if (is_array($source))
 276                 {
 277                     $result = array();
 278 
 279                     // Iterate through the array
 280                     foreach ($source as $eachString)
 281                     {
 282                         $cleaned  = (string) preg_replace($pattern, '', $eachString);
 283                         $result[] = ltrim($cleaned, '.');
 284                     }
 285                 }
 286                 else
 287                 {
 288                     $result = (string) preg_replace($pattern, '', $source);
 289                     $result = ltrim($result, '.');
 290                 }
 291 
 292                 break;
 293             case 'BASE64':
 294                 $pattern = '/[^A-Z0-9\/+=]/i';
 295 
 296                 if (is_array($source))
 297                 {
 298                     $result = array();
 299 
 300                     // Iterate through the array
 301                     foreach ($source as $eachString)
 302                     {
 303                         $result[] = (string) preg_replace($pattern, '', $eachString);
 304                     }
 305                 }
 306                 else
 307                 {
 308                     $result = (string) preg_replace($pattern, '', $source);
 309                 }
 310 
 311                 break;
 312             case 'STRING':
 313 
 314                 if (is_array($source))
 315                 {
 316                     $result = array();
 317 
 318                     // Iterate through the array
 319                     foreach ($source as $eachString)
 320                     {
 321                         $result[] = (string) $this->remove($this->decode((string) $eachString));
 322                     }
 323                 }
 324                 else
 325                 {
 326                     $result = (string) $this->remove($this->decode((string) $source));
 327                 }
 328 
 329                 break;
 330             case 'HTML':
 331 
 332                 if (is_array($source))
 333                 {
 334                     $result = array();
 335 
 336                     // Iterate through the array
 337                     foreach ($source as $eachString)
 338                     {
 339                         $result[] = (string) $this->remove((string) $eachString);
 340                     }
 341                 }
 342                 else
 343                 {
 344                     $result = (string) $this->remove((string) $source);
 345                 }
 346 
 347                 break;
 348             case 'ARRAY':
 349                 $result = (array) $source;
 350 
 351                 break;
 352             case 'PATH':
 353                 $pattern = '/^[A-Za-z0-9_\/-]+[A-Za-z0-9_\.-]*([\\\\\/][A-Za-z0-9_-]+[A-Za-z0-9_\.-]*)*$/';
 354 
 355                 if (is_array($source))
 356                 {
 357                     $result = array();
 358 
 359                     // Itterate through the array
 360                     foreach ($source as $eachString)
 361                     {
 362                         preg_match($pattern, (string) $eachString, $matches);
 363                         $result[] = isset($matches[0]) ? (string) $matches[0] : '';
 364                     }
 365                 }
 366                 else
 367                 {
 368                     preg_match($pattern, $source, $matches);
 369                     $result = isset($matches[0]) ? (string) $matches[0] : '';
 370                 }
 371 
 372                 break;
 373             case 'TRIM':
 374 
 375                 if (is_array($source))
 376                 {
 377                     $result = array();
 378 
 379                     // Iterate through the array
 380                     foreach ($source as $eachString)
 381                     {
 382                         $cleaned  = (string) trim($eachString);
 383                         $cleaned  = StringHelper::trim($cleaned, chr(0xE3) . chr(0x80) . chr(0x80));
 384                         $result[] = StringHelper::trim($cleaned, chr(0xC2) . chr(0xA0));
 385                     }
 386                 }
 387                 else
 388                 {
 389                     $result = (string) trim($source);
 390                     $result = StringHelper::trim($result, chr(0xE3) . chr(0x80) . chr(0x80));
 391                     $result = StringHelper::trim($result, chr(0xC2) . chr(0xA0));
 392                 }
 393 
 394                 break;
 395             case 'USERNAME':
 396                 $pattern = '/[\x00-\x1F\x7F<>"\'%&]/';
 397 
 398                 if (is_array($source))
 399                 {
 400                     $result = array();
 401 
 402                     // Iterate through the array
 403                     foreach ($source as $eachString)
 404                     {
 405                         $result[] = (string) preg_replace($pattern, '', $eachString);
 406                     }
 407                 }
 408                 else
 409                 {
 410                     $result = (string) preg_replace($pattern, '', $source);
 411                 }
 412 
 413                 break;
 414             case 'RAW':
 415                 $result = $source;
 416 
 417                 break;
 418             default:
 419 
 420                 // Are we dealing with an array?
 421                 if (is_array($source))
 422                 {
 423                     foreach ($source as $key => $value)
 424                     {
 425                         // Filter element for XSS and other 'bad' code etc.
 426                         if (is_string($value))
 427                         {
 428                             $source[$key] = $this->_remove($this->_decode($value));
 429                         }
 430                     }
 431                     $result = $source;
 432                 }
 433                 else
 434                 {
 435                     // Or a string?
 436                     if (is_string($source) && !empty($source))
 437                     {
 438                         // Filter source for XSS and other 'bad' code etc.
 439                         $result = $this->_remove($this->_decode($source));
 440                     }
 441                     else
 442                     {
 443                         // Not an array or string... return the passed parameter
 444                         $result = $source;
 445                     }
 446                 }
 447 
 448                 break;
 449         }
 450 
 451         return $result;
 452     }
 453 
 454     /**
 455      * Function to punyencode utf8 mail when saving content
 456      *
 457      * @param   string  $text  The strings to encode
 458      *
 459      * @return  string  The punyencoded mail
 460      *
 461      * @since   3.5
 462      */
 463     public function emailToPunycode($text)
 464     {
 465         $pattern = '/(("mailto:)+[\w\.\-\+]+\@[^"?]+\.+[^."?]+("|\?))/';
 466 
 467         if (preg_match_all($pattern, $text, $matches))
 468         {
 469             foreach ($matches[0] as $match)
 470             {
 471                 $match  = (string) str_replace(array('?', '"'), '', $match);
 472                 $text   = (string) str_replace($match, JStringPunycode::emailToPunycode($match), $text);
 473             }
 474         }
 475 
 476         return $text;
 477     }
 478 
 479     /**
 480      * Checks an uploaded for suspicious naming and potential PHP contents which could indicate a hacking attempt.
 481      *
 482      * The options you can define are:
 483      * null_byte                   Prevent files with a null byte in their name (buffer overflow attack)
 484      * forbidden_extensions        Do not allow these strings anywhere in the file's extension
 485      * php_tag_in_content          Do not allow `<?php` tag in content
 486      * shorttag_in_content         Do not allow short tag `<?` in content
 487      * shorttag_extensions         Which file extensions to scan for short tags in content
 488      * fobidden_ext_in_content     Do not allow forbidden_extensions anywhere in content
 489      * php_ext_content_extensions  Which file extensions to scan for .php in content
 490      *
 491      * This code is an adaptation and improvement of Admin Tools' UploadShield feature,
 492      * relicensed and contributed by its author.
 493      *
 494      * @param   array  $file     An uploaded file descriptor
 495      * @param   array  $options  The scanner options (see the code for details)
 496      *
 497      * @return  boolean  True of the file is safe
 498      *
 499      * @since   3.4
 500      */
 501     public static function isSafeFile($file, $options = array())
 502     {
 503         $defaultOptions = array(
 504 
 505             // Null byte in file name
 506             'null_byte'                  => true,
 507 
 508             // Forbidden string in extension (e.g. php matched .php, .xxx.php, .php.xxx and so on)
 509             'forbidden_extensions'       => array(
 510                 'php', 'phps', 'pht', 'phtml', 'php3', 'php4', 'php5', 'php6', 'php7', 'inc', 'pl', 'cgi', 'fcgi', 'java', 'jar', 'py',
 511             ),
 512 
 513             // <?php tag in file contents
 514             'php_tag_in_content'         => true,
 515 
 516             // <? tag in file contents
 517             'shorttag_in_content'        => true,
 518 
 519             // Which file extensions to scan for short tags
 520             'shorttag_extensions'        => array(
 521                 'inc', 'phps', 'class', 'php3', 'php4', 'php5', 'txt', 'dat', 'tpl', 'tmpl',
 522             ),
 523 
 524             // Forbidden extensions anywhere in the content
 525             'fobidden_ext_in_content'    => true,
 526 
 527             // Which file extensions to scan for .php in the content
 528             'php_ext_content_extensions' => array('zip', 'rar', 'tar', 'gz', 'tgz', 'bz2', 'tbz', 'jpa'),
 529         );
 530 
 531         $options = array_merge($defaultOptions, $options);
 532 
 533         // Make sure we can scan nested file descriptors
 534         $descriptors = $file;
 535 
 536         if (isset($file['name']) && isset($file['tmp_name']))
 537         {
 538             $descriptors = self::decodeFileData(
 539                 array(
 540                     $file['name'],
 541                     $file['type'],
 542                     $file['tmp_name'],
 543                     $file['error'],
 544                     $file['size'],
 545                 )
 546             );
 547         }
 548 
 549         // Handle non-nested descriptors (single files)
 550         if (isset($descriptors['name']))
 551         {
 552             $descriptors = array($descriptors);
 553         }
 554 
 555         // Scan all descriptors detected
 556         foreach ($descriptors as $fileDescriptor)
 557         {
 558             if (!isset($fileDescriptor['name']))
 559             {
 560                 // This is a nested descriptor. We have to recurse.
 561                 if (!self::isSafeFile($fileDescriptor, $options))
 562                 {
 563                     return false;
 564                 }
 565 
 566                 continue;
 567             }
 568 
 569             $tempNames     = $fileDescriptor['tmp_name'];
 570             $intendedNames = $fileDescriptor['name'];
 571 
 572             if (!is_array($tempNames))
 573             {
 574                 $tempNames = array($tempNames);
 575             }
 576 
 577             if (!is_array($intendedNames))
 578             {
 579                 $intendedNames = array($intendedNames);
 580             }
 581 
 582             $len = count($tempNames);
 583 
 584             for ($i = 0; $i < $len; $i++)
 585             {
 586                 $tempName     = array_shift($tempNames);
 587                 $intendedName = array_shift($intendedNames);
 588 
 589                 // 1. Null byte check
 590                 if ($options['null_byte'])
 591                 {
 592                     if (strstr($intendedName, "\x00"))
 593                     {
 594                         return false;
 595                     }
 596                 }
 597 
 598                 // 2. PHP-in-extension check (.php, .php.xxx[.yyy[.zzz[...]]], .xxx[.yyy[.zzz[...]]].php)
 599                 if (!empty($options['forbidden_extensions']))
 600                 {
 601                     $explodedName = explode('.', $intendedName);
 602                     $explodedName = array_reverse($explodedName);
 603                     array_pop($explodedName);
 604                     $explodedName = array_map('strtolower', $explodedName);
 605 
 606                     /*
 607                      * DO NOT USE array_intersect HERE! array_intersect expects the two arrays to
 608                      * be set, i.e. they should have unique values.
 609                      */
 610                     foreach ($options['forbidden_extensions'] as $ext)
 611                     {
 612                         if (in_array($ext, $explodedName))
 613                         {
 614                             return false;
 615                         }
 616                     }
 617                 }
 618 
 619                 // 3. File contents scanner (PHP tag in file contents)
 620                 if ($options['php_tag_in_content']
 621                     || $options['shorttag_in_content']
 622                     || ($options['fobidden_ext_in_content'] && !empty($options['forbidden_extensions'])))
 623                 {
 624                     $fp = @fopen($tempName, 'r');
 625 
 626                     if ($fp !== false)
 627                     {
 628                         $data = '';
 629 
 630                         while (!feof($fp))
 631                         {
 632                             $data .= @fread($fp, 131072);
 633 
 634                             if ($options['php_tag_in_content'] && stristr($data, '<?php'))
 635                             {
 636                                 return false;
 637                             }
 638 
 639                             if ($options['shorttag_in_content'])
 640                             {
 641                                 $suspiciousExtensions = $options['shorttag_extensions'];
 642 
 643                                 if (empty($suspiciousExtensions))
 644                                 {
 645                                     $suspiciousExtensions = array(
 646                                         'inc', 'phps', 'class', 'php3', 'php4', 'txt', 'dat', 'tpl', 'tmpl',
 647                                     );
 648                                 }
 649 
 650                                 /*
 651                                  * DO NOT USE array_intersect HERE! array_intersect expects the two arrays to
 652                                  * be set, i.e. they should have unique values.
 653                                  */
 654                                 $collide = false;
 655 
 656                                 foreach ($suspiciousExtensions as $ext)
 657                                 {
 658                                     if (in_array($ext, $explodedName))
 659                                     {
 660                                         $collide = true;
 661 
 662                                         break;
 663                                     }
 664                                 }
 665 
 666                                 if ($collide)
 667                                 {
 668                                     // These are suspicious text files which may have the short tag (<?) in them
 669                                     if (strstr($data, '<?'))
 670                                     {
 671                                         return false;
 672                                     }
 673                                 }
 674                             }
 675 
 676                             if ($options['fobidden_ext_in_content'] && !empty($options['forbidden_extensions']))
 677                             {
 678                                 $suspiciousExtensions = $options['php_ext_content_extensions'];
 679 
 680                                 if (empty($suspiciousExtensions))
 681                                 {
 682                                     $suspiciousExtensions = array(
 683                                         'zip', 'rar', 'tar', 'gz', 'tgz', 'bz2', 'tbz', 'jpa',
 684                                     );
 685                                 }
 686 
 687                                 /*
 688                                  * DO NOT USE array_intersect HERE! array_intersect expects the two arrays to
 689                                  * be set, i.e. they should have unique values.
 690                                  */
 691                                 $collide = false;
 692 
 693                                 foreach ($suspiciousExtensions as $ext)
 694                                 {
 695                                     if (in_array($ext, $explodedName))
 696                                     {
 697                                         $collide = true;
 698 
 699                                         break;
 700                                     }
 701                                 }
 702 
 703                                 if ($collide)
 704                                 {
 705                                     /*
 706                                      * These are suspicious text files which may have an executable
 707                                      * file extension in them
 708                                      */
 709                                     foreach ($options['forbidden_extensions'] as $ext)
 710                                     {
 711                                         if (strstr($data, '.' . $ext))
 712                                         {
 713                                             return false;
 714                                         }
 715                                     }
 716                                 }
 717                             }
 718 
 719                             /*
 720                              * This makes sure that we don't accidentally skip a <?php tag if it's across
 721                              * a read boundary, even on multibyte strings
 722                              */
 723                             $data = substr($data, -10);
 724                         }
 725 
 726                         fclose($fp);
 727                     }
 728                 }
 729             }
 730         }
 731 
 732         return true;
 733     }
 734 
 735     /**
 736      * Method to decode a file data array.
 737      *
 738      * @param   array  $data  The data array to decode.
 739      *
 740      * @return  array
 741      *
 742      * @since   3.4
 743      */
 744     protected static function decodeFileData(array $data)
 745     {
 746         $result = array();
 747 
 748         if (is_array($data[0]))
 749         {
 750             foreach ($data[0] as $k => $v)
 751             {
 752                 $result[$k] = self::decodeFileData(array($data[0][$k], $data[1][$k], $data[2][$k], $data[3][$k], $data[4][$k]));
 753             }
 754 
 755             return $result;
 756         }
 757 
 758         return array('name' => $data[0], 'type' => $data[1], 'tmp_name' => $data[2], 'error' => $data[3], 'size' => $data[4]);
 759     }
 760 
 761     /**
 762      * Internal method to iteratively remove all unwanted tags and attributes
 763      *
 764      * @param   string  $source  Input string to be 'cleaned'
 765      *
 766      * @return  string  'Cleaned' version of input parameter
 767      *
 768      * @since       11.1
 769      * @deprecated  4.0 Use JFilterInput::remove() instead
 770      */
 771     protected function _remove($source)
 772     {
 773         return $this->remove($source);
 774     }
 775 
 776     /**
 777      * Internal method to iteratively remove all unwanted tags and attributes
 778      *
 779      * @param   string  $source  Input string to be 'cleaned'
 780      *
 781      * @return  string  'Cleaned' version of input parameter
 782      *
 783      * @since   3.5
 784      */
 785     protected function remove($source)
 786     {
 787         // Check for invalid UTF-8 byte sequence
 788         if (!preg_match('//u', $source))
 789         {
 790             // String contains invalid byte sequence, remove it
 791             $source = htmlspecialchars_decode(htmlspecialchars($source, ENT_IGNORE, 'UTF-8'));
 792         }
 793 
 794         // Iteration provides nested tag protection
 795         do
 796         {
 797             $temp = $source;
 798             $source = $this->_cleanTags($source);
 799         }
 800         while ($temp !== $source);
 801 
 802         return $source;
 803     }
 804 
 805     /**
 806      * Internal method to strip a string of certain tags
 807      *
 808      * @param   string  $source  Input string to be 'cleaned'
 809      *
 810      * @return  string  'Cleaned' version of input parameter
 811      *
 812      * @since       11.1
 813      * @deprecated  4.0 Use JFilterInput::cleanTags() instead
 814      */
 815     protected function _cleanTags($source)
 816     {
 817         return $this->cleanTags($source);
 818     }
 819 
 820     /**
 821      * Internal method to strip a string of certain tags
 822      *
 823      * @param   string  $source  Input string to be 'cleaned'
 824      *
 825      * @return  string  'Cleaned' version of input parameter
 826      *
 827      * @since   3.5
 828      */
 829     protected function cleanTags($source)
 830     {
 831         // First, pre-process this for illegal characters inside attribute values
 832         $source = $this->_escapeAttributeValues($source);
 833 
 834         // In the beginning we don't really have a tag, so result is empty
 835         $result = '';
 836         $offset = 0;
 837         $length = strlen($source);
 838 
 839         // Is there a tag? If so it will certainly start with a '<'.
 840         $tagOpenStartOffset = strpos($source, '<');
 841 
 842         // Is there any close tag
 843         $tagOpenEndOffset = strpos($source, '>');
 844 
 845         while ($offset < $length)
 846         {
 847             // Preserve '>' character which exists before related '<'
 848             if ($tagOpenEndOffset !== false && ($tagOpenStartOffset === false || $tagOpenEndOffset < $tagOpenStartOffset))
 849             {
 850                 $result .= substr($source, $offset, $tagOpenEndOffset - $offset) . '>';
 851                 $offset  = $tagOpenEndOffset + 1;
 852 
 853                 // Search for a new closing indicator
 854                 $tagOpenEndOffset = strpos($source, '>', $offset);
 855 
 856                 continue;
 857             }
 858 
 859             // Add safe text appearing before the '<'
 860             if ($tagOpenStartOffset > $offset)
 861             {
 862                 $result .= substr($source, $offset, $tagOpenStartOffset - $offset);
 863                 $offset  = $tagOpenStartOffset;
 864             }
 865 
 866             // There is no more tags
 867             if ($tagOpenStartOffset === false && $tagOpenEndOffset === false)
 868             {
 869                 $result .= substr($source, $offset, $length - $offset);
 870                 $offset  = $length;
 871 
 872                 break;
 873             }
 874 
 875             // Remove every '<' character if '>' does not exists or we have '<>'
 876             if ($tagOpenStartOffset !== false && $tagOpenEndOffset === false || $tagOpenStartOffset + 1 == $tagOpenEndOffset)
 877             {
 878                 $offset++;
 879 
 880                 // Search for a new opening indicator
 881                 $tagOpenStartOffset = strpos($source, '<', $offset);
 882 
 883                 continue;
 884             }
 885 
 886             // Check for mal-formed tag where we have a second '<' before the '>'
 887             $nextOpenStartOffset = strpos($source, '<', $tagOpenStartOffset + 1);
 888 
 889             if ($nextOpenStartOffset !== false && $nextOpenStartOffset < $tagOpenEndOffset)
 890             {
 891                 // At this point we have a mal-formed tag, skip previous '<'
 892                 $offset++;
 893 
 894                 // Set a new opening indicator position
 895                 $tagOpenStartOffset = $nextOpenStartOffset;
 896 
 897                 continue;
 898             }
 899 
 900             // Let's get some information about our tag and setup attribute pairs
 901             // Now we have something like 'span class="" style=""', '/span', 'br/', 'br /' or 'hr disabled /'
 902             $tagContent = substr($source, $offset + 1, $tagOpenEndOffset - 1 - $offset);
 903 
 904             // All ASCII whitespaces replace by 0x20
 905             $tagNormalized = preg_replace('/\s/', ' ', $tagContent);
 906             $tagLength     = strlen($tagContent);
 907             $spaceOffset   = strpos($tagNormalized, ' ');
 908 
 909             // Are we an open tag or a close tag?
 910             $isClosingTag     = $tagContent[0] === '/' ? 1 : 0;
 911             $isSelfClosingTag = substr($tagContent, -1) === '/' ? 1 : 0;
 912 
 913             if ($spaceOffset !== false)
 914             {
 915                 $tagName = substr($tagContent, $isClosingTag, $spaceOffset - $isClosingTag);
 916             }
 917             else
 918             {
 919                 $tagName = substr($tagContent, $isClosingTag, $tagLength - $isClosingTag - $isSelfClosingTag);
 920             }
 921 
 922             /*
 923              * Exclude all "non-regular" tagnames
 924              * OR no tagname
 925              * OR remove if xssauto is on and tag is blacklisted
 926              */
 927             if (!$tagName
 928                 || !preg_match("/^[a-z][a-z0-9]*$/i", $tagName)
 929                 || ($this->xssAuto && in_array(strtolower($tagName), $this->tagBlacklist)))
 930             {
 931                 $offset += $tagLength + 2;
 932 
 933                 $tagOpenStartOffset = strpos($source, '<', $offset);
 934                 $tagOpenEndOffset   = strpos($source, '>', $offset);
 935 
 936                 // Strip tag
 937                 continue;
 938             }
 939 
 940             $attrSet = array();
 941 
 942             /*
 943              * Time to grab any attributes from the tag... need this section in
 944              * case attributes have spaces in the values.
 945              */
 946             while ($spaceOffset !== false && $spaceOffset + 1 < $tagLength)
 947             {
 948                 $attrStartOffset = $spaceOffset + 1;
 949 
 950                 // Find position of equal and open quote
 951                 if (preg_match('#= *(")[^"]*(")#', $tagNormalized, $matches, PREG_OFFSET_CAPTURE, $attrStartOffset))
 952                 {
 953                     $equalOffset     = $matches[0][1];
 954                     $quote1Offset    = $matches[1][1];
 955                     $quote2Offset    = $matches[2][1];
 956                     $nextSpaceOffset = strpos($tagNormalized, ' ', $quote2Offset);
 957                 }
 958                 else
 959                 {
 960                     $equalOffset     = strpos($tagNormalized, '=', $attrStartOffset);
 961                     $quote1Offset    = strpos($tagNormalized, '"', $attrStartOffset);
 962                     $nextSpaceOffset = strpos($tagNormalized, ' ', $attrStartOffset);
 963 
 964                     if ($quote1Offset !== false)
 965                     {
 966                         $quote2Offset = strpos($tagNormalized, '"', $quote1Offset + 1);
 967                     }
 968                     else
 969                     {
 970                         $quote2Offset = false;
 971                     }
 972                 }
 973 
 974                 // Do we have an attribute to process? [check for equal sign]
 975                 if ($tagContent[$attrStartOffset] !== '/'
 976                     && ($equalOffset && $nextSpaceOffset && $nextSpaceOffset < $equalOffset || !$equalOffset))
 977                 {
 978                     // Search for attribute without value, ex: 'checked/' or 'checked '
 979                     if ($nextSpaceOffset)
 980                     {
 981                         $attrEndOffset = $nextSpaceOffset;
 982                     }
 983                     else
 984                     {
 985                         $attrEndOffset = strpos($tagContent, '/', $attrStartOffset);
 986 
 987                         if ($attrEndOffset === false)
 988                         {
 989                             $attrEndOffset = $tagLength;
 990                         }
 991                     }
 992 
 993                     // If there is an ending, use this, if not, do not worry.
 994                     if ($attrEndOffset > $attrStartOffset)
 995                     {
 996                         $attrSet[] = substr($tagContent, $attrStartOffset, $attrEndOffset - $attrStartOffset);
 997                     }
 998                 }
 999                 elseif ($equalOffset !== false)
1000                 {
1001                     /*
1002                      * If the attribute value is wrapped in quotes we need to grab the substring from
1003                      * the closing quote, otherwise grab until the next space.
1004                      */
1005                     if ($quote1Offset !== false && $quote2Offset !== false)
1006                     {
1007                         // Add attribute, ex: 'class="body abc"'
1008                         $attrSet[] = substr($tagContent, $attrStartOffset, $quote2Offset + 1 - $attrStartOffset);
1009                     }
1010                     else
1011                     {
1012                         if ($nextSpaceOffset)
1013                         {
1014                             $attrEndOffset = $nextSpaceOffset;
1015                         }
1016                         else
1017                         {
1018                             $attrEndOffset = $tagLength;
1019                         }
1020 
1021                         // Add attribute, ex: 'class=body'
1022                         $attrSet[] = substr($tagContent, $attrStartOffset, $attrEndOffset - $attrStartOffset);
1023                     }
1024                 }
1025 
1026                 $spaceOffset = $nextSpaceOffset;
1027             }
1028 
1029             // Is our tag in the user input array?
1030             $tagFound = in_array(strtolower($tagName), $this->tagsArray);
1031 
1032             // If the tag is allowed let's append it to the output string.
1033             if ((!$tagFound && $this->tagsMethod) || ($tagFound && !$this->tagsMethod))
1034             {
1035                 // Reconstruct tag with allowed attributes
1036                 if ($isClosingTag)
1037                 {
1038                     $result .= "</$tagName>";
1039                 }
1040                 else
1041                 {
1042                     $attrSet = $this->_cleanAttributes($attrSet);
1043 
1044                     // Open or single tag
1045                     $result .= '<' . $tagName;
1046 
1047                     if ($attrSet)
1048                     {
1049                         $result .= ' ' . implode(' ', $attrSet);
1050                     }
1051 
1052                     // Reformat single tags to XHTML
1053                     if (strpos($source, "</$tagName>", $tagOpenStartOffset) !== false)
1054                     {
1055                         $result .= '>';
1056                     }
1057                     else
1058                     {
1059                         $result .= ' />';
1060                     }
1061                 }
1062             }
1063 
1064             $offset += $tagLength + 2;
1065 
1066             if ($offset < $length)
1067             {
1068                 // Find next tag's start and continue iteration
1069                 $tagOpenStartOffset = strpos($source, '<', $offset);
1070                 $tagOpenEndOffset   = strpos($source, '>', $offset);
1071             }
1072         }
1073 
1074         return $result;
1075     }
1076 
1077     /**
1078      * Internal method to strip a tag of certain attributes
1079      *
1080      * @param   array  $attrSet  Array of attribute pairs to filter
1081      *
1082      * @return  array  Filtered array of attribute pairs
1083      *
1084      * @since       11.1
1085      * @deprecated  4.0 Use JFilterInput::cleanAttributes() instead
1086      */
1087     protected function _cleanAttributes($attrSet)
1088     {
1089         return $this->cleanAttributes($attrSet);
1090     }
1091 
1092     /**
1093      * Escape < > and " inside attribute values
1094      *
1095      * @param   string  $source  The source string.
1096      *
1097      * @return  string  Filtered string
1098      *
1099      * @since    3.5
1100      */
1101     protected function escapeAttributeValues($source)
1102     {
1103         $alreadyFiltered = '';
1104         $remainder = $source;
1105         $badChars = array('<', '"', '>');
1106         $escapedChars = array('&lt;', '&quot;', '&gt;');
1107 
1108         /*
1109          * Process each portion based on presence of =" and "<space>, "/>, or ">
1110          * See if there are any more attributes to process
1111          */
1112         while (preg_match('#<[^>]*?=\s*?(\"|\')#s', $remainder, $matches, PREG_OFFSET_CAPTURE))
1113         {
1114             // Get the portion before the attribute value
1115             $quotePosition = $matches[0][1];
1116             $nextBefore = $quotePosition + strlen($matches[0][0]);
1117 
1118             /*
1119              * Figure out if we have a single or double quote and look for the matching closing quote
1120              * Closing quote should be "/>, ">, "<space>, or " at the end of the string
1121              */
1122             $quote = substr($matches[0][0], -1);
1123             $pregMatch = ($quote == '"') ? '#(\"\s*/\s*>|\"\s*>|\"\s+|\"$)#' : "#(\'\s*/\s*>|\'\s*>|\'\s+|\'$)#";
1124 
1125             // Get the portion after attribute value
1126             if (preg_match($pregMatch, substr($remainder, $nextBefore), $matches, PREG_OFFSET_CAPTURE))
1127             {
1128                 // We have a closing quote
1129                 $nextAfter = $nextBefore + $matches[0][1];
1130             }
1131             else
1132             {
1133                 // No closing quote
1134                 $nextAfter = strlen($remainder);
1135             }
1136 
1137             // Get the actual attribute value
1138             $attributeValue = substr($remainder, $nextBefore, $nextAfter - $nextBefore);
1139 
1140             // Escape bad chars
1141             $attributeValue = str_replace($badChars, $escapedChars, $attributeValue);
1142             $attributeValue = $this->_stripCSSExpressions($attributeValue);
1143             $alreadyFiltered .= substr($remainder, 0, $nextBefore) . $attributeValue . $quote;
1144             $remainder = substr($remainder, $nextAfter + 1);
1145         }
1146 
1147         // At this point, we just have to return the $alreadyFiltered and the $remainder
1148         return $alreadyFiltered . $remainder;
1149     }
1150 
1151     /**
1152      * Try to convert to plaintext
1153      *
1154      * @param   string  $source  The source string.
1155      *
1156      * @return  string  Plaintext string
1157      *
1158      * @since       11.1
1159      * @deprecated  4.0 Use JFilterInput::decode() instead
1160      */
1161     protected function _decode($source)
1162     {
1163         return $this->decode($source);
1164     }
1165 
1166     /**
1167      * Try to convert to plaintext
1168      *
1169      * @param   string  $source  The source string.
1170      *
1171      * @return  string  Plaintext string
1172      *
1173      * @since   3.5
1174      */
1175     protected function decode($source)
1176     {
1177         static $ttr;
1178 
1179         if (!is_array($ttr))
1180         {
1181             // Entity decode
1182             $trans_tbl = get_html_translation_table(HTML_ENTITIES, ENT_COMPAT, 'ISO-8859-1');
1183 
1184             foreach ($trans_tbl as $k => $v)
1185             {
1186                 $ttr[$v] = utf8_encode($k);
1187             }
1188         }
1189 
1190         $source = strtr($source, $ttr);
1191 
1192         // Convert decimal
1193         $source = preg_replace_callback('/&#(\d+);/m', function($m)
1194         {
1195             return utf8_encode(chr($m[1]));
1196         }, $source
1197         );
1198 
1199         // Convert hex
1200         $source = preg_replace_callback('/&#x([a-f0-9]+);/mi', function($m)
1201         {
1202             return utf8_encode(chr(hexdec($m[1])));
1203         }, $source
1204         );
1205 
1206         return $source;
1207     }
1208 
1209     /**
1210      * Escape < > and " inside attribute values
1211      *
1212      * @param   string  $source  The source string.
1213      *
1214      * @return  string  Filtered string
1215      *
1216      * @since       11.1
1217      * @deprecated  4.0 Use JFilterInput::escapeAttributeValues() instead
1218      */
1219     protected function _escapeAttributeValues($source)
1220     {
1221         return $this->escapeAttributeValues($source);
1222     }
1223 
1224     /**
1225      * Remove CSS Expressions in the form of `<property>:expression(...)`
1226      *
1227      * @param   string  $source  The source string.
1228      *
1229      * @return  string  Filtered string
1230      *
1231      * @since       11.1
1232      * @deprecated  4.0 Use JFilterInput::stripCSSExpressions() instead
1233      */
1234     protected function _stripCSSExpressions($source)
1235     {
1236         return $this->stripCSSExpressions($source);
1237     }
1238 
1239     /**
1240      * Recursively strip Unicode Supplementary Characters from the source. Not: objects cannot be filtered.
1241      *
1242      * @param   mixed  $source  The data to filter
1243      *
1244      * @return  mixed  The filtered result
1245      *
1246      * @since  3.5
1247      */
1248     protected function stripUSC($source)
1249     {
1250         if (is_object($source))
1251         {
1252             return $source;
1253         }
1254 
1255         if (is_array($source))
1256         {
1257             $filteredArray = array();
1258 
1259             foreach ($source as $k => $v)
1260             {
1261                 $filteredArray[$k] = $this->stripUSC($v);
1262             }
1263 
1264             return $filteredArray;
1265         }
1266 
1267         return preg_replace('/[\xF0-\xF7].../s', "\xE2\xAF\x91", $source);
1268     }
1269 }
1270
Namespaces

Classes

Interfaces

Exceptions

Constants

Functions