1 <?php
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
44
45 46 47 48 49 50 51 52 53
54 class SimplePie_Sanitize
55 {
56
57 var $base;
58
59
60 var $remove_div = true;
61 var $image_handler = '';
62 var $strip_htmltags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style');
63 var $encode_instead_of_strip = false;
64 var $strip_attributes = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc');
65 var = false;
66 var $output_encoding = 'UTF-8';
67 var $enable_cache = true;
68 var $cache_location = './cache';
69 var $cache_name_function = 'md5';
70 var $timeout = 10;
71 var $useragent = '';
72 var $force_fsockopen = false;
73 var $replace_url_attributes = null;
74
75 public function __construct()
76 {
77
78 $this->set_url_replacements(null);
79 }
80
81 public function remove_div($enable = true)
82 {
83 $this->remove_div = (bool) $enable;
84 }
85
86 public function set_image_handler($page = false)
87 {
88 if ($page)
89 {
90 $this->image_handler = (string) $page;
91 }
92 else
93 {
94 $this->image_handler = false;
95 }
96 }
97
98 public function set_registry(SimplePie_Registry $registry)
99 {
100 $this->registry = $registry;
101 }
102
103 public function pass_cache_data($enable_cache = true, $cache_location = './cache', $cache_name_function = 'md5', $cache_class = 'SimplePie_Cache')
104 {
105 if (isset($enable_cache))
106 {
107 $this->enable_cache = (bool) $enable_cache;
108 }
109
110 if ($cache_location)
111 {
112 $this->cache_location = (string) $cache_location;
113 }
114
115 if ($cache_name_function)
116 {
117 $this->cache_name_function = (string) $cache_name_function;
118 }
119 }
120
121 public function pass_file_data($file_class = 'SimplePie_File', $timeout = 10, $useragent = '', $force_fsockopen = false)
122 {
123 if ($timeout)
124 {
125 $this->timeout = (string) $timeout;
126 }
127
128 if ($useragent)
129 {
130 $this->useragent = (string) $useragent;
131 }
132
133 if ($force_fsockopen)
134 {
135 $this->force_fsockopen = (string) $force_fsockopen;
136 }
137 }
138
139 public function strip_htmltags($tags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'))
140 {
141 if ($tags)
142 {
143 if (is_array($tags))
144 {
145 $this->strip_htmltags = $tags;
146 }
147 else
148 {
149 $this->strip_htmltags = explode(',', $tags);
150 }
151 }
152 else
153 {
154 $this->strip_htmltags = false;
155 }
156 }
157
158 public function encode_instead_of_strip($encode = false)
159 {
160 $this->encode_instead_of_strip = (bool) $encode;
161 }
162
163 public function strip_attributes($attribs = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'))
164 {
165 if ($attribs)
166 {
167 if (is_array($attribs))
168 {
169 $this->strip_attributes = $attribs;
170 }
171 else
172 {
173 $this->strip_attributes = explode(',', $attribs);
174 }
175 }
176 else
177 {
178 $this->strip_attributes = false;
179 }
180 }
181
182 public function ($strip = false)
183 {
184 $this->strip_comments = (bool) $strip;
185 }
186
187 public function set_output_encoding($encoding = 'UTF-8')
188 {
189 $this->output_encoding = (string) $encoding;
190 }
191
192 193 194 195 196 197 198 199 200 201 202
203 public function set_url_replacements($element_attribute = null)
204 {
205 if ($element_attribute === null)
206 {
207 $element_attribute = array(
208 'a' => 'href',
209 'area' => 'href',
210 'blockquote' => 'cite',
211 'del' => 'cite',
212 'form' => 'action',
213 'img' => array(
214 'longdesc',
215 'src'
216 ),
217 'input' => 'src',
218 'ins' => 'cite',
219 'q' => 'cite'
220 );
221 }
222 $this->replace_url_attributes = (array) $element_attribute;
223 }
224
225 public function sanitize($data, $type, $base = '')
226 {
227 $data = trim($data);
228 if ($data !== '' || $type & SIMPLEPIE_CONSTRUCT_IRI)
229 {
230 if ($type & SIMPLEPIE_CONSTRUCT_MAYBE_HTML)
231 {
232 if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\/[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>)/', $data))
233 {
234 $type |= SIMPLEPIE_CONSTRUCT_HTML;
235 }
236 else
237 {
238 $type |= SIMPLEPIE_CONSTRUCT_TEXT;
239 }
240 }
241
242 if ($type & SIMPLEPIE_CONSTRUCT_BASE64)
243 {
244 $data = base64_decode($data);
245 }
246
247 if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML))
248 {
249
250 $document = new DOMDocument();
251 $document->encoding = 'UTF-8';
252 $data = $this->preprocess($data, $type);
253
254 set_error_handler(array('SimplePie_Misc', 'silence_errors'));
255 $document->loadHTML($data);
256 restore_error_handler();
257
258
259 if ($this->strip_comments)
260 {
261 $xpath = new DOMXPath($document);
262 $comments = $xpath->query('//comment()');
263
264 foreach ($comments as $comment)
265 {
266 $comment->parentNode->removeChild($comment);
267 }
268 }
269
270
271
272
273 if ($this->strip_htmltags)
274 {
275 foreach ($this->strip_htmltags as $tag)
276 {
277 $this->strip_tag($tag, $document, $type);
278 }
279 }
280
281 if ($this->strip_attributes)
282 {
283 foreach ($this->strip_attributes as $attrib)
284 {
285 $this->strip_attr($attrib, $document);
286 }
287 }
288
289
290 $this->base = $base;
291 foreach ($this->replace_url_attributes as $element => $attributes)
292 {
293 $this->replace_urls($document, $element, $attributes);
294 }
295
296
297 if (isset($this->image_handler) && ((string) $this->image_handler) !== '' && $this->enable_cache)
298 {
299 $images = $document->getElementsByTagName('img');
300 foreach ($images as $img)
301 {
302 if ($img->hasAttribute('src'))
303 {
304 $image_url = call_user_func($this->cache_name_function, $img->getAttribute('src'));
305 $cache = $this->registry->call('Cache', 'get_handler', array($this->cache_location, $image_url, 'spi'));
306
307 if ($cache->load())
308 {
309 $img->setAttribute('src', $this->image_handler . $image_url);
310 }
311 else
312 {
313 $file = $this->registry->create('File', array($img['attribs']['src']['data'], $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen));
314 $headers = $file->headers;
315
316 if ($file->success && ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300)))
317 {
318 if ($cache->save(array('headers' => $file->headers, 'body' => $file->body)))
319 {
320 $img->setAttribute('src', $this->image_handler . $image_url);
321 }
322 else
323 {
324 trigger_error("$this->cache_location is not writeable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
325 }
326 }
327 }
328 }
329 }
330 }
331
332
333
334 if ($document->firstChild instanceof DOMDocumentType)
335 {
336 $document->removeChild($document->firstChild);
337 }
338
339
340 $real_body = $document->getElementsByTagName('body')->item(0)->childNodes->item(0);
341 $document->replaceChild($real_body, $document->firstChild);
342
343
344 $data = trim($document->saveHTML());
345
346 if ($this->remove_div)
347 {
348 $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data);
349 $data = preg_replace('/<\/div>$/', '', $data);
350 }
351 else
352 {
353 $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data);
354 }
355 }
356
357 if ($type & SIMPLEPIE_CONSTRUCT_IRI)
358 {
359 $absolute = $this->registry->call('Misc', 'absolutize_url', array($data, $base));
360 if ($absolute !== false)
361 {
362 $data = $absolute;
363 }
364 }
365
366 if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI))
367 {
368 $data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8');
369 }
370
371 if ($this->output_encoding !== 'UTF-8')
372 {
373 $data = $this->registry->call('Misc', 'change_encoding', array($data, 'UTF-8', $this->output_encoding));
374 }
375 }
376 return $data;
377 }
378
379 protected function preprocess($html, $type)
380 {
381 $ret = '';
382 if ($type & ~SIMPLEPIE_CONSTRUCT_XHTML)
383 {
384
385
386 $html = '<div>' . $html . '</div>';
387 $ret .= '<!DOCTYPE html>';
388 $content_type = 'text/html';
389 }
390 else
391 {
392 $ret .= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">';
393 $content_type = 'application/xhtml+xml';
394 }
395
396 $ret .= '<html><head>';
397 $ret .= '<meta http-equiv="Content-Type" content="' . $content_type . '; charset=utf-8" />';
398 $ret .= '</head><body>' . $html . '</body></html>';
399 return $ret;
400 }
401
402 public function replace_urls($document, $tag, $attributes)
403 {
404 if (!is_array($attributes))
405 {
406 $attributes = array($attributes);
407 }
408
409 if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags))
410 {
411 $elements = $document->getElementsByTagName($tag);
412 foreach ($elements as $element)
413 {
414 foreach ($attributes as $attribute)
415 {
416 if ($element->hasAttribute($attribute))
417 {
418 $value = $this->registry->call('Misc', 'absolutize_url', array($element->getAttribute($attribute), $this->base));
419 if ($value !== false)
420 {
421 $element->setAttribute($attribute, $value);
422 }
423 }
424 }
425 }
426 }
427 }
428
429 public function do_strip_htmltags($match)
430 {
431 if ($this->encode_instead_of_strip)
432 {
433 if (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style')))
434 {
435 $match[1] = htmlspecialchars($match[1], ENT_COMPAT, 'UTF-8');
436 $match[2] = htmlspecialchars($match[2], ENT_COMPAT, 'UTF-8');
437 return "<$match[1]$match[2]>$match[3]</$match[1]>";
438 }
439 else
440 {
441 return htmlspecialchars($match[0], ENT_COMPAT, 'UTF-8');
442 }
443 }
444 elseif (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style')))
445 {
446 return $match[4];
447 }
448 else
449 {
450 return '';
451 }
452 }
453
454 protected function strip_tag($tag, $document, $type)
455 {
456 $xpath = new DOMXPath($document);
457 $elements = $xpath->query('body//' . $tag);
458 if ($this->encode_instead_of_strip)
459 {
460 foreach ($elements as $element)
461 {
462 $fragment = $document->createDocumentFragment();
463
464
465 if (!in_array($tag, array('script', 'style')))
466 {
467 $text = '<' . $tag;
468 if ($element->hasAttributes())
469 {
470 $attrs = array();
471 foreach ($element->attributes as $name => $attr)
472 {
473 $value = $attr->value;
474
475
476 if (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_XHTML))
477 {
478 $value = $name;
479 }
480
481 elseif (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_HTML))
482 {
483 $attrs[] = $name;
484 continue;
485 }
486
487
488 $attrs[] = $name . '="' . $attr->value . '"';
489 }
490 $text .= ' ' . implode(' ', $attrs);
491 }
492 $text .= '>';
493 $fragment->appendChild(new DOMText($text));
494 }
495
496 $number = $element->childNodes->length;
497 for ($i = $number; $i > 0; $i--)
498 {
499 $child = $element->childNodes->item(0);
500 $fragment->appendChild($child);
501 }
502
503 if (!in_array($tag, array('script', 'style')))
504 {
505 $fragment->appendChild(new DOMText('</' . $tag . '>'));
506 }
507
508 $element->parentNode->replaceChild($fragment, $element);
509 }
510
511 return;
512 }
513 elseif (in_array($tag, array('script', 'style')))
514 {
515 foreach ($elements as $element)
516 {
517 $element->parentNode->removeChild($element);
518 }
519
520 return;
521 }
522 else
523 {
524 foreach ($elements as $element)
525 {
526 $fragment = $document->createDocumentFragment();
527 $number = $element->childNodes->length;
528 for ($i = $number; $i > 0; $i--)
529 {
530 $child = $element->childNodes->item(0);
531 $fragment->appendChild($child);
532 }
533
534 $element->parentNode->replaceChild($fragment, $element);
535 }
536 }
537 }
538
539 protected function strip_attr($attrib, $document)
540 {
541 $xpath = new DOMXPath($document);
542 $elements = $xpath->query('//*[@' . $attrib . ']');
543
544 foreach ($elements as $element)
545 {
546 $element->removeAttribute($attrib);
547 }
548 }
549 }
550