1 <?php
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
44
45 46 47 48 49 50 51 52
53 class SimplePie_Locator
54 {
55 var $useragent;
56 var $timeout;
57 var $file;
58 var $local = array();
59 var $elsewhere = array();
60 var $cached_entities = array();
61 var $http_base;
62 var $base;
63 var $base_location = 0;
64 var $checked_feeds = 0;
65 var $max_checked_feeds = 10;
66 protected $registry;
67
68 public function __construct(SimplePie_File $file, $timeout = 10, $useragent = null, $max_checked_feeds = 10)
69 {
70 $this->file = $file;
71 $this->useragent = $useragent;
72 $this->timeout = $timeout;
73 $this->max_checked_feeds = $max_checked_feeds;
74
75 if (class_exists('DOMDocument'))
76 {
77 $this->dom = new DOMDocument();
78
79 set_error_handler(array('SimplePie_Misc', 'silence_errors'));
80 $this->dom->loadHTML($this->file->body);
81 restore_error_handler();
82 }
83 else
84 {
85 $this->dom = null;
86 }
87 }
88
89 public function set_registry(SimplePie_Registry $registry)
90 {
91 $this->registry = $registry;
92 }
93
94 public function find($type = SIMPLEPIE_LOCATOR_ALL, &$working)
95 {
96 if ($this->is_feed($this->file))
97 {
98 return $this->file;
99 }
100
101 if ($this->file->method & SIMPLEPIE_FILE_SOURCE_REMOTE)
102 {
103 $sniffer = $this->registry->create('Content_Type_Sniffer', array($this->file));
104 if ($sniffer->get_type() !== 'text/html')
105 {
106 return null;
107 }
108 }
109
110 if ($type & ~SIMPLEPIE_LOCATOR_NONE)
111 {
112 $this->get_base();
113 }
114
115 if ($type & SIMPLEPIE_LOCATOR_AUTODISCOVERY && $working = $this->autodiscovery())
116 {
117 return $working[0];
118 }
119
120 if ($type & (SIMPLEPIE_LOCATOR_LOCAL_EXTENSION | SIMPLEPIE_LOCATOR_LOCAL_BODY | SIMPLEPIE_LOCATOR_REMOTE_EXTENSION | SIMPLEPIE_LOCATOR_REMOTE_BODY) && $this->get_links())
121 {
122 if ($type & SIMPLEPIE_LOCATOR_LOCAL_EXTENSION && $working = $this->extension($this->local))
123 {
124 return $working;
125 }
126
127 if ($type & SIMPLEPIE_LOCATOR_LOCAL_BODY && $working = $this->body($this->local))
128 {
129 return $working;
130 }
131
132 if ($type & SIMPLEPIE_LOCATOR_REMOTE_EXTENSION && $working = $this->extension($this->elsewhere))
133 {
134 return $working;
135 }
136
137 if ($type & SIMPLEPIE_LOCATOR_REMOTE_BODY && $working = $this->body($this->elsewhere))
138 {
139 return $working;
140 }
141 }
142 return null;
143 }
144
145 public function is_feed($file)
146 {
147 if ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE)
148 {
149 $sniffer = $this->registry->create('Content_Type_Sniffer', array($file));
150 $sniffed = $sniffer->get_type();
151 if (in_array($sniffed, array('application/rss+xml', 'application/rdf+xml', 'text/rdf', 'application/atom+xml', 'text/xml', 'application/xml')))
152 {
153 return true;
154 }
155 else
156 {
157 return false;
158 }
159 }
160 elseif ($file->method & SIMPLEPIE_FILE_SOURCE_LOCAL)
161 {
162 return true;
163 }
164 else
165 {
166 return false;
167 }
168 }
169
170 public function get_base()
171 {
172 if ($this->dom === null)
173 {
174 throw new SimplePie_Exception('DOMDocument not found, unable to use locator');
175 }
176 $this->http_base = $this->file->url;
177 $this->base = $this->http_base;
178 $elements = $this->dom->getElementsByTagName('base');
179 foreach ($elements as $element)
180 {
181 if ($element->hasAttribute('href'))
182 {
183 $base = $this->registry->call('Misc', 'absolutize_url', array(trim($element->getAttribute('href')), $this->http_base));
184 if ($base === false)
185 {
186 continue;
187 }
188 $this->base = $base;
189 $this->base_location = method_exists($element, 'getLineNo') ? $element->getLineNo() : 0;
190 break;
191 }
192 }
193 }
194
195 public function autodiscovery()
196 {
197 $done = array();
198 $feeds = array();
199 $feeds = array_merge($feeds, $this->search_elements_by_tag('link', $done, $feeds));
200 $feeds = array_merge($feeds, $this->search_elements_by_tag('a', $done, $feeds));
201 $feeds = array_merge($feeds, $this->search_elements_by_tag('area', $done, $feeds));
202
203 if (!empty($feeds))
204 {
205 return array_values($feeds);
206 }
207 else
208 {
209 return null;
210 }
211 }
212
213 protected function search_elements_by_tag($name, &$done, $feeds)
214 {
215 if ($this->dom === null)
216 {
217 throw new SimplePie_Exception('DOMDocument not found, unable to use locator');
218 }
219
220 $links = $this->dom->getElementsByTagName($name);
221 foreach ($links as $link)
222 {
223 if ($this->checked_feeds === $this->max_checked_feeds)
224 {
225 break;
226 }
227 if ($link->hasAttribute('href') && $link->hasAttribute('rel'))
228 {
229 $rel = array_unique($this->registry->call('Misc', 'space_seperated_tokens', array(strtolower($link->getAttribute('rel')))));
230 $line = method_exists($link, 'getLineNo') ? $link->getLineNo() : 1;
231
232 if ($this->base_location < $line)
233 {
234 $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base));
235 }
236 else
237 {
238 $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->http_base));
239 }
240 if ($href === false)
241 {
242 continue;
243 }
244
245 if (!in_array($href, $done) && in_array('feed', $rel) || (in_array('alternate', $rel) && !in_array('stylesheet', $rel) && $link->hasAttribute('type') && in_array(strtolower($this->registry->call('Misc', 'parse_mime', array($link->getAttribute('type')))), array('application/rss+xml', 'application/atom+xml'))) && !isset($feeds[$href]))
246 {
247 $this->checked_feeds++;
248 $headers = array(
249 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
250 );
251 $feed = $this->registry->create('File', array($href, $this->timeout, 5, $headers, $this->useragent));
252 if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
253 {
254 $feeds[$href] = $feed;
255 }
256 }
257 $done[] = $href;
258 }
259 }
260
261 return $feeds;
262 }
263
264 public function get_links()
265 {
266 if ($this->dom === null)
267 {
268 throw new SimplePie_Exception('DOMDocument not found, unable to use locator');
269 }
270
271 $links = $this->dom->getElementsByTagName('a');
272 foreach ($links as $link)
273 {
274 if ($link->hasAttribute('href'))
275 {
276 $href = trim($link->getAttribute('href'));
277 $parsed = $this->registry->call('Misc', 'parse_url', array($href));
278 if ($parsed['scheme'] === '' || preg_match('/^(http(s)|feed)?$/i', $parsed['scheme']))
279 {
280 if ($this->base_location < $link->getLineNo())
281 {
282 $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base));
283 }
284 else
285 {
286 $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->http_base));
287 }
288 if ($href === false)
289 {
290 continue;
291 }
292
293 $current = $this->registry->call('Misc', 'parse_url', array($this->file->url));
294
295 if ($parsed['authority'] === '' || $parsed['authority'] === $current['authority'])
296 {
297 $this->local[] = $href;
298 }
299 else
300 {
301 $this->elsewhere[] = $href;
302 }
303 }
304 }
305 }
306 $this->local = array_unique($this->local);
307 $this->elsewhere = array_unique($this->elsewhere);
308 if (!empty($this->local) || !empty($this->elsewhere))
309 {
310 return true;
311 }
312 return null;
313 }
314
315 public function extension(&$array)
316 {
317 foreach ($array as $key => $value)
318 {
319 if ($this->checked_feeds === $this->max_checked_feeds)
320 {
321 break;
322 }
323 if (in_array(strtolower(strrchr($value, '.')), array('.rss', '.rdf', '.atom', '.xml')))
324 {
325 $this->checked_feeds++;
326
327 $headers = array(
328 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
329 );
330 $feed = $this->registry->create('File', array($value, $this->timeout, 5, $headers, $this->useragent));
331 if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
332 {
333 return $feed;
334 }
335 else
336 {
337 unset($array[$key]);
338 }
339 }
340 }
341 return null;
342 }
343
344 public function body(&$array)
345 {
346 foreach ($array as $key => $value)
347 {
348 if ($this->checked_feeds === $this->max_checked_feeds)
349 {
350 break;
351 }
352 if (preg_match('/(rss|rdf|atom|xml)/i', $value))
353 {
354 $this->checked_feeds++;
355 $headers = array(
356 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
357 );
358 $feed = $this->registry->create('File', array($value, $this->timeout, 5, null, $this->useragent));
359 if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
360 {
361 return $feed;
362 }
363 else
364 {
365 unset($array[$key]);
366 }
367 }
368 }
369 return null;
370 }
371 }
372
373