1 <?php
2 /**
3 * @package Joomla.Libraries
4 * @subpackage HTML
5 *
6 * @copyright Copyright (C) 2005 - 2017 Open Source Matters, Inc. All rights reserved.
7 * @license GNU General Public License version 2 or later; see LICENSE.txt
8 */
9
10 defined('JPATH_PLATFORM') or die;
11
12 use Joomla\String\StringHelper;
13
14 /**
15 * HTML helper class for rendering manipulated strings.
16 *
17 * @since 1.6
18 */
19 abstract class JHtmlString
20 {
21 /**
22 * Truncates text blocks over the specified character limit and closes
23 * all open HTML tags. The method will optionally not truncate an individual
24 * word, it will find the first space that is within the limit and
25 * truncate at that point. This method is UTF-8 safe.
26 *
27 * @param string $text The text to truncate.
28 * @param integer $length The maximum length of the text.
29 * @param boolean $noSplit Don't split a word if that is where the cutoff occurs (default: true).
30 * @param boolean $allowHtml Allow HTML tags in the output, and close any open tags (default: true).
31 *
32 * @return string The truncated text.
33 *
34 * @since 1.6
35 */
36 public static function truncate($text, $length = 0, $noSplit = true, $allowHtml = true)
37 {
38 // Assume a lone open tag is invalid HTML.
39 if ($length === 1 && $text[0] === '<')
40 {
41 return '...';
42 }
43
44 // Check if HTML tags are allowed.
45 if (!$allowHtml)
46 {
47 // Deal with spacing issues in the input.
48 $text = str_replace('>', '> ', $text);
49 $text = str_replace(array(' ', ' '), ' ', $text);
50 $text = StringHelper::trim(preg_replace('#\s+#mui', ' ', $text));
51
52 // Strip the tags from the input and decode entities.
53 $text = strip_tags($text);
54 $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
55
56 // Remove remaining extra spaces.
57 $text = str_replace(' ', ' ', $text);
58 $text = StringHelper::trim(preg_replace('#\s+#mui', ' ', $text));
59 }
60
61 // Whether or not allowing HTML, truncate the item text if it is too long.
62 if ($length > 0 && StringHelper::strlen($text) > $length)
63 {
64 $tmp = trim(StringHelper::substr($text, 0, $length));
65
66 if ($tmp[0] === '<' && strpos($tmp, '>') === false)
67 {
68 return '...';
69 }
70
71 // $noSplit true means that we do not allow splitting of words.
72 if ($noSplit)
73 {
74 // Find the position of the last space within the allowed length.
75 $offset = StringHelper::strrpos($tmp, ' ');
76 $tmp = StringHelper::substr($tmp, 0, $offset + 1);
77
78 // If there are no spaces and the string is longer than the maximum
79 // we need to just use the ellipsis. In that case we are done.
80 if ($offset === false && strlen($text) > $length)
81 {
82 return '...';
83 }
84
85 if (StringHelper::strlen($tmp) > $length - 3)
86 {
87 $tmp = trim(StringHelper::substr($tmp, 0, StringHelper::strrpos($tmp, ' ')));
88 }
89 }
90
91 if ($allowHtml)
92 {
93 // Put all opened tags into an array
94 preg_match_all("#<([a-z][a-z0-9]*)\b.*?(?!/)>#i", $tmp, $result);
95 $openedTags = $result[1];
96
97 // Some tags self close so they do not need a separate close tag.
98 $openedTags = array_diff($openedTags, array('img', 'hr', 'br'));
99 $openedTags = array_values($openedTags);
100
101 // Put all closed tags into an array
102 preg_match_all("#</([a-z][a-z0-9]*)\b(?:[^>]*?)>#iU", $tmp, $result);
103 $closedTags = $result[1];
104
105 $numOpened = count($openedTags);
106
107 // Not all tags are closed so trim the text and finish.
108 if (count($closedTags) !== $numOpened)
109 {
110 // Closing tags need to be in the reverse order of opening tags.
111 $openedTags = array_reverse($openedTags);
112
113 // Close tags
114 for ($i = 0; $i < $numOpened; $i++)
115 {
116 if (!in_array($openedTags[$i], $closedTags))
117 {
118 $tmp .= '</' . $openedTags[$i] . '>';
119 }
120 else
121 {
122 unset($closedTags[array_search($openedTags[$i], $closedTags)]);
123 }
124 }
125 }
126
127 // Check if we are within a tag
128 if (StringHelper::strrpos($tmp, '<') > StringHelper::strrpos($tmp, '>'))
129 {
130 $offset = StringHelper::strrpos($tmp, '<');
131 $tmp = StringHelper::trim(StringHelper::substr($tmp, 0, $offset));
132 }
133 }
134
135 if ($tmp === false || strlen($text) > strlen($tmp))
136 {
137 $text = trim($tmp) . '...';
138 }
139 }
140
141 // Clean up any internal spaces created by the processing.
142 $text = str_replace(' </', '</', $text);
143 $text = str_replace(' ...', '...', $text);
144
145 return $text;
146 }
147
148 /**
149 * Method to extend the truncate method to more complex situations
150 *
151 * The goal is to get the proper length plain text string with as much of
152 * the html intact as possible with all tags properly closed.
153 *
154 * @param string $html The content of the introtext to be truncated
155 * @param integer $maxLength The maximum number of characters to render
156 * @param boolean $noSplit Don't split a word if that is where the cutoff occurs (default: true).
157 *
158 * @return string The truncated string. If the string is truncated an ellipsis
159 * (...) will be appended.
160 *
161 * @note If a maximum length of 3 or less is selected and the text has more than
162 * that number of characters an ellipsis will be displayed.
163 * This method will not create valid HTML from malformed HTML.
164 *
165 * @since 3.1
166 */
167 public static function truncateComplex($html, $maxLength = 0, $noSplit = true)
168 {
169 // Start with some basic rules.
170 $baseLength = strlen($html);
171
172 // If the original HTML string is shorter than the $maxLength do nothing and return that.
173 if ($baseLength <= $maxLength || $maxLength === 0)
174 {
175 return $html;
176 }
177
178 // Take care of short simple cases.
179 if ($maxLength <= 3 && $html[0] !== '<' && strpos(substr($html, 0, $maxLength - 1), '<') === false && $baseLength > $maxLength)
180 {
181 return '...';
182 }
183
184 // Deal with maximum length of 1 where the string starts with a tag.
185 if ($maxLength === 1 && $html[0] === '<')
186 {
187 $endTagPos = strlen(strstr($html, '>', true));
188 $tag = substr($html, 1, $endTagPos);
189
190 $l = $endTagPos + 1;
191
192 if ($noSplit)
193 {
194 return substr($html, 0, $l) . '</' . $tag . '...';
195 }
196
197 // TODO: $character doesn't seem to be used...
198 $character = substr(strip_tags($html), 0, 1);
199
200 return substr($html, 0, $l) . '</' . $tag . '...';
201 }
202
203 // First get the truncated plain text string. This is the rendered text we want to end up with.
204 $ptString = JHtml::_('string.truncate', $html, $maxLength, $noSplit, $allowHtml = false);
205
206 // It's all HTML, just return it.
207 if ($ptString === '')
208 {
209 return $html;
210 }
211
212 // If the plain text is shorter than the max length the variable will not end in ...
213 // In that case we use the whole string.
214 if (substr($ptString, -3) !== '...')
215 {
216 return $html;
217 }
218
219 // Regular truncate gives us the ellipsis but we want to go back for text and tags.
220 if ($ptString === '...')
221 {
222 $stripped = substr(strip_tags($html), 0, $maxLength);
223 $ptString = JHtml::_('string.truncate', $stripped, $maxLength, $noSplit, $allowHtml = false);
224 }
225
226 // We need to trim the ellipsis that truncate adds.
227 $ptString = rtrim($ptString, '.');
228
229 // Now deal with more complex truncation.
230 while ($maxLength <= $baseLength)
231 {
232 // Get the truncated string assuming HTML is allowed.
233 $htmlString = JHtml::_('string.truncate', $html, $maxLength, $noSplit, $allowHtml = true);
234
235 if ($htmlString === '...' && strlen($ptString) + 3 > $maxLength)
236 {
237 return $htmlString;
238 }
239
240 $htmlString = rtrim($htmlString, '.');
241
242 // Now get the plain text from the HTML string and trim it.
243 $htmlStringToPtString = JHtml::_('string.truncate', $htmlString, $maxLength, $noSplit, $allowHtml = false);
244 $htmlStringToPtString = rtrim($htmlStringToPtString, '.');
245
246 // If the new plain text string matches the original plain text string we are done.
247 if ($ptString === $htmlStringToPtString)
248 {
249 return $htmlString . '...';
250 }
251
252 // Get the number of HTML tag characters in the first $maxLength characters
253 $diffLength = strlen($ptString) - strlen($htmlStringToPtString);
254
255 if ($diffLength <= 0)
256 {
257 return $htmlString . '...';
258 }
259
260 // Set new $maxlength that adjusts for the HTML tags
261 $maxLength += $diffLength;
262 }
263 }
264
265 /**
266 * Abridges text strings over the specified character limit. The
267 * behavior will insert an ellipsis into the text replacing a section
268 * of variable size to ensure the string does not exceed the defined
269 * maximum length. This method is UTF-8 safe.
270 *
271 * For example, it transforms "Really long title" to "Really...title".
272 *
273 * Note that this method does not scan for HTML tags so will potentially break them.
274 *
275 * @param string $text The text to abridge.
276 * @param integer $length The maximum length of the text (default is 50).
277 * @param integer $intro The maximum length of the intro text (default is 30).
278 *
279 * @return string The abridged text.
280 *
281 * @since 1.6
282 */
283 public static function abridge($text, $length = 50, $intro = 30)
284 {
285 // Abridge the item text if it is too long.
286 if (StringHelper::strlen($text) > $length)
287 {
288 // Determine the remaining text length.
289 $remainder = $length - ($intro + 3);
290
291 // Extract the beginning and ending text sections.
292 $beg = StringHelper::substr($text, 0, $intro);
293 $end = StringHelper::substr($text, StringHelper::strlen($text) - $remainder);
294
295 // Build the resulting string.
296 $text = $beg . '...' . $end;
297 }
298
299 return $text;
300 }
301 }
302