Skip to content

Commit

Permalink
Merge pull request #53 from mtibben/use_mb_funcs
Browse files Browse the repository at this point in the history
Always use multibyte string functions
  • Loading branch information
mtibben committed Feb 22, 2016
2 parents 1338bc9 + aceae14 commit a0f857c
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 18 deletions.
4 changes: 4 additions & 0 deletions composer.json
Expand Up @@ -8,5 +8,9 @@
},
"require-dev": {
"phpunit/phpunit": "~4"
},
"suggest": {
"ext-mbstring": "For best performance",
"symfony/polyfill-mbstring": "If you can't install ext-mbstring"
}
}
46 changes: 28 additions & 18 deletions src/Html2Text.php
Expand Up @@ -23,6 +23,8 @@ class Html2Text
{
const ENCODING = 'UTF-8';

protected $htmlFuncFlags;

/**
* Contains the HTML content to convert.
*
Expand Down Expand Up @@ -236,6 +238,9 @@ public function __construct($html = '', $options = array())

$this->html = $html;
$this->options = array_merge($this->options, $options);
$this->htmlFuncFlags = (PHP_VERSION_ID < 50400)
? ENT_COMPAT
: ENT_COMPAT | ENT_HTML5;
}

/**
Expand Down Expand Up @@ -318,6 +323,16 @@ public function set_base_url($baseurl)
}

protected function convert()
{
$origEncoding = mb_internal_encoding();
mb_internal_encoding(self::ENCODING);

$this->doConvert();

mb_internal_encoding($origEncoding);
}

protected function doConvert()
{
$this->linkList = array();

Expand Down Expand Up @@ -345,7 +360,7 @@ protected function converter(&$text)
$text = preg_replace_callback($this->callbackSearch, array($this, 'pregCallback'), $text);
$text = strip_tags($text);
$text = preg_replace($this->entSearch, $this->entReplace, $text);
$text = html_entity_decode($text, ENT_QUOTES, self::ENCODING);
$text = html_entity_decode($text, $this->htmlFuncFlags, self::ENCODING);

// Remove unknown/unhandled entities (this cannot be done in search-and-replace block)
$text = preg_replace('/&([a-zA-Z0-9]{2,6}|#[0-9]{2,4});/', '', $text);
Expand Down Expand Up @@ -395,7 +410,7 @@ protected function buildlinkList($link, $display, $linkOverride = null)
$url = $link;
} else {
$url = $this->baseurl;
if (substr($link, 0, 1) != '/') {
if (mb_substr($link, 0, 1) != '/') {
$url .= '/';
}
$url .= $link;
Expand Down Expand Up @@ -472,7 +487,7 @@ protected function convertBlockquotes(&$text)
$end = $m[1];
$len = $end - $taglen - $start;
// Get blockquote content
$body = substr($text, $start + $taglen - $diff, $len);
$body = mb_substr($text, $start + $taglen - $diff, $len);

// Set text width
$pWidth = $this->options['width'];
Expand All @@ -482,20 +497,21 @@ protected function convertBlockquotes(&$text)
$this->converter($body);
// Add citation markers and create PRE block
$body = preg_replace('/((^|\n)>*)/', '\\1> ', trim($body));
$body = '<pre>' . htmlspecialchars($body) . '</pre>';
$body = '<pre>' . htmlspecialchars($body, $this->htmlFuncFlags, self::ENCODING) . '</pre>';
// Re-set text width
$this->options['width'] = $pWidth;
// Replace content
$text = substr($text, 0, $start - $diff)
. $body . substr($text, $end + strlen($m[0]) - $diff);
$text = mb_substr($text, 0, $start - $diff)
. $body
. mb_substr($text, $end + mb_strlen($m[0]) - $diff);

$diff += $len + $taglen + strlen($m[0]) - strlen($body);
$diff += $len + $taglen + mb_strlen($m[0]) - mb_strlen($body);
unset($body);
}
} else {
if ($level == 0) {
$start = $m[1];
$taglen = strlen($m[0]);
$taglen = mb_strlen($m[0]);
}
$level++;
}
Expand All @@ -511,7 +527,7 @@ protected function convertBlockquotes(&$text)
*/
protected function pregCallback($matches)
{
switch (strtolower($matches[1])) {
switch (mb_strtolower($matches[1])) {
case 'p':
// Replace newlines with spaces.
$para = str_replace("\n", " ", $matches[3]);
Expand Down Expand Up @@ -585,15 +601,9 @@ protected function toupper($str)
*/
protected function strtoupper($str)
{
$str = html_entity_decode($str, ENT_COMPAT, self::ENCODING);

if (function_exists('mb_strtoupper')) {
$str = mb_strtoupper($str, self::ENCODING);
} else {
$str = strtoupper($str);
}

$str = htmlspecialchars($str, ENT_COMPAT, self::ENCODING);
$str = html_entity_decode($str, $this->htmlFuncFlags, self::ENCODING);
$str = mb_strtoupper($str);
$str = htmlspecialchars($str, $this->htmlFuncFlags, self::ENCODING);

return $str;
}
Expand Down

0 comments on commit a0f857c

Please sign in to comment.