Merge pull request #5765 from MrPetovan/bug/php7-remove-pear-text_highlighter

[php7] Remove PHP code highlighting
This commit is contained in:
Michael Vogel 2018-09-18 18:34:46 +00:00 committed by GitHub
commit bd0515714c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 102 additions and 344 deletions

View file

@ -25,7 +25,6 @@ use Friendica\Util\Map;
use Friendica\Util\Network;
use Friendica\Util\ParseUrl;
use Friendica\Util\Proxy as ProxyUtils;
use League\HTMLToMarkdown\HtmlConverter;
class BBCode extends BaseObject
{
@ -348,7 +347,7 @@ class BBCode extends BaseObject
*/
public static function toPlaintext($text, $keep_urls = true)
{
$naked_text = preg_replace('/\[(.+?)\]/','', $text);
$naked_text = preg_replace('/\[(.+?)\]\s*/','', $text);
if (!$keep_urls) {
$naked_text = preg_replace('#https?\://[^\s<]+[^\s\.\)]#i', '', $naked_text);
}
@ -1164,21 +1163,6 @@ class BBCode extends BaseObject
return $return;
}
private static function textHighlightCallback($match)
{
// Fallback in case the language doesn't exist
$return = '[code]' . $match[2] . '[/code]';
if (in_array(strtolower($match[1]),
['php', 'css', 'mysql', 'sql', 'abap', 'diff', 'html', 'perl', 'ruby',
'vbscript', 'avrc', 'dtd', 'java', 'xml', 'cpp', 'python', 'javascript', 'js', 'sh', 'bash'])
) {
$return = text_highlight($match[2], strtolower($match[1]));
}
return $return;
}
/**
* @brief Converts a BBCode message to HTML message
*
@ -1227,6 +1211,22 @@ class BBCode extends BaseObject
return $return;
};
// Extracting multi-line code blocks before the whitespace processing
$codeblocks = [];
$text = preg_replace_callback("#\[code(?:=([^\]]*))?\](.*?)\[\/code\]#is",
function ($matches) use (&$codeblocks) {
$return = $matches[0];
if (strpos($matches[2], "\n") !== false) {
$return = '#codeblock-' . count($codeblocks) . '#';
$codeblocks[] = '<pre><code class="language-' . trim($matches[1]) . '">' . trim($matches[2], "\n\r") . '</code></pre>';
}
return $return;
},
$text
);
// Hide all [noparse] contained bbtags by spacefying them
// POSSIBLE BUG --> Will the 'preg' functions crash if there's an embedded image?
@ -1273,11 +1273,6 @@ class BBCode extends BaseObject
$text = preg_replace("/\[share(.*?)avatar\s?=\s?'.*?'\s?(.*?)\]\s?(.*?)\s?\[\/share\]\s?/ism", "\n[share$1$2]$3[/share]", $text);
}
// Check for [code] text here, before the linefeeds are messed with.
// The highlighter will unescape and re-escape the content.
if (strpos($text, '[code=') !== false) {
$text = preg_replace_callback("/\[code=(.*?)\](.*?)\[\/code\]/ism", 'self::textHighlightCallback', $text);
}
// Convert new line chars to html <br /> tags
// nlbr seems to be hopelessly messed up
@ -1771,6 +1766,18 @@ class BBCode extends BaseObject
$text = self::interpolateSavedImagesIntoItemBody($text, $saved_image);
}
// Restore code blocks
$text = preg_replace_callback('/#codeblock-([0-9]+)#/iU',
function ($matches) use ($codeblocks) {
$return = $matches[0];
if (isset($codeblocks[intval($matches[1])])) {
$return = $codeblocks[$matches[1]];
}
return $return;
},
$text
);
// Clean up the HTML by loading and saving the HTML with the DOM.
// Bad structured html can break a whole page.
// For performance reasons do it only with ativated item cache or at export.
@ -1905,23 +1912,6 @@ class BBCode extends BaseObject
// Converting images with size parameters to simple images. Markdown doesn't know it.
$text = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $text);
// Extracting multi-line code blocks before the whitespace processing/code highlighter in self::convert()
$codeblocks = [];
$text = preg_replace_callback("#\[code(?:=([^\]]*))?\](.*?)\[\/code\]#is",
function ($matches) use (&$codeblocks) {
$return = $matches[0];
if (strpos($matches[2], "\n") !== false) {
$return = '#codeblock-' . count($codeblocks) . '#';
$prefix = '````' . $matches[1] . PHP_EOL;
$codeblocks[] = $prefix . trim($matches[2]) . PHP_EOL . '````';
}
return $return;
},
$text
);
// Convert it to HTML - don't try oembed
if ($for_diaspora) {
$text = self::convert($text, false, 3);
@ -1951,8 +1941,7 @@ class BBCode extends BaseObject
$stamp1 = microtime(true);
// Now convert HTML to Markdown
$converter = new HtmlConverter();
$text = $converter->convert($text);
$text = HTML::toMarkdown($text);
// unmask the special chars back to HTML
$text = str_replace(['&\_lt\_;', '&\_gt\_;', '&\_amp\_;'], ['&lt;', '&gt;', '&amp;'], $text);
@ -1975,18 +1964,6 @@ class BBCode extends BaseObject
);
}
// Restore code blocks
$text = preg_replace_callback('/#codeblock-([0-9]+)#/iU',
function ($matches) use ($codeblocks) {
$return = '';
if (isset($codeblocks[intval($matches[1])])) {
$return = $codeblocks[$matches[1]];
}
return $return;
},
$text
);
Addon::callHooks('bb2diaspora', $text);
return $text;

View file

@ -11,6 +11,7 @@ use DOMXPath;
use Friendica\Core\Addon;
use Friendica\Util\Network;
use Friendica\Util\XML;
use League\HTMLToMarkdown\HtmlConverter;
class HTML
{
@ -122,7 +123,7 @@ class HTML
// Removing code blocks before the whitespace removal processing below
$codeblocks = [];
$message = preg_replace_callback(
'#<pre><code(?: class="([^"]*)")?>(.*)</code></pre>#iUs',
'#<pre><code(?: class="language-([^"]*)")?>(.*)</code></pre>#iUs',
function ($matches) use (&$codeblocks) {
$return = '[codeblock-' . count($codeblocks) . ']';
@ -131,7 +132,7 @@ class HTML
$prefix = '[code=' . $matches[1] . ']';
}
$codeblocks[] = $prefix . trim($matches[2]) . '[/code]';
$codeblocks[] = $prefix . PHP_EOL . trim($matches[2]) . PHP_EOL . '[/code]';
return $return;
},
$message
@ -672,4 +673,19 @@ class HTML
return trim($message);
}
/**
* Converts provided HTML code to Markdown. The hardwrap parameter maximizes
* compatibility with Diaspora in spite of the Markdown standards.
*
* @param string $html
* @return string
*/
public static function toMarkdown($html)
{
$converter = new HtmlConverter(['hard_break' => true]);
$markdown = $converter->convert($html);
return $markdown;
}
}

View file

@ -32,6 +32,7 @@ class Markdown extends BaseObject
$MarkdownParser = new MarkdownExtra();
$MarkdownParser->hard_wrap = $hardwrap;
$MarkdownParser->code_class_prefix = 'language-';
$html = $MarkdownParser->transform($text);
self::getApp()->save_timestamp($stamp1, "parser");