2019-04-12 04:10:31 +00:00
< ? php
2021-12-03 03:01:39 +00:00
2022-02-16 04:08:28 +00:00
namespace Code\Lib ;
2019-04-12 04:10:31 +00:00
/**
* @ brief Some functions for BB and markdown conversions
*/
2022-08-27 22:54:42 +00:00
use InvalidArgumentException ;
2019-04-12 04:10:31 +00:00
use Michelf\MarkdownExtra ;
use League\HTMLToMarkdown\HtmlConverter ;
use League\HTMLToMarkdown\Environment ;
2022-02-16 04:08:28 +00:00
use Code\Extend\Hook ;
2019-04-12 04:10:31 +00:00
require_once ( " include/event.php " );
require_once ( " include/html2bbcode.php " );
require_once ( " include/bbcode.php " );
2021-12-02 23:02:31 +00:00
class Markdown
{
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
/**
* @ brief Convert Markdown to bbcode .
*
* We don ' t want to support a bbcode specific markdown interpreter
* and the markdown library we have is pretty good , but provides HTML output .
* So we ' ll use that to convert to HTML , then convert the HTML back to bbcode ,
* and then clean up a few Diaspora specific constructs .
*
* @ param string $s The message as Markdown
* @ param bool $use_zrl default false
* @ param array $options default empty
* @ return string The message converted to bbcode
*/
public static function to_bbcode ( $s , $use_zrl = false , $options = [])
{
if ( is_array ( $s )) {
btlogger ( 'markdown_to_bb called with array. ' . print_r ( $s , true ), LOGGER_NORMAL , LOG_WARNING );
return '' ;
}
$s = str_replace ( " 
 " , " \r " , $s );
$s = str_replace ( " 
 \n > " , " " , $s );
$s = html_entity_decode ( $s , ENT_COMPAT , 'UTF-8' );
// if empty link text replace with the url
2022-08-27 22:54:42 +00:00
$s = preg_replace ( " / \ [] \ ((.*?) \ )/ism " , '[$1]($1)' , $s );
2021-12-02 23:02:31 +00:00
$x = [
'text' => $s ,
'zrl' => $use_zrl ,
'options' => $options
];
/**
* @ hooks markdown_to_bb_init
* * \e string \b text - The message as Markdown and what will get returned
* * \e boolean \b zrl
* * \e array \b options
*/
2022-02-12 08:50:48 +00:00
Hook :: call ( 'markdown_to_bb_init' , $x );
2021-12-02 23:02:31 +00:00
$s = $x [ 'text' ];
// Escaping the hash tags
2022-08-27 22:54:42 +00:00
$s = preg_replace ( '/#([^\s#])/' , '#$1' , $s );
2021-12-02 23:02:31 +00:00
$s = MarkdownExtra :: defaultTransform ( $s );
if ( $options && $options [ 'preserve_lf' ]) {
$s = str_replace ([ " \r " , " \n " ], [ " " , '<br>' ], $s );
} else {
$s = str_replace ( " \r " , " " , $s );
}
$s = str_replace ( '#' , '#' , $s );
$s = html2bbcode ( $s );
// Convert everything that looks like a link to a link
if ( $use_zrl ) {
2022-08-27 22:54:42 +00:00
if ( str_contains ( $s , '[/img]' )) {
$s = preg_replace_callback ( " / \ [img](.*?) \ [ \ /img]/ism " , [ '\\Code\\Lib\\Markdown' , 'use_zrl_cb_img' ], $s );
$s = preg_replace_callback ( " / \ [img=([0-9]*)x([0-9]*)](.*?) \ [ \ /img]/ism " , [ '\\Code\\Lib\\Markdown' , 'use_zrl_cb_img_x' ], $s );
2021-12-02 23:02:31 +00:00
}
2022-08-27 22:54:42 +00:00
$s = preg_replace_callback ( " /([^]= { \ /]|^)(https?: \ / \ /)([a-zA-Z0-9 \ pL: \ / \ -?&;.=_~#% \$ !+,@()]+)/imu " , [ '\\Code\\Lib\\Markdown' , 'use_zrl_cb_link' ], $s );
2021-12-02 23:02:31 +00:00
} else {
2022-08-27 22:54:42 +00:00
$s = preg_replace ( " /([^]= { \ /]|^)(https?: \ / \ /)([a-zA-Z0-9 \ pL: \ / \ -?&;.=_~#% \$ !+,@()]+)/imu " , '$1[url=$2$3]$2$3[/url]' , $s );
2021-12-02 23:02:31 +00:00
}
// remove duplicate adjacent code tags
2022-08-27 22:54:42 +00:00
$s = preg_replace ( " /( \ [code])+(.*?)( \ [ \ /code])+/ism " , " [code] $ 2[/code] " , $s );
2021-12-02 23:02:31 +00:00
/**
* @ hooks markdown_to_bb
* * \e string - The already converted message as bbcode
*/
2022-02-12 08:50:48 +00:00
Hook :: call ( 'markdown_to_bb' , $s );
2021-12-02 23:02:31 +00:00
return $s ;
}
public static function use_zrl_cb_link ( $match )
{
$res = '' ;
2023-10-27 23:52:09 +00:00
$is_zid = isOWAEnabled ( trim ( $match [ 0 ]));
2021-12-02 23:02:31 +00:00
2021-12-03 03:01:39 +00:00
if ( $is_zid ) {
2021-12-02 23:02:31 +00:00
$res = $match [ 1 ] . '[zrl=' . $match [ 2 ] . $match [ 3 ] . ']' . $match [ 2 ] . $match [ 3 ] . '[/zrl]' ;
2021-12-03 03:01:39 +00:00
} else {
2021-12-02 23:02:31 +00:00
$res = $match [ 1 ] . '[url=' . $match [ 2 ] . $match [ 3 ] . ']' . $match [ 2 ] . $match [ 3 ] . '[/url]' ;
2021-12-03 03:01:39 +00:00
}
2021-12-02 23:02:31 +00:00
return $res ;
}
public static function use_zrl_cb_img ( $match )
{
$res = '' ;
2023-10-27 23:52:09 +00:00
$is_zid = isOWAEnabled ( trim ( $match [ 1 ]));
2021-12-02 23:02:31 +00:00
2021-12-03 03:01:39 +00:00
if ( $is_zid ) {
2021-12-02 23:02:31 +00:00
$res = '[zmg]' . $match [ 1 ] . '[/zmg]' ;
2021-12-03 03:01:39 +00:00
} else {
2021-12-02 23:02:31 +00:00
$res = $match [ 0 ];
2021-12-03 03:01:39 +00:00
}
2021-12-02 23:02:31 +00:00
return $res ;
}
public static function use_zrl_cb_img_x ( $match )
{
$res = '' ;
2023-10-27 23:52:09 +00:00
$is_zid = isOWAEnabled ( trim ( $match [ 3 ]));
2019-04-12 04:10:31 +00:00
2021-12-03 03:01:39 +00:00
if ( $is_zid ) {
2021-12-02 23:02:31 +00:00
$res = '[zmg=' . $match [ 1 ] . 'x' . $match [ 2 ] . ']' . $match [ 3 ] . '[/zmg]' ;
2021-12-03 03:01:39 +00:00
} else {
2021-12-02 23:02:31 +00:00
$res = $match [ 0 ];
2021-12-03 03:01:39 +00:00
}
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
return $res ;
}
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
/**
* @ brief
*
* @ param array $match
* @ return string
*/
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
public static function from_bbcode_share ( $match )
{
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
$matches = [];
$attributes = $match [ 1 ];
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
$author = " " ;
preg_match ( " /author='(.*?)'/ism " , $attributes , $matches );
2021-12-03 03:01:39 +00:00
if ( $matches [ 1 ] != " " ) {
2021-12-02 23:02:31 +00:00
$author = urldecode ( $matches [ 1 ]);
2021-12-03 03:01:39 +00:00
}
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
$link = " " ;
preg_match ( " /link='(.*?)'/ism " , $attributes , $matches );
2021-12-03 03:01:39 +00:00
if ( $matches [ 1 ] != " " ) {
2021-12-02 23:02:31 +00:00
$link = $matches [ 1 ];
2021-12-03 03:01:39 +00:00
}
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
$avatar = " " ;
preg_match ( " /avatar='(.*?)'/ism " , $attributes , $matches );
2021-12-03 03:01:39 +00:00
if ( $matches [ 1 ] != " " ) {
2021-12-02 23:02:31 +00:00
$avatar = $matches [ 1 ];
2021-12-03 03:01:39 +00:00
}
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
$profile = " " ;
preg_match ( " /profile='(.*?)'/ism " , $attributes , $matches );
2021-12-03 03:01:39 +00:00
if ( $matches [ 1 ] != " " ) {
2021-12-02 23:02:31 +00:00
$profile = $matches [ 1 ];
2021-12-03 03:01:39 +00:00
}
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
$posted = " " ;
preg_match ( " /posted='(.*?)'/ism " , $attributes , $matches );
2021-12-03 03:01:39 +00:00
if ( $matches [ 1 ] != " " ) {
2021-12-02 23:02:31 +00:00
$posted = $matches [ 1 ];
2021-12-03 03:01:39 +00:00
}
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
// message_id is never used, do we still need it?
$message_id = " " ;
preg_match ( " /message_id='(.*?)'/ism " , $attributes , $matches );
2021-12-03 03:01:39 +00:00
if ( $matches [ 1 ] != " " ) {
2021-12-02 23:02:31 +00:00
$message_id = $matches [ 1 ];
2021-12-03 03:01:39 +00:00
}
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
if ( ! $message_id ) {
preg_match ( " /guid='(.*?)'/ism " , $attributes , $matches );
2021-12-03 03:01:39 +00:00
if ( $matches [ 1 ] != " " ) {
2021-12-02 23:02:31 +00:00
$message_id = $matches [ 1 ];
2021-12-03 03:01:39 +00:00
}
2021-12-02 23:02:31 +00:00
}
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
$reldate = datetime_convert ( 'UTC' , date_default_timezone_get (), $posted , 'r' );
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
$headline = '' ;
2019-04-12 04:10:31 +00:00
2021-12-03 03:01:39 +00:00
if ( $avatar != " " ) {
2021-12-02 23:02:31 +00:00
$headline .= '[url=' . zid ( $profile ) . '][img]' . $avatar . '[/img][/url]' ;
2021-12-03 03:01:39 +00:00
}
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
// Bob Smith wrote the following post 2 hours ago
2019-04-12 04:10:31 +00:00
2021-12-03 03:01:39 +00:00
$fmt = sprintf (
t ( '%1$s wrote the following %2$s %3$s' ),
2021-12-02 23:02:31 +00:00
'[url=' . zid ( $profile ) . ']' . $author . '[/url]' ,
'[url=' . zid ( $link ) . ']' . t ( 'post' ) . '[/url]' ,
$reldate
);
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
$headline .= $fmt . " \n \n " ;
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
$text = $headline . trim ( $match [ 2 ]);
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
return $text ;
}
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
/**
* @ brief Convert bbcode to Markdown .
*
* @ param string $Text The message as bbcode
* @ param array $options default empty
* @ return string The message converted to Markdown
*/
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
public static function from_bbcode ( $Text , $options = [])
{
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
/*
* Transform #tags, strip off the [url] and replace spaces with underscore
*/
2019-04-12 04:10:31 +00:00
2022-08-20 23:11:42 +00:00
// $Text = preg_replace_callback(
// '/#\[([zu])rl\=(.*?)\](.*?)\[\/[(zu)]rl\]/i',
// function() { return '#'. str_replace(' ', '_', $match[3]); },
// $Text
// );
2019-04-12 04:10:31 +00:00
2022-08-27 22:54:42 +00:00
$Text = preg_replace ( '/#\^\[([zu])rl=(.*?)](.*?)\[\/([zu])rl]/i' , '[$1rl=$2]$3[/$4rl]' , $Text );
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
// Converting images with size parameters to simple images. Markdown doesn't know it.
2022-08-27 22:54:42 +00:00
$Text = preg_replace ( " / \ [img=([0-9]*)x([0-9]*)](.*?) \ [ \ /img]/ism " , '[img]$3[/img]' , $Text );
2019-04-12 04:10:31 +00:00
2022-08-27 22:54:42 +00:00
$Text = preg_replace_callback ( " / \ [share(.*?)](.*?) \ [ \ /share]/ism " , [ '\\Code\\Lib\\Markdown' , 'from_bbcode_share' ], $Text );
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
$x = [ 'bbcode' => $Text , 'options' => $options ];
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
/**
* @ hooks bb_to_markdown_bb
* * \e string \b bbcode - The message as bbcode and what will get returned
* * \e array \b options
*/
2022-02-12 08:50:48 +00:00
Hook :: call ( 'bb_to_markdown_bb' , $x );
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
$Text = $x [ 'bbcode' ];
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
// Convert it to HTML - don't try oembed
$Text = bbcode ( $Text , [ 'tryoembed' => false ]);
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
// Now convert HTML to Markdown
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
$Text = self :: from_html ( $Text );
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
//html2markdown adds backslashes infront of hashes after a new line. remove them
$Text = str_replace ( " \n \ # " , " \n # " , $Text );
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
// If the text going into bbcode() has a plain URL in it, i.e.
// with no [url] tags around it, it will come out of parseString()
// looking like: <http://url.com>, which gets removed by strip_tags().
// So take off the angle brackets of any such URL
$Text = preg_replace ( " /<http(.*?)>/is " , " http $ 1 " , $Text );
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
// Remove empty zrl links
2022-08-27 22:54:42 +00:00
$Text = preg_replace ( " / \ [zrl=].*? \ [ \ /zrl]/is " , " " , $Text );
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
$Text = trim ( $Text );
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
/**
* @ hooks bb_to_markdown
* * \e string - The already converted message as bbcode and what will get returned
*/
2022-02-12 08:50:48 +00:00
Hook :: call ( 'bb_to_markdown' , $Text );
2021-12-02 23:02:31 +00:00
return $Text ;
}
/**
2022-08-27 22:54:42 +00:00
* @ brief Convert HTML text into Markdown .
2021-12-02 23:02:31 +00:00
*
* This function uses the library league / html - to - markdown for this task .
*
* If the HTML text can not get parsed it will return an empty string .
*
* @ param string $html The HTML code to convert
* @ return string Markdown representation of the given HTML text , empty on error
*/
public static function from_html ( $html , $options = [])
{
$markdown = '' ;
if ( ! $options ) {
$options = [
'header_style' => 'setext' , // Set to 'atx' to output H1 and H2 headers as # Header1 and ## Header2
'suppress_errors' => true , // Set to false to show warnings when loading malformed HTML
'strip_tags' => false , // Set to true to strip tags that don't have markdown equivalents. N.B. Strips tags, not their content. Useful to clean MS Word HTML output.
'bold_style' => '**' , // DEPRECATED: Set to '__' if you prefer the underlined style
'italic_style' => '*' , // DEPRECATED: Set to '_' if you prefer the underlined style
'remove_nodes' => '' , // space-separated list of dom nodes that should be removed. example: 'meta style script'
'hard_break' => false , // Set to true to turn <br> into `\n` instead of ` \n`
'list_item_style' => '-' , // Set the default character for each <li> in a <ul>. Can be '-', '*', or '+'
];
}
$environment = Environment :: createDefaultEnvironment ( $options );
$environment -> addConverter ( new TableConverter ());
$converter = new HtmlConverter ( $environment );
try {
$markdown = $converter -> convert ( $html );
} catch ( InvalidArgumentException $e ) {
logger ( " Invalid HTML. HTMLToMarkdown library threw an exception. " );
}
2019-04-12 04:10:31 +00:00
2021-12-02 23:02:31 +00:00
return $markdown ;
}
2019-04-12 04:10:31 +00:00
}
// Tables are not an official part of the markdown specification.
// This interface was suggested as a workaround.
// author: Mark Hamstra
// https://github.com/Mark-H/Docs