streams/Zotlabs/Lib/MarkdownSoap.php
Klaus Weidenbach aa63c23839 💡 Add source documentation from recent conversations.
There have been some conversations in the last weeks which explained
several parts of the code, so add it to the source code documentation.
Also some other small source code documentation improvements.
2018-01-05 01:38:20 +01:00

132 lines
2.6 KiB
PHP

<?php
namespace Zotlabs\Lib;
/**
* @brief MarkdownSoap class.
*
* Purify Markdown for storage
* @code{.php}
* $x = new MarkdownSoap($string_to_be_cleansed);
* $text = $x->clean();
* @endcode
* What this does:
* 1. extracts code blocks and privately escapes them from processing
* 2. Run html purifier on the content
* 3. put back the code blocks
* 4. run htmlspecialchars on the entire content for safe storage
*
* At render time:
* @code{.php}
* $markdown = \Zotlabs\Lib\MarkdownSoap::unescape($text);
* $html = \Michelf\MarkdownExtra::DefaultTransform($markdown);
* @endcode
*/
class MarkdownSoap {
/**
* @var string
*/
private $str;
/**
* @var string
*/
private $token;
function __construct($s) {
$this->str = $s;
$this->token = random_string(20);
}
function clean() {
$x = $this->extract_code($this->str);
$x = $this->purify($x);
$x = $this->putback_code($x);
$x = $this->escape($x);
return $x;
}
/**
* @brief Extracts code blocks and privately escapes them from processing.
*
* @see encode_code()
* @see putback_code()
*
* @param string $s
* @return string
*/
function extract_code($s) {
$text = preg_replace_callback('{
(?:\n\n|\A\n?)
( # $1 = the code block -- one or more lines, starting with a space/tab
(?>
[ ]{'.'4'.'} # Lines must start with a tab or a tab-width of spaces
.*\n+
)+
)
((?=^[ ]{0,'.'4'.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
}xm',
[ $this , 'encode_code' ], $s);
return $text;
}
function encode_code($matches) {
return $this->token . ';' . base64_encode($matches[0]) . ';' ;
}
function decode_code($matches) {
return base64_decode($matches[1]);
}
/**
* @brief Put back the code blocks.
*
* @see extract_code()
* @see decode_code()
*
* @param string $s
* @return string
*/
function putback_code($s) {
$text = preg_replace_callback('{' . $this->token . '\;(.*?)\;}xm', [ $this, 'decode_code' ], $s);
return $text;
}
function purify($s) {
$s = $this->protect_autolinks($s);
$s = purify_html($s);
$s = $this->unprotect_autolinks($s);
return $s;
}
function protect_autolinks($s) {
$s = preg_replace('/\<(https?\:\/\/)(.*?)\>/', '[$1$2]($1$2)', $s);
return $s;
}
function unprotect_autolinks($s) {
return $s;
}
function escape($s) {
return htmlspecialchars($s, ENT_QUOTES, 'UTF-8', false);
}
/**
* @brief Converts special HTML entities back to characters.
*
* @param string $s
* @return string
*/
static public function unescape($s) {
return htmlspecialchars_decode($s, ENT_QUOTES);
}
}