mirror of
https://github.com/friendica/friendica
synced 2025-01-12 19:24:42 +00:00
Merge pull request #400 from annando/master
New bb2markdown, parse_url improved
This commit is contained in:
commit
082ad499c6
9 changed files with 420 additions and 261 deletions
|
@ -156,6 +156,7 @@
|
||||||
//echo "<pre>"; var_dump($r); die();
|
//echo "<pre>"; var_dump($r); die();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
header("HTTP/1.1 404 Not Found");
|
||||||
logger('API call not implemented: '.$a->query_string." - ".print_r($_REQUEST,true));
|
logger('API call not implemented: '.$a->query_string." - ".print_r($_REQUEST,true));
|
||||||
$r = '<status><error>not implemented</error></status>';
|
$r = '<status><error>not implemented</error></status>';
|
||||||
switch($type){
|
switch($type){
|
||||||
|
@ -490,7 +491,8 @@
|
||||||
$_REQUEST['type'] = 'wall';
|
$_REQUEST['type'] = 'wall';
|
||||||
$_REQUEST['profile_uid'] = local_user();
|
$_REQUEST['profile_uid'] = local_user();
|
||||||
$_REQUEST['api_source'] = true;
|
$_REQUEST['api_source'] = true;
|
||||||
$txt = urldecode(requestdata('status'));
|
$txt = requestdata('status');
|
||||||
|
//$txt = urldecode(requestdata('status'));
|
||||||
|
|
||||||
require_once('library/HTMLPurifier.auto.php');
|
require_once('library/HTMLPurifier.auto.php');
|
||||||
require_once('include/html2bbcode.php');
|
require_once('include/html2bbcode.php');
|
||||||
|
@ -554,7 +556,8 @@
|
||||||
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
$_REQUEST['body'] = urldecode(requestdata('status'));
|
$_REQUEST['body'] = requestdata('status');
|
||||||
|
//$_REQUEST['body'] = urldecode(requestdata('status'));
|
||||||
|
|
||||||
$parent = requestdata('in_reply_to_status_id');
|
$parent = requestdata('in_reply_to_status_id');
|
||||||
if(ctype_digit($parent))
|
if(ctype_digit($parent))
|
||||||
|
@ -1725,4 +1728,6 @@ notifications/follow
|
||||||
notifications/leave
|
notifications/leave
|
||||||
blocks/exists
|
blocks/exists
|
||||||
blocks/blocking
|
blocks/blocking
|
||||||
|
lists
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
|
@ -190,63 +190,40 @@ function diaspora_ol($s) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function bb2diaspora($Text,$preserve_nl = false) {
|
function bb2diaspora($Text,$preserve_nl = false, $fordiaspora = true) {
|
||||||
|
|
||||||
//////////////////////
|
// Re-enabling the converter again.
|
||||||
// An attempt was made to convert bbcode to html and then to markdown
|
// The bbcode parser now handles youtube-links (and the other stuff) correctly.
|
||||||
// consisting of the following lines.
|
// Additionally the html code is now fixed so that lists are now working.
|
||||||
// I'm undoing this as we have a lot of bbcode constructs which
|
|
||||||
// were simply getting lost, for instance bookmark, vimeo, video, youtube, events, etc.
|
|
||||||
// We can try this again, but need a very good test sequence to verify
|
|
||||||
// all the major bbcode constructs that we use are getting through.
|
|
||||||
//////////////////////
|
|
||||||
/*
|
|
||||||
// bbcode() will convert "[*]" into "<li>" with no closing "</li>"
|
|
||||||
// Markdownify() is unable to handle these, as it makes each new
|
|
||||||
// "<li>" into a deeper nested element until it crashes. So pre-format
|
|
||||||
// the lists as Diaspora lists before sending the $Text to bbcode()
|
|
||||||
//
|
|
||||||
// Note that to get nested lists to work for Diaspora, we would need
|
|
||||||
// to define the closing tag for the list elements. So nested lists
|
|
||||||
// are going to be flattened out in Diaspora for now
|
|
||||||
|
|
||||||
$endlessloop = 0;
|
// Converting images with size parameters to simple images. Markdown doesn't know it.
|
||||||
while ((((strpos($Text, "[/list]") !== false) && (strpos($Text, "[list") !== false)) ||
|
$Text = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $Text);
|
||||||
((strpos($Text, "[/ol]") !== false) && (strpos($Text, "[ol]") !== false)) ||
|
|
||||||
((strpos($Text, "[/ul]") !== false) && (strpos($Text, "[ul]") !== false))) && (++$endlessloop < 20)) {
|
|
||||||
$Text = preg_replace_callback("/\[list\](.*?)\[\/list\]/is", 'diaspora_ul', $Text);
|
|
||||||
$Text = preg_replace_callback("/\[list=1\](.*?)\[\/list\]/is", 'diaspora_ol', $Text);
|
|
||||||
$Text = preg_replace_callback("/\[list=i\](.*?)\[\/list\]/s",'diaspora_ol', $Text);
|
|
||||||
$Text = preg_replace_callback("/\[list=I\](.*?)\[\/list\]/s", 'diaspora_ol', $Text);
|
|
||||||
$Text = preg_replace_callback("/\[list=a\](.*?)\[\/list\]/s", 'diaspora_ol', $Text);
|
|
||||||
$Text = preg_replace_callback("/\[list=A\](.*?)\[\/list\]/s", 'diaspora_ol', $Text);
|
|
||||||
$Text = preg_replace_callback("/\[ul\](.*?)\[\/ul\]/is", 'diaspora_ul', $Text);
|
|
||||||
$Text = preg_replace_callback("/\[ol\](.*?)\[\/ol\]/is", 'diaspora_ol', $Text);
|
|
||||||
}
|
|
||||||
|
|
||||||
*/
|
// the following was added on 10-January-2012 due to an inability of Diaspora's
|
||||||
|
// new javascript markdown processor to handle links with images as the link "text"
|
||||||
|
// It is not optimal and may be removed if this ability is restored in the future
|
||||||
|
if ($fordiaspora)
|
||||||
|
$Text = preg_replace("/\[url\=([^\[\]]*)\]\s*\[img\](.*?)\[\/img\]\s*\[\/url\]/ism",
|
||||||
|
"[url]$1[/url]\n[img]$2[/img]", $Text);
|
||||||
|
|
||||||
// Convert it to HTML - don't try oembed
|
// Convert it to HTML - don't try oembed
|
||||||
// $Text = bbcode($Text, $preserve_nl, false);
|
$Text = bbcode($Text, $preserve_nl, false);
|
||||||
|
|
||||||
// Now convert HTML to Markdown
|
// Now convert HTML to Markdown
|
||||||
// $md = new Markdownify(false, false, false);
|
$md = new Markdownify(false, false, false);
|
||||||
// $Text = $md->parseString($Text);
|
$Text = $md->parseString($Text);
|
||||||
|
|
||||||
// If the text going into bbcode() has a plain URL in it, i.e.
|
// If the text going into bbcode() has a plain URL in it, i.e.
|
||||||
// with no [url] tags around it, it will come out of parseString()
|
// with no [url] tags around it, it will come out of parseString()
|
||||||
// looking like: <http://url.com>, which gets removed by strip_tags().
|
// looking like: <http://url.com>, which gets removed by strip_tags().
|
||||||
// So take off the angle brackets of any such URL
|
// So take off the angle brackets of any such URL
|
||||||
// $Text = preg_replace("/<http(.*?)>/is", "http$1", $Text);
|
$Text = preg_replace("/<http(.*?)>/is", "http$1", $Text);
|
||||||
|
|
||||||
// Remove all unconverted tags
|
// Remove all unconverted tags
|
||||||
// $Text = strip_tags($Text);
|
$Text = strip_tags($Text);
|
||||||
|
|
||||||
//////
|
|
||||||
// end of bb->html->md conversion attempt
|
|
||||||
//////
|
|
||||||
|
|
||||||
|
|
||||||
|
/* Old routine
|
||||||
|
|
||||||
$ev = bbtoevent($Text);
|
$ev = bbtoevent($Text);
|
||||||
|
|
||||||
|
@ -422,6 +399,7 @@ function bb2diaspora($Text,$preserve_nl = false) {
|
||||||
|
|
||||||
$Text = preg_replace_callback('/\[(.*?)\]\((.*?)\)/ism','unescape_underscores_in_links',$Text);
|
$Text = preg_replace_callback('/\[(.*?)\]\((.*?)\)/ism','unescape_underscores_in_links',$Text);
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
// Remove any leading or trailing whitespace, as this will mess up
|
// Remove any leading or trailing whitespace, as this will mess up
|
||||||
// the Diaspora signature verification and cause the item to disappear
|
// the Diaspora signature verification and cause the item to disappear
|
||||||
|
|
|
@ -115,6 +115,10 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true) {
|
||||||
|
|
||||||
$a = get_app();
|
$a = get_app();
|
||||||
|
|
||||||
|
// Move all spaces out of the tags
|
||||||
|
$Text = preg_replace("/\[(\w*)\](\s*)/ism", '$2[$1]', $Text);
|
||||||
|
$Text = preg_replace("/(\s*)\[\/(\w*)\]/ism", '[/$2]$1', $Text);
|
||||||
|
|
||||||
// Hide all [noparse] contained bbtags by spacefying them
|
// Hide all [noparse] contained bbtags by spacefying them
|
||||||
// POSSIBLE BUG --> Will the 'preg' functions crash if there's an embedded image?
|
// POSSIBLE BUG --> Will the 'preg' functions crash if there's an embedded image?
|
||||||
|
|
||||||
|
@ -313,21 +317,25 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true) {
|
||||||
$Text = preg_replace("/\[img\](.*?)\[\/img\]/ism", '<img src="$1" alt="' . t('Image/photo') . '" />', $Text);
|
$Text = preg_replace("/\[img\](.*?)\[\/img\]/ism", '<img src="$1" alt="' . t('Image/photo') . '" />', $Text);
|
||||||
|
|
||||||
|
|
||||||
$Text = preg_replace("/\[video\](.*?\.(ogg|ogv|oga|ogm|webm|mp4))\[\/video\]/ism", '<video src="$1" controls="controls" width="425" height="350"><a href="$1">$1</a></video>', $Text);
|
|
||||||
|
|
||||||
$Text = preg_replace("/\[audio\](.*?\.(ogg|ogv|oga|ogm|webm|mp4|mp3))\[\/audio\]/ism", '<audio src="$1" controls="controls"><a href="$1">$1</a></audio>', $Text);
|
|
||||||
|
|
||||||
// Try to Oembed
|
// Try to Oembed
|
||||||
if ($tryoembed) {
|
if ($tryoembed) {
|
||||||
|
$Text = preg_replace("/\[video\](.*?\.(ogg|ogv|oga|ogm|webm|mp4))\[\/video\]/ism", '<video src="$1" controls="controls" width="425" height="350"><a href="$1">$1</a></video>', $Text);
|
||||||
|
$Text = preg_replace("/\[audio\](.*?\.(ogg|ogv|oga|ogm|webm|mp4|mp3))\[\/audio\]/ism", '<audio src="$1" controls="controls"><a href="$1">$1</a></audio>', $Text);
|
||||||
|
|
||||||
$Text = preg_replace_callback("/\[video\](.*?)\[\/video\]/ism", 'tryoembed', $Text);
|
$Text = preg_replace_callback("/\[video\](.*?)\[\/video\]/ism", 'tryoembed', $Text);
|
||||||
$Text = preg_replace_callback("/\[audio\](.*?)\[\/audio\]/ism", 'tryoembed', $Text);
|
$Text = preg_replace_callback("/\[audio\](.*?)\[\/audio\]/ism", 'tryoembed', $Text);
|
||||||
|
} else {
|
||||||
|
$Text = preg_replace("/\[video\](.*?)\[\/video\]/", '$1', $Text);
|
||||||
|
$Text = preg_replace("/\[audio\](.*?)\[\/audio\]/", '$1', $Text);
|
||||||
}
|
}
|
||||||
|
|
||||||
// html5 video and audio
|
// html5 video and audio
|
||||||
|
|
||||||
|
|
||||||
|
if ($tryoembed)
|
||||||
$Text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '<iframe src="$1" width="425" height="350"><a href="$1">$1</a></iframe>', $Text);
|
$Text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '<iframe src="$1" width="425" height="350"><a href="$1">$1</a></iframe>', $Text);
|
||||||
|
else
|
||||||
|
$Text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '<a href="$1">$1</a>', $Text);
|
||||||
|
|
||||||
// Youtube extensions
|
// Youtube extensions
|
||||||
if ($tryoembed) {
|
if ($tryoembed) {
|
||||||
|
@ -340,7 +348,10 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true) {
|
||||||
$Text = preg_replace("/\[youtube\]https?:\/\/www.youtube.com\/embed\/(.*?)\[\/youtube\]/ism",'[youtube]$1[/youtube]',$Text);
|
$Text = preg_replace("/\[youtube\]https?:\/\/www.youtube.com\/embed\/(.*?)\[\/youtube\]/ism",'[youtube]$1[/youtube]',$Text);
|
||||||
$Text = preg_replace("/\[youtube\]https?:\/\/youtu.be\/(.*?)\[\/youtube\]/ism",'[youtube]$1[/youtube]',$Text);
|
$Text = preg_replace("/\[youtube\]https?:\/\/youtu.be\/(.*?)\[\/youtube\]/ism",'[youtube]$1[/youtube]',$Text);
|
||||||
|
|
||||||
|
if ($tryoembed)
|
||||||
$Text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", '<iframe width="425" height="350" src="http://www.youtube.com/embed/$1" frameborder="0" ></iframe>', $Text);
|
$Text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", '<iframe width="425" height="350" src="http://www.youtube.com/embed/$1" frameborder="0" ></iframe>', $Text);
|
||||||
|
else
|
||||||
|
$Text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", "http://www.youtube.com/watch?v=$1", $Text);
|
||||||
|
|
||||||
|
|
||||||
if ($tryoembed) {
|
if ($tryoembed) {
|
||||||
|
@ -350,7 +361,11 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true) {
|
||||||
|
|
||||||
$Text = preg_replace("/\[vimeo\]https?:\/\/player.vimeo.com\/video\/([0-9]+)(.*?)\[\/vimeo\]/ism",'[vimeo]$1[/vimeo]',$Text);
|
$Text = preg_replace("/\[vimeo\]https?:\/\/player.vimeo.com\/video\/([0-9]+)(.*?)\[\/vimeo\]/ism",'[vimeo]$1[/vimeo]',$Text);
|
||||||
$Text = preg_replace("/\[vimeo\]https?:\/\/vimeo.com\/([0-9]+)(.*?)\[\/vimeo\]/ism",'[vimeo]$1[/vimeo]',$Text);
|
$Text = preg_replace("/\[vimeo\]https?:\/\/vimeo.com\/([0-9]+)(.*?)\[\/vimeo\]/ism",'[vimeo]$1[/vimeo]',$Text);
|
||||||
|
|
||||||
|
if ($tryoembed)
|
||||||
$Text = preg_replace("/\[vimeo\]([0-9]+)(.*?)\[\/vimeo\]/ism", '<iframe width="425" height="350" src="http://player.vimeo.com/video/$1" frameborder="0" ></iframe>', $Text);
|
$Text = preg_replace("/\[vimeo\]([0-9]+)(.*?)\[\/vimeo\]/ism", '<iframe width="425" height="350" src="http://player.vimeo.com/video/$1" frameborder="0" ></iframe>', $Text);
|
||||||
|
else
|
||||||
|
$Text = preg_replace("/\[vimeo\]([0-9]+)(.*?)\[\/vimeo\]/ism", "http://vimeo.com/$1", $Text);
|
||||||
|
|
||||||
// $Text = preg_replace("/\[youtube\](.*?)\[\/youtube\]/", '<object width="425" height="350" type="application/x-shockwave-flash" data="http://www.youtube.com/v/$1" ><param name="movie" value="http://www.youtube.com/v/$1"></param><!--[if IE]><embed src="http://www.youtube.com/v/$1" type="application/x-shockwave-flash" width="425" height="350" /><![endif]--></object>', $Text);
|
// $Text = preg_replace("/\[youtube\](.*?)\[\/youtube\]/", '<object width="425" height="350" type="application/x-shockwave-flash" data="http://www.youtube.com/v/$1" ><param name="movie" value="http://www.youtube.com/v/$1"></param><!--[if IE]><embed src="http://www.youtube.com/v/$1" type="application/x-shockwave-flash" width="425" height="350" /><![endif]--></object>', $Text);
|
||||||
|
|
||||||
|
@ -358,6 +373,9 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true) {
|
||||||
// oembed tag
|
// oembed tag
|
||||||
$Text = oembed_bbcode2html($Text);
|
$Text = oembed_bbcode2html($Text);
|
||||||
|
|
||||||
|
// Avoid triple linefeeds through oembed
|
||||||
|
$Text = str_replace("<br style='clear:left'></span><br /><br />", "<br style='clear:left'></span><br />", $Text);
|
||||||
|
|
||||||
// If we found an event earlier, strip out all the event code and replace with a reformatted version.
|
// If we found an event earlier, strip out all the event code and replace with a reformatted version.
|
||||||
// Replace the event-start section with the entire formatted event. The other bbcode is stripped.
|
// Replace the event-start section with the entire formatted event. The other bbcode is stripped.
|
||||||
// Summary (e.g. title) is required, earlier revisions only required description (in addition to
|
// Summary (e.g. title) is required, earlier revisions only required description (in addition to
|
||||||
|
@ -391,6 +409,25 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true) {
|
||||||
if($saved_image)
|
if($saved_image)
|
||||||
$Text = bb_replace_images($Text, $saved_image);
|
$Text = bb_replace_images($Text, $saved_image);
|
||||||
|
|
||||||
|
// Clean up the HTML by loading and saving the HTML with the DOM
|
||||||
|
// Only do it when it has to be done - for performance reasons
|
||||||
|
if (!$tryoembed) {
|
||||||
|
$doc = new DOMDocument();
|
||||||
|
$doc->preserveWhiteSpace = false;
|
||||||
|
|
||||||
|
$Text = mb_convert_encoding($Text, 'HTML-ENTITIES', "UTF-8");
|
||||||
|
|
||||||
|
$doctype = '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">';
|
||||||
|
@$doc->loadHTML($doctype."<html><body>".$Text."</body></html>");
|
||||||
|
|
||||||
|
$Text = $doc->saveHTML();
|
||||||
|
$Text = str_replace(array("<html><body>", "</body></html>", $doctype), array("", "", ""), $Text);
|
||||||
|
|
||||||
|
$Text = str_replace('<br></li>','</li>', $Text);
|
||||||
|
|
||||||
|
$Text = mb_convert_encoding($Text, "UTF-8", 'HTML-ENTITIES');
|
||||||
|
}
|
||||||
|
|
||||||
call_hooks('bbcode',$Text);
|
call_hooks('bbcode',$Text);
|
||||||
|
|
||||||
return $Text;
|
return $Text;
|
||||||
|
|
|
@ -71,22 +71,32 @@ class dba {
|
||||||
}
|
}
|
||||||
|
|
||||||
public function q($sql) {
|
public function q($sql) {
|
||||||
|
global $a;
|
||||||
|
|
||||||
if((! $this->db) || (! $this->connected))
|
if((! $this->db) || (! $this->connected))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
$this->error = '';
|
$this->error = '';
|
||||||
|
|
||||||
//if (get_config("system", "db_log") != "")
|
if ($a->config["system"]["db_log"] != "")
|
||||||
// @file_put_contents(get_config("system", "db_log"), datetime_convert().':'.session_id(). ' Start '.$sql."\n", FILE_APPEND);
|
$stamp1 = microtime(true);
|
||||||
|
|
||||||
if($this->mysqli)
|
if($this->mysqli)
|
||||||
$result = @$this->db->query($sql);
|
$result = @$this->db->query($sql);
|
||||||
else
|
else
|
||||||
$result = @mysql_query($sql,$this->db);
|
$result = @mysql_query($sql,$this->db);
|
||||||
|
|
||||||
//if (get_config("system", "db_log") != "")
|
if ($a->config["system"]["db_log"] != "") {
|
||||||
// @file_put_contents(get_config("system", "db_log"), datetime_convert().':'.session_id(). ' Stop '."\n", FILE_APPEND);
|
$stamp2 = microtime(true);
|
||||||
|
$duration = round($stamp2-$stamp1, 3);
|
||||||
|
if ($duration > $a->config["system"]["db_loglimit"]) {
|
||||||
|
$backtrace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS);
|
||||||
|
@file_put_contents($a->config["system"]["db_log"], $duration."\t".
|
||||||
|
basename($backtrace[1]["file"])."\t".
|
||||||
|
$backtrace[1]["line"]."\t".$backtrace[2]["function"]."\t".
|
||||||
|
substr($sql, 0, 2000)."\n", FILE_APPEND);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if($this->mysqli) {
|
if($this->mysqli) {
|
||||||
if($this->db->errno)
|
if($this->db->errno)
|
||||||
|
|
|
@ -374,6 +374,29 @@ function limit_body_size($body) {
|
||||||
return $body;
|
return $body;
|
||||||
}}
|
}}
|
||||||
|
|
||||||
|
function title_is_body($title, $body) {
|
||||||
|
|
||||||
|
$title = strip_tags($title);
|
||||||
|
$title = trim($title);
|
||||||
|
$title = str_replace(array("\n", "\r", "\t", " "), array("","","",""), $title);
|
||||||
|
|
||||||
|
$body = strip_tags($body);
|
||||||
|
$body = trim($body);
|
||||||
|
$body = str_replace(array("\n", "\r", "\t", " "), array("","","",""), $body);
|
||||||
|
|
||||||
|
if (strlen($title) < strlen($body))
|
||||||
|
$body = substr($body, 0, strlen($title));
|
||||||
|
|
||||||
|
if (($title != $body) and (substr($title, -3) == "...")) {
|
||||||
|
$pos = strrpos($title, "...");
|
||||||
|
if ($pos > 0) {
|
||||||
|
$title = substr($title, 0, $pos);
|
||||||
|
$body = substr($body, 0, $pos);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return($title == $body);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -400,6 +423,11 @@ function get_atom_elements($feed,$item) {
|
||||||
$res['body'] = unxmlify($item->get_content());
|
$res['body'] = unxmlify($item->get_content());
|
||||||
$res['plink'] = unxmlify($item->get_link(0));
|
$res['plink'] = unxmlify($item->get_link(0));
|
||||||
|
|
||||||
|
// removing the content of the title if its identically to the body
|
||||||
|
// This helps with auto generated titles e.g. from tumblr
|
||||||
|
if (title_is_body($res["title"], $res["body"]))
|
||||||
|
$res['title'] = "";
|
||||||
|
|
||||||
if($res['plink'])
|
if($res['plink'])
|
||||||
$base_url = implode('/', array_slice(explode('/',$res['plink']),0,3));
|
$base_url = implode('/', array_slice(explode('/',$res['plink']),0,3));
|
||||||
else
|
else
|
||||||
|
@ -758,10 +786,41 @@ function get_atom_elements($feed,$item) {
|
||||||
$res['target'] .= '</target>' . "\n";
|
$res['target'] .= '</target>' . "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This is some experimental stuff. By now retweets are shown with "RT:"
|
||||||
|
// But: There is data so that the message could be shown similar to native retweets
|
||||||
|
// There is some better way to parse this array - but it didn't worked for me.
|
||||||
|
$child = $item->feed->data["child"][SIMPLEPIE_NAMESPACE_ATOM_10]["feed"][0]["child"][SIMPLEPIE_NAMESPACE_ATOM_10]["entry"][0]["child"]["http://activitystrea.ms/spec/1.0/"][object][0]["child"];
|
||||||
|
if (is_array($child)) {
|
||||||
|
$message = $child["http://activitystrea.ms/spec/1.0/"]["object"][0]["child"][SIMPLEPIE_NAMESPACE_ATOM_10]["content"][0]["data"];
|
||||||
|
$author = $child[SIMPLEPIE_NAMESPACE_ATOM_10]["author"][0]["child"][SIMPLEPIE_NAMESPACE_ATOM_10];
|
||||||
|
$uri = $author["uri"][0]["data"];
|
||||||
|
$name = $author["name"][0]["data"];
|
||||||
|
$avatar = @array_shift($author["link"][2]["attribs"]);
|
||||||
|
$avatar = $avatar["href"];
|
||||||
|
|
||||||
|
if (($name != "") and ($uri != "") and ($avatar != "") and ($message != "")) {
|
||||||
|
$res["owner-name"] = $res["author-name"];
|
||||||
|
$res["owner-link"] = $res["author-link"];
|
||||||
|
$res["owner-avatar"] = $res["author-avatar"];
|
||||||
|
|
||||||
|
$res["author-name"] = $name;
|
||||||
|
$res["author-link"] = $uri;
|
||||||
|
$res["author-avatar"] = $avatar;
|
||||||
|
|
||||||
|
$res["body"] = html2bbcode($message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
$arr = array('feed' => $feed, 'item' => $item, 'result' => $res);
|
$arr = array('feed' => $feed, 'item' => $item, 'result' => $res);
|
||||||
|
|
||||||
call_hooks('parse_atom', $arr);
|
call_hooks('parse_atom', $arr);
|
||||||
|
|
||||||
|
//if (($res["title"] != "") or (strpos($res["body"], "RT @") > 0)) {
|
||||||
|
//if (strpos($res["body"], "RT @") !== false) {
|
||||||
|
// $debugfile = tempnam("/home/ike/log", "item-res2-");
|
||||||
|
// file_put_contents($debugfile, serialize($arr));
|
||||||
|
//}
|
||||||
|
|
||||||
return $res;
|
return $res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1748,7 +1807,6 @@ function consume_feed($xml,$importer,&$contact, &$hub, $datedir = 0, $pass = 0)
|
||||||
$item_id = $item->get_id();
|
$item_id = $item->get_id();
|
||||||
$datarray = get_atom_elements($feed,$item);
|
$datarray = get_atom_elements($feed,$item);
|
||||||
|
|
||||||
|
|
||||||
if((! x($datarray,'author-name')) && ($contact['network'] != NETWORK_DFRN))
|
if((! x($datarray,'author-name')) && ($contact['network'] != NETWORK_DFRN))
|
||||||
$datarray['author-name'] = $contact['name'];
|
$datarray['author-name'] = $contact['name'];
|
||||||
if((! x($datarray,'author-link')) && ($contact['network'] != NETWORK_DFRN))
|
if((! x($datarray,'author-link')) && ($contact['network'] != NETWORK_DFRN))
|
||||||
|
@ -3665,7 +3723,6 @@ function posted_date_widget($url,$uid,$wall) {
|
||||||
return $o;
|
return $o;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function store_diaspora_retract_sig($item, $user, $baseurl) {
|
function store_diaspora_retract_sig($item, $user, $baseurl) {
|
||||||
// Note that we can't add a target_author_signature
|
// Note that we can't add a target_author_signature
|
||||||
// if the comment was deleted by a remote user. That should be ok, because if a remote user is deleting
|
// if the comment was deleted by a remote user. That should be ok, because if a remote user is deleting
|
||||||
|
|
|
@ -735,6 +735,13 @@ class Markdownify {
|
||||||
$this->parser->tagAttributes['src'] = $this->decode($this->parser->tagAttributes['src']);
|
$this->parser->tagAttributes['src'] = $this->decode($this->parser->tagAttributes['src']);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ![Alt text](/path/to/img.jpg "Optional title")
|
||||||
|
if ($this->parser->tagAttributes['title'] != "")
|
||||||
|
$this->out('!['.$this->parser->tagAttributes['alt'].']('.$this->parser->tagAttributes['src'].'"'.$this->parser->tagAttributes['title'].'")', true);
|
||||||
|
else
|
||||||
|
$this->out('!['.$this->parser->tagAttributes['alt'].']('.$this->parser->tagAttributes['src'].')', true);
|
||||||
|
|
||||||
|
/*
|
||||||
# [This link][id]
|
# [This link][id]
|
||||||
$link_id = false;
|
$link_id = false;
|
||||||
if (!empty($this->stack['a'])) {
|
if (!empty($this->stack['a'])) {
|
||||||
|
@ -759,6 +766,7 @@ class Markdownify {
|
||||||
}
|
}
|
||||||
|
|
||||||
$this->out('!['.$this->parser->tagAttributes['alt'].']['.$link_id.']', true);
|
$this->out('!['.$this->parser->tagAttributes['alt'].']['.$link_id.']', true);
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* handle <code> tags
|
* handle <code> tags
|
||||||
|
|
|
@ -22,7 +22,8 @@ function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_
|
||||||
}
|
}
|
||||||
|
|
||||||
@curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
|
@curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
|
||||||
@curl_setopt($ch, CURLOPT_USERAGENT, "Friendica");
|
//@curl_setopt($ch, CURLOPT_USERAGENT, "Friendica");
|
||||||
|
@curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; Friendica)");
|
||||||
|
|
||||||
|
|
||||||
if(intval($timeout)) {
|
if(intval($timeout)) {
|
||||||
|
@ -59,7 +60,6 @@ function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_
|
||||||
$base = $s;
|
$base = $s;
|
||||||
$curl_info = @curl_getinfo($ch);
|
$curl_info = @curl_getinfo($ch);
|
||||||
$http_code = $curl_info['http_code'];
|
$http_code = $curl_info['http_code'];
|
||||||
|
|
||||||
// logger('fetch_url:' . $http_code . ' data: ' . $s);
|
// logger('fetch_url:' . $http_code . ' data: ' . $s);
|
||||||
$header = '';
|
$header = '';
|
||||||
|
|
||||||
|
@ -88,9 +88,7 @@ function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_
|
||||||
$a->set_curl_code($http_code);
|
$a->set_curl_code($http_code);
|
||||||
|
|
||||||
$body = substr($s,strlen($header));
|
$body = substr($s,strlen($header));
|
||||||
|
|
||||||
$a->set_curl_headers($header);
|
$a->set_curl_headers($header);
|
||||||
|
|
||||||
@curl_close($ch);
|
@curl_close($ch);
|
||||||
return($body);
|
return($body);
|
||||||
}}
|
}}
|
||||||
|
@ -800,6 +798,9 @@ function scale_external_images($s, $include_link = true, $scale_replace = false)
|
||||||
|
|
||||||
$a = get_app();
|
$a = get_app();
|
||||||
|
|
||||||
|
// Picture addresses can contain special characters
|
||||||
|
$s = htmlspecialchars_decode($s);
|
||||||
|
|
||||||
$matches = null;
|
$matches = null;
|
||||||
$c = preg_match_all('/\[img\](.*?)\[\/img\]/ism',$s,$matches,PREG_SET_ORDER);
|
$c = preg_match_all('/\[img\](.*?)\[\/img\]/ism',$s,$matches,PREG_SET_ORDER);
|
||||||
if($c) {
|
if($c) {
|
||||||
|
|
|
@ -1562,6 +1562,7 @@ function undo_post_tagging($s) {
|
||||||
|
|
||||||
function fix_mce_lf($s) {
|
function fix_mce_lf($s) {
|
||||||
$s = str_replace("\r\n","\n",$s);
|
$s = str_replace("\r\n","\n",$s);
|
||||||
|
$s = str_replace("\n\n","\n",$s);
|
||||||
return $s;
|
return $s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,224 @@
|
||||||
<?php
|
<?php
|
||||||
|
/* To-Do
|
||||||
|
https://developers.google.com/+/plugins/snippet/
|
||||||
|
|
||||||
require_once('library/HTML5/Parser.php');
|
<meta itemprop="name" content="Toller Titel">
|
||||||
require_once('library/HTMLPurifier.auto.php');
|
<meta itemprop="description" content="Eine tolle Beschreibung">
|
||||||
|
<meta itemprop="image" content="http://maple.libertreeproject.org/images/tree-icon.png">
|
||||||
|
|
||||||
|
<body itemscope itemtype="http://schema.org/Product">
|
||||||
|
<h1 itemprop="name">Shiny Trinket</h1>
|
||||||
|
<img itemprop="image" src="{image-url}" />
|
||||||
|
<p itemprop="description">Shiny trinkets are shiny.</p>
|
||||||
|
</body>
|
||||||
|
*/
|
||||||
|
|
||||||
|
if(!function_exists('deletenode')) {
|
||||||
|
function deletenode(&$doc, $node)
|
||||||
|
{
|
||||||
|
$xpath = new DomXPath($doc);
|
||||||
|
$list = $xpath->query("//".$node);
|
||||||
|
foreach ($list as $child)
|
||||||
|
$child->parentNode->removeChild($child);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function completeurl($url, $scheme) {
|
||||||
|
$urlarr = parse_url($url);
|
||||||
|
|
||||||
|
if (isset($urlarr["scheme"]))
|
||||||
|
return($url);
|
||||||
|
|
||||||
|
$schemearr = parse_url($scheme);
|
||||||
|
|
||||||
|
$complete = $schemearr["scheme"]."://".$schemearr["host"];
|
||||||
|
|
||||||
|
if ($schemearr["port"] != "")
|
||||||
|
$complete .= ":".$schemearr["port"];
|
||||||
|
|
||||||
|
$complete .= $urlarr["path"];
|
||||||
|
|
||||||
|
if ($urlarr["query"] != "")
|
||||||
|
$complete .= "?".$urlarr["query"];
|
||||||
|
|
||||||
|
if ($urlarr["fragment"] != "")
|
||||||
|
$complete .= "#".$urlarr["fragment"];
|
||||||
|
|
||||||
|
return($complete);
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseurl_getsiteinfo($url) {
|
||||||
|
$siteinfo = array();
|
||||||
|
|
||||||
|
$ch = curl_init();
|
||||||
|
curl_setopt($ch, CURLOPT_URL, $url);
|
||||||
|
curl_setopt($ch, CURLOPT_HEADER, 1);
|
||||||
|
curl_setopt($ch, CURLOPT_NOBODY, 0);
|
||||||
|
curl_setopt($ch, CURLOPT_TIMEOUT, 3);
|
||||||
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||||
|
curl_setopt($ch,CURLOPT_USERAGENT,'Opera/9.64(Windows NT 5.1; U; de) Presto/2.1.1');
|
||||||
|
|
||||||
|
$header = curl_exec($ch);
|
||||||
|
curl_close($ch);
|
||||||
|
|
||||||
|
// Fetch the first mentioned charset. Can be in body or header
|
||||||
|
if (preg_match('/charset=(.*?)['."'".'"\s\n]/', $header, $matches))
|
||||||
|
$charset = trim(array_pop($matches));
|
||||||
|
else
|
||||||
|
$charset = "utf-8";
|
||||||
|
|
||||||
|
$pos = strpos($header, "\r\n\r\n");
|
||||||
|
|
||||||
|
if ($pos)
|
||||||
|
$body = trim(substr($header, $pos));
|
||||||
|
else
|
||||||
|
$body = $header;
|
||||||
|
|
||||||
|
$body = mb_convert_encoding($body, "UTF-8", $charset);
|
||||||
|
$body = mb_convert_encoding($body, 'HTML-ENTITIES', "UTF-8");
|
||||||
|
|
||||||
|
$doc = new DOMDocument();
|
||||||
|
@$doc->loadHTML($body);
|
||||||
|
|
||||||
|
deletenode($doc, 'style');
|
||||||
|
deletenode($doc, 'script');
|
||||||
|
deletenode($doc, 'option');
|
||||||
|
deletenode($doc, 'h1');
|
||||||
|
deletenode($doc, 'h2');
|
||||||
|
deletenode($doc, 'h3');
|
||||||
|
deletenode($doc, 'h4');
|
||||||
|
deletenode($doc, 'h5');
|
||||||
|
deletenode($doc, 'h6');
|
||||||
|
deletenode($doc, 'ol');
|
||||||
|
deletenode($doc, 'ul');
|
||||||
|
|
||||||
|
$xpath = new DomXPath($doc);
|
||||||
|
|
||||||
|
//$list = $xpath->query("head/title");
|
||||||
|
$list = $xpath->query("//title");
|
||||||
|
foreach ($list as $node)
|
||||||
|
$siteinfo["title"] = html_entity_decode($node->nodeValue, ENT_QUOTES, "UTF-8");
|
||||||
|
|
||||||
|
//$list = $xpath->query("head/meta[@name]");
|
||||||
|
$list = $xpath->query("//meta[@name]");
|
||||||
|
foreach ($list as $node) {
|
||||||
|
$attr = array();
|
||||||
|
if ($node->attributes->length)
|
||||||
|
foreach ($node->attributes as $attribute)
|
||||||
|
$attr[$attribute->name] = $attribute->value;
|
||||||
|
|
||||||
|
$attr["content"] = html_entity_decode($attr["content"], ENT_QUOTES, "UTF-8");
|
||||||
|
|
||||||
|
switch (strtolower($attr["name"])) {
|
||||||
|
case "fulltitle":
|
||||||
|
$siteinfo["title"] = $attr["content"];
|
||||||
|
break;
|
||||||
|
case "description":
|
||||||
|
$siteinfo["text"] = $attr["content"];
|
||||||
|
break;
|
||||||
|
case "dc.title":
|
||||||
|
$siteinfo["title"] = $attr["content"];
|
||||||
|
break;
|
||||||
|
case "dc.description":
|
||||||
|
$siteinfo["text"] = $attr["content"];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//$list = $xpath->query("head/meta[@property]");
|
||||||
|
$list = $xpath->query("//meta[@property]");
|
||||||
|
foreach ($list as $node) {
|
||||||
|
$attr = array();
|
||||||
|
if ($node->attributes->length)
|
||||||
|
foreach ($node->attributes as $attribute)
|
||||||
|
$attr[$attribute->name] = $attribute->value;
|
||||||
|
|
||||||
|
$attr["content"] = html_entity_decode($attr["content"], ENT_QUOTES, "UTF-8");
|
||||||
|
|
||||||
|
switch (strtolower($attr["property"])) {
|
||||||
|
case "og:image":
|
||||||
|
$siteinfo["image"] = $attr["content"];
|
||||||
|
break;
|
||||||
|
case "og:title":
|
||||||
|
$siteinfo["title"] = $attr["content"];
|
||||||
|
break;
|
||||||
|
case "og:description":
|
||||||
|
$siteinfo["text"] = $attr["content"];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($siteinfo["image"] == "") {
|
||||||
|
$list = $xpath->query("//img[@src]");
|
||||||
|
foreach ($list as $node) {
|
||||||
|
$attr = array();
|
||||||
|
if ($node->attributes->length)
|
||||||
|
foreach ($node->attributes as $attribute)
|
||||||
|
$attr[$attribute->name] = $attribute->value;
|
||||||
|
|
||||||
|
$src = completeurl($attr["src"], $url);
|
||||||
|
$photodata = getimagesize($src);
|
||||||
|
|
||||||
|
if (($photodata[0] > 150) and ($photodata[1] > 150)) {
|
||||||
|
if ($photodata[0] > 300) {
|
||||||
|
$photodata[1] = round($photodata[1] * (300 / $photodata[0]));
|
||||||
|
$photodata[0] = 300;
|
||||||
|
}
|
||||||
|
if ($photodata[1] > 300) {
|
||||||
|
$photodata[0] = round($photodata[0] * (300 / $photodata[1]));
|
||||||
|
$photodata[1] = 300;
|
||||||
|
}
|
||||||
|
$siteinfo["images"][] = array("src"=>$src,
|
||||||
|
"width"=>$photodata[0],
|
||||||
|
"height"=>$photodata[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
$src = completeurl($siteinfo["image"], $url);
|
||||||
|
$photodata = getimagesize($src);
|
||||||
|
|
||||||
|
if (($photodata[0] > 10) and ($photodata[1] > 10))
|
||||||
|
$siteinfo["images"][] = array("src"=>$src,
|
||||||
|
"width"=>$photodata[0],
|
||||||
|
"height"=>$photodata[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($siteinfo["text"] == "") {
|
||||||
|
$text = "";
|
||||||
|
|
||||||
|
$list = $xpath->query("//div[@class='article']");
|
||||||
|
foreach ($list as $node)
|
||||||
|
if (strlen($node->nodeValue) > 40)
|
||||||
|
$text .= " ".trim($node->nodeValue);
|
||||||
|
|
||||||
|
if ($text == "") {
|
||||||
|
$list = $xpath->query("//div[@class='content']");
|
||||||
|
foreach ($list as $node)
|
||||||
|
if (strlen($node->nodeValue) > 40)
|
||||||
|
$text .= " ".trim($node->nodeValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If none text was found then take the paragraph content
|
||||||
|
if ($text == "") {
|
||||||
|
$list = $xpath->query("//p");
|
||||||
|
foreach ($list as $node)
|
||||||
|
if (strlen($node->nodeValue) > 40)
|
||||||
|
$text .= " ".trim($node->nodeValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($text != "") {
|
||||||
|
$text = trim(str_replace(array("\n", "\r"), array(" ", " "), $text));
|
||||||
|
|
||||||
|
while (strpos($text, " "))
|
||||||
|
$text = trim(str_replace(" ", " ", $text));
|
||||||
|
|
||||||
|
$siteinfo["text"] = html_entity_decode(substr($text,0,350), ENT_QUOTES, "UTF-8").'...';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return($siteinfo);
|
||||||
|
}
|
||||||
|
|
||||||
function arr_add_hashes(&$item,$k) {
|
function arr_add_hashes(&$item,$k) {
|
||||||
$item = '#' . $item;
|
$item = '#' . $item;
|
||||||
|
@ -16,8 +233,8 @@ function parse_url_content(&$a) {
|
||||||
if(local_user() && intval(get_pconfig(local_user(),'system','plaintext')))
|
if(local_user() && intval(get_pconfig(local_user(),'system','plaintext')))
|
||||||
$textmode = true;
|
$textmode = true;
|
||||||
|
|
||||||
if($textmode)
|
//if($textmode)
|
||||||
$br = (($textmode) ? "\n" : '<br /?');
|
$br = (($textmode) ? "\n" : '<br />');
|
||||||
|
|
||||||
if(x($_GET,'binurl'))
|
if(x($_GET,'binurl'))
|
||||||
$url = trim(hex2bin($_GET['binurl']));
|
$url = trim(hex2bin($_GET['binurl']));
|
||||||
|
@ -40,13 +257,11 @@ function parse_url_content(&$a) {
|
||||||
|
|
||||||
logger('parse_url: ' . $url);
|
logger('parse_url: ' . $url);
|
||||||
|
|
||||||
|
|
||||||
if($textmode)
|
if($textmode)
|
||||||
$template = $br . '[bookmark=%s]%s[/bookmark]%s' . $br;
|
$template = $br . '[bookmark=%s]%s[/bookmark]%s' . $br;
|
||||||
else
|
else
|
||||||
$template = "<br /><a class=\"bookmark\" href=\"%s\" >%s</a>%s<br />";
|
$template = "<br /><a class=\"bookmark\" href=\"%s\" >%s</a>%s<br />";
|
||||||
|
|
||||||
|
|
||||||
$arr = array('url' => $url, 'text' => '');
|
$arr = array('url' => $url, 'text' => '');
|
||||||
|
|
||||||
call_hooks('parse_link', $arr);
|
call_hooks('parse_link', $arr);
|
||||||
|
@ -60,9 +275,9 @@ function parse_url_content(&$a) {
|
||||||
if($url && $title && $text) {
|
if($url && $title && $text) {
|
||||||
|
|
||||||
if($textmode)
|
if($textmode)
|
||||||
$text = $br . $br . '[quote]' . $text . '[/quote]' . $br;
|
$text = $br . '[quote]' . trim($text) . '[/quote]' . $br;
|
||||||
else
|
else
|
||||||
$text = '<br /><br /><blockquote>' . $text . '</blockquote><br />';
|
$text = '<br /><blockquote>' . trim($text) . '</blockquote><br />';
|
||||||
|
|
||||||
$title = str_replace(array("\r","\n"),array('',''),$title);
|
$title = str_replace(array("\r","\n"),array('',''),$title);
|
||||||
|
|
||||||
|
@ -74,187 +289,34 @@ function parse_url_content(&$a) {
|
||||||
killme();
|
killme();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$siteinfo = parseurl_getsiteinfo($url);
|
||||||
|
|
||||||
if($url) {
|
if($siteinfo["title"] == "") {
|
||||||
$s = fetch_url($url);
|
echo print_r($siteinfo, true);
|
||||||
|
//echo sprintf($template,$url,$url,'') . $str_tags;
|
||||||
|
killme();
|
||||||
} else {
|
} else {
|
||||||
echo '';
|
$text = $siteinfo["text"];
|
||||||
killme();
|
$title = $siteinfo["title"];
|
||||||
}
|
}
|
||||||
|
|
||||||
// logger('parse_url: data: ' . $s, LOGGER_DATA);
|
$image = "";
|
||||||
|
|
||||||
if(! $s) {
|
foreach ($siteinfo["images"] as $imagedata)
|
||||||
echo sprintf($template,$url,$url,'') . $str_tags;
|
|
||||||
killme();
|
|
||||||
}
|
|
||||||
|
|
||||||
$matches = '';
|
|
||||||
$c = preg_match('/\<head(.*?)\>(.*?)\<\/head\>/ism',$s,$matches);
|
|
||||||
if($c) {
|
|
||||||
// logger('parse_url: header: ' . $matches[2], LOGGER_DATA);
|
|
||||||
try {
|
|
||||||
$domhead = HTML5_Parser::parse($matches[2]);
|
|
||||||
} catch (DOMException $e) {
|
|
||||||
logger('scrape_dfrn: parse error: ' . $e);
|
|
||||||
}
|
|
||||||
if($domhead)
|
|
||||||
logger('parsed header');
|
|
||||||
}
|
|
||||||
|
|
||||||
if(! $title) {
|
|
||||||
if(strpos($s,'<title>')) {
|
|
||||||
$title = substr($s,strpos($s,'<title>')+7,64);
|
|
||||||
if(strpos($title,'<') !== false)
|
|
||||||
$title = strip_tags(substr($title,0,strpos($title,'<')));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
|
||||||
$config->set('Cache.DefinitionImpl', null);
|
|
||||||
$purifier = new HTMLPurifier($config);
|
|
||||||
$s = $purifier->purify($s);
|
|
||||||
|
|
||||||
// logger('purify_output: ' . $s);
|
|
||||||
|
|
||||||
try {
|
|
||||||
$dom = HTML5_Parser::parse($s);
|
|
||||||
} catch (DOMException $e) {
|
|
||||||
logger('scrape_dfrn: parse error: ' . $e);
|
|
||||||
}
|
|
||||||
|
|
||||||
if(! $dom) {
|
|
||||||
echo sprintf($template,$url,$url,'') . $str_tags;
|
|
||||||
killme();
|
|
||||||
}
|
|
||||||
|
|
||||||
$items = $dom->getElementsByTagName('title');
|
|
||||||
|
|
||||||
if($items) {
|
|
||||||
foreach($items as $item) {
|
|
||||||
$title = trim($item->textContent);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if(! $text) {
|
|
||||||
$divs = $dom->getElementsByTagName('div');
|
|
||||||
if($divs) {
|
|
||||||
foreach($divs as $div) {
|
|
||||||
$class = $div->getAttribute('class');
|
|
||||||
if($class && (stristr($class,'article') || stristr($class,'content'))) {
|
|
||||||
$items = $div->getElementsByTagName('p');
|
|
||||||
if($items) {
|
|
||||||
foreach($items as $item) {
|
|
||||||
$text = $item->textContent;
|
|
||||||
if(stristr($text,'<script')) {
|
|
||||||
$text = '';
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
$text = strip_tags($text);
|
|
||||||
if(strlen($text) < 100) {
|
|
||||||
$text = '';
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
$text = substr($text,0,250) . '...' ;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if($text)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if(! $text) {
|
|
||||||
$items = $dom->getElementsByTagName('p');
|
|
||||||
if($items) {
|
|
||||||
foreach($items as $item) {
|
|
||||||
$text = $item->textContent;
|
|
||||||
if(stristr($text,'<script'))
|
|
||||||
continue;
|
|
||||||
$text = strip_tags($text);
|
|
||||||
if(strlen($text) < 100) {
|
|
||||||
$text = '';
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
$text = substr($text,0,250) . '...' ;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if(! $text) {
|
|
||||||
logger('parsing meta');
|
|
||||||
$items = (isset($domhead) && is_object($domhead) ? $domhead->getElementsByTagName('meta') : null);
|
|
||||||
if($items) {
|
|
||||||
foreach($items as $item) {
|
|
||||||
$property = $item->getAttribute('property');
|
|
||||||
if($property && (stristr($property,':description'))) {
|
|
||||||
|
|
||||||
$text = $item->getAttribute('content');
|
|
||||||
if(stristr($text,'<script')) {
|
|
||||||
$text = '';
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
$text = strip_tags($text);
|
|
||||||
|
|
||||||
|
|
||||||
$text = substr($text,0,250) . '...' ;
|
|
||||||
}
|
|
||||||
if($property && (stristr($property,':image'))) {
|
|
||||||
|
|
||||||
$image = $item->getAttribute('content');
|
|
||||||
if(stristr($text,'<script')) {
|
|
||||||
$image = '';
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
$image = strip_tags($image);
|
|
||||||
|
|
||||||
$i = fetch_url($image);
|
|
||||||
if($i) {
|
|
||||||
require_once('include/Photo.php');
|
|
||||||
// guess mimetype from headers or filename
|
|
||||||
$type = guess_image_type($image,true);
|
|
||||||
|
|
||||||
$ph = new Photo($i, $type);
|
|
||||||
if($ph->is_valid()) {
|
|
||||||
if($ph->getWidth() > 300 || $ph->getHeight() > 300) {
|
|
||||||
$ph->scaleImage(300);
|
|
||||||
$new_width = $ph->getWidth();
|
|
||||||
$new_height = $ph->getHeight();
|
|
||||||
if($textmode)
|
if($textmode)
|
||||||
$image = $br . $br . '[img=' . $new_width . 'x' . $new_height . ']' . $image . '[/img]';
|
$image .= '[img='.$imagedata["width"].'x'.$imagedata["height"].']'.$imagedata["src"].'[/img]';
|
||||||
else
|
else
|
||||||
$image = '<br /><br /><img height="' . $new_height . '" width="' . $new_width . '" src="' .$image . '" alt="photo" />';
|
$image .= '<img height="'.$imagedata["height"].'" width="'.$imagedata["width"].'" src="'.$imagedata["src"].'" alt="photo" />';
|
||||||
}
|
|
||||||
else {
|
|
||||||
if($textmode)
|
|
||||||
$image = $br . $br . '[img]' . $image . '[/img]';
|
|
||||||
else
|
|
||||||
$image = '<br /><br /><img src="' . $image . '" alt="photo" />';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
$image = '';
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if(strlen($text)) {
|
if(strlen($text)) {
|
||||||
if($textmode)
|
if($textmode)
|
||||||
$text = $br .$br . '[quote]' . $text . '[/quote]' . $br ;
|
$text = $br.'[quote]'.trim($text).'[/quote]'.$br ;
|
||||||
else
|
else
|
||||||
$text = '<br /><br /><blockquote>' . $text . '</blockquote><br />';
|
$text = '<br /><blockquote>'.trim($text).'</blockquote><br />';
|
||||||
}
|
}
|
||||||
|
|
||||||
if($image) {
|
if($image) {
|
||||||
$text = $image . $br . $text;
|
$text = $br.$br.$image.$text;
|
||||||
}
|
}
|
||||||
$title = str_replace(array("\r","\n"),array('',''),$title);
|
$title = str_replace(array("\r","\n"),array('',''),$title);
|
||||||
|
|
||||||
|
@ -262,6 +324,6 @@ function parse_url_content(&$a) {
|
||||||
|
|
||||||
logger('parse_url: returns: ' . $result);
|
logger('parse_url: returns: ' . $result);
|
||||||
|
|
||||||
echo $result;
|
echo trim($result);
|
||||||
killme();
|
killme();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue