Improved probe_url, fixed wrong network detection.

This commit is contained in:
Michael Vogel 2015-02-16 22:11:51 +01:00
parent 2dafc9eac1
commit 8b1b886797
3 changed files with 88 additions and 26 deletions

View file

@ -374,6 +374,7 @@ function probe_url($url, $mode = PROBE_NORMAL) {
$network = NETWORK_APPNET; $network = NETWORK_APPNET;
} }
// Twitter is deactivated since twitter closed its old API // Twitter is deactivated since twitter closed its old API
//$twitter = ((strpos($url,'twitter.com') !== false) ? true : false); //$twitter = ((strpos($url,'twitter.com') !== false) ? true : false);
$lastfm = ((strpos($url,'last.fm/user') !== false) ? true : false); $lastfm = ((strpos($url,'last.fm/user') !== false) ? true : false);
@ -569,6 +570,10 @@ function probe_url($url, $mode = PROBE_NORMAL) {
$network = NETWORK_DIASPORA; $network = NETWORK_DIASPORA;
elseif($has_lrdd) elseif($has_lrdd)
$network = NETWORK_OSTATUS; $network = NETWORK_OSTATUS;
if(strpos($url,'@'))
$addr = str_replace('acct:', '', $url);
$priority = 0; $priority = 0;
if($hcard && ! $vcard) { if($hcard && ! $vcard) {
@ -762,6 +767,22 @@ function probe_url($url, $mode = PROBE_NORMAL) {
if(($network === NETWORK_FEED) && ($poll) && (! x($vcard,'fn'))) if(($network === NETWORK_FEED) && ($poll) && (! x($vcard,'fn')))
$vcard['fn'] = $url; $vcard['fn'] = $url;
if (($notify != "") AND ($poll != "")) {
$baseurl = matching($notify, $poll);
$baseurl2 = matching($baseurl, $profile);
if ($baseurl2 != "")
$baseurl = $baseurl2;
}
if (($baseurl == "") AND ($notify != ""))
$baseurl = matching($profile, $notify);
if (($baseurl == "") AND ($poll != ""))
$baseurl = matching($profile, $poll);
$baseurl = rtrim($baseurl, "/");
$vcard['fn'] = notags($vcard['fn']); $vcard['fn'] = notags($vcard['fn']);
$vcard['nick'] = str_replace(' ','',notags($vcard['nick'])); $vcard['nick'] = str_replace(' ','',notags($vcard['nick']));
@ -780,14 +801,17 @@ function probe_url($url, $mode = PROBE_NORMAL) {
$result['network'] = $network; $result['network'] = $network;
$result['alias'] = $alias; $result['alias'] = $alias;
$result['pubkey'] = $pubkey; $result['pubkey'] = $pubkey;
$result['baseurl'] = $baseurl;
logger('probe_url: ' . print_r($result,true), LOGGER_DEBUG); logger('probe_url: ' . print_r($result,true), LOGGER_DEBUG);
// Trying if it maybe a diaspora account // Trying if it maybe a diaspora account
if ($result['network'] == NETWORK_FEED) { //if (($result['network'] == NETWORK_FEED) OR (($result['addr'] == "") AND ($result['network'] != NETWORK_OSTATUS))) {
if (($result['network'] == NETWORK_FEED) OR ($result['addr'] == "")) {
require_once('include/bbcode.php'); require_once('include/bbcode.php');
$address = GetProfileUsername($url, "", true); $address = GetProfileUsername($url, "", true);
$result2 = probe_url($address, $mode); $result2 = probe_url($address, $mode);
//$result2 = probe_url($address, PROBE_DIASPORA);
if ($result2['network'] != "") if ($result2['network'] != "")
$result = $result2; $result = $result2;
} }
@ -796,3 +820,20 @@ function probe_url($url, $mode = PROBE_NORMAL) {
return $result; return $result;
} }
function matching($part1, $part2) {
$len = min(strlen($part1), strlen($part2));
$match = "";
$matching = true;
$i = 0;
while (($i <= $len) AND $matching) {
if (substr($part1, $i, 1) == substr($part2, $i, 1))
$match .= substr($part1, $i, 1);
else
$matching = false;
$i++;
}
return($match);
}

View file

@ -1376,9 +1376,6 @@ function item_store($arr,$force_parent = false, $notify = false, $dontcache = fa
$current_post = $r[0]['id']; $current_post = $r[0]['id'];
logger('item_store: created item ' . $current_post); logger('item_store: created item ' . $current_post);
// Add every contact to the global contact table
poco_store($arr);
/* /*
// Is it a global copy? // Is it a global copy?
$store_gcontact = ($arr["uid"] == 0); $store_gcontact = ($arr["uid"] == 0);
@ -1511,7 +1508,7 @@ function item_store($arr,$force_parent = false, $notify = false, $dontcache = fa
$deleted = tag_deliver($arr['uid'],$current_post); $deleted = tag_deliver($arr['uid'],$current_post);
// current post can be deleted if is for a communuty page and no mention are // current post can be deleted if is for a community page and no mention are
// in it. // in it.
if (!$deleted AND !$dontcache) { if (!$deleted AND !$dontcache) {
@ -1521,10 +1518,12 @@ function item_store($arr,$force_parent = false, $notify = false, $dontcache = fa
$r = q('SELECT * FROM `item` WHERE id = %d', intval($current_post)); $r = q('SELECT * FROM `item` WHERE id = %d', intval($current_post));
if (count($r) == 1) { if (count($r) == 1) {
call_hooks('post_remote_end', $r[0]); call_hooks('post_remote_end', $r[0]);
} else { } else
logger('item_store: new item not found in DB, id ' . $current_post); logger('item_store: new item not found in DB, id ' . $current_post);
} }
}
// Add every contact of the post to the global contact table
poco_store($arr);
create_tags_from_item($current_post); create_tags_from_item($current_post);
create_files_from_item($current_post); create_files_from_item($current_post);

View file

@ -78,9 +78,6 @@ function poco_load($cid,$uid = 0,$zcid = 0,$url = null) {
$gender = ''; $gender = '';
$generation = 0; $generation = 0;
if ($uid == 0)
$network = NETWORK_DFRN;
$name = $entry->displayName; $name = $entry->displayName;
if(isset($entry->urls)) { if(isset($entry->urls)) {
@ -126,6 +123,10 @@ function poco_load($cid,$uid = 0,$zcid = 0,$url = null) {
foreach($entry->tags as $tag) foreach($entry->tags as $tag)
$keywords = implode(", ", $tag); $keywords = implode(", ", $tag);
// If you query a Friendica server for its profiles, the network has to be Friendica
if ($uid == 0)
$network = NETWORK_DFRN;
poco_check($profile_url, $name, $network, $profile_photo, $about, $location, $gender, $keywords, $connect_url, $updated, $generation, $cid, $uid, $zcid); poco_check($profile_url, $name, $network, $profile_photo, $about, $location, $gender, $keywords, $connect_url, $updated, $generation, $cid, $uid, $zcid);
// Update the Friendica contacts. Diaspora is doing it via a message. (See include/diaspora.php) // Update the Friendica contacts. Diaspora is doing it via a message. (See include/diaspora.php)
@ -151,6 +152,8 @@ function poco_load($cid,$uid = 0,$zcid = 0,$url = null) {
function poco_check($profile_url, $name, $network, $profile_photo, $about, $location, $gender, $keywords, $connect_url, $updated, $generation, $cid = 0, $uid = 0, $zcid = 0) { function poco_check($profile_url, $name, $network, $profile_photo, $about, $location, $gender, $keywords, $connect_url, $updated, $generation, $cid = 0, $uid = 0, $zcid = 0) {
$a = get_app();
// Generation: // Generation:
// 0: No definition // 0: No definition
// 1: Profiles on this server // 1: Profiles on this server
@ -163,15 +166,24 @@ function poco_check($profile_url, $name, $network, $profile_photo, $about, $loca
if ($profile_url == "") if ($profile_url == "")
return $gcid; return $gcid;
$r = q("SELECT `network` FROM `contact` WHERE `nurl` = '%s' AND `network` != '' LIMIT 1", // Don't store the statusnet connector as network
dbesc(normalise_link($profile_url)) // We can't simply set this to NETWORK_OSTATUS since the connector could have fetched posts from friendica as well
if ($network == NETWORK_STATUSNET)
$network = "";
// The global contacts should contain the original picture, not the cached one
if (($generation != 1) AND stristr(normalise_link($profile_photo), normalise_link($a->get_baseurl()."/photo/")))
$profile_photo = "";
$r = q("SELECT `network` FROM `contact` WHERE `nurl` = '%s' AND `network` != '' AND `network` != '%s' LIMIT 1",
dbesc(normalise_link($profile_url)), dbesc(NETWORK_STATUSNET)
); );
if(count($r)) if(count($r))
$network = $r[0]["network"]; $network = $r[0]["network"];
if ($network == "") { if (($network == "") OR ($network == NETWORK_OSTATUS)) {
$r = q("SELECT `network`, `url` FROM `contact` WHERE `alias` IN ('%s', '%s') AND `network` != '' LIMIT 1", $r = q("SELECT `network`, `url` FROM `contact` WHERE `alias` IN ('%s', '%s') AND `network` != '' AND `network` != '%s' LIMIT 1",
dbesc($profile_url), dbesc(normalise_link($profile_url)) dbesc($profile_url), dbesc(normalise_link($profile_url)), dbesc(NETWORK_STATUSNET)
); );
if(count($r)) { if(count($r)) {
$network = $r[0]["network"]; $network = $r[0]["network"];
@ -182,15 +194,16 @@ function poco_check($profile_url, $name, $network, $profile_photo, $about, $loca
$x = q("SELECT * FROM `gcontact` WHERE `nurl` = '%s' LIMIT 1", $x = q("SELECT * FROM `gcontact` WHERE `nurl` = '%s' LIMIT 1",
dbesc(normalise_link($profile_url)) dbesc(normalise_link($profile_url))
); );
if(count($x) AND ($network == "")) if(count($x) AND ($network == "") AND ($x[0]["network"] != NETWORK_STATUSNET))
$network = $x[0]["network"]; $network = $x[0]["network"];
if (($network == "") OR ($name == "") OR ($profile_photo == "")) { if (($network == "") OR ($name == "") OR ($profile_photo == "")) {
require_once("include/Scrape.php"); require_once("include/Scrape.php");
$data = probe_url($profile_url, PROBE_DIASPORA); $data = probe_url($profile_url);
$network = $data["network"]; $network = $data["network"];
$name = $data["name"]; $name = $data["name"];
$profile_url = $data["url"];
$profile_photo = $data["photo"]; $profile_photo = $data["photo"];
} }
@ -204,7 +217,7 @@ function poco_check($profile_url, $name, $network, $profile_photo, $about, $loca
if (($name == "") OR ($profile_photo == "")) if (($name == "") OR ($profile_photo == ""))
return $gcid; return $gcid;
if (!in_array($network, array(NETWORK_DFRN, NETWORK_OSTATUS, NETWORK_DIASPORA, NETWORK_STATUSNET))) if (!in_array($network, array(NETWORK_DFRN, NETWORK_OSTATUS, NETWORK_DIASPORA)))
return $gcid; return $gcid;
logger("profile-check generation: ".$generation." Network: ".$network." URL: ".$profile_url." name: ".$name." avatar: ".$profile_photo, LOGGER_DEBUG); logger("profile-check generation: ".$generation." Network: ".$network." URL: ".$profile_url." name: ".$name." avatar: ".$profile_photo, LOGGER_DEBUG);
@ -333,11 +346,11 @@ function sub_poco_from_share($share, $created, $cid, $uid) {
function poco_store($item) { function poco_store($item) {
// Isn't it public? // Isn't it public?
if (!$item['private']) if ($item['private'])
return; return;
// Or is it from a network where we don't store the global contacts? // Or is it from a network where we don't store the global contacts?
if (!in_array($item["network"], array(NETWORK_DFRN, NETWORK_DIASPORA, NETWORK_OSTATUS, ""))) if (!in_array($item["network"], array(NETWORK_DFRN, NETWORK_DIASPORA, NETWORK_OSTATUS, NETWORK_STATUSNET, "")))
return; return;
// Is it a global copy? // Is it a global copy?
@ -355,25 +368,34 @@ function poco_store($item) {
// "3" means: We don't know this contact directly (Maybe a reshared item) // "3" means: We don't know this contact directly (Maybe a reshared item)
$generation = 3; $generation = 3;
$network = ""; $network = "";
$profile_url = $item["author-link"];
// Is it a user from our server? // Is it a user from our server?
$q = q("SELECT `id` FROM `contact` WHERE `self` AND `nurl` = '%s' LIMIT 1", $q = q("SELECT `id` FROM `contact` WHERE `self` AND `nurl` = '%s' LIMIT 1",
dbesc(normalise_link($item["author-link"]))); dbesc(normalise_link($item["author-link"])));
if (count($q)) { if (count($q)) {
logger("Our user (generation 1): ".$item["author-link"], LOGGER_DEBUG);
$generation = 1; $generation = 1;
$network = NETWORK_DFRN; $network = NETWORK_DFRN;
} else { // Is it a contact from a user on our server? } else { // Is it a contact from a user on our server?
$q = q("SELECT `network` FROM `contact` WHERE `uid` != 0 AND `network` != '' $q = q("SELECT `network`, `url` FROM `contact` WHERE `uid` != 0 AND `network` != ''
AND (`nurl` = '%s' OR `alias` IN ('%s', '%s')) LIMIT 1", AND (`nurl` = '%s' OR `alias` IN ('%s', '%s')) AND `network` != '%s' LIMIT 1",
dbesc(normalise_link($item["author-link"])), dbesc(normalise_link($item["author-link"])),
dbesc(normalise_link($item["author-link"])), dbesc(normalise_link($item["author-link"])),
dbesc($item["author-link"])); dbesc($item["author-link"]),
dbesc(NETWORK_STATUSNET));
if (count($q)) { if (count($q)) {
$generation = 2; $generation = 2;
$network = $q[0]["network"]; $network = $q[0]["network"];
$profile_url = $q[0]["url"];
logger("Known contact (generation 2): ".$profile_url, LOGGER_DEBUG);
} }
} }
poco_check($item["author-link"], $item["author-name"], $network, $item["author-avatar"], "", "", "", "", "", $item["received"], $generation, $item["contact-id"], $item["uid"]);
if ($generation == 3)
logger("Unknown contact (generation 3): ".$item["author-link"], LOGGER_DEBUG);
poco_check($profile_url, $item["author-name"], $network, $item["author-avatar"], "", "", "", "", "", $item["received"], $generation, $item["contact-id"], $item["uid"]);
// Maybe its a body with a shared item? Then extract a global contact from it. // Maybe its a body with a shared item? Then extract a global contact from it.
poco_contact_from_body($item["body"], $item["received"], $item["contact-id"], $item["uid"]); poco_contact_from_body($item["body"], $item["received"], $item["contact-id"], $item["uid"]);