Merge pull request #14151 from annando/local-link

Probing for Zot improved and Pumpio removed
This commit is contained in:
Hypolite Petovan 2024-05-12 21:03:20 -04:00 committed by GitHub
commit c604477cac
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 83 additions and 271 deletions

View file

@ -184,6 +184,12 @@ class Media
{ {
if (Network::isLocalLink($media['url'])) { if (Network::isLocalLink($media['url'])) {
$media = self::fetchLocalData($media); $media = self::fetchLocalData($media);
if (preg_match('|.*?/search\?(.+)|', $media['url'], $matches)) {
return $media;
}
if (empty($media['mimetype']) || empty($media['size'])) {
Logger::debug('Unknown local link', ['url' => $media['url']]);
}
} }
// Fetch the mimetype or size if missing. // Fetch the mimetype or size if missing.
@ -391,7 +397,7 @@ class Media
*/ */
private static function fetchLocalData(array $media): array private static function fetchLocalData(array $media): array
{ {
if (preg_match('|.*?/attach/(\d+)|', $media['url'] ?? '', $matches)) { if (preg_match('|.*?/attach/(\d+)|', $media['url'], $matches)) {
$attachment = Attach::selectFirst(['filename', 'filetype', 'filesize'], ['id' => $matches[1]]); $attachment = Attach::selectFirst(['filename', 'filetype', 'filesize'], ['id' => $matches[1]]);
if (!empty($attachment)) { if (!empty($attachment)) {
$media['name'] = $attachment['filename']; $media['name'] = $attachment['filename'];
@ -401,7 +407,7 @@ class Media
return $media; return $media;
} }
if (!preg_match('|.*?/photo/(.*[a-fA-F0-9])\-(.*[0-9])\..*[\w]|', $media['url'] ?? '', $matches)) { if (!preg_match('|.*?/photo/(.*[a-fA-F0-9])\-(.*[0-9])\..*[\w]|', $media['url'], $matches)) {
return $media; return $media;
} }
$photo = Photo::selectFirst([], ['resource-id' => $matches[1], 'scale' => $matches[2]]); $photo = Photo::selectFirst([], ['resource-id' => $matches[1], 'scale' => $matches[2]]);

View file

@ -112,12 +112,14 @@ class Probe
*/ */
private static function rearrangeData(array $data): array private static function rearrangeData(array $data): array
{ {
$fields = ['name', 'given_name', 'family_name', 'nick', 'guid', 'url', 'addr', 'alias', $fields = [
'photo', 'photo_medium', 'photo_small', 'header', 'name', 'given_name', 'family_name', 'nick', 'guid', 'url', 'addr', 'alias',
'account-type', 'community', 'keywords', 'location', 'about', 'xmpp', 'matrix', 'photo', 'photo_medium', 'photo_small', 'header',
'hide', 'batch', 'notify', 'poll', 'request', 'confirm', 'subscribe', 'poco', 'account-type', 'community', 'keywords', 'location', 'about', 'xmpp', 'matrix',
'following', 'followers', 'inbox', 'outbox', 'sharedinbox', 'hide', 'batch', 'notify', 'poll', 'request', 'confirm', 'subscribe', 'poco',
'priority', 'network', 'pubkey', 'manually-approve', 'baseurl', 'gsid']; 'following', 'followers', 'inbox', 'outbox', 'sharedinbox',
'priority', 'network', 'pubkey', 'manually-approve', 'baseurl', 'gsid'
];
$numeric_fields = ['gsid', 'account-type']; $numeric_fields = ['gsid', 'account-type'];
$boolean_fields = ['hide', 'manually-approve']; $boolean_fields = ['hide', 'manually-approve'];
@ -149,7 +151,7 @@ class Probe
} }
$newdata['networks'] = []; $newdata['networks'] = [];
foreach ([Protocol::DIASPORA, Protocol::OSTATUS] as $network) { foreach ([Protocol::DIASPORA] as $network) {
if (!empty($data['networks'][$network])) { if (!empty($data['networks'][$network])) {
$data['networks'][$network]['subscribe'] = $newdata['subscribe'] ?? ''; $data['networks'][$network]['subscribe'] = $newdata['subscribe'] ?? '';
if (empty($data['networks'][$network]['baseurl'])) { if (empty($data['networks'][$network]['baseurl'])) {
@ -256,7 +258,7 @@ class Probe
$xml = $curlResult->getBodyString(); $xml = $curlResult->getBodyString();
$xrd = XML::parseString($xml, true); $xrd = XML::parseString($xml, true);
$host_url = 'http://'.$host; $host_url = 'http://' . $host;
} }
if (!is_object($xrd)) { if (!is_object($xrd)) {
Logger::info('No xrd object found', ['host' => $host]); Logger::info('No xrd object found', ['host' => $host]);
@ -765,14 +767,9 @@ class Probe
} }
if ((!$result && ($network == '')) || ($network == Protocol::OSTATUS)) { if ((!$result && ($network == '')) || ($network == Protocol::OSTATUS)) {
$result = self::ostatus($webfinger); $result = self::ostatus($webfinger);
} else {
$result['networks'][Protocol::OSTATUS] = self::ostatus($webfinger);
} }
if (in_array($network, ['', Protocol::ZOT])) { if (in_array($network, ['', Protocol::ZOT])) {
$result = self::zot($webfinger, $result, $baseurl); $result = self::zot($webfinger, $result);
}
if ((!$result && ($network == '')) || ($network == Protocol::PUMPIO)) {
$result = self::pumpio($webfinger, $addr, $baseurl);
} }
if (empty($result['network']) && empty($ap_profile['network']) || ($network == Protocol::FEED)) { if (empty($result['network']) && empty($ap_profile['network']) || ($network == Protocol::FEED)) {
$result = self::feed($uri); $result = self::feed($uri);
@ -812,18 +809,19 @@ class Probe
* *
* @param array $webfinger Webfinger data * @param array $webfinger Webfinger data
* @param array $data previously probed data * @param array $data previously probed data
* @param string $baseUrl Base URL
* *
* @return array Zot data * @return array Zot data
* @throws HTTPException\InternalServerErrorException * @throws HTTPException\InternalServerErrorException
*/ */
private static function zot(array $webfinger, array $data, string $baseurl): array private static function zot(array $webfinger, array $data): array
{ {
if (!empty($webfinger['aliases']) && is_array($webfinger['aliases'])) { if (empty($webfinger['aliases']) || !is_array($webfinger['aliases'])) {
foreach ($webfinger['aliases'] as $alias) { return $data;
if (substr($alias, 0, 5) == 'acct:') { }
$data['addr'] = substr($alias, 5);
} foreach ($webfinger['aliases'] as $alias) {
if (substr($alias, 0, 5) == 'acct:') {
$data['addr'] = substr($alias, 5);
} }
} }
@ -831,30 +829,22 @@ class Probe
$data['addr'] = substr($webfinger['subject'], 5); $data['addr'] = substr($webfinger['subject'], 5);
} }
$zot_url = '';
foreach ($webfinger['links'] as $link) { foreach ($webfinger['links'] as $link) {
if (($link['rel'] == 'http://purl.org/zot/protocol') && !empty($link['href'])) { if (($link['rel'] == 'http://purl.org/zot/protocol/6.0') && !empty($link['href'])) {
$zot_url = $link['href']; $zot_url = $link['href'];
} }
} }
if (empty($zot_url) && !empty($data['addr']) && !empty($baseurl)) {
$condition = ['nurl' => Strings::normaliseLink($baseurl), 'platform' => ['hubzilla']];
if (!DBA::exists('gserver', $condition)) {
return $data;
}
$zot_url = $baseurl . '/.well-known/zot-info?address=' . $data['addr'];
}
if (empty($zot_url)) { if (empty($zot_url)) {
return $data; return $data;
} }
$data = self::pollZot($zot_url, $data); $data = self::pollZot($zot_url, $data);
if (!empty($data['url']) && !empty($webfinger['aliases']) && is_array($webfinger['aliases'])) { if (!empty($data['url'])) {
foreach ($webfinger['aliases'] as $alias) { foreach ($webfinger['aliases'] as $alias) {
if (!strstr($alias, '@') && Strings::normaliseLink($alias) != Strings::normaliseLink($data['url'])) { if (Network::isValidHttpUrl($alias) && Strings::normaliseLink($alias) != Strings::normaliseLink($data['url'])) {
$data['alias'] = $alias; $data['alias'] = $alias;
} }
} }
@ -863,9 +853,9 @@ class Probe
return $data; return $data;
} }
public static function pollZot(string $url, array $data): array private static function pollZot(string $url, array $data): array
{ {
$curlResult = DI::httpClient()->get($url, HttpClientAccept::JSON, [HttpClientOptions::REQUEST => HttpClientRequest::CONTACTINFO]); $curlResult = DI::httpClient()->get($url, 'application/x-zot+json', [HttpClientOptions::REQUEST => HttpClientRequest::CONTACTINFO]);
if ($curlResult->isTimeout()) { if ($curlResult->isTimeout()) {
return $data; return $data;
} }
@ -880,36 +870,41 @@ class Probe
} }
if (empty($data['network'])) { if (empty($data['network'])) {
if (!empty($json['protocols']) && in_array('zot', $json['protocols'])) { if (!empty($json['protocols']) && in_array('zot6', $json['protocols'])) {
$data['network'] = Protocol::ZOT;
} elseif (!isset($json['protocols'])) {
$data['network'] = Protocol::ZOT; $data['network'] = Protocol::ZOT;
} }
} }
if (!empty($json['guid']) && empty($data['guid'])) { if (!empty($json['public_key'])) {
$data['guid'] = $json['guid']; $data['pubkey'] = $json['public_key'];
}
if (!empty($json['key']) && empty($data['pubkey'])) {
$data['pubkey'] = $json['key'];
} }
if (!empty($json['name'])) { if (!empty($json['name'])) {
$data['name'] = $json['name']; $data['name'] = $json['name'];
} }
if (!empty($json['photo'])) { if (!empty($json['username'])) {
$data['photo'] = $json['photo']; $data['nick'] = $json['username'];
if (!empty($json['photo_updated'])) { }
$data['photo'] .= '?rev=' . urlencode($json['photo_updated']); if (!empty($json['photo']) && !empty($json['photo']['url'])) {
$data['photo'] = $json['photo']['url'];
}
if (!empty($json['locations'])) {
foreach ($json['locations'] as $location) {
if ($location['deleted'] || (parse_url($url, PHP_URL_HOST) != $location['host'])) {
continue;
}
if (!empty($location['address'])) {
$data['addr'] = $location['address'];
}
if (!empty($location['id_url'])) {
$data['url'] = $location['id_url'];
}
if (!empty($location['callback'])) {
$data['confirm'] = $location['callback'];
}
} }
} }
if (!empty($json['address'])) { if (!empty($json['primary_location']) && !empty($json['primary_location']['connections_url'])) {
$data['addr'] = $json['address']; $data['poco'] = $json['primary_location']['connections_url'];
}
if (!empty($json['url'])) {
$data['url'] = $json['url'];
}
if (!empty($json['connections_url'])) {
$data['poco'] = $json['connections_url'];
} }
if (isset($json['searchable'])) { if (isset($json['searchable'])) {
$data['hide'] = !$json['searchable']; $data['hide'] = !$json['searchable'];
@ -917,6 +912,8 @@ class Probe
if (!empty($json['public_forum'])) { if (!empty($json['public_forum'])) {
$data['community'] = $json['public_forum']; $data['community'] = $json['public_forum'];
$data['account-type'] = User::PAGE_FLAGS_COMMUNITY; $data['account-type'] = User::PAGE_FLAGS_COMMUNITY;
} elseif ($json['channel_type'] == 'normal') {
$data['account-type'] = User::PAGE_FLAGS_NORMAL;
} }
if (!empty($json['profile'])) { if (!empty($json['profile'])) {
@ -958,7 +955,7 @@ class Probe
* @return array webfinger data * @return array webfinger data
* @throws HTTPException\InternalServerErrorException * @throws HTTPException\InternalServerErrorException
*/ */
public static function webfinger(string $url, string $type): array private static function webfinger(string $url, string $type): array
{ {
try { try {
$curlResult = DI::httpClient()->get( $curlResult = DI::httpClient()->get(
@ -1133,86 +1130,6 @@ class Probe
return $data; return $data;
} }
/**
* Check for valid DFRN data
*
* @param array $data DFRN data
*
* @return int Number of errors
*/
public static function validDfrn(array $data): int
{
$errors = 0;
if (!isset($data['key'])) {
$errors ++;
}
if (!isset($data['dfrn-request'])) {
$errors ++;
}
if (!isset($data['dfrn-confirm'])) {
$errors ++;
}
if (!isset($data['dfrn-notify'])) {
$errors ++;
}
if (!isset($data['dfrn-poll'])) {
$errors ++;
}
return $errors;
}
/**
* Fetch data from a DFRN profile page and via "noscrape"
*
* @param string $profile_link Link to the profile page
* @return array profile data
* @throws HTTPException\InternalServerErrorException
* @throws \ImagickException
*/
public static function profile(string $profile_link): array
{
$data = [];
Logger::info('Check profile', ['link' => $profile_link]);
// Fetch data via noscrape - this is faster
$noscrape_url = str_replace(['/hcard/', '/profile/'], '/noscrape/', $profile_link);
$data = self::pollNoscrape($noscrape_url, $data);
if (!isset($data['notify'])
|| !isset($data['confirm'])
|| !isset($data['request'])
|| !isset($data['poll'])
|| !isset($data['name'])
|| !isset($data['photo'])
) {
$data = self::pollHcard($profile_link, $data, true);
}
if (empty($data['addr']) || empty($data['nick'])) {
$probe_data = self::uri($profile_link);
$data['addr'] = ($data['addr'] ?? '') ?: $probe_data['addr'];
$data['nick'] = ($data['nick'] ?? '') ?: $probe_data['nick'];
}
$prof_data = [
'addr' => $data['addr'],
'nick' => $data['nick'],
'dfrn-request' => $data['request'] ?? null,
'dfrn-confirm' => $data['confirm'] ?? null,
'dfrn-notify' => $data['notify'] ?? null,
'dfrn-poll' => $data['poll'] ?? null,
'photo' => $data['photo'] ?? null,
'fn' => $data['name'] ?? null,
'key' => $data['pubkey'] ?? null,
];
Logger::debug('Result', ['link' => $profile_link, 'data' => $prof_data]);
return $prof_data;
}
/** /**
* Check for DFRN contact * Check for DFRN contact
* *
@ -1276,7 +1193,8 @@ class Probe
$noscrape_url = str_replace('/hcard/', '/noscrape/', $hcard_url); $noscrape_url = str_replace('/hcard/', '/noscrape/', $hcard_url);
$data = self::pollNoscrape($noscrape_url, $data); $data = self::pollNoscrape($noscrape_url, $data);
if (isset($data['notify']) if (
isset($data['notify'])
&& isset($data['confirm']) && isset($data['confirm'])
&& isset($data['request']) && isset($data['request'])
&& isset($data['poll']) && isset($data['poll'])
@ -1485,7 +1403,7 @@ class Probe
if (!empty($webfinger['aliases']) && is_array($webfinger['aliases'])) { if (!empty($webfinger['aliases']) && is_array($webfinger['aliases'])) {
foreach ($webfinger['aliases'] as $alias) { foreach ($webfinger['aliases'] as $alias) {
if (Strings::normaliseLink($alias) != Strings::normaliseLink($data['url']) && ! strstr($alias, '@')) { if (Strings::normaliseLink($alias) != Strings::normaliseLink($data['url']) && !strstr($alias, '@')) {
$data['alias'] = $alias; $data['alias'] = $alias;
} elseif (substr($alias, 0, 5) == 'acct:') { } elseif (substr($alias, 0, 5) == 'acct:') {
$data['addr'] = substr($alias, 5); $data['addr'] = substr($alias, 5);
@ -1504,7 +1422,8 @@ class Probe
return []; return [];
} }
if (!empty($data['url']) if (
!empty($data['url'])
&& !empty($data['guid']) && !empty($data['guid'])
&& !empty($data['baseurl']) && !empty($data['baseurl'])
&& !empty($data['pubkey']) && !empty($data['pubkey'])
@ -1549,7 +1468,8 @@ class Probe
} }
} }
if (!empty($webfinger['subject']) && strstr($webfinger['subject'], '@') if (
!empty($webfinger['subject']) && strstr($webfinger['subject'], '@')
&& !strstr(Strings::normaliseLink($webfinger['subject']), 'http://') && !strstr(Strings::normaliseLink($webfinger['subject']), 'http://')
) { ) {
$data['addr'] = str_replace('acct:', '', $webfinger['subject']); $data['addr'] = str_replace('acct:', '', $webfinger['subject']);
@ -1590,13 +1510,13 @@ class Probe
try { try {
$data['pubkey'] = Salmon::magicKeyToPem($pubkey); $data['pubkey'] = Salmon::magicKeyToPem($pubkey);
} catch (\Throwable $e) { } catch (\Throwable $e) {
} }
} }
} }
} }
if (isset($data['notify']) && isset($data['pubkey']) if (
isset($data['notify']) && isset($data['pubkey'])
&& isset($data['poll']) && isset($data['poll'])
&& isset($data['url']) && isset($data['url'])
) { ) {
@ -1654,124 +1574,6 @@ class Probe
return $data; return $data;
} }
/**
* Fetch data from a pump.io profile page
*
* @param string $profile_link Link to the profile page
*
* @return array Profile data
*/
private static function pumpioProfileData(string $profile_link, string $baseurl): array
{
$curlResult = DI::httpClient()->get($profile_link, HttpClientAccept::HTML, [HttpClientOptions::REQUEST => HttpClientRequest::CONTACTINFO]);
if (!$curlResult->isSuccess() || empty($curlResult->getBodyString())) {
return [];
}
$doc = new DOMDocument();
if (!@$doc->loadHTML($curlResult->getBodyString())) {
return [];
}
$xpath = new DomXPath($doc);
$data = [];
$data['name'] = $xpath->query("//span[contains(@class, 'p-name')]")->item(0)->nodeValue;
if ($data['name'] == '') {
// This is ugly - but pump.io doesn't seem to know a better way for it
$data['name'] = trim($xpath->query("//h1[@class='media-header']")->item(0)->nodeValue);
$pos = strpos($data['name'], chr(10));
if ($pos) {
$data['name'] = trim(substr($data['name'], 0, $pos));
}
}
$data['location'] = XML::getFirstNodeValue($xpath, "//p[contains(@class, 'p-locality')]");
if ($data['location'] == '') {
$data['location'] = XML::getFirstNodeValue($xpath, "//p[contains(@class, 'location')]");
}
$data['about'] = XML::getFirstNodeValue($xpath, "//p[contains(@class, 'p-note')]");
if ($data['about'] == '') {
$data['about'] = XML::getFirstNodeValue($xpath, "//p[contains(@class, 'summary')]");
}
$avatar = $xpath->query("//img[contains(@class, 'u-photo')]")->item(0);
if (!$avatar) {
$avatar = $xpath->query("//img[@class='img-rounded media-object']")->item(0);
}
if ($avatar) {
foreach ($avatar->attributes as $attribute) {
if (($attribute->name == 'src') && !empty($attribute->value)) {
$data['photo'] = Network::addBasePath($attribute->value, $baseurl);
}
}
}
return $data;
}
/**
* Check for pump.io contact
*
* @param array $webfinger Webfinger data
* @param string $addr
*
* @return array pump.io data
*/
private static function pumpio(array $webfinger, string $addr, string $baseurl): array
{
$data = [];
// The array is reversed to take into account the order of preference for same-rel links
// See: https://tools.ietf.org/html/rfc7033#section-4.4.4
foreach (array_reverse($webfinger['links']) as $link) {
if (($link['rel'] == 'http://webfinger.net/rel/profile-page')
&& (($link['type'] ?? '') == 'text/html')
&& ($link['href'] != '')
) {
$data['url'] = $link['href'];
} elseif (($link['rel'] == 'activity-inbox') && ($link['href'] != '')) {
$data['notify'] = $link['href'];
} elseif (($link['rel'] == 'activity-outbox') && ($link['href'] != '')) {
$data['poll'] = $link['href'];
} elseif (($link['rel'] == 'dialback') && ($link['href'] != '')) {
$data['dialback'] = $link['href'];
}
}
if (isset($data['poll']) && isset($data['notify'])
&& isset($data['dialback'])
&& isset($data['url'])
) {
// by now we use these fields only for the network type detection
// So we unset all data that isn't used at the moment
unset($data['dialback']);
$data['network'] = Protocol::PUMPIO;
} else {
return [];
}
$profile_data = self::pumpioProfileData($data['url'], $baseurl);
if (!$profile_data) {
return [];
}
$data = array_merge($data, $profile_data);
if (($addr != '') && ($data['name'] != '')) {
$name = trim(str_replace($addr, '', $data['name']));
if ($name != '') {
$data['name'] = $name;
}
}
return $data;
}
/** /**
* Checks HTML page for RSS feed link * Checks HTML page for RSS feed link
* *
@ -1881,7 +1683,7 @@ class Probe
{ {
try { try {
$curlResult = DI::httpClient()->get($url, HttpClientAccept::FEED_XML, [HttpClientOptions::REQUEST => HttpClientRequest::CONTACTINFO]); $curlResult = DI::httpClient()->get($url, HttpClientAccept::FEED_XML, [HttpClientOptions::REQUEST => HttpClientRequest::CONTACTINFO]);
} catch(\Throwable $e) { } catch (\Throwable $e) {
DI::logger()->info('Error requesting feed URL', ['url' => $url, 'exception' => $e]); DI::logger()->info('Error requesting feed URL', ['url' => $url, 'exception' => $e]);
return []; return [];
} }
@ -2055,7 +1857,7 @@ class Probe
$query = isset($parts['query']) ? '?' . $parts['query'] : ''; $query = isset($parts['query']) ? '?' . $parts['query'] : '';
$fragment = isset($parts['fragment']) ? '#' . $parts['fragment'] : ''; $fragment = isset($parts['fragment']) ? '#' . $parts['fragment'] : '';
$fixed = $scheme.$host.$port.$path.$query.$fragment; $fixed = $scheme . $host . $port . $path . $query . $fragment;
Logger::debug('Avatar fixed', ['base' => $base, 'avatar' => $avatar, 'fixed' => $fixed]); Logger::debug('Avatar fixed', ['base' => $base, 'avatar' => $avatar, 'fixed' => $fixed]);
@ -2108,8 +1910,10 @@ class Probe
} }
// Check the 'noscrape' endpoint when it is a Friendica server // Check the 'noscrape' endpoint when it is a Friendica server
$gserver = DBA::selectFirst('gserver', ['noscrape'], ["`nurl` = ? AND `noscrape` != ''", $gserver = DBA::selectFirst('gserver', ['noscrape'], [
Strings::normaliseLink($data['baseurl'])]); "`nurl` = ? AND `noscrape` != ''",
Strings::normaliseLink($data['baseurl'])
]);
if (!DBA::isResult($gserver)) { if (!DBA::isResult($gserver)) {
return ''; return '';
} }
@ -2207,7 +2011,7 @@ class Probe
foreach ($entries as $entry) { foreach ($entries as $entry) {
$published_item = $xpath->query('atom:published/text()', $entry)->item(0); $published_item = $xpath->query('atom:published/text()', $entry)->item(0);
$updated_item = $xpath->query('atom:updated/text()' , $entry)->item(0); $updated_item = $xpath->query('atom:updated/text()', $entry)->item(0);
$published = !empty($published_item->nodeValue) ? DateTimeFormat::utc($published_item->nodeValue) : null; $published = !empty($published_item->nodeValue) ? DateTimeFormat::utc($published_item->nodeValue) : null;
$updated = !empty($updated_item->nodeValue) ? DateTimeFormat::utc($updated_item->nodeValue) : null; $updated = !empty($updated_item->nodeValue) ? DateTimeFormat::utc($updated_item->nodeValue) : null;

View file

@ -386,10 +386,12 @@ class Feed
$orig_plink = $item['plink']; $orig_plink = $item['plink'];
try { if (!$dryRun) {
$item['plink'] = DI::httpClient()->finalUrl($item['plink']); try {
} catch (TransferException $exception) { $item['plink'] = DI::httpClient()->finalUrl($item['plink']);
Logger::notice('Item URL couldn\'t get expanded', ['url' => $item['plink'], 'exception' => $exception]); } catch (TransferException $exception) {
Logger::notice('Item URL couldn\'t get expanded', ['url' => $item['plink'], 'exception' => $exception]);
}
} }
if (empty($item['title'])) { if (empty($item['title'])) {