Improved server detection

This commit is contained in:
Michael 2022-07-10 13:01:47 +00:00
parent cc75eb5d18
commit 85c7bacb00
3 changed files with 87 additions and 48 deletions

View file

@ -186,11 +186,12 @@ class GServer
return self::check($server, $network, $force);
}
public static function getNextUpdateDate(bool $success, string $created = '', string $last_contact = '')
public static function getNextUpdateDate(bool $success, string $created = '', string $last_contact = '', bool $undetected = false)
{
// On successful contact process check again next week
// On successful contact process check again next week when it is a detected system.
// When we haven't detected the system, it could be a static website or a really old system.
if ($success) {
return DateTimeFormat::utc('now +7 day');
return DateTimeFormat::utc($undetected ? 'now +1 month' : 'now +7 day');
}
$now = strtotime(DateTimeFormat::utcNow());
@ -331,6 +332,11 @@ class GServer
// Remove URL content that is not supposed to exist for a server url
$url = rtrim(self::cleanURL($url), '/');
if (empty($url)) {
Logger::notice('Empty URL.');
return false;
}
if (!Network::isUrlValid($url)) {
self::setFailure($url);
return false;
@ -352,6 +358,11 @@ class GServer
return false;
}
if (empty($finalurl)) {
Logger::notice('Empty redirected URL.', ['url' => $url]);
return false;
}
// We only follow redirects when the path stays the same or the target url has no path.
// Some systems have got redirects on their landing page to a single account page. This check handles it.
if (((parse_url($url, PHP_URL_HOST) != parse_url($finalurl, PHP_URL_HOST)) && (parse_url($url, PHP_URL_PATH) == parse_url($finalurl, PHP_URL_PATH))) ||
@ -367,10 +378,10 @@ class GServer
(parse_url($url, PHP_URL_SCHEME) != parse_url($finalurl, PHP_URL_SCHEME))) {
if (!Network::isUrlValid($finalurl)) {
self::setFailure($finalurl);
return false;
}
} else {
$url = $finalurl;
}
}
$in_webroot = empty(parse_url($url, PHP_URL_PATH));
@ -410,11 +421,11 @@ class GServer
if ($curlResult->isSuccess()) {
$json = json_decode($curlResult->getBody(), true);
if (!empty($json)) {
if (!empty($json) && is_array($json)) {
$data = self::fetchDataFromSystemActor($json, $serverdata);
$serverdata = $data['server'];
$systemactor = $data['actor'];
if (!$html_fetched && ($serverdata['detection-method'] == self::DETECT_AP_ACTOR)) {
if (!$html_fetched && !in_array($serverdata['detection-method'], [self::DETECT_SYSTEM_ACTOR, self::DETECT_AP_COLLECTION])) {
$curlResult = DI::httpClient()->get($url, HttpClientAccept::HTML);
}
} elseif (!$html_fetched && (strlen($curlResult->getBody()) < 1000)) {
@ -447,9 +458,8 @@ class GServer
}
if ($validHostMeta) {
if ($serverdata['detection-method'] == self::DETECT_MANUAL) {
if (in_array($serverdata['detection-method'], [self::DETECT_MANUAL, self::DETECT_HEADER, self::DETECT_BODY])) {
$serverdata['detection-method'] = self::DETECT_HOST_META;
$serverdata['platform'] = '';
}
if (($serverdata['network'] == Protocol::PHANTOM) || in_array($serverdata['detection-method'], self::DETECT_UNSPECIFIC)) {
@ -476,6 +486,8 @@ class GServer
$serverdata = self::detectGNUSocial($url, $serverdata);
}
}
} elseif (in_array($serverdata['platform'], ['friendica', 'friendika']) && in_array($serverdata['detection-method'], self::DETECT_UNSPECIFIC)) {
$serverdata = self::detectFriendica($url, $serverdata);
}
if (($serverdata['network'] == Protocol::PHANTOM) || in_array($serverdata['detection-method'], self::DETECT_UNSPECIFIC)) {
@ -507,7 +519,8 @@ class GServer
// When a server is new, then there is no gserver entry yet.
// But in "detectNetworkViaContacts" it could happen that a contact is updated,
// and this can call this function here as well.
if (in_array($serverdata['network'], [Protocol::PHANTOM, Protocol::FEED]) && self::getID($url, true)) {
if (self::getID($url, true) && (in_array($serverdata['network'], [Protocol::PHANTOM, Protocol::FEED]) ||
in_array($serverdata['detection-method'], [self::DETECT_MANUAL, self::DETECT_HEADER, self::DETECT_BODY, self::DETECT_HOST_META]))) {
$serverdata = self::detectNetworkViaContacts($url, $serverdata);
}
@ -535,7 +548,7 @@ class GServer
$serverdata['registered-users'] = 0;
}
$serverdata['next_contact'] = self::getNextUpdateDate(true);
$serverdata['next_contact'] = self::getNextUpdateDate(true, '', '', in_array($serverdata['network'], [Protocol::PHANTOM, Protocol::FEED]));
$serverdata['last_contact'] = DateTimeFormat::utcNow();
$serverdata['failed'] = false;
@ -1222,7 +1235,7 @@ class GServer
return ['server' => $serverdata, 'actor' => ''];
}
$actor = JsonLD::compact($data);
$actor = JsonLD::compact($data, false);
if (in_array(JsonLD::fetchElement($actor, '@type'), ActivityPub\Receiver::ACCOUNT_TYPES)) {
$serverdata['network'] = Protocol::ACTIVITYPUB;
$serverdata['site_name'] = JsonLD::fetchElement($actor, 'as:name', '@value');
@ -1842,16 +1855,17 @@ class GServer
}
$platforms = array_merge($ap_platforms, $dfrn_platforms, $zap_platforms, $platforms);
$valid_platforms = array_values($platforms);
$doc = new DOMDocument();
@$doc->loadHTML($curlResult->getBody());
$xpath = new DOMXPath($doc);
$assigned = false;
// We can only detect honk via some HTML element on their page
if ($xpath->query('//div[@id="honksonpage"]')->count() == 1) {
$serverdata['platform'] = 'honk';
$serverdata['network'] = Protocol::ACTIVITYPUB;
$assigned = true;
}
$title = trim(XML::getFirstNodeValue($xpath, '//head/title/text()'));
@ -1884,27 +1898,23 @@ class GServer
if (in_array($attr['name'], ['application-name', 'al:android:app_name', 'al:ios:app_name',
'twitter:app:name:googleplay', 'twitter:app:name:iphone', 'twitter:app:name:ipad', 'generator'])) {
$platform = str_replace(array_keys($platforms), array_values($platforms), $attr['content']);
$platform = strtolower(str_replace('/', ' ', $platform));
$version_part = explode(' ', $platform);
if (count($version_part) >= 2) {
if (in_array($version_part[0], array_values($dfrn_platforms))) {
$platform = str_ireplace(array_keys($platforms), array_values($platforms), $attr['content']);
$platform = str_replace('/', ' ', $platform);
$platform_parts = explode(' ', $platform);
if ((count($platform_parts) >= 2) && in_array(strtolower($platform_parts[0]), array_values($platforms))) {
$platform = $platform_parts[0];
$serverdata['version'] = $platform_parts[1];
}
if (in_array($platform, array_values($dfrn_platforms))) {
$serverdata['network'] = Protocol::DFRN;
} elseif (in_array($version_part[0], array_values($ap_platforms))) {
} elseif (in_array($platform, array_values($ap_platforms))) {
$serverdata['network'] = Protocol::ACTIVITYPUB;
} elseif (in_array($version_part[0], array_values($zap_platforms))) {
} elseif (in_array($platform, array_values($zap_platforms))) {
$serverdata['network'] = Protocol::ZOT;
}
if (in_array(strtolower($version_part[0]), $valid_platforms)) {
$platform = strtolower($version_part[0]);
$serverdata['version'] = $version_part[1];
}
}
if (in_array($platform, array_values($platforms))) {
$serverdata['platform'] = $platform;
} elseif (empty($serverdata['platform'])) {
print_r($attr);
$assigned = true;
}
}
}
@ -1939,8 +1949,7 @@ class GServer
if (in_array($attr['property'], ['og:platform', 'generator'])) {
if (in_array($attr['content'], array_keys($platforms))) {
$serverdata['platform'] = $platforms[$attr['content']];
} else {
print_r($attr);
$assigned = true;
}
if (in_array($attr['content'], array_keys($ap_platforms))) {
@ -1951,7 +1960,33 @@ class GServer
}
}
if (in_array($serverdata['platform'], $valid_platforms) && ($serverdata['detection-method'] == self::DETECT_MANUAL)) {
$list = $xpath->query('//link[@rel="me"]');
foreach ($list as $node) {
foreach ($node->attributes as $attribute) {
if (parse_url(trim($attribute->value), PHP_URL_HOST) == 'micro.blog') {
$serverdata['version'] = trim($serverdata['platform'] . ' ' . $serverdata['version']);
$serverdata['platform'] = 'microblog';
$serverdata['network'] = Protocol::ACTIVITYPUB;
$assigned = true;
}
}
}
if ($serverdata['platform'] != 'microblog') {
$list = $xpath->query('//link[@rel="micropub"]');
foreach ($list as $node) {
foreach ($node->attributes as $attribute) {
if (trim($attribute->value) == 'https://micro.blog/micropub') {
$serverdata['version'] = trim($serverdata['platform'] . ' ' . $serverdata['version']);
$serverdata['platform'] = 'microblog';
$serverdata['network'] = Protocol::ACTIVITYPUB;
$assigned = true;
}
}
}
}
if ($assigned && in_array($serverdata['detection-method'], [self::DETECT_MANUAL, self::DETECT_HEADER])) {
$serverdata['detection-method'] = self::DETECT_BODY;
}

View file

@ -46,10 +46,11 @@ class Federation extends BaseAdmin
'gnusocial' => ['name' => 'GNU Social/Statusnet', 'color' => '#a22430'], // dark red from the logo
'gotosocial' => ['name' => 'GoToSocial', 'color' => '#df8958'], // Some color from their mascot
'hometown' => ['name' => 'Hometown', 'color' => '#1f70c1'], // Color from the Patreon page
'honk' => ['name' => 'Honk', 'color' => '##0d0d0d'], // Background color from the page
'hubzilla' => ['name' => 'Hubzilla/Red Matrix', 'color' => '#43488a'], // blue from the logo
'hugo' => ['name' => 'Hugo', 'color' => '#0a1922'], // Color from the homepage background
'lemmy' => ['name' => 'Lemmy', 'color' => '#00c853'], // Green from the page
'mastodon' => ['name' => 'Mastodon', 'color' => '#1a9df9'], // blue from the Mastodon logo
'microblog' => ['name' => 'Microblog', 'color' => '#fdb52b'], // Color from the page
'misskey' => ['name' => 'Misskey', 'color' => '#ccfefd'], // Font color of the homepage
'mobilizon' => ['name' => 'Mobilizon', 'color' => '#ffd599'], // Background color of parts of the homepage
'nextcloud' => ['name' => 'Nextcloud', 'color' => '#1cafff'], // Logo color
@ -85,7 +86,8 @@ class Federation extends BaseAdmin
SUM(IFNULL(`active-month-users`, `active-week-users`)) AS `month`,
SUM(IFNULL(`active-halfyear-users`, `active-week-users`)) AS `halfyear`, `platform`,
ANY_VALUE(`network`) AS `network`, MAX(`version`) AS `version`
FROM `gserver` WHERE NOT `failed` AND `detection-method` != ? AND NOT `network` IN (?, ?) GROUP BY `platform`", GServer::DETECT_MANUAL, Protocol::PHANTOM, Protocol::FEED);
FROM `gserver` WHERE NOT `failed` AND `platform` != ? AND `detection-method` != ? AND NOT `network` IN (?, ?) GROUP BY `platform`",
'', GServer::DETECT_MANUAL, Protocol::PHANTOM, Protocol::FEED);
while ($gserver = DBA::fetch($gservers)) {
$total += $gserver['total'];
$users += $gserver['users'];
@ -102,7 +104,7 @@ class Federation extends BaseAdmin
if (in_array($gserver['platform'], ['Red Matrix', 'redmatrix', 'red'])) {
$version['version'] = 'Red ' . $version['version'];
} elseif (in_array($gserver['platform'], ['osada', 'mistpark', 'roadhouse', 'zap'])) {
} elseif (in_array($gserver['platform'], ['osada', 'mistpark', 'roadhouse', 'zap', 'macgirvin', 'mkultra'])) {
$version['version'] = $gserver['platform'] . ' ' . $version['version'];
} elseif (in_array($gserver['platform'], ['activityrelay', 'pub-relay', 'selective-relay', 'aoderelay'])) {
$version['version'] = $gserver['platform'] . '-' . $version['version'];
@ -118,7 +120,7 @@ class Federation extends BaseAdmin
$platform = 'friendica';
} elseif (in_array($platform, ['red matrix', 'redmatrix', 'red'])) {
$platform = 'hubzilla';
} elseif (in_array($platform, ['mistpark', 'osada', 'roadhouse', 'zap'])) {
} elseif (in_array($platform, ['osada', 'mistpark', 'roadhouse', 'zap', 'macgirvin', 'mkultra'])) {
$platform = 'mistpark';
} elseif(stristr($platform, 'pleroma')) {
$platform = 'pleroma';

View file

@ -25,7 +25,7 @@ $platforms = [
'BaseKit' => 'basekit',
'BBEdit' => 'bbedit',
'Big Cartel' => 'big-cartel',
'blogger' => 'blogger',
'Blogger' => 'blogger',
'Bloom' => 'bloom',
'Bludit' => 'bludit',
'BunnyPress' => 'bunnypress',
@ -48,25 +48,31 @@ $platforms = [
'filerun' => 'filerun',
'FlatPress' => 'flatpress',
'Gatsby' => 'gatsby',
'Ghost' => 'ghost',
'gitweb' => 'gitweb',
'gnusocial' => 'gnusocial',
'Government Site Builder' => 'government-site-builder',
'GravCMS' => 'gravcms',
'grocy' => 'grocy',
'Gruta' => 'gruta',
'hakyll' => 'hakyll',
'HedgeDoc - Collaborative markdown notes' => 'hedgedoc',
'helloworld' => 'helloworld',
'Hello, world. https://github.com/mimecuvalo/helloworld' => 'helloworld',
'Hexo' => 'hexo',
'honk' => 'honk',
'Hugo' => 'hugo',
'ian' => 'ian',
'InterRed' => 'interred',
'Ikiwiki' => 'ikiwiki',
'Jekyll' => 'jekyll',
'Joomla!' => 'joomla',
'KeyHelp' => 'keyhelp',
'Known https://withknown.com' => 'known',
'KONTEXT-CMS (c) WARENFORM [www.warenform.net]' => 'kontext-cms',
'ktistec' => 'ktistec',
'lemoncurry' => 'lemoncurry',
'LibreOffice' => 'libreoffice',
'Magazine News Byte' => 'magazine-news-byte',
'Magnet' => 'magnet',
'mastodon' => 'mastodon',
'Mattermost' => 'mattermost',
'MediaWiki' => 'mediawiki',
@ -84,7 +90,6 @@ $platforms = [
'Org mode' => 'org-mode',
'Org-mode' => 'org-mode',
'Org Mode' => 'org-mode',
'orig4' => 'orig4',
'Osclass' => 'osclass',
'pamphlets/vinyl-press' => 'pamphlets',
'peertube' => 'peertube',
@ -100,6 +105,7 @@ $platforms = [
'Sedo' => 'sedo',
'sitebaker' => 'sitebaker',
'SitePad' => 'sitepad',
'SMAR' => 'smar',
'SPIP' => 'spip',
'STUDIO' => 'studio',
'Synology - Synology DiskStation' => 'synology',
@ -116,20 +122,16 @@ $platforms = [
'Webflow' => 'webflow',
'WikkaWiki' => 'wikkawiki',
'Wix.com' => 'wix.com',
'WordPress' => 'wordpress',
'WordPress.com' => 'wordpress',
'WordPress' => 'wordpress',
'Write.as' => 'write.as',
'XAG/CMS' => 'xagcms',
'Zim' => 'zim',
];
$ap_platforms = [
'honk' => 'honk',
'PeerTube' => 'peertube',
'Hugo' => 'hugo',
'lemoncurry' => 'lemoncurry',
'Ghost' => 'ghost',
'Jekyll' => 'jekyll',
'Known https://withknown.com' => 'known',
];
$dfrn_platforms = [