Merge pull request #11812 from annando/ap-performance

Hopefully fixes loops during message processing
This commit is contained in:
Hypolite Petovan 2022-08-03 03:00:50 -04:00 committed by GitHub
commit 196a1de7f2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 167 additions and 91 deletions

View file

@ -2449,7 +2449,7 @@ class Contact
$new_pubkey = $ret['pubkey'] ?? ''; $new_pubkey = $ret['pubkey'] ?? '';
if ($uid == 0) { if ($uid == 0 && DI::config()->get('system', 'fetch_featured_posts')) {
if ($ret['network'] == Protocol::ACTIVITYPUB) { if ($ret['network'] == Protocol::ACTIVITYPUB) {
$apcontact = APContact::getByURL($ret['url'], false); $apcontact = APContact::getByURL($ret['url'], false);
if (!empty($apcontact['featured'])) { if (!empty($apcontact['featured'])) {

View file

@ -265,12 +265,15 @@ class Processor
/** /**
* Prepares data for a message * Prepares data for a message
* *
* @param array $activity Activity array * @param array $activity Activity array
* @param bool $fetch_parents
*
* @return array Internal item * @return array Internal item
*
* @throws \Friendica\Network\HTTPException\InternalServerErrorException * @throws \Friendica\Network\HTTPException\InternalServerErrorException
* @throws \ImagickException * @throws \ImagickException
*/ */
public static function createItem(array $activity): array public static function createItem(array $activity, bool $fetch_parents = true): array
{ {
$item = []; $item = [];
$item['verb'] = Activity::POST; $item['verb'] = Activity::POST;
@ -305,59 +308,14 @@ class Processor
return []; return [];
} }
if (empty($activity['directmessage']) && ($activity['id'] != $activity['reply-to-id']) && !Post::exists(['uri' => $activity['reply-to-id']])) { if ($fetch_parents && empty($activity['directmessage']) && ($activity['id'] != $activity['reply-to-id']) && !Post::exists(['uri' => $activity['reply-to-id']])) {
if (self::hasJustBeenFetched($activity['reply-to-id'])) { $result = self::fetchParent($activity);
Logger::notice('We just have tried to fetch this activity. We don\'t try it again.', ['parent' => $activity['reply-to-id']]); if (!empty($result)) {
$fetch_by_worker = false;
if (empty($conversation)) {
return [];
}
} else {
$recursion_depth = $activity['recursion-depth'] ?? 0;
Logger::notice('Parent not found. Try to refetch it.', ['parent' => $activity['reply-to-id'], 'recursion-depth' => $recursion_depth]);
if ($recursion_depth < DI::config()->get('system', 'max_recursion_depth')) {
$result = self::fetchMissingActivity($activity['reply-to-id'], $activity, '', Receiver::COMPLETION_AUTO);
$fetch_by_worker = empty($result);
if (empty($result) && self::isActivityGone($activity['reply-to-id'])) {
if (!empty($activity['entry-id'])) {
Queue::deleteById($activity['entry-id']);
}
if (empty($conversation)) {
return [];
}
}
} else {
Logger::notice('Recursion level is too high.', ['parent' => $activity['reply-to-id'], 'recursion-depth' => $recursion_depth]);
$fetch_by_worker = true;
}
}
if ($fetch_by_worker && Queue::hasWorker($activity)) {
Logger::notice('There is already a worker task to fetch the post.', ['id' => $activity['id'], 'parent' => $activity['reply-to-id']]);
$fetch_by_worker = false;
if (empty($conversation)) {
return [];
}
}
if ($fetch_by_worker && DI::config()->get('system', 'fetch_by_worker')) {
Logger::notice('Fetching is done by worker.', ['parent' => $activity['reply-to-id'], 'recursion-depth' => $recursion_depth]);
$activity['recursion-depth'] = 0;
if (!Fetch::hasWorker($activity['reply-to-id'])) {
Fetch::add($activity['reply-to-id']);
$wid = Worker::add(PRIORITY_HIGH, 'FetchMissingActivity', $activity['reply-to-id'], $activity, '', Receiver::COMPLETION_AUTO);
Fetch::setWorkerId($activity['reply-to-id'], $wid);
Queue::setWorkerId($activity, $wid);
} else {
Logger::debug('Activity will already be fetched via a worker.', ['url' => $activity['reply-to-id']]);
}
if (empty($conversation)) {
return [];
}
} elseif (!empty($result)) {
if (($item['thr-parent'] != $result) && Post::exists(['uri' => $result])) { if (($item['thr-parent'] != $result) && Post::exists(['uri' => $result])) {
$item['thr-parent'] = $result; $item['thr-parent'] = $result;
} }
} elseif (empty($conversation)) {
return [];
} }
} }
@ -482,6 +440,77 @@ class Processor
return $item; return $item;
} }
/**
* Fetch and process parent posts for the given activity
*
* @param array $activity
*
* @return string
*/
private static function fetchParent(array $activity): string
{
if (self::hasJustBeenFetched($activity['reply-to-id'])) {
Logger::notice('We just have tried to fetch this activity. We don\'t try it again.', ['parent' => $activity['reply-to-id']]);
return '';
}
$recursion_depth = $activity['recursion-depth'] ?? 0;
if ($recursion_depth < DI::config()->get('system', 'max_recursion_depth')) {
Logger::notice('Parent not found. Try to refetch it.', ['parent' => $activity['reply-to-id'], 'recursion-depth' => $recursion_depth]);
$result = self::fetchMissingActivity($activity['reply-to-id'], $activity, '', Receiver::COMPLETION_AUTO);
if (empty($result) && self::isActivityGone($activity['reply-to-id'])) {
Logger::notice('The activity is gone, the queue entry will be deleted', ['parent' => $activity['reply-to-id']]);
if (!empty($activity['entry-id'])) {
Queue::deleteById($activity['entry-id']);
}
return '';
} elseif (!empty($result)) {
$exists = Post::exists(['uri' => [$result, $activity['reply-to-id']]]);
if ($exists) {
Logger::notice('The activity has been fetched and created.', ['parent' => $result]);
return $result;
} elseif (DI::config()->get('system', 'fetch_by_worker') || DI::config()->get('system', 'decoupled_receiver')) {
Logger::notice('The activity has been fetched and will hopefully be created later.', ['parent' => $result]);
} else {
Logger::notice('The activity exists but has not been created, the queue entry will be deleted.', ['parent' => $result]);
if (!empty($activity['entry-id'])) {
Queue::deleteById($activity['entry-id']);
}
}
return '';
}
if (empty($result) && !DI::config()->get('system', 'fetch_by_worker')) {
return '';
}
} elseif (self::isActivityGone($activity['reply-to-id'])) {
Logger::notice('The activity is gone. We will not spawn a worker. The queue entry will be deleted', ['parent' => $activity['reply-to-id']]);
if (!empty($activity['entry-id'])) {
Queue::deleteById($activity['entry-id']);
}
return '';
} else {
Logger::notice('Recursion level is too high.', ['parent' => $activity['reply-to-id'], 'recursion-depth' => $recursion_depth]);
}
if (Queue::hasWorker($activity['worker-id'] ?? 0)) {
Logger::notice('There is already a worker task to fetch the post.', ['id' => $activity['id'], 'parent' => $activity['reply-to-id']]);
return '';
}
if (!Fetch::hasWorker($activity['reply-to-id'])) {
Logger::notice('Fetching is done by worker.', ['parent' => $activity['reply-to-id'], 'recursion-depth' => $recursion_depth]);
Fetch::add($activity['reply-to-id']);
$activity['recursion-depth'] = 0;
$wid = Worker::add(PRIORITY_HIGH, 'FetchMissingActivity', $activity['reply-to-id'], $activity, '', Receiver::COMPLETION_AUTO);
Fetch::setWorkerId($activity['reply-to-id'], $wid);
} else {
Logger::debug('Activity will already be fetched via a worker.', ['url' => $activity['reply-to-id']]);
}
return '';
}
/** /**
* Check if a given activity has recently been fetched * Check if a given activity has recently been fetched
* *
@ -1021,8 +1050,8 @@ class Processor
Queue::remove($activity); Queue::remove($activity);
if ($success && Queue::hasChildren($item['uri'])) { if ($success && Queue::hasChildren($item['uri']) && Post::exists(['uri' => $item['uri']])) {
Worker::add(PRIORITY_HIGH, 'ProcessReplyByUri', $item['uri']); Queue::processReplyByUri($item['uri']);
} }
// Store send a follow request for every reshare - but only when the item had been stored // Store send a follow request for every reshare - but only when the item had been stored
@ -1366,9 +1395,13 @@ class Processor
return ''; return '';
} }
ActivityPub\Receiver::processActivity($ldactivity, json_encode($activity), $uid, true, false, $signer); if (($completion == Receiver::COMPLETION_RELAY) && Queue::exists($url, 'as:Create')) {
Logger::notice('Activity has already been queued.', ['url' => $url, 'object' => $activity['id']]);
Logger::notice('Activity had been fetched and processed.', ['url' => $url, 'object' => $activity['id']]); } elseif (ActivityPub\Receiver::processActivity($ldactivity, json_encode($activity), $uid, true, false, $signer, '', $completion)) {
Logger::notice('Activity had been fetched and processed.', ['url' => $url, 'entry' => $child['entry-id'] ?? 0, 'completion' => $completion, 'object' => $activity['id']]);
} else {
Logger::notice('Activity had been fetched and will be processed later.', ['url' => $url, 'entry' => $child['entry-id'] ?? 0, 'completion' => $completion, 'object' => $activity['id']]);
}
return $activity['id']; return $activity['id'];
} }

View file

@ -25,6 +25,7 @@ use Friendica\Core\Logger;
use Friendica\Database\Database; use Friendica\Database\Database;
use Friendica\Database\DBA; use Friendica\Database\DBA;
use Friendica\DI; use Friendica\DI;
use Friendica\Model\Post;
use Friendica\Util\DateTimeFormat; use Friendica\Util\DateTimeFormat;
use Friendica\Util\JsonLD; use Friendica\Util\JsonLD;
@ -84,6 +85,18 @@ class Queue
return $activity; return $activity;
} }
/**
* Checks if an entry for a given url and type already exists
*
* @param string $url
* @param string $type
* @return boolean
*/
public static function exists(string $url, string $type): bool
{
return DBA::exists('inbox-entry', ['type' => $type, 'object-id' => $url]);
}
/** /**
* Remove activity from the queue * Remove activity from the queue
* *
@ -132,46 +145,60 @@ class Queue
/** /**
* Set the worker id for the queue entry * Set the worker id for the queue entry
* *
* @param array $activity * @param int $entry_id
* @param int $wid * @param int $wid
* @return void * @return void
*/ */
public static function setWorkerId(array $activity, int $wid) public static function setWorkerId(int $entry_id, int $wid)
{ {
if (empty($activity['entry-id']) || empty($wid)) { if (empty($entry_id) || empty($wid)) {
return; return;
} }
DBA::update('inbox-entry', ['wid' => $wid], ['id' => $activity['entry-id']]); DBA::update('inbox-entry', ['wid' => $wid], ['id' => $entry_id]);
} }
/** /**
* Check if there is an assigned worker task * Check if there is an assigned worker task
* *
* @param array $activity * @param int $wid
*
* @return bool * @return bool
*/ */
public static function hasWorker(array $activity = []): bool public static function hasWorker(int $wid): bool
{ {
if (empty($activity['worker-id'])) { if (empty($wid)) {
return false; return false;
} }
return DBA::exists('workerqueue', ['id' => $activity['worker-id'], 'done' => false]); return DBA::exists('workerqueue', ['id' => $wid, 'done' => false]);
} }
/** /**
* Process the activity with the given id * Process the activity with the given id
* *
* @param integer $id * @param integer $id
* @param bool $fetch_parents
* *
* @return bool * @return bool
*/ */
public static function process(int $id): bool public static function process(int $id, bool $fetch_parents = true): bool
{ {
$entry = DBA::selectFirst('inbox-entry', [], ['id' => $id]); $entry = DBA::selectFirst('inbox-entry', [], ['id' => $id]);
if (empty($entry)) { if (empty($entry)) {
return false; return false;
} }
if (!empty($entry['wid'])) {
$worker = DI::app()->getQueue();
$wid = $worker['id'] ?? 0;
if ($entry['wid'] != $wid) {
$workerqueue = DBA::selectFirst('workerqueue', ['pid'], ['id' => $entry['wid'], 'done' => false]);
if (!empty($workerqueue['pid']) && posix_kill($workerqueue['pid'], 0)) {
Logger::notice('Entry is already processed via another process.', ['current' => $wid, 'processor' => $entry['wid']]);
return false;
}
}
}
Logger::debug('Processing queue entry', ['id' => $entry['id'], 'type' => $entry['type'], 'object-type' => $entry['object-type'], 'uri' => $entry['object-id'], 'in-reply-to' => $entry['in-reply-to-id']]); Logger::debug('Processing queue entry', ['id' => $entry['id'], 'type' => $entry['type'], 'object-type' => $entry['object-type'], 'uri' => $entry['object-id'], 'in-reply-to' => $entry['in-reply-to-id']]);
$activity = json_decode($entry['activity'], true); $activity = json_decode($entry['activity'], true);
@ -190,7 +217,7 @@ class Queue
} }
DBA::close($receivers); DBA::close($receivers);
if (!Receiver::routeActivities($activity, $type, $push)) { if (!Receiver::routeActivities($activity, $type, $push, $fetch_parents)) {
self::remove($activity); self::remove($activity);
} }
@ -206,12 +233,16 @@ class Queue
{ {
$entries = DBA::select('inbox-entry', ['id', 'type', 'object-type', 'object-id', 'in-reply-to-id'], ["`trust` AND `wid` IS NULL"], ['order' => ['id' => true]]); $entries = DBA::select('inbox-entry', ['id', 'type', 'object-type', 'object-id', 'in-reply-to-id'], ["`trust` AND `wid` IS NULL"], ['order' => ['id' => true]]);
while ($entry = DBA::fetch($entries)) { while ($entry = DBA::fetch($entries)) {
// Don't process entries of items that are answer to non existing posts
if (!empty($entry['in-reply-to-id']) && !Post::exists(['uri' => $entry['in-reply-to-id']])) {
continue;
}
// We don't need to process entries that depend on already existing entries. // We don't need to process entries that depend on already existing entries.
if (!empty($entry['in-reply-to-id']) && DBA::exists('inbox-entry', ["`id` != ? AND `object-id` = ?", $entry['id'], $entry['in-reply-to-id']])) { if (!empty($entry['in-reply-to-id']) && DBA::exists('inbox-entry', ["`id` != ? AND `object-id` = ?", $entry['id'], $entry['in-reply-to-id']])) {
continue; continue;
} }
Logger::debug('Process leftover entry', $entry); Logger::debug('Process leftover entry', $entry);
self::process($entry['id']); self::process($entry['id'], false);
} }
DBA::close($entries); DBA::close($entries);
} }
@ -247,7 +278,7 @@ class Queue
$entries = DBA::select('inbox-entry', ['id'], ["`in-reply-to-id` = ? AND `object-id` != ?", $uri, $uri]); $entries = DBA::select('inbox-entry', ['id'], ["`in-reply-to-id` = ? AND `object-id` != ?", $uri, $uri]);
while ($entry = DBA::fetch($entries)) { while ($entry = DBA::fetch($entries)) {
$count += 1; $count += 1;
self::process($entry['id']); self::process($entry['id'], false);
} }
DBA::close($entries); DBA::close($entries);
return $count; return $count;
@ -314,6 +345,5 @@ class Queue
} }
} }
DBA::close($entries); DBA::close($entries);
} }
} }

View file

@ -507,26 +507,29 @@ class Receiver
* @param boolean $trust_source Do we trust the source? * @param boolean $trust_source Do we trust the source?
* @param boolean $push Message had been pushed to our system * @param boolean $push Message had been pushed to our system
* @param array $signer The signer of the post * @param array $signer The signer of the post
*
* @return bool
*
* @throws \Friendica\Network\HTTPException\InternalServerErrorException * @throws \Friendica\Network\HTTPException\InternalServerErrorException
* @throws \ImagickException * @throws \ImagickException
*/ */
public static function processActivity(array $activity, string $body = '', int $uid = null, bool $trust_source = false, bool $push = false, array $signer = [], string $http_signer = '') public static function processActivity(array $activity, string $body = '', int $uid = null, bool $trust_source = false, bool $push = false, array $signer = [], string $http_signer = '', int $completion = Receiver::COMPLETION_AUTO): bool
{ {
$type = JsonLD::fetchElement($activity, '@type'); $type = JsonLD::fetchElement($activity, '@type');
if (!$type) { if (!$type) {
Logger::info('Empty type', ['activity' => $activity]); Logger::info('Empty type', ['activity' => $activity]);
return; return true;
} }
if (!JsonLD::fetchElement($activity, 'as:object', '@id')) { if (!JsonLD::fetchElement($activity, 'as:object', '@id')) {
Logger::info('Empty object', ['activity' => $activity]); Logger::info('Empty object', ['activity' => $activity]);
return; return true;
} }
$actor = JsonLD::fetchElement($activity, 'as:actor', '@id'); $actor = JsonLD::fetchElement($activity, 'as:actor', '@id');
if (empty($actor)) { if (empty($actor)) {
Logger::info('Empty actor', ['activity' => $activity]); Logger::info('Empty actor', ['activity' => $activity]);
return; return true;
} }
if (is_array($activity['as:object'])) { if (is_array($activity['as:object'])) {
@ -548,7 +551,7 @@ class Receiver
$object_data = self::prepareObjectData($activity, $uid, $push, $trust_source); $object_data = self::prepareObjectData($activity, $uid, $push, $trust_source);
if (empty($object_data)) { if (empty($object_data)) {
Logger::info('No object data found', ['activity' => $activity]); Logger::info('No object data found', ['activity' => $activity]);
return; return true;
} }
// Lemmy is announcing activities. // Lemmy is announcing activities.
@ -583,21 +586,27 @@ class Receiver
$object_data['object_activity'] = $activity; $object_data['object_activity'] = $activity;
} }
if (($type == 'as:Create') && Queue::exists($object_data['object_id'], $type)) {
Logger::info('The activity is already added.', ['id' => $object_data['object_id']]);
return true;
}
if (DI::config()->get('system', 'decoupled_receiver') && ($trust_source || DI::config()->get('debug', 'ap_inbox_store_untrusted'))) { if (DI::config()->get('system', 'decoupled_receiver') && ($trust_source || DI::config()->get('debug', 'ap_inbox_store_untrusted'))) {
$object_data = Queue::add($object_data, $type, $uid, $http_signer, $push, $trust_source); $object_data = Queue::add($object_data, $type, $uid, $http_signer, $push, $trust_source);
} }
if (!$trust_source) { if (!$trust_source) {
Logger::info('Activity trust could not be achieved.', ['id' => $object_data['object_id'], 'type' => $type, 'signer' => $signer, 'actor' => $actor, 'attributedTo' => $attributed_to]); Logger::info('Activity trust could not be achieved.', ['id' => $object_data['object_id'], 'type' => $type, 'signer' => $signer, 'actor' => $actor, 'attributedTo' => $attributed_to]);
return; return true;
} }
if (!empty($object_data['entry-id']) && DI::config()->get('system', 'decoupled_receiver') && ($push || ($activity['completion-mode'] == self::COMPLETION_RELAY))) { if (!empty($object_data['entry-id']) && DI::config()->get('system', 'decoupled_receiver') && ($push || ($completion == self::COMPLETION_RELAY))) {
// We delay by 5 seconds to allow to accumulate all receivers // We delay by 5 seconds to allow to accumulate all receivers
$delayed = date(DateTimeFormat::MYSQL, time() + 5); $delayed = date(DateTimeFormat::MYSQL, time() + 5);
Logger::debug('Initiate processing', ['id' => $object_data['entry-id'], 'uri' => $object_data['object_id']]); Logger::debug('Initiate processing', ['id' => $object_data['entry-id'], 'uri' => $object_data['object_id']]);
Worker::add(['priority' => PRIORITY_HIGH, 'delayed' => $delayed], 'ProcessQueue', $object_data['entry-id']); $wid = Worker::add(['priority' => PRIORITY_HIGH, 'delayed' => $delayed], 'ProcessQueue', $object_data['entry-id']);
return; Queue::setWorkerId($object_data['entry-id'], $wid);
return false;
} }
if (!empty($activity['recursion-depth'])) { if (!empty($activity['recursion-depth'])) {
@ -612,25 +621,27 @@ class Receiver
self::storeUnhandledActivity(true, $type, $object_data, $activity, $body, $uid, $trust_source, $push, $signer); self::storeUnhandledActivity(true, $type, $object_data, $activity, $body, $uid, $trust_source, $push, $signer);
Queue::remove($object_data); Queue::remove($object_data);
} }
return true;
} }
/** /**
* Route activities * Route activities
* *
* @param array $object_data * @param array $object_data
* @param string $type * @param string $type
* @param boolean $push * @param bool $push
* @param bool $fetch_parents
* *
* @return boolean Could the activity be routed? * @return boolean Could the activity be routed?
*/ */
public static function routeActivities(array $object_data, string $type, bool $push): bool public static function routeActivities(array $object_data, string $type, bool $push, bool $fetch_parents = true): bool
{ {
$activity = $object_data['object_activity'] ?? []; $activity = $object_data['object_activity'] ?? [];
switch ($type) { switch ($type) {
case 'as:Create': case 'as:Create':
if (in_array($object_data['object_type'], self::CONTENT_TYPES)) { if (in_array($object_data['object_type'], self::CONTENT_TYPES)) {
$item = ActivityPub\Processor::createItem($object_data); $item = ActivityPub\Processor::createItem($object_data, $fetch_parents);
ActivityPub\Processor::postItem($object_data, $item); ActivityPub\Processor::postItem($object_data, $item);
} elseif (in_array($object_data['object_type'], ['pt:CacheFile'])) { } elseif (in_array($object_data['object_type'], ['pt:CacheFile'])) {
// Unhandled Peertube activity // Unhandled Peertube activity
@ -642,7 +653,7 @@ class Receiver
case 'as:Invite': case 'as:Invite':
if (in_array($object_data['object_type'], ['as:Event'])) { if (in_array($object_data['object_type'], ['as:Event'])) {
$item = ActivityPub\Processor::createItem($object_data); $item = ActivityPub\Processor::createItem($object_data, $fetch_parents);
ActivityPub\Processor::postItem($object_data, $item); ActivityPub\Processor::postItem($object_data, $item);
} else { } else {
return false; return false;
@ -668,7 +679,7 @@ class Receiver
$object_data['thread-completion'] = Contact::getIdForURL($actor); $object_data['thread-completion'] = Contact::getIdForURL($actor);
$object_data['completion-mode'] = self::COMPLETION_ANNOUCE; $object_data['completion-mode'] = self::COMPLETION_ANNOUCE;
$item = ActivityPub\Processor::createItem($object_data); $item = ActivityPub\Processor::createItem($object_data, $fetch_parents);
if (empty($item)) { if (empty($item)) {
return false; return false;
} }

View file

@ -93,9 +93,7 @@ class Cron
Queue::clear(); Queue::clear();
// Process all unprocessed entries // Process all unprocessed entries
if (DI::config()->get('system', 'decoupled_receiver')) { Queue::processAll();
Queue::processAll();
}
// Search for new contacts in the directory // Search for new contacts in the directory
if (DI::config()->get('system', 'synchronize_directory')) { if (DI::config()->get('system', 'synchronize_directory')) {

View file

@ -286,6 +286,10 @@ return [
// Fetch missing posts via a background process // Fetch missing posts via a background process
'fetch_by_worker' => false, 'fetch_by_worker' => false,
// fetch_featured_posts (Boolean)
// Fetch featured posts from all contacts
'fetch_featured_posts' => false,
// free_crawls (Integer) // free_crawls (Integer)
// Number of "free" searches when system => permit_crawling is enabled. // Number of "free" searches when system => permit_crawling is enabled.
'free_crawls' => 10, 'free_crawls' => 10,