Remove unused contacts

This commit is contained in:
Michael 2024-08-17 21:58:34 +00:00
parent 4ac29047dc
commit 6f7d8d4c09
3 changed files with 55 additions and 32 deletions

View file

@ -928,7 +928,7 @@ class Transmitter
*
* @param array $item
* @param boolean $blindcopy
* @return void
* @return array
*/
public static function getReceiversForUriId(int $uri_id, bool $blindcopy): array
{

View file

@ -24,8 +24,10 @@ namespace Friendica\Worker;
use Friendica\Contact\Avatar;
use Friendica\Core\Logger;
use Friendica\Core\Protocol;
use Friendica\Core\Worker;
use Friendica\Database\DBA;
use Friendica\Database\DBStructure;
use Friendica\DI;
use Friendica\Model\Contact;
use Friendica\Model\Photo;
use Friendica\Util\DateTimeFormat;
@ -37,28 +39,53 @@ class RemoveUnusedContacts
{
public static function execute()
{
$condition = ["`id` != ? AND `uid` = ? AND NOT `self` AND NOT `nurl` IN (SELECT `nurl` FROM `contact` WHERE `uid` != ?)
AND (NOT `network` IN (?, ?, ?, ?, ?, ?) OR (`archive` AND `success_update` < ?))
AND NOT `id` IN (SELECT `author-id` FROM `post-user` WHERE `author-id` = `contact`.`id`)
AND NOT `id` IN (SELECT `owner-id` FROM `post-user` WHERE `owner-id` = `contact`.`id`)
AND NOT `id` IN (SELECT `causer-id` FROM `post-user` WHERE `causer-id` IS NOT NULL AND `causer-id` = `contact`.`id`)
AND NOT `id` IN (SELECT `cid` FROM `post-tag` WHERE `cid` = `contact`.`id`)
AND NOT `id` IN (SELECT `contact-id` FROM `post-user` WHERE `contact-id` = `contact`.`id`)
AND NOT `id` IN (SELECT `cid` FROM `user-contact` WHERE `cid` = `contact`.`id`)
AND NOT `id` IN (SELECT `cid` FROM `event` WHERE `cid` = `contact`.`id`)
AND NOT `id` IN (SELECT `cid` FROM `group` WHERE `cid` = `contact`.`id`)
AND NOT `id` IN (SELECT `cid` FROM `delivery-queue` WHERE `cid` = `contact`.`id`)
AND NOT `id` IN (SELECT `author-id` FROM `mail` WHERE `author-id` = `contact`.`id`)
AND NOT `id` IN (SELECT `contact-id` FROM `mail` WHERE `contact-id` = `contact`.`id`)
AND NOT `id` IN (SELECT `contact-id` FROM `group_member` WHERE `contact-id` = `contact`.`id`)
AND `created` < ?",
0, 0, 0, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, Protocol::FEED, Protocol::MAIL, Protocol::ACTIVITYPUB, DateTimeFormat::utc('now - 365 days'), DateTimeFormat::utc('now - 30 days')];
$loop = 0;
while (self::removeContacts(++$loop)) {
Logger::info('In removal', ['loop' => $loop]);
}
$total = DBA::count('contact', $condition);
Logger::notice('Starting removal', ['total' => $total]);
Logger::notice('Remove apcontact entries with no related contact');
DBA::delete('apcontact', ["`uri-id` NOT IN (SELECT `uri-id` FROM `contact`) AND `updated` < ?", DateTimeFormat::utc('now - 30 days')]);
Logger::notice('Removed apcontact entries with no related contact', ['count' => DBA::affectedRows()]);
Logger::notice('Remove diaspora-contact entries with no related contact');
DBA::delete('diaspora-contact', ["`uri-id` NOT IN (SELECT `uri-id` FROM `contact`) AND `updated` < ?", DateTimeFormat::utc('now - 30 days')]);
Logger::notice('Removed diaspora-contact entries with no related contact', ['count' => DBA::affectedRows()]);
}
public static function removeContacts(int $loop): bool
{
Logger::notice('Starting removal', ['loop' => $loop]);
$condition = [
"`id` != ? AND `uid` = ? AND NOT `self` AND NOT `uri-id` IN (SELECT `uri-id` FROM `contact` WHERE `uid` != ?)
AND NOT EXISTS(SELECT `author-id` FROM `post-user` WHERE `author-id` = `contact`.`id`)
AND NOT EXISTS(SELECT `owner-id` FROM `post-user` WHERE `owner-id` = `contact`.`id`)
AND NOT EXISTS(SELECT `causer-id` FROM `post-user` WHERE `causer-id` IS NOT NULL AND `causer-id` = `contact`.`id`)
AND NOT EXISTS(SELECT `cid` FROM `post-tag` WHERE `cid` = `contact`.`id`)
AND NOT EXISTS(SELECT `contact-id` FROM `post-user` WHERE `contact-id` = `contact`.`id`)
AND NOT EXISTS(SELECT `cid` FROM `user-contact` WHERE `cid` = `contact`.`id`)
AND NOT EXISTS(SELECT `cid` FROM `event` WHERE `cid` = `contact`.`id`)
AND NOT EXISTS(SELECT `cid` FROM `group` WHERE `cid` = `contact`.`id`)
AND NOT EXISTS(SELECT `author-id` FROM `mail` WHERE `author-id` = `contact`.`id`)
AND NOT EXISTS(SELECT `contact-id` FROM `mail` WHERE `contact-id` = `contact`.`id`)
AND NOT EXISTS(SELECT `contact-id` FROM `group_member` WHERE `contact-id` = `contact`.`id`)
AND `created` < ?", 0, 0, 0, DateTimeFormat::utc('now - 7 days')
];
if (!DI::config()->get('remove_all_unused_contacts')) {
$condition2 = [
"(NOT `network` IN (?, ?, ?, ?, ?, ?) OR `archive`)",
Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, Protocol::FEED, Protocol::MAIL, Protocol::ACTIVITYPUB
];
$condition = DBA::mergeConditions($condition2, $condition);
}
$contacts = DBA::select('contact', ['id', 'uid', 'photo', 'thumb', 'micro'], $condition, ['limit' => 1000]);
$count = 0;
$contacts = DBA::select('contact', ['id', 'uid', 'photo', 'thumb', 'micro'], $condition);
while ($contact = DBA::fetch($contacts)) {
++$count;
Photo::delete(['uid' => $contact['uid'], 'contact-id' => $contact['id']]);
Avatar::deleteCache($contact);
@ -87,19 +114,9 @@ class RemoveUnusedContacts
DBA::delete('post-thread-user', ['causer-id' => $contact['id']]);
Contact::deleteById($contact['id']);
if ((++$count % 1000) == 0) {
Logger::info('In removal', ['count' => $count, 'total' => $total]);
}
}
DBA::close($contacts);
Logger::notice('Removal done', ['count' => $count, 'total' => $total]);
Logger::notice('Remove apcontact entries with no related contact');
DBA::delete('apcontact', ["`uri-id` NOT IN (SELECT `uri-id` FROM `contact`) AND `updated` < ?", DateTimeFormat::utc('now - 30 days')]);
Logger::notice('Removed apcontact entries with no related contact', ['count' => DBA::affectedRows()]);
Logger::notice('Remove diaspora-contact entries with no related contact');
DBA::delete('diaspora-contact', ["`uri-id` NOT IN (SELECT `uri-id` FROM `contact`) AND `updated` < ?", DateTimeFormat::utc('now - 30 days')]);
Logger::notice('Removed diaspora-contact entries with no related contact', ['count' => DBA::affectedRows()]);
Logger::notice('Removal done', ['count' => $count]);
return ($count == 1000 && Worker::isInMaintenanceWindow());
}
}

View file

@ -514,6 +514,12 @@ return [
// Redistribute incoming activities via ActivityPub
'redistribute_activities' => true,
// remove_all_unused_contacts (Boolean)
// Remove all unused contacts.
// Per default only archived contacts are removed from federated services.
// Unused contacts from connector networks will be removed in any case.
'remove_all_unused_contacts' => false,
// session_handler (database|cache|native)
// Whether to use Cache to store session data or to use PHP native session storage.
'session_handler' => 'database',