friendica-github/src/Worker/ExpirePosts.php

324 lines
12 KiB
PHP
Raw Normal View History

<?php
2024-08-24 15:27:00 +02:00
// Copyright (C) 2010-2024, the Friendica project
// SPDX-FileCopyrightText: 2010-2024 the Friendica project
//
// SPDX-License-Identifier: AGPL-3.0-or-later
namespace Friendica\Worker;
use Friendica\Core\Logger;
use Friendica\Core\Worker;
use Friendica\Database\Database;
use Friendica\Database\DBA;
use Friendica\Database\DBStructure;
use Friendica\DI;
2024-08-24 08:37:56 +00:00
use Friendica\Model\Attach;
2021-02-08 07:48:36 +00:00
use Friendica\Model\Item;
use Friendica\Model\Post;
use Friendica\Util\DateTimeFormat;
class ExpirePosts
{
/**
* Expire posts and remove unused item-uri entries
*
* @return void
*/
public static function execute()
{
2024-06-05 03:20:22 +00:00
if (!DBA::acquireOptimizeLock()) {
Logger::warning('Lock could not be acquired');
return;
}
self::deleteExpiredOriginPosts();
self::deleteOrphanedEntries();
self::deleteUnusedItemUri();
self::deleteExpiredExternalPosts();
if (DI::config()->get('system', 'add_missing_posts')) {
self::addMissingEntries();
}
2024-08-24 08:37:56 +00:00
self::deleteUnusedAttachments();
2024-06-05 03:20:22 +00:00
DBA::releaseOptimizeLock();
// Set the expiry for origin posts
2022-10-17 05:49:55 +00:00
Worker::add(Worker::PRIORITY_LOW, 'Expire');
2021-03-02 07:06:22 +00:00
// update nodeinfo data after everything is cleaned up
2022-10-17 05:49:55 +00:00
Worker::add(Worker::PRIORITY_LOW, 'NodeInfo');
}
/**
* Delete expired origin posts and orphaned post related table entries
*
* @return void
*/
private static function deleteExpiredOriginPosts()
{
Logger::notice('Delete expired posts');
// physically remove anything that has been deleted for more than two months
$condition = ["`gravity` = ? AND `deleted` AND `edited` < ?", Item::GRAVITY_PARENT, DateTimeFormat::utc('now - 60 days')];
2024-06-05 03:20:22 +00:00
$pass = 0;
do {
++$pass;
$rows = DBA::select('post-user', ['uri-id', 'uid'], $condition, ['limit' => 1000]);
2024-06-05 03:20:22 +00:00
$affected_count = 0;
while ($row = Post::fetch($rows)) {
Logger::info('Delete expired item', ['pass' => $pass, 'uri-id' => $row['uri-id']]);
2024-06-05 03:20:22 +00:00
Post\User::delete(['parent-uri-id' => $row['uri-id'], 'uid' => $row['uid']]);
$affected_count += DBA::affectedRows();
Post\Origin::delete(['parent-uri-id' => $row['uri-id'], 'uid' => $row['uid']]);
$affected_count += DBA::affectedRows();
}
DBA::close($rows);
DBA::commit();
Logger::notice('Delete expired posts - done', ['pass' => $pass, 'rows' => $affected_count]);
} while ($affected_count);
}
/**
* Delete orphaned entries in the post related tables
*
* @return void
*/
private static function deleteOrphanedEntries()
{
Logger::notice('Delete orphaned entries');
// "post-user" is the leading table. So we delete every entry that isn't found there
$tables = ['item', 'post', 'post-content', 'post-thread', 'post-thread-user'];
foreach ($tables as $table) {
if (($table == 'item') && !DBStructure::existsTable('item')) {
continue;
}
Logger::notice('Start collecting orphaned entries', ['table' => $table]);
$uris = DBA::select($table, ['uri-id'], ["NOT `uri-id` IN (SELECT `uri-id` FROM `post-user`)"]);
$affected_count = 0;
Logger::notice('Deleting orphaned entries - start', ['table' => $table]);
while ($rows = DBA::toArray($uris, false, 100)) {
$ids = array_column($rows, 'uri-id');
DBA::delete($table, ['uri-id' => $ids]);
$affected_count += DBA::affectedRows();
}
DBA::close($uris);
2024-06-05 03:20:22 +00:00
DBA::commit();
Logger::notice('Orphaned entries deleted', ['table' => $table, 'rows' => $affected_count]);
}
Logger::notice('Delete orphaned entries - done');
}
/**
* Add missing entries in some post related tables
*
* @return void
*/
private static function addMissingEntries()
{
Logger::notice('Adding missing entries');
$rows = 0;
2021-05-29 21:51:33 +00:00
$userposts = DBA::select('post-user', [], ["`uri-id` not in (select `uri-id` from `post`)"]);
while ($fields = DBA::fetch($userposts)) {
2022-07-13 00:23:12 +02:00
$post_fields = DI::dbaDefinition()->truncateFieldsForTable('post', $fields);
DBA::insert('post', $post_fields, Database::INSERT_IGNORE);
$rows++;
}
DBA::close($userposts);
if ($rows > 0) {
Logger::notice('Added post entries', ['rows' => $rows]);
} else {
Logger::notice('No post entries added');
}
$rows = 0;
$userposts = DBA::select('post-user', [], ["`gravity` = ? AND `uri-id` not in (select `uri-id` from `post-thread`)", Item::GRAVITY_PARENT]);
while ($fields = DBA::fetch($userposts)) {
2022-07-13 00:23:12 +02:00
$post_fields = DI::dbaDefinition()->truncateFieldsForTable('post-thread', $fields);
$post_fields['commented'] = $post_fields['changed'] = $post_fields['created'];
DBA::insert('post-thread', $post_fields, Database::INSERT_IGNORE);
$rows++;
}
DBA::close($userposts);
if ($rows > 0) {
Logger::notice('Added post-thread entries', ['rows' => $rows]);
} else {
Logger::notice('No post-thread entries added');
}
$rows = 0;
$userposts = DBA::select('post-user', [], ["`gravity` = ? AND `id` not in (select `post-user-id` from `post-thread-user`)", Item::GRAVITY_PARENT]);
while ($fields = DBA::fetch($userposts)) {
2022-07-13 00:23:12 +02:00
$post_fields = DI::dbaDefinition()->truncateFieldsForTable('post-thread-user', $fields);
$post_fields['commented'] = $post_fields['changed'] = $post_fields['created'];
DBA::insert('post-thread-user', $post_fields, Database::INSERT_IGNORE);
$rows++;
}
DBA::close($userposts);
if ($rows > 0) {
Logger::notice('Added post-thread-user entries', ['rows' => $rows]);
} else {
Logger::notice('No post-thread-user entries added');
}
}
/**
* Delete unused item-uri entries
*/
private static function deleteUnusedItemUri()
{
// We have to avoid deleting newly created "item-uri" entries.
// So we fetch a post that had been stored yesterday and only delete older ones.
2024-06-05 03:20:22 +00:00
$item = Post::selectFirstThread(
['uri-id'],
["`uid` = ? AND `received` < ?", 0, DateTimeFormat::utc('now - 1 day')],
['order' => ['received' => true]]
);
if (empty($item['uri-id'])) {
Logger::warning('No item with uri-id found - we better quit here');
return;
}
Logger::notice('Start collecting orphaned URI-ID', ['last-id' => $item['uri-id']]);
2024-06-05 03:20:22 +00:00
$condition = [
"`id` < ?
2022-09-26 06:39:28 +00:00
AND NOT EXISTS(SELECT `uri-id` FROM `post-user` WHERE `uri-id` = `item-uri`.`id`)
AND NOT EXISTS(SELECT `parent-uri-id` FROM `post-user` WHERE `parent-uri-id` = `item-uri`.`id`)
AND NOT EXISTS(SELECT `thr-parent-id` FROM `post-user` WHERE `thr-parent-id` = `item-uri`.`id`)
AND NOT EXISTS(SELECT `external-id` FROM `post-user` WHERE `external-id` = `item-uri`.`id`)
2024-06-25 11:39:30 +00:00
AND NOT EXISTS(SELECT `replies-id` FROM `post-user` WHERE `replies-id` = `item-uri`.`id`)
2024-07-01 15:05:44 +00:00
AND NOT EXISTS(SELECT `context-id` FROM `post-thread` WHERE `context-id` = `item-uri`.`id`)
2022-09-26 06:39:28 +00:00
AND NOT EXISTS(SELECT `conversation-id` FROM `post-thread` WHERE `conversation-id` = `item-uri`.`id`)
AND NOT EXISTS(SELECT `uri-id` FROM `mail` WHERE `uri-id` = `item-uri`.`id`)
AND NOT EXISTS(SELECT `uri-id` FROM `event` WHERE `uri-id` = `item-uri`.`id`)
AND NOT EXISTS(SELECT `uri-id` FROM `user-contact` WHERE `uri-id` = `item-uri`.`id`)
AND NOT EXISTS(SELECT `uri-id` FROM `contact` WHERE `uri-id` = `item-uri`.`id`)
AND NOT EXISTS(SELECT `uri-id` FROM `apcontact` WHERE `uri-id` = `item-uri`.`id`)
AND NOT EXISTS(SELECT `uri-id` FROM `diaspora-contact` WHERE `uri-id` = `item-uri`.`id`)
2022-09-26 06:39:28 +00:00
AND NOT EXISTS(SELECT `uri-id` FROM `inbox-status` WHERE `uri-id` = `item-uri`.`id`)
AND NOT EXISTS(SELECT `uri-id` FROM `post-delivery` WHERE `uri-id` = `item-uri`.`id`)
AND NOT EXISTS(SELECT `uri-id` FROM `post-delivery` WHERE `inbox-id` = `item-uri`.`id`)
AND NOT EXISTS(SELECT `parent-uri-id` FROM `mail` WHERE `parent-uri-id` = `item-uri`.`id`)
2024-06-05 03:20:22 +00:00
AND NOT EXISTS(SELECT `thr-parent-id` FROM `mail` WHERE `thr-parent-id` = `item-uri`.`id`)", $item['uri-id']
];
$pass = 0;
do {
++$pass;
$uris = DBA::select('item-uri', ['id'], $condition, ['limit' => 1000]);
Logger::notice('Start deleting orphaned URI-ID', ['pass' => $pass, 'last-id' => $item['uri-id']]);
$affected_count = 0;
while ($rows = DBA::toArray($uris, false, 100)) {
$ids = array_column($rows, 'id');
DBA::delete('item-uri', ['id' => $ids]);
$affected_count += DBA::affectedRows();
Logger::info('Deleted', ['pass' => $pass, 'rows' => $affected_count]);
}
DBA::close($uris);
DBA::commit();
Logger::notice('Orphaned URI-ID entries removed', ['pass' => $pass, 'rows' => $affected_count]);
} while ($affected_count);
}
/**
* Delete old external post entries
*/
private static function deleteExpiredExternalPosts()
{
$expire_days = DI::config()->get('system', 'dbclean-expire-days');
$expire_days_unclaimed = DI::config()->get('system', 'dbclean-expire-unclaimed');
if (empty($expire_days_unclaimed)) {
$expire_days_unclaimed = $expire_days;
}
2020-10-17 08:16:17 +00:00
$limit = DI::config()->get('system', 'dbclean-expire-limit');
if (empty($limit)) {
return;
}
if (!empty($expire_days)) {
Logger::notice('Start collecting expired threads', ['expiry_days' => $expire_days]);
2024-06-05 03:20:22 +00:00
$condition = [
"`received` < ?
2024-06-05 03:20:22 +00:00
AND NOT `uri-id` IN (SELECT `uri-id` FROM `post-thread-user`
WHERE (`mention` OR `starred` OR `wall`) AND `uri-id` = `post-thread`.`uri-id`)
AND NOT `uri-id` IN (SELECT `uri-id` FROM `post-category`
WHERE `uri-id` = `post-thread`.`uri-id`)
AND NOT `uri-id` IN (SELECT `uri-id` FROM `post-collection`
WHERE `uri-id` = `post-thread`.`uri-id`)
AND NOT `uri-id` IN (SELECT `uri-id` FROM `post-media`
WHERE `uri-id` = `post-thread`.`uri-id`)
AND NOT `uri-id` IN (SELECT `parent-uri-id` FROM `post-user` INNER JOIN `contact` ON `contact`.`id` = `contact-id` AND `notify_new_posts`
WHERE `parent-uri-id` = `post-thread`.`uri-id`)
AND NOT `uri-id` IN (SELECT `parent-uri-id` FROM `post-user`
WHERE (`origin` OR `event-id` != 0 OR `post-type` = ?) AND `parent-uri-id` = `post-thread`.`uri-id`)
AND NOT `uri-id` IN (SELECT `uri-id` FROM `post-content`
WHERE `resource-id` != 0 AND `uri-id` = `post-thread`.`uri-id`)",
2024-06-05 03:20:22 +00:00
DateTimeFormat::utc('now - ' . (int)$expire_days . ' days'), Item::PT_PERSONAL_NOTE
];
$pass = 0;
do {
++$pass;
$uris = DBA::select('post-thread', ['uri-id'], $condition, ['limit' => 1000]);
2024-06-05 03:20:22 +00:00
Logger::notice('Start deleting expired threads', ['pass' => $pass]);
$affected_count = 0;
while ($rows = DBA::toArray($uris, false, 100)) {
$ids = array_column($rows, 'uri-id');
2024-06-05 03:20:22 +00:00
DBA::delete('item-uri', ['id' => $ids]);
$affected_count += DBA::affectedRows();
}
DBA::close($uris);
DBA::commit();
Logger::notice('Deleted expired threads', ['pass' => $pass, 'rows' => $affected_count]);
} while ($affected_count);
}
if (!empty($expire_days_unclaimed)) {
Logger::notice('Start collecting unclaimed public items', ['expiry_days' => $expire_days_unclaimed]);
2024-06-05 03:20:22 +00:00
$condition = [
"`gravity` = ? AND `uid` = ? AND `received` < ?
2024-06-05 03:20:22 +00:00
AND NOT `uri-id` IN (SELECT `parent-uri-id` FROM `post-user` AS `i` WHERE `i`.`uid` != ?
AND `i`.`parent-uri-id` = `post-user`.`uri-id`)
AND NOT `uri-id` IN (SELECT `parent-uri-id` FROM `post-user` AS `i` WHERE `i`.`uid` = ?
AND `i`.`parent-uri-id` = `post-user`.`uri-id` AND `i`.`received` > ?)",
2024-06-05 03:20:22 +00:00
Item::GRAVITY_PARENT, 0, DateTimeFormat::utc('now - ' . (int)$expire_days_unclaimed . ' days'), 0, 0, DateTimeFormat::utc('now - ' . (int)$expire_days_unclaimed . ' days')
];
$pass = 0;
do {
++$pass;
$uris = DBA::select('post-user', ['uri-id'], $condition, ['limit' => 1000]);
2024-06-05 03:20:22 +00:00
Logger::notice('Start deleting unclaimed public items', ['pass' => $pass]);
$affected_count = 0;
while ($rows = DBA::toArray($uris, false, 100)) {
$ids = array_column($rows, 'uri-id');
2024-06-05 03:20:22 +00:00
DBA::delete('item-uri', ['id' => $ids]);
$affected_count += DBA::affectedRows();
}
DBA::close($uris);
DBA::commit();
Logger::notice('Deleted unclaimed public items', ['pass' => $pass, 'rows' => $affected_count]);
} while ($affected_count);
}
}
2024-08-24 08:37:56 +00:00
/**
* Delete media attachments (excluding photos) that aren't linked to any post
*
* @return void
*/
private static function deleteUnusedAttachments()
{
$postmedia = DBA::select('attach', ['id'], ["`id` NOT IN (SELECT `attach-id` FROM `post-media`)"]);
while ($media = DBA::fetch($postmedia)) {
Attach::delete(['id' => $media['id']]);
}
}
}