Unify searchindex table with engagement table

This commit is contained in:
Michael 2024-02-01 23:08:53 +00:00
parent 8ddc71188f
commit fc22a3e83f
11 changed files with 79 additions and 48 deletions

View file

@ -1,6 +1,6 @@
-- ------------------------------------------
-- Friendica 2024.03-dev (Yellow Archangel)
-- DB_UPDATE_VERSION 1549
-- DB_UPDATE_VERSION 1550
-- ------------------------------------------
@ -1346,7 +1346,7 @@ CREATE TABLE IF NOT EXISTS `post-engagement` (
`owner-id` int unsigned NOT NULL DEFAULT 0 COMMENT 'Item owner',
`contact-type` tinyint NOT NULL DEFAULT 0 COMMENT 'Person, organisation, news, community, relay',
`media-type` tinyint NOT NULL DEFAULT 0 COMMENT 'Type of media in a bit array (1 = image, 2 = video, 4 = audio',
`language` varchar(128) COMMENT 'Language information about this post',
`iso-639-1` char(2) COMMENT 'Language information about this post in the ISO 639-1 format',
`searchtext` mediumtext COMMENT 'Simplified text for the full text search',
`size` int unsigned COMMENT 'Body size',
`created` datetime COMMENT '',
@ -1467,14 +1467,19 @@ CREATE TABLE IF NOT EXISTS `post-question-option` (
--
CREATE TABLE IF NOT EXISTS `post-searchindex` (
`uri-id` int unsigned NOT NULL COMMENT 'Id of the item-uri table entry that contains the item uri',
`network` char(4) COMMENT '',
`private` tinyint unsigned COMMENT '0=public, 1=private, 2=unlisted',
`owner-id` int unsigned NOT NULL DEFAULT 0 COMMENT 'Item owner',
`media-type` tinyint NOT NULL DEFAULT 0 COMMENT 'Type of media in a bit array (1 = image, 2 = video, 4 = audio',
`iso-639-1` char(2) COMMENT 'Language information about this post in the ISO 639-1 format',
`searchtext` mediumtext COMMENT 'Simplified text for the full text search',
`size` int unsigned COMMENT 'Body size',
`created` datetime COMMENT '',
`restricted` boolean NOT NULL DEFAULT '0' COMMENT 'If true, this post is either unlisted or not from a federated network',
PRIMARY KEY(`uri-id`),
INDEX `owner-id` (`owner-id`),
INDEX `created` (`created`),
FULLTEXT INDEX `searchtext` (`searchtext`),
FOREIGN KEY (`uri-id`) REFERENCES `item-uri` (`id`) ON UPDATE RESTRICT ON DELETE CASCADE
FOREIGN KEY (`uri-id`) REFERENCES `item-uri` (`id`) ON UPDATE RESTRICT ON DELETE CASCADE,
FOREIGN KEY (`owner-id`) REFERENCES `contact` (`id`) ON UPDATE RESTRICT ON DELETE CASCADE
) DEFAULT COLLATE utf8mb4_general_ci COMMENT='Content for all posts';
--

View file

@ -12,7 +12,7 @@ Fields
| owner-id | Item owner | int unsigned | NO | | 0 | |
| contact-type | Person, organisation, news, community, relay | tinyint | NO | | 0 | |
| media-type | Type of media in a bit array (1 = image, 2 = video, 4 = audio | tinyint | NO | | 0 | |
| language | Language information about this post | varchar(128) | YES | | NULL | |
| iso-639-1 | Language information about this post in the ISO 639-1 format | char(2) | YES | | NULL | |
| searchtext | Simplified text for the full text search | mediumtext | YES | | NULL | |
| size | Body size | int unsigned | YES | | NULL | |
| created | | datetime | YES | | NULL | |

View file

@ -6,13 +6,16 @@ Content for all posts
Fields
------
| Field | Description | Type | Null | Key | Default | Extra |
| ---------- | --------------------------------------------------------- | ---------------- | ---- | --- | ------- | ----- |
| uri-id | Id of the item-uri table entry that contains the item uri | int unsigned | NO | PRI | NULL | |
| network | | char(4) | YES | | NULL | |
| private | 0=public, 1=private, 2=unlisted | tinyint unsigned | YES | | NULL | |
| searchtext | Simplified text for the full text search | mediumtext | YES | | NULL | |
| created | | datetime | YES | | NULL | |
| Field | Description | Type | Null | Key | Default | Extra |
| ---------- | --------------------------------------------------------------------- | ------------ | ---- | --- | ------- | ----- |
| uri-id | Id of the item-uri table entry that contains the item uri | int unsigned | NO | PRI | NULL | |
| owner-id | Item owner | int unsigned | NO | | 0 | |
| media-type | Type of media in a bit array (1 = image, 2 = video, 4 = audio | tinyint | NO | | 0 | |
| iso-639-1 | Language information about this post in the ISO 639-1 format | char(2) | YES | | NULL | |
| searchtext | Simplified text for the full text search | mediumtext | YES | | NULL | |
| size | Body size | int unsigned | YES | | NULL | |
| created | | datetime | YES | | NULL | |
| restricted | If true, this post is either unlisted or not from a federated network | boolean | NO | | 0 | |
Indexes
------------
@ -20,6 +23,7 @@ Indexes
| Name | Fields |
| ---------- | -------------------- |
| PRIMARY | uri-id |
| owner-id | owner-id |
| created | created |
| searchtext | FULLTEXT, searchtext |
@ -29,5 +33,6 @@ Foreign Keys
| Field | Target Table | Target Field |
|-------|--------------|--------------|
| uri-id | [item-uri](help/database/db_item-uri) | id |
| owner-id | [contact](help/database/db_contact) | id |
Return to [database documentation](help/database)

View file

@ -52,7 +52,7 @@ class PostUpdate
// Needed for the helper function to read from the legacy term table
const OBJECT_TYPE_POST = 1;
const VERSION = 1547;
const VERSION = 1550;
/**
* Calls the post update functions
@ -128,7 +128,7 @@ class PostUpdate
if (!self::update1544()) {
return false;
}
if (!self::update1547()) {
if (!self::update1550()) {
return false;
}
return true;
@ -1369,14 +1369,24 @@ class PostUpdate
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
* @throws \ImagickException
*/
private static function update1547()
private static function update1550()
{
// Was the script completed?
if (DI::keyValue()->get('post_update_version') >= 1547) {
if (DI::keyValue()->get('post_update_version') >= 1550) {
return true;
}
$id = (int)(DI::keyValue()->get('post_update_version_1547_id') ?? 0);
$engagements = DBA::select('post-engagement', ['uri-id'], ["`iso-639-1` IS NULL"], ['order' => ['uri-id' => true], 'limit' => 1000]);
while ($engagement = DBA::fetch($engagements)) {
$item = Post::selectFirst([], ['uri-id' => $engagement['uri-id']]);
if (empty($item)) {
continue;
}
Post\Engagement::storeFromItem($item);
}
DBA::close($engagements);
$id = (int)(DI::keyValue()->get('post_update_version_1550_id') ?? 0);
if ($id == 0) {
$post = Post::selectFirstPost(['uri-id'], [], ['order' => ['uri-id' => true]]);
$id = (int)($post['uri-id'] ?? 0);
@ -1393,7 +1403,7 @@ class PostUpdate
DBA::mergeConditions($condition, ["`created` > ?", $limit]);
}
$posts = Post::selectPosts(['uri-id', 'network', 'private', 'created'], $condition, ['order' => ['uri-id' => true], 'limit' => 1000]);
$posts = Post::selectPosts(['uri-id', 'created'], $condition, ['order' => ['uri-id' => true], 'limit' => 1000]);
if (DBA::errorNo() != 0) {
Logger::error('Database error', ['no' => DBA::errorNo(), 'message' => DBA::errorMessage()]);
@ -1402,17 +1412,17 @@ class PostUpdate
while ($post = Post::fetch($posts)) {
$id = $post['uri-id'];
Post\SearchIndex::insert($post['uri-id'], $post['network'], $post['private'], $post['created'], true);
Post\SearchIndex::insert($post['uri-id'], $post['created'], true);
++$rows;
}
DBA::close($posts);
DI::keyValue()->set('post_update_version_1547_id', $id);
DI::keyValue()->set('post_update_version_1550_id', $id);
Logger::info('Processed', ['rows' => $rows, 'last' => $id]);
if ($rows <= 100) {
DI::keyValue()->set('post_update_version', 1547);
DI::keyValue()->set('post_update_version', 1550);
Logger::info('Done');
return true;
}

View file

@ -1450,7 +1450,7 @@ class Item
$engagement_uri_id = Post\Engagement::storeFromItem($posted_item);
if (in_array($posted_item['gravity'], [self::GRAVITY_PARENT, self::GRAVITY_COMMENT])) {
Post\SearchIndex::insert($posted_item['uri-id'], $posted_item['network'], $posted_item['private'], $posted_item['created']);
Post\SearchIndex::insert($posted_item['uri-id'], $posted_item['created']);
} elseif ($posted_item['verb'] == Activity::ANNOUNCE) {
Post\SearchIndex::update($posted_item['thr-parent-id']);
}

View file

@ -110,9 +110,9 @@ class Content
{
$search = Post\Engagement::escapeKeywords($search);
if ($uid != 0) {
$condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and (private = ? OR `uri-id` in (SELECT `uri-id` FROM `post-user` where `uid` = ?))", $search, Item::PUBLIC, $uid];
$condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) AND (NOT `restricted` OR `uri-id` IN (SELECT `uri-id` FROM `post-user` WHERE `uid` = ?))", $search, $uid];
} else {
$condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and private = ?", $search, Item::PUBLIC];
$condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) AND NOT `restricted`", $search];
}
if (!empty($last_uriid)) {
@ -139,9 +139,9 @@ class Content
{
$search = Post\Engagement::escapeKeywords($search);
if ($uid != 0) {
$condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and (private = ? OR `uri-id` in (SELECT `uri-id` FROM `post-user` where `uid` = ?))", $search, Item::PUBLIC, $uid];
$condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) AND (NOT `restricted` OR `uri-id` IN (SELECT `uri-id` FROM `post-user` WHERE `uid` = ?))", $search, $uid];
} else {
$condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and private = ?", $search, Item::PUBLIC];
$condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) AND NOT `restricted", $search];
}
return DBA::count('post-searchindex', $condition);
}

View file

@ -22,6 +22,7 @@
namespace Friendica\Model\Post;
use Friendica\Content\Text\BBCode;
use Friendica\Core\L10n;
use Friendica\Core\Logger;
use Friendica\Core\Protocol;
use Friendica\Database\DBA;
@ -93,9 +94,9 @@ class Engagement
}
$searchtext = self::getSearchTextForItem($parent);
$language = !empty($parent['language']) ? (array_key_first(json_decode($parent['language'], true)) ?? L10n::UNDETERMINED_LANGUAGE) : L10n::UNDETERMINED_LANGUAGE;
if (!$store) {
$language = !empty($parent['language']) ? (array_key_first(json_decode($parent['language'], true)) ?? '') : '';
$store = DI::userDefinedChannel()->match($searchtext, $language);
$store = DI::userDefinedChannel()->match($searchtext, $language);
}
$engagement = [
@ -103,7 +104,7 @@ class Engagement
'owner-id' => $parent['owner-id'],
'contact-type' => $parent['contact-contact-type'],
'media-type' => $mediatype,
'language' => $parent['language'],
'iso-639-1' => $language,
'searchtext' => $searchtext,
'size' => self::getContentSize($parent),
'created' => $parent['created'],
@ -130,7 +131,7 @@ class Engagement
return ($ret && !$exists) ? $engagement['uri-id'] : 0;
}
private static function getContentSize(array $item): int
public static function getContentSize(array $item): int
{
$body = ' ' . $item['title'] . ' ' . $item['content-warning'] . ' ' . $item['body'];
$body = BBCode::removeAttachment($body);
@ -315,7 +316,7 @@ class Engagement
return $text;
}
private static function getMediaType(int $uri_id): int
public static function getMediaType(int $uri_id): int
{
$media = Post\Media::getByURIId($uri_id);
$type = 0;

View file

@ -21,10 +21,13 @@
namespace Friendica\Model\Post;
use Friendica\Core\L10n;
use Friendica\Core\Logger;
use Friendica\Core\Protocol;
use Friendica\Database\Database;
use Friendica\Database\DBA;
use Friendica\DI;
use Friendica\Model\Item;
use Friendica\Model\Post;
use Friendica\Util\DateTimeFormat;
@ -34,24 +37,27 @@ class SearchIndex
* Insert a post-searchindex entry
*
* @param int $uri_id
* @param string $network
* @param int $private
* @param string $created
* @param bool $refresh
*/
public static function insert(int $uri_id, string $network, int $private, string $created, bool $refresh = false)
public static function insert(int $uri_id, string $created, bool $refresh = false)
{
$limit = self::searchAgeDateLimit();
if (!empty($limit) && (strtotime($created) < strtotime($limit))) {
return;
}
$item = Post::selectFirstPost(['created', 'owner-id', 'private', 'language', 'network', 'title', 'content-warning', 'body'], ['uri-id' => $uri_id]);
$search = [
'uri-id' => $uri_id,
'network' => $network,
'private' => $private,
'created' => $created,
'owner-id' => $item['owner-id'],
'media-type' => Engagement::getMediaType($uri_id),
'iso-639-1' => !empty($item['language']) ? (array_key_first(json_decode($item['language'], true)) ?? L10n::UNDETERMINED_LANGUAGE) : L10n::UNDETERMINED_LANGUAGE,
'searchtext' => Post\Engagement::getSearchTextForUriId($uri_id, $refresh),
'size' => Engagement::getContentSize($item),
'created' => $item['created'],
'restricted' => !in_array($item['network'], Protocol::FEDERATED) || ($item['private'] != Item::PUBLIC),
];
return DBA::insert('post-searchindex', $search, Database::INSERT_UPDATE);
}

View file

@ -154,7 +154,7 @@ class Search extends BaseApi
$table = 'tag-search-view';
} else {
$q = Post\Engagement::escapeKeywords($q);
$condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and (private = ? OR `uri-id` in (SELECT `uri-id` FROM `post-user` where `uid` = ?))", $q, Item::PUBLIC, $uid];
$condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) AND (NOT `restricted` OR `uri-id` IN (SELECT `uri-id` FROM `post-user` WHERE `uid` = ?))", $q, $uid];
$table = 'post-searchindex';
}

View file

@ -324,7 +324,7 @@ class Timeline extends BaseModule
} elseif ($this->selectedTab == ChannelEntity::AUDIO) {
$condition = ["`media-type` & ?", 4];
} elseif ($this->selectedTab == ChannelEntity::LANGUAGE) {
$condition = ["JSON_EXTRACT(JSON_KEYS(language), '$[0]') = ?", User::getLanguageCode($uid)];
$condition = ["`iso-639-1` = ?", User::getLanguageCode($uid)];
} elseif (is_numeric($this->selectedTab)) {
$condition = $this->getUserChannelConditions($this->selectedTab, $uid);
}
@ -450,7 +450,7 @@ class Timeline extends BaseModule
$conditions = [];
$languages = $languages ?: User::getWantedLanguages($uid);
foreach ($languages as $language) {
$conditions[] = "JSON_EXTRACT(JSON_KEYS(language), '$[0]') = ?";
$conditions[] = "`iso-639-1` = ?";
$condition[] = $language;
}
if (!empty($conditions)) {

View file

@ -56,7 +56,7 @@ use Friendica\Database\DBA;
// This file is required several times during the test in DbaDefinition which justifies this condition
if (!defined('DB_UPDATE_VERSION')) {
define('DB_UPDATE_VERSION', 1549);
define('DB_UPDATE_VERSION', 1550);
}
return [
@ -1245,7 +1245,7 @@ return [
"post-activity" => [
"comment" => "Original remote activity",
"fields" => [
"uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"],
"uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"],
"activity" => ["type" => "mediumtext", "comment" => "Original activity"],
"received" => ["type" => "datetime", "comment" => ""],
],
@ -1256,7 +1256,7 @@ return [
"post-category" => [
"comment" => "post relation to categories",
"fields" => [
"uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"],
"uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"],
"uid" => ["type" => "mediumint unsigned", "not null" => "1", "default" => "0", "primary" => "1", "foreign" => ["user" => "uid"], "comment" => "User id"],
"type" => ["type" => "tinyint unsigned", "not null" => "1", "default" => "0", "primary" => "1", "comment" => ""],
"tid" => ["type" => "int unsigned", "not null" => "1", "default" => "0", "primary" => "1", "foreign" => ["tag" => "id", "on delete" => "restrict"], "comment" => ""],
@ -1363,11 +1363,11 @@ return [
"post-engagement" => [
"comment" => "Engagement data per post",
"fields" => [
"uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"],
"uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"],
"owner-id" => ["type" => "int unsigned", "not null" => "1", "default" => "0", "foreign" => ["contact" => "id"], "comment" => "Item owner"],
"contact-type" => ["type" => "tinyint", "not null" => "1", "default" => "0", "comment" => "Person, organisation, news, community, relay"],
"media-type" => ["type" => "tinyint", "not null" => "1", "default" => "0", "comment" => "Type of media in a bit array (1 = image, 2 = video, 4 = audio"],
"language" => ["type" => "varchar(128)", "comment" => "Language information about this post"],
"iso-639-1" => ["type" => "char(2)", "comment" => "Language information about this post in the ISO 639-1 format"],
"searchtext" => ["type" => "mediumtext", "comment" => "Simplified text for the full text search"],
"size" => ["type" => "int unsigned", "comment" => "Body size"],
"created" => ["type" => "datetime", "comment" => ""],
@ -1486,13 +1486,17 @@ return [
"comment" => "Content for all posts",
"fields" => [
"uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"],
"network" => ["type" => "char(4)", "comment" => ""],
"private" => ["type" => "tinyint unsigned", "comment" => "0=public, 1=private, 2=unlisted"],
"owner-id" => ["type" => "int unsigned", "not null" => "1", "default" => "0", "foreign" => ["contact" => "id"], "comment" => "Item owner"],
"media-type" => ["type" => "tinyint", "not null" => "1", "default" => "0", "comment" => "Type of media in a bit array (1 = image, 2 = video, 4 = audio"],
"iso-639-1" => ["type" => "char(2)", "comment" => "Language information about this post in the ISO 639-1 format"],
"searchtext" => ["type" => "mediumtext", "comment" => "Simplified text for the full text search"],
"size" => ["type" => "int unsigned", "comment" => "Body size"],
"created" => ["type" => "datetime", "comment" => ""],
"restricted" => ["type" => "boolean", "not null" => "1", "default" => "0", "comment" => "If true, this post is either unlisted or not from a federated network"],
],
"indexes" => [
"PRIMARY" => ["uri-id"],
"owner-id" => ["owner-id"],
"created" => ["created"],
"searchtext" => ["FULLTEXT", "searchtext"],
]