mirror of
https://github.com/friendica/friendica
synced 2024-11-17 22:23:41 +00:00
New table "post-searchindex"
This commit is contained in:
parent
75b37fe376
commit
ee9a68e40c
8 changed files with 97 additions and 51 deletions
|
@ -156,7 +156,7 @@ class UserDefinedChannel extends \Friendica\BaseRepository
|
|||
return true;
|
||||
}
|
||||
|
||||
return $this->db->select('check-full-text-search', [], ["`pid` = ? AND MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", getmypid(), $this->escapeKeywords($searchtext)]) !== false;
|
||||
return $this->db->select('check-full-text-search', [], ["`pid` = ? AND MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", getmypid(), Engagement::escapeKeywords($searchtext)]) !== false;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -310,15 +310,7 @@ class UserDefinedChannel extends \Friendica\BaseRepository
|
|||
|
||||
private function inFulltext(string $fullTextSearch): bool
|
||||
{
|
||||
return $this->db->exists('check-full-text-search', ["`pid` = ? AND MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", getmypid(), $this->escapeKeywords($fullTextSearch)]);
|
||||
}
|
||||
|
||||
private function escapeKeywords(string $fullTextSearch): string
|
||||
{
|
||||
foreach (Engagement::KEYWORDS as $keyword) {
|
||||
$fullTextSearch = preg_replace('~(' . $keyword . ':.[\w@\.-]+)~', '"$1"', $fullTextSearch);
|
||||
}
|
||||
return $fullTextSearch;
|
||||
return $this->db->exists('check-full-text-search', ["`pid` = ? AND MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", getmypid(), Engagement::escapeKeywords($fullTextSearch)]);
|
||||
}
|
||||
|
||||
private function getUserCondition()
|
||||
|
|
|
@ -255,12 +255,15 @@ class BBCode
|
|||
// Removes attachments
|
||||
$text = self::removeAttachment($text);
|
||||
|
||||
// Add images because of possible alt texts
|
||||
// Add text from attached media
|
||||
if (!empty($uri_id)) {
|
||||
$text = Post\Media::addAttachmentsToBody($uri_id, $text, [Post\Media::IMAGE]);
|
||||
|
||||
foreach (Post\Media::getByURIId($uri_id, [Post\Media::HTML]) as $media) {
|
||||
$text .= ' ' . $media['name'] . ' ' . $media['description'];
|
||||
foreach (Post\Media::getByURIId($uri_id) as $media) {
|
||||
if (!empty($media['description']) && (stripos($text, $media['description']) === false)) {
|
||||
$text .= ' ' . $media['description'];
|
||||
}
|
||||
if (in_array($media['type'], [Post\Media::HTML, Post\Media::ACTIVITY]) && !empty($media['name']) && (stripos($text, $media['name']) === false)) {
|
||||
$text .= ' ' . $media['name'];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -34,6 +34,7 @@ use Friendica\Core\Protocol;
|
|||
use Friendica\Core\Renderer;
|
||||
use Friendica\Core\System;
|
||||
use Friendica\Core\Worker;
|
||||
use Friendica\Database\Database;
|
||||
use Friendica\Database\DBA;
|
||||
use Friendica\DI;
|
||||
use Friendica\Model\Post\Category;
|
||||
|
@ -243,6 +244,11 @@ class Item
|
|||
$content_fields['raw-body'] = BBCode::removeAttachment($content_fields['raw-body']);
|
||||
|
||||
Post\Content::update($item['uri-id'], $content_fields);
|
||||
|
||||
$searchtext = Post\Engagement::getSearchTextForUriId($item['uri-id'], true);
|
||||
DBA::update('post-engagement', ['searchtext' => $searchtext], ['uri-id' => $item['uri-id']]);
|
||||
DBA::update('post-searchindex', ['searchtext' => $searchtext], ['uri-id' => $item['uri-id']]);
|
||||
|
||||
}
|
||||
|
||||
if (!empty($fields['file'])) {
|
||||
|
@ -1443,6 +1449,16 @@ class Item
|
|||
}
|
||||
|
||||
$engagement_uri_id = Post\Engagement::storeFromItem($posted_item);
|
||||
|
||||
if (in_array($item['gravity'], [self::GRAVITY_PARENT, self::GRAVITY_COMMENT])) {
|
||||
$search = [
|
||||
'uri-id' => $posted_item['uri-id'],
|
||||
'network' => $posted_item['network'],
|
||||
'private' => $posted_item['private'],
|
||||
'searchtext' => Post\Engagement::getSearchTextForUriId($posted_item['uri-id']),
|
||||
];
|
||||
DBA::insert('post-searchindex', $search, Database::INSERT_IGNORE);
|
||||
}
|
||||
|
||||
if (($posted_item['gravity'] == self::GRAVITY_ACTIVITY) && ($posted_item['verb'] == Activity::ANNOUNCE) && ($posted_item['parent-uri-id'] == $posted_item['thr-parent-id'])) {
|
||||
self::reshareChannelPost($posted_item['thr-parent-id'], $posted_item['author-id']);
|
||||
|
|
|
@ -22,11 +22,10 @@
|
|||
namespace Friendica\Model\Post;
|
||||
|
||||
use \BadMethodCallException;
|
||||
use Friendica\Core\Protocol;
|
||||
use Friendica\Database\Database;
|
||||
use Friendica\Database\DBA;
|
||||
use Friendica\Database\DBStructure;
|
||||
use Friendica\DI;
|
||||
use Friendica\Model\Item;
|
||||
use Friendica\Model\Post;
|
||||
|
||||
class Content
|
||||
|
@ -109,9 +108,12 @@ class Content
|
|||
*/
|
||||
public static function getURIIdListBySearch(string $search, int $uid = 0, int $start = 0, int $limit = 100, int $last_uriid = 0)
|
||||
{
|
||||
$condition = ["`uri-id` IN (SELECT `uri-id` FROM `post-content` WHERE MATCH (`title`, `content-warning`, `body`) AGAINST (? IN BOOLEAN MODE))
|
||||
AND (`uid` = ? OR (`uid` = ? AND NOT `global`)) AND (`network` IN (?, ?, ?, ?) OR (`uid` = ? AND `uid` != ?))",
|
||||
str_replace('@', ' ', $search), 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0];
|
||||
$search = Post\Engagement::escapeKeywords($search);
|
||||
if ($uid != 0) {
|
||||
$condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and (private = ? OR `uri-id` in (SELECT `uri-id` FROM `post-user` where `uid` = ?))", $search, Item::PUBLIC, $uid];
|
||||
} else {
|
||||
$condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and private = ?", $search, Item::PUBLIC];
|
||||
}
|
||||
|
||||
if (!empty($last_uriid)) {
|
||||
$condition = DBA::mergeConditions($condition, ["`uri-id` < ?", $last_uriid]);
|
||||
|
@ -122,7 +124,7 @@ class Content
|
|||
'limit' => [$start, $limit]
|
||||
];
|
||||
|
||||
$tags = Post::select(['uri-id'], $condition, $params);
|
||||
$tags = DBA::select('post-searchindex', ['uri-id'], $condition, $params);
|
||||
|
||||
$uriids = [];
|
||||
while ($tag = DBA::fetch($tags)) {
|
||||
|
@ -135,9 +137,12 @@ class Content
|
|||
|
||||
public static function countBySearch(string $search, int $uid = 0)
|
||||
{
|
||||
$condition = ["`uri-id` IN (SELECT `uri-id` FROM `post-content` WHERE MATCH (`title`, `content-warning`, `body`) AGAINST (? IN BOOLEAN MODE))
|
||||
AND (`uid` = ? OR (`uid` = ? AND NOT `global`)) AND (`network` IN (?, ?, ?, ?) OR (`uid` = ? AND `uid` != ?))",
|
||||
str_replace('@', ' ', $search), 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0];
|
||||
return Post::count($condition);
|
||||
$search = Post\Engagement::escapeKeywords($search);
|
||||
if ($uid != 0) {
|
||||
$condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and (private = ? OR `uri-id` in (SELECT `uri-id` FROM `post-user` where `uid` = ?))", $search, Item::PUBLIC, $uid];
|
||||
} else {
|
||||
$condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and private = ?", $search, Item::PUBLIC];
|
||||
}
|
||||
return DBA::count('post-searchindex', $condition);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -146,7 +146,7 @@ class Engagement
|
|||
'owner-contact-type' => $author['contact-type'],
|
||||
'owner-nick' => $author['nick'],
|
||||
'owner-addr' => $author['addr'],
|
||||
'author-gsid' => $author['gsid'],
|
||||
'owner-gsid' => $author['gsid'],
|
||||
];
|
||||
|
||||
foreach ($receivers as $receiver) {
|
||||
|
@ -158,6 +158,21 @@ class Engagement
|
|||
return self::getSearchText($item, $receivers, $tags);
|
||||
}
|
||||
|
||||
public static function getSearchTextForUriId(int $uri_id, bool $refresh = false): string
|
||||
{
|
||||
if (!$refresh) {
|
||||
$engagement = DBA::selectFirst('post-engagement', ['searchtext'], ['uri-id' => $uri_id]);
|
||||
if (!empty($engagement['searchtext'])) {
|
||||
return $engagement['searchtext'];
|
||||
}
|
||||
}
|
||||
|
||||
$post = Post::selectFirstPost(['uri-id', 'network', 'title', 'content-warning', 'body', 'private',
|
||||
'author-id', 'author-contact-type', 'author-nick', 'author-addr', 'author-gsid',
|
||||
'owner-id', 'owner-contact-type', 'owner-nick', 'owner-addr', 'owner-gsid'], ['uri-id' => $uri_id]);
|
||||
return self::getSearchTextForItem($post);
|
||||
}
|
||||
|
||||
private static function getSearchTextForItem(array $item): string
|
||||
{
|
||||
$receivers = array_column(Tag::getByURIId($item['uri-id'], [Tag::MENTION, Tag::IMPLICIT_MENTION, Tag::EXCLUSIVE_MENTION, Tag::AUDIENCE]), 'url');
|
||||
|
@ -167,24 +182,24 @@ class Engagement
|
|||
|
||||
private static function getSearchText(array $item, array $receivers, array $tags): string
|
||||
{
|
||||
$body = '[nosmile]network:' . $item['network'];
|
||||
$body = '[nosmile]network_' . $item['network'];
|
||||
|
||||
if (!empty($item['author-gsid'])) {
|
||||
$gserver = DBA::selectFirst('gserver', ['platform', 'nurl'], ['id' => $item['author-gsid']]);
|
||||
$platform = preg_replace( '/[\W]/', '', $gserver['platform'] ?? '');
|
||||
if (!empty($platform)) {
|
||||
$body .= ' platform:' . $platform;
|
||||
$body .= ' platform_' . $platform;
|
||||
}
|
||||
$body .= ' server:' . parse_url($gserver['nurl'], PHP_URL_HOST);
|
||||
$body .= ' server_' . parse_url($gserver['nurl'], PHP_URL_HOST);
|
||||
}
|
||||
|
||||
if (($item['owner-contact-type'] == Contact::TYPE_COMMUNITY) && !empty($item['owner-gsid']) && ($item['owner-gsid'] != ($item['author-gsid'] ?? 0))) {
|
||||
$gserver = DBA::selectFirst('gserver', ['platform', 'nurl'], ['id' => $item['owner-gsid']]);
|
||||
$platform = preg_replace( '/[\W]/', '', $gserver['platform'] ?? '');
|
||||
if (!empty($platform) && !strpos($body, 'platform:' . $platform)) {
|
||||
$body .= ' platform:' . $platform;
|
||||
if (!empty($platform) && !strpos($body, 'platform_' . $platform)) {
|
||||
$body .= ' platform_' . $platform;
|
||||
}
|
||||
$body .= ' server:' . parse_url($gserver['nurl'], PHP_URL_HOST);
|
||||
$body .= ' server_' . parse_url($gserver['nurl'], PHP_URL_HOST);
|
||||
}
|
||||
|
||||
switch ($item['private']) {
|
||||
|
@ -212,16 +227,16 @@ class Engagement
|
|||
}
|
||||
|
||||
if ($item['author-contact-type'] == Contact::TYPE_COMMUNITY) {
|
||||
$body .= ' group:' . $item['author-nick'] . ' group:' . $item['author-addr'];
|
||||
$body .= ' group_' . $item['author-nick'] . ' group_' . $item['author-addr'];
|
||||
} elseif (in_array($item['author-contact-type'], [Contact::TYPE_PERSON, Contact::TYPE_NEWS, Contact::TYPE_ORGANISATION])) {
|
||||
$body .= ' from:' . $item['author-nick'] . ' from:' . $item['author-addr'];
|
||||
$body .= ' from_' . $item['author-nick'] . ' from_' . $item['author-addr'];
|
||||
}
|
||||
|
||||
if ($item['author-id'] != $item['owner-id']) {
|
||||
if ($item['owner-contact-type'] == Contact::TYPE_COMMUNITY) {
|
||||
$body .= ' group:' . $item['owner-nick'] . ' group:' . $item['owner-addr'];
|
||||
$body .= ' group_' . $item['owner-nick'] . ' group_' . $item['owner-addr'];
|
||||
} elseif (in_array($item['owner-contact-type'], [Contact::TYPE_PERSON, Contact::TYPE_NEWS, Contact::TYPE_ORGANISATION])) {
|
||||
$body .= ' from:' . $item['owner-nick'] . ' from:' . $item['owner-addr'];
|
||||
$body .= ' from_' . $item['owner-nick'] . ' from_' . $item['owner-addr'];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -231,15 +246,15 @@ class Engagement
|
|||
continue;
|
||||
}
|
||||
|
||||
if (($contact['contact-type'] == Contact::TYPE_COMMUNITY) && !strpos($body, 'group:' . $contact['addr'])) {
|
||||
$body .= ' group:' . $contact['nick'] . ' group:' . $contact['addr'];
|
||||
if (($contact['contact-type'] == Contact::TYPE_COMMUNITY) && !strpos($body, 'group_' . $contact['addr'])) {
|
||||
$body .= ' group_' . $contact['nick'] . ' group_' . $contact['addr'];
|
||||
} elseif (in_array($contact['contact-type'], [Contact::TYPE_PERSON, Contact::TYPE_NEWS, Contact::TYPE_ORGANISATION])) {
|
||||
$body .= ' to:' . $contact['nick'] . ' to:' . $contact['addr'];
|
||||
$body .= ' to_' . $contact['nick'] . ' to_' . $contact['addr'];
|
||||
}
|
||||
}
|
||||
|
||||
foreach ($tags as $tag) {
|
||||
$body .= ' tag:' . $tag;
|
||||
$body .= ' tag_' . $tag;
|
||||
}
|
||||
|
||||
$body .= ' ' . $item['title'] . ' ' . $item['content-warning'] . ' ' . $item['body'];
|
||||
|
@ -293,4 +308,12 @@ class Engagement
|
|||
|
||||
return DateTimeFormat::utc('now - ' . DI::config()->get('channel', 'engagement_hours') . ' hour');
|
||||
}
|
||||
|
||||
public static function escapeKeywords(string $fullTextSearch): string
|
||||
{
|
||||
foreach (Engagement::KEYWORDS as $keyword) {
|
||||
$fullTextSearch = preg_replace('~(' . $keyword . '):(.[\w\*@\.-]+)~', '$1_$2', $fullTextSearch);
|
||||
}
|
||||
return $fullTextSearch;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,7 +23,6 @@ namespace Friendica\Module\Api\Mastodon;
|
|||
|
||||
use Friendica\Core\Logger;
|
||||
use Friendica\Core\Protocol;
|
||||
use Friendica\Core\System;
|
||||
use Friendica\Database\DBA;
|
||||
use Friendica\DI;
|
||||
use Friendica\Model\Contact;
|
||||
|
@ -154,10 +153,9 @@ class Search extends BaseApi
|
|||
substr($q, 1), 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0];
|
||||
$table = 'tag-search-view';
|
||||
} else {
|
||||
$condition = ["`uri-id` IN (SELECT `uri-id` FROM `post-content` WHERE MATCH (`title`, `content-warning`, `body`) AGAINST (? IN BOOLEAN MODE))
|
||||
AND (`uid` = ? OR (`uid` = ? AND NOT `global`)) AND (`network` IN (?, ?, ?, ?) OR (`uid` = ? AND `uid` != ?))",
|
||||
str_replace('@', ' ', $q), 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0];
|
||||
$table = 'post-user-view';
|
||||
$q = Post\Engagement::escapeKeywords($q);
|
||||
$condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and (private = ? OR `uri-id` in (SELECT `uri-id` FROM `post-user` where `uid` = ?))", $q, Item::PUBLIC, $uid];
|
||||
$table = 'post-searchindex';
|
||||
}
|
||||
|
||||
if (!empty($max_id)) {
|
||||
|
|
|
@ -398,11 +398,7 @@ class Timeline extends BaseModule
|
|||
}
|
||||
|
||||
if (!empty($channel->fullTextSearch)) {
|
||||
$search = $channel->fullTextSearch;
|
||||
foreach (Engagement::KEYWORDS as $keyword) {
|
||||
$search = preg_replace('~(' . $keyword . ':.[\w@\.-]+)~', '"$1"', $search);
|
||||
}
|
||||
$condition = DBA::mergeConditions($condition, ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", $search]);
|
||||
$condition = DBA::mergeConditions($condition, ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", Engagement::escapeKeywords($channel->fullTextSearch)]);
|
||||
}
|
||||
|
||||
if (!empty($channel->includeTags)) {
|
||||
|
|
|
@ -56,7 +56,7 @@ use Friendica\Database\DBA;
|
|||
|
||||
// This file is required several times during the test in DbaDefinition which justifies this condition
|
||||
if (!defined('DB_UPDATE_VERSION')) {
|
||||
define('DB_UPDATE_VERSION', 1546);
|
||||
define('DB_UPDATE_VERSION', 1547);
|
||||
}
|
||||
|
||||
return [
|
||||
|
@ -1480,6 +1480,19 @@ return [
|
|||
"PRIMARY" => ["uri-id", "id"],
|
||||
]
|
||||
],
|
||||
"post-searchindex" => [
|
||||
"comment" => "Content for all posts",
|
||||
"fields" => [
|
||||
"uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"],
|
||||
"network" => ["type" => "char(4)", "comment" => ""],
|
||||
"private" => ["type" => "tinyint unsigned", "comment" => "0=public, 1=private, 2=unlisted"],
|
||||
"searchtext" => ["type" => "mediumtext", "comment" => "Simplified text for the full text search"],
|
||||
],
|
||||
"indexes" => [
|
||||
"PRIMARY" => ["uri-id"],
|
||||
"searchtext" => ["FULLTEXT", "searchtext"],
|
||||
]
|
||||
],
|
||||
"post-tag" => [
|
||||
"comment" => "post relation to tags",
|
||||
"fields" => [
|
||||
|
|
Loading…
Reference in a new issue