Move getTags to BBCode

move getTags function to BBCode class and update calls.
This commit is contained in:
Adam Magness 2018-11-09 13:24:19 -05:00
parent e4354a0d7b
commit b5a97c1abe
5 changed files with 78 additions and 78 deletions

View file

@ -348,7 +348,7 @@ function item_post(App $a) {
$str_tags = '';
$inform = '';
$tags = Strings::getTags($body);
$tags = BBCode::getTags($body);
// Add a tag if the parent contact is from ActivityPub or OStatus (This will notify them)
if ($parent && in_array($thr_parent_contact['network'], [Protocol::OSTATUS, Protocol::ACTIVITYPUB])) {

View file

@ -525,7 +525,7 @@ function photos_post(App $a)
}
$taginfo = [];
$tags = Strings::getTags($rawtags);
$tags = BBCode::getTags($rawtags);
if (count($tags)) {
foreach ($tags as $tag) {

View file

@ -1911,4 +1911,78 @@ class BBCode extends BaseObject
return $text;
}
/**
* @brief Pull out all #hashtags and @person tags from $string.
*
* We also get @person@domain.com - which would make
* the regex quite complicated as tags can also
* end a sentence. So we'll run through our results
* and strip the period from any tags which end with one.
* Returns array of tags found, or empty array.
*
* @param string $string Post content
*
* @return array List of tag and person names
*/
public static function getTags($string)
{
$ret = [];
// Convert hashtag links to hashtags
$string = preg_replace('/#\[url\=([^\[\]]*)\](.*?)\[\/url\]/ism', '#$2', $string);
// ignore anything in a code block
$string = preg_replace('/\[code\](.*?)\[\/code\]/sm', '', $string);
// Force line feeds at bbtags
$string = str_replace(['[', ']'], ["\n[", "]\n"], $string);
// ignore anything in a bbtag
$string = preg_replace('/\[(.*?)\]/sm', '', $string);
// Match full names against @tags including the space between first and last
// We will look these up afterward to see if they are full names or not recognisable.
if (preg_match_all('/(@[^ \x0D\x0A,:?]+ [^ \x0D\x0A@,:?]+)([ \x0D\x0A@,:?]|$)/', $string, $matches)) {
foreach ($matches[1] as $match) {
if (strstr($match, ']')) {
// we might be inside a bbcode color tag - leave it alone
continue;
}
if (substr($match, -1, 1) === '.') {
$ret[] = substr($match, 0, -1);
} else {
$ret[] = $match;
}
}
}
// Otherwise pull out single word tags. These can be @nickname, @first_last
// and #hash tags.
if (preg_match_all('/([!#@][^\^ \x0D\x0A,;:?]+)([ \x0D\x0A,;:?]|$)/', $string, $matches)) {
foreach ($matches[1] as $match) {
if (strstr($match, ']')) {
// we might be inside a bbcode color tag - leave it alone
continue;
}
if (substr($match, -1, 1) === '.') {
$match = substr($match,0,-1);
}
// ignore strictly numeric tags like #1
if ((strpos($match, '#') === 0) && ctype_digit(substr($match, 1))) {
continue;
}
// try not to catch url fragments
if (strpos($string, $match) && preg_match('/[a-zA-z0-9\/]/', substr($string, strpos($string, $match) - 1, 1))) {
continue;
}
$ret[] = $match;
}
}
return $ret;
}
}

View file

@ -2403,7 +2403,7 @@ class Item extends BaseObject
public static function setHashtags(&$item)
{
$tags = Strings::getTags($item["body"]);
$tags = BBCode::getTags($item["body"]);
// No hashtags?
if (!count($tags)) {

View file

@ -279,80 +279,6 @@ class Strings
return base64_decode(strtr($s, '-_', '+/'));
}
/**
* @brief Pull out all #hashtags and @person tags from $string.
*
* We also get @person@domain.com - which would make
* the regex quite complicated as tags can also
* end a sentence. So we'll run through our results
* and strip the period from any tags which end with one.
* Returns array of tags found, or empty array.
*
* @param string $string Post content
*
* @return array List of tag and person names
*/
public static function getTags($string)
{
$ret = [];
// Convert hashtag links to hashtags
$string = preg_replace('/#\[url\=([^\[\]]*)\](.*?)\[\/url\]/ism', '#$2', $string);
// ignore anything in a code block
$string = preg_replace('/\[code\](.*?)\[\/code\]/sm', '', $string);
// Force line feeds at bbtags
$string = str_replace(['[', ']'], ["\n[", "]\n"], $string);
// ignore anything in a bbtag
$string = preg_replace('/\[(.*?)\]/sm', '', $string);
// Match full names against @tags including the space between first and last
// We will look these up afterward to see if they are full names or not recognisable.
if (preg_match_all('/(@[^ \x0D\x0A,:?]+ [^ \x0D\x0A@,:?]+)([ \x0D\x0A@,:?]|$)/', $string, $matches)) {
foreach ($matches[1] as $match) {
if (strstr($match, ']')) {
// we might be inside a bbcode color tag - leave it alone
continue;
}
if (substr($match, -1, 1) === '.') {
$ret[] = substr($match, 0, -1);
} else {
$ret[] = $match;
}
}
}
// Otherwise pull out single word tags. These can be @nickname, @first_last
// and #hash tags.
if (preg_match_all('/([!#@][^\^ \x0D\x0A,;:?]+)([ \x0D\x0A,;:?]|$)/', $string, $matches)) {
foreach ($matches[1] as $match) {
if (strstr($match, ']')) {
// we might be inside a bbcode color tag - leave it alone
continue;
}
if (substr($match, -1, 1) === '.') {
$match = substr($match,0,-1);
}
// ignore strictly numeric tags like #1
if ((strpos($match, '#') === 0) && ctype_digit(substr($match, 1))) {
continue;
}
// try not to catch url fragments
if (strpos($string, $match) && preg_match('/[a-zA-z0-9\/]/', substr($string, strpos($string, $match) - 1, 1))) {
continue;
}
$ret[] = $match;
}
}
return $ret;
}
/**
* @brief Normalize url
*