README updated, code restructured

This commit is contained in:
Michael 2019-11-25 22:23:19 +00:00
parent 0465de62dc
commit 9bc1f9cee6
2 changed files with 60 additions and 128 deletions

View file

@ -1,97 +1,28 @@
NSFW Discourse connector
===================
"Not safe for work" The Discourse connectors detects incoming mails from Discourse and
improves them by fetching the content and user data via API.
Scans the message content for the string 'nsfw' Prerequisites
(case insensitive) and if found replaces the content -------------
with a "click to open/close" link, default is closed. The user has to configure the mail interface so that the user's mails
can be fetched via Friendica. Then the user has to activate the
mailing list mode in Discourse.
If you click on the 'Not safe for work' addon under The mailing list mode in Discourse knows two different options:
/settings/addon a text field appears, where you can 1. Get all posts - including your own. This will create duplicates
extend the list of search terms. The terms must be if you post via Friendica.
seperated by commas. 2. Don't get your own posts. Then you will missing all your posts
that you made directly on Discourse. Since you cannot create
a new post via this connector (only comments are possible)
this is not a goog choice either.
It is also possible to enter profile URLs as values. Known problems
This is quite useful for the case, that you perhaps --------------
don't want to see postings by person_A, but person_B - You can't create starting posts
is one of your contacts and person_B used to reshare - Either you don't get your own posts you made directly on Discourse
postings by person_A. or you do get duplicates for every post you made via Friendica.
- Non public categories are currently only working via some workaround
You can also make use of regular expressions. without the API, which most likely will cause some content problems.
They also have to be seperated by commas and the - links to Discourse profiles in the posts are invalid.
regex itself has to be enclosed with slashes:
... nsfw, /<REGEX>/, politics,...
---------------
A few examples:
---------------
1)
Let's say you don't want to see postings which contain
the term 'fake news'
The term could appear in several ways:
fakenews, fake news, fake_news, fake-news, f@ke news,
f4ke news, f4k3 n3ws, and so on and so on and so on.
You could write every possible version of it as single
item into your NSFW-filter list, but this can also be
done with a single regex, which matches all of them:
/f[@4a]k[3e][-_ ]n[3e]w[sz]/
2)
Another use case could be, that you are simply not
interested in postings about christmas.
/christmas(?:[-_ ]?(?:tree|time|eve|pudding))?/
ATTENTION:
It is absolutely important, that you use grouping
parentheses instead of capturing parentheses!!
Grouping parentheses are:
(?: )
If you use capturing parentheses, which are
( )
it will produce errors and the regex won't work and
at least your targets will not get collapsed.
3)
Another possibility is the usage of a so called
'lookbehind' construct. I'll give an example followed
by a descripton:
/(?<!the )\badvent\b/
The \b is a word boundary, what matches the beginning
and the end of a word. The simple pattern of 'advent'
would match advent iteself, but also adventure.
This can be prevented by
/\badvent\b/
The first part of the regex above
(?<!the )
is a negative lookbehind. It makes \badvent\b only
match, if there is no 'the ' before \badvent\b or in
words:
It looks for 'advent', but doesn't match 'the advent'.
For more informations take a look at the PCRE regex
dialect.

View file

@ -26,17 +26,15 @@ Use Friendica\Util\DateTimeFormat;
function discourse_install() function discourse_install()
{ {
Hook::register('email_getmessage', __FILE__, 'discourse_email_getmessage'); Hook::register('email_getmessage', __FILE__, 'discourse_email_getmessage');
Hook::register('email_getmessage_end', __FILE__, 'discourse_email_getmessage_end'); Hook::register('addon_settings', __FILE__, 'discourse_settings');
Hook::register('addon_settings', __FILE__, 'discourse_addon_settings'); Hook::register('addon_settings_post', __FILE__, 'discourse_settings_post');
Hook::register('addon_settings_post', __FILE__, 'discourse_addon_settings_post');
} }
function discourse_uninstall() function discourse_uninstall()
{ {
Hook::unregister('email_getmessage', __FILE__, 'discourse_email_getmessage'); Hook::unregister('email_getmessage', __FILE__, 'discourse_email_getmessage');
Hook::unregister('email_getmessage_end', __FILE__, 'discourse_email_getmessage_end'); Hook::unregister('connector_settings', __FILE__, 'discourse_settings');
Hook::unregister('addon_settings', __FILE__, 'discourse_addon_settings'); Hook::unregister('connector_settings_post', __FILE__, 'discourse_settings_post');
Hook::unregister('addon_settings_post', __FILE__, 'discourse_addon_settings_post');
} }
function discourse_addon_settings(App $a, &$s) function discourse_addon_settings(App $a, &$s)
@ -51,28 +49,35 @@ function discourse_email_getmessage(App $a, &$message)
{ {
// Logger::info('Got raw message', $message); // Logger::info('Got raw message', $message);
/* if (preg_match('=topic/(.*)/(.*)@(.*)=', $message['item']['uri'], $matches)) { // We do assume that all Discourse servers are running with SSL
Logger::info('Got post data', ['topic' => $matches[1], 'post' => $matches[2], 'host' => $matches[3]]); if (preg_match('=topic/(.*\d)/(.*\d)@(.*)=', $message['item']['uri'], $matches) &&
if (discourse_fetch_post_from_api($message, $matches[2], $matches[3])) { discourse_fetch_post_from_api($message, $matches[2], $matches[3])) {
Logger::info('Fetched comment via API', ['host' => $matches[3], 'topic' => $matches[1], 'post' => $matches[2]]);
return; return;
} }
if (preg_match('=topic/(.*\d)@(.*)=', $message['item']['uri'], $matches) &&
discourse_fetch_topic_from_api($message, 'https://' . $matches[2], $matches[1], 1)) {
discourse_fetch_post_from_api($message, $matches[2], $matches[3]);
Logger::info('Fetched starting post via API', ['host' => $matches[2], 'topic' => $matches[1]]);
return;
} }
*/
// Search in the text part for the link to the discourse entry and the text body // Search in the text part for the link to the discourse entry and the text body
// The text body is used as alternative, if the fetched HTML isn't working
if (!empty($message['text'])) { if (!empty($message['text'])) {
$message = discourse_get_text($message); $message = discourse_get_text($message);
} }
if (!empty($message['item']['plink'])) { if (empty($message['item']['plink']) || !preg_match('=(http.*)/t/.*/(.*\d)/(.*\d)=', $message['item']['plink'], $matches)) {
if (preg_match('=(http.*)/t/.*/(.*\d)/(.*\d)=', $message['item']['plink'], $matches)) { Logger::info('This is no Discourse post');
}
if (discourse_fetch_topic_from_api($message, $matches[1], $matches[2], $matches[3])) { if (discourse_fetch_topic_from_api($message, $matches[1], $matches[2], $matches[3])) {
Logger::info('Fetched post from via API', ['host' => $matches[1], 'topic' => $matches[2], 'id' => $matches[3]]);
return; return;
} }
}
} Logger::info('Fallback mode');
Logger::info('Stop');
die('Test');
// Search in the HTML part for the discourse entry and the author profile // Search in the HTML part for the discourse entry and the author profile
if (!empty($message['html'])) { if (!empty($message['html'])) {
$message = discourse_get_html($message); $message = discourse_get_html($message);
@ -98,8 +103,7 @@ function discourse_fetch_post($host, $topic, $pid)
$posts = $data['post_stream']['posts']; $posts = $data['post_stream']['posts'];
foreach($posts as $post) { foreach($posts as $post) {
if ($post['post_number'] != $pid) { if ($post['post_number'] != $pid) {
// Test /// @todo Possibly fetch missing posts here
discourse_get_user($post, $host);
continue; continue;
} }
Logger::info('Got post data from topic', $post); Logger::info('Got post data from topic', $post);
@ -146,9 +150,11 @@ function discourse_get_user($post, $hostaddr)
{ {
$host = parse_url($hostaddr, PHP_URL_HOST); $host = parse_url($hostaddr, PHP_URL_HOST);
// Currently unused contact fields:
// - display_username
// - user_id
$contact = []; $contact = [];
// display_username
// user_id
$contact['uid'] = 0; $contact['uid'] = 0;
$contact['network'] = Protocol::DISCOURSE; $contact['network'] = Protocol::DISCOURSE;
$contact['name'] = $contact['nick'] = $post['username']; $contact['name'] = $contact['nick'] = $post['username'];
@ -198,7 +204,7 @@ function discourse_process_post($message, $post, $hostaddr)
if ($post['post_number'] == 1) { if ($post['post_number'] == 1) {
$message['item']['parent-uri'] = $message['item']['uri'] = 'topic/' . $post['topic_id'] . '@' . $host; $message['item']['parent-uri'] = $message['item']['uri'] = 'topic/' . $post['topic_id'] . '@' . $host;
// To-Do: Thread information /// @ToDo Fetch thread information
} else { } else {
$message['item']['uri'] = 'topic/' . $post['topic_id'] . '/' . $post['id'] . '@' . $host; $message['item']['uri'] = 'topic/' . $post['topic_id'] . '/' . $post['id'] . '@' . $host;
unset($message['item']['title']); unset($message['item']['title']);
@ -232,7 +238,7 @@ function discourse_get_html($message)
Logger::info('Found html body', ['html' => $message['html']]); Logger::info('Found html body', ['html' => $message['html']]);
$profile = discourse_get_profile($xpath); $profile = discourse_get_profile($xpath);
if (!empty($profile)) { if (!empty($profile['url'])) {
Logger::info('Found profile', $profile); Logger::info('Found profile', $profile);
$message['item']['author-id'] = Contact::getIdForURL($profile['url'], 0, true, $profile); $message['item']['author-id'] = Contact::getIdForURL($profile['url'], 0, true, $profile);
$message['item']['author-link'] = $profile['url']; $message['item']['author-link'] = $profile['url'];
@ -301,8 +307,3 @@ function discourse_get_profile($xpath)
} }
return $profile; return $profile;
} }
function discourse_email_getmessage_end(App $a, &$message)
{
// Logger::info('Got converted message', $message);
}