Discourse addon created

2019-11-23 23:43:59 +00:00 · 2019-11-23 23:43:59 +00:00 · da663db1cb
commit da663db1cb
parent 43503abc15
2 changed files with 321 additions and 0 deletions
--- a/discourse/README
+++ b/discourse/README
@ -0,0 +1,97 @@
 NSFW
 "Not safe for work"
 Scans the message content for the string 'nsfw' 
 (case insensitive) and if found replaces the content
 with a "click to open/close" link, default is closed.
 If you click on the 'Not safe for work' addon under
 /settings/addon a text field appears, where you can
 extend the list of search terms. The terms must be
 seperated by commas.
 It is also possible to enter profile URLs as values.
 This is quite useful for the case, that you perhaps
 don't want to see postings by person_A, but person_B
 is one of your contacts and person_B used to reshare
 postings by person_A.
 You can also make use of regular expressions.
 They also have to be seperated by commas and the
 regex itself has to be enclosed with slashes:
 	... nsfw, /<REGEX>/, politics,...
 ---------------
 A few examples:
 ---------------
 1)
 Let's say you don't want to see postings which contain
 the term 'fake news'
 The term could appear in several ways:
 fakenews, fake news, fake_news, fake-news, f@ke news,
 f4ke news, f4k3 n3ws, and so on and so on and so on.
 You could write every possible version of it as single
 item into your NSFW-filter list, but this can also be
 done with a single regex, which matches all of them:
 	/f[@4a]k[3e][-_ ]n[3e]w[sz]/
 2)
 Another use case could be, that you are simply not
 interested in postings about christmas.
 	/christmas(?:[-_ ]?(?:tree|time|eve|pudding))?/
 ATTENTION:
 It is absolutely important, that you use grouping
 parentheses instead of capturing parentheses!!
 Grouping parentheses are:
 	(?: )
 If you use capturing parentheses, which are
 	( )
 it will produce errors and the regex won't work and
 at least your targets will not get collapsed.
 3)
 Another possibility is the usage of a so called
 'lookbehind' construct. I'll give an example followed
 by a descripton:
 	/(?<!the )\badvent\b/
 The \b is a word boundary, what matches the beginning
 and the end of a word. The simple pattern of 'advent'
 would match advent iteself, but also adventure.
 This can be prevented by
 	/\badvent\b/
 The first part of the regex above
 	(?<!the )
 is a negative lookbehind. It makes \badvent\b only
 match, if there is no 'the ' before \badvent\b or in
 words:
 It looks for 'advent', but doesn't match 'the advent'.
 For more informations take a look at the PCRE regex
 dialect.
--- a/discourse/discourse.php
+++ b/discourse/discourse.php
@ -0,0 +1,224 @@
 <?php
 /**
 * Name: Discourse Mail Connector
 * Description: Improves mails from Discourse in mailing list mode
 * Version: 0.1
 * Author: Michael Vogel <http://pirati.ca/profile/heluecht>
 *
 */
 //use DOMDocument;
 //use DOMXPath;
 use Friendica\App;
 use Friendica\Core\Hook;
 use Friendica\Core\L10n;
 use Friendica\Core\Logger;
 use Friendica\Core\PConfig;
 use Friendica\Util\XML;
 use Friendica\Content\Text\Markdown;
 use Friendica\Util\Network;
 Use Friendica\Util\DateTimeFormat;
 function discourse_install()
 {
 	Hook::register('email_getmessage',     __FILE__, 'discourse_email_getmessage');
 	Hook::register('email_getmessage_end', __FILE__, 'discourse_email_getmessage_end');
 	Hook::register('addon_settings',       __FILE__, 'discourse_addon_settings');
 	Hook::register('addon_settings_post',  __FILE__, 'discourse_addon_settings_post');
 }
 function discourse_uninstall()
 {
 	Hook::unregister('email_getmessage',     __FILE__, 'discourse_email_getmessage');
 	Hook::unregister('email_getmessage_end', __FILE__, 'discourse_email_getmessage_end');
 	Hook::unregister('addon_settings',       __FILE__, 'discourse_addon_settings');
 	Hook::unregister('addon_settings_post',  __FILE__, 'discourse_addon_settings_post');
 }
 function discourse_addon_settings(App $a, &$s)
 {
 }
 function discourse_addon_settings_post(App $a)
 {
 }
 function discourse_email_getmessage(App $a, &$message)
 {
 //	Logger::info('Got raw message', $message);
 	// Remove the title on comments, they don't serve any purpose there
 	if ($message['item']['parent-uri'] != $message['item']['uri']) {
 		unset($message['item']['title']);
 	}
 	if (preg_match('=topic/(.*)/(.*)@(.*)=', $message['item']['uri'], $matches)) {
 		Logger::info('Got post data', ['topic' => $matches[1], 'post' => $matches[2], 'host' => $matches[3]]);
 		if (discourse_fetch_post_from_api($message, $matches[2], $matches[3])) {
 			return;
 		}
 	}
 	// Search in the text part for the link to the discourse entry and the text body
 	// The text body is used as alternative, if the fetched HTML isn't working
 	if (!empty($message['text'])) {
 		discourse_get_text($message);
 	}
 	if (!empty($message['item']['plink'])) {
 		if (preg_match('=(http.*)/t/.*/(.*\d)/(.*\d)=', $message['item']['plink'], $matches)) {
 			if (discourse_fetch_topic_from_api($message, $matches[1], $matches[1], $matches[1])) {
 				return;
 			}
 		}
 	}
 	// Search in the HTML part for the discourse entry and the author profile
 	if (!empty($message['html'])) {
 		discourse_get_html($message);
 	}
 }
 function discourse_fetch_topic_from_api(&$message, $host, $topic, $pid)
 {
 	$url = $host . '/t/' . $topic . '/posts.json?posts_ids[]=' . $pid;
 	$curlResult = Network::curl($url);
 	if (!$curlResult->isSuccess()) {
 		return false;
 	}
 	$raw = $curlResult->getBody();
 	$data = json_decode($raw, true);
 	$posts = $data['post_stream']['posts'];
 	foreach($posts as $post) {
 		if ($post['post_number'] != $pid) {
 			continue;
 		}
 		Logger::info('Got post data from topic', $post);
 		discourse_process_post($message, $post);
 		return true;
 	}
 	return false;
 }
 function discourse_fetch_post_from_api(&$message, $post, $host)
 {
 	$url = "https://" . $host . '/posts/' . $post . '.json';
 	$curlResult = Network::curl($url);
 	if (!$curlResult->isSuccess()) {
 		return false;
 	}
 	$raw = $curlResult->getBody();
 	$data = json_decode($raw, true);
 	if (empty($data)) {
 		return false;
 	}
 	discourse_process_post($message, $data);
 	Logger::info('Got API data', $message);
 	return true;
 }
 function discourse_process_post(&$message, $post)
 {
 	if ($post['post_number'] == 1) {
 		// Thread information
 	}
 	$nick = $post['username'];
 	$name = $post['name'];
 	// User information
 	$message['html'] = $post['cooked'];
 	$message['text'] = $post['raw'];
 	$message['item']['created'] = DateTimeFormat::utc($post['created_at']);
 }
 function discourse_get_html(&$message)
 {
 	$doc = new DOMDocument();
 	$doc2 = new DOMDocument();
 	$doc->preserveWhiteSpace = false;
 	$html = mb_convert_encoding($message['html'], 'HTML-ENTITIES', "UTF-8");
 	@$doc->loadHTML($html, LIBXML_HTML_NODEFDTD);
 	$xpath = new DomXPath($doc);
 	// Fetch the first 'div' before the 'hr' -hopefully this fits for all systems
 	$result = $xpath->query("//hr//preceding::div[1]");
 	$div = $doc2->importNode($result->item(0), true);
 	$doc2->appendChild($div);
 	$message['html'] = $doc2->saveHTML();
 	Logger::info('Found html body', ['html' => $message['html']]);
 	$profile = discourse_get_profile($xpath);
 	if (!empty($profile)) {
 		Logger::info('Found profile', $profile);
 /*
 		$message['item']['author-avatar'] = $contact['avatar'];
 		$message['item']['author-link'] = $profile['link'];
 		$message['item']['author-name'] = $profile['name'];
 */
 	}
 }
 function discourse_get_text(&$message)
 {
 	$text = $message['text'];
 	$text = str_replace("\r", '', $text);
 	$pos = strpos($text, "\n---\n");
 	if ($pos > 0) {
 		$message['text'] = trim(substr($text, 0, $pos));
 		Logger::info('Found text body', ['text' => $message['text']]);
 		$message['text'] = Markdown::toBBCode($message['text']);
 		$text = substr($text, $pos);
 		if (preg_match('=\((http.*?)\)=', $text, $link)) {
 			$message['item']['plink'] = $link[1];
 			Logger::info('Found plink', ['plink' => $message['item']['plink']]);
 		}
 	} else {
 		Logger::info('No separator found', ['text' => $text]);
 	}
 }
 function discourse_get_profile($xpath)
 {
 	$profile = [];
 	$list = $xpath->query("//td//following::img");
 	foreach ($list as $node) {
 		$attr = [];
 		foreach ($node->attributes as $attribute) {
 			$attr[$attribute->name] = $attribute->value;
 		}
 		if (!empty($attr['src']) && !empty($attr['title'])
 			&& !empty($attr['width']) && !empty($attr['height'])
 			&& ($attr['width'] == $attr['height'])) {
 			$profile = ['avatar' => $attr['src'], 'name' => $attr['title']];
 			break;
 		}
 	}
 	$list = $xpath->query("//td//following::a");
 	foreach ($list as $node) {
 		if (!empty(trim($node->textContent)) && $node->attributes->length) {
 			$attr = [];
 			foreach ($node->attributes as $attribute) {
 				$attr[$attribute->name] = $attribute->value;
 			}
 			if (!empty($attr['href']) && (strpos($attr['href'], '/' . $profile['name']))) {
 				$profile['link'] = $attr['href'];
 				break;
 			}
 		}
 	}
 	return $profile;
 }
 function discourse_email_getmessage_end(App $a, &$message)
 {
 //	Logger::info('Got converted message', $message);
 }