From aae57a69030d582c1214219cd4e6966bfff66712 Mon Sep 17 00:00:00 2001 From: Michael <heluecht@pirati.ca> Date: Wed, 27 Nov 2024 12:51:32 +0000 Subject: [PATCH] Jetstream daemon to to receive atproto messages --- bin/jetstream.php | 184 ++++++ src/DI.php | 29 + src/Protocol/ATProtocol.php | 427 ++++++++++++ src/Protocol/ATProtocol/Actor.php | 203 ++++++ src/Protocol/ATProtocol/Jetstream.php | 404 ++++++++++++ src/Protocol/ATProtocol/Processor.php | 904 ++++++++++++++++++++++++++ static/defaults.config.php | 9 + 7 files changed, 2160 insertions(+) create mode 100755 bin/jetstream.php create mode 100644 src/Protocol/ATProtocol.php create mode 100755 src/Protocol/ATProtocol/Actor.php create mode 100755 src/Protocol/ATProtocol/Jetstream.php create mode 100755 src/Protocol/ATProtocol/Processor.php diff --git a/bin/jetstream.php b/bin/jetstream.php new file mode 100755 index 0000000000..c4075ff652 --- /dev/null +++ b/bin/jetstream.php @@ -0,0 +1,184 @@ +#!/usr/bin/env php +<?php +/** + * Copyright (C) 2010-2024, the Friendica project + * SPDX-FileCopyrightText: 2010-2024 the Friendica project + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + */ + +use Dice\Dice; +use Friendica\Core\Addon; +use Friendica\Core\Hook; +use Friendica\Core\Logger; +use Friendica\Database\DBA; +use Friendica\DI; +use Psr\Log\LoggerInterface; +use Friendica\Protocol\ATProtocol\Jetstream; + +if (php_sapi_name() !== 'cli') { + header($_SERVER["SERVER_PROTOCOL"] . ' 403 Forbidden'); + exit(); +} + +// Ensure that Jetstream.php is executed from the base path of the installation +if (!file_exists('index.php') && (sizeof((array)$_SERVER['argv']) != 0)) { + $directory = dirname($_SERVER['argv'][0]); + + if (substr($directory, 0, 1) != '/') { + $directory = $_SERVER['PWD'] . '/' . $directory; + } + $directory = realpath($directory . '/..'); + + chdir($directory); +} + +require dirname(__DIR__) . '/vendor/autoload.php'; + +$dice = (new Dice())->addRules(include __DIR__ . '/../static/dependencies.config.php'); +/** @var \Friendica\Core\Addon\Capability\ICanLoadAddons $addonLoader */ +$addonLoader = $dice->create(\Friendica\Core\Addon\Capability\ICanLoadAddons::class); +$dice = $dice->addRules($addonLoader->getActiveAddonConfig('dependencies')); +$dice = $dice->addRule(LoggerInterface::class, ['constructParams' => [Logger\Capability\LogChannel::DAEMON]]); + +DI::init($dice); +\Friendica\Core\Logger\Handler\ErrorHandler::register($dice->create(\Psr\Log\LoggerInterface::class)); +Addon::loadAddons(); +Hook::loadHooks(); +DI::config()->reload(); + +if (DI::mode()->isInstall()) { + die("Friendica isn't properly installed yet.\n"); +} + +if (empty(DI::config()->get('jetstream', 'pidfile'))) { + die(<<<TXT +Please set jetstream.pidfile in config/local.config.php. For example: + + 'jetstream' => [ + 'pidfile' => '/path/to/jetstream.pid', + ], +TXT); +} + +if (!Addon::isEnabled('bluesky')) { + die("Bluesky has to be enabled.\n"); +} + +$pidfile = DI::config()->get('jetstream', 'pidfile'); + +if (in_array('start', (array)$_SERVER['argv'])) { + $mode = 'start'; +} + +if (in_array('stop', (array)$_SERVER['argv'])) { + $mode = 'stop'; +} + +if (in_array('status', (array)$_SERVER['argv'])) { + $mode = 'status'; +} + +if (!isset($mode)) { + die("Please use either 'start', 'stop' or 'status'.\n"); +} + +// Get options +$shortopts = 'f'; +$longopts = ['foreground']; +$options = getopt($shortopts, $longopts); + +$foreground = array_key_exists('f', $options) || array_key_exists('foreground', $options); + +if (empty($_SERVER['argv'][0])) { + die("Unexpected script behaviour. This message should never occur.\n"); +} + +$pid = null; + +if (is_readable($pidfile)) { + $pid = intval(file_get_contents($pidfile)); +} + +if (empty($pid) && in_array($mode, ['stop', 'status'])) { + die("Pidfile wasn't found. Is jetstream running?\n"); +} + +if ($mode == 'status') { + if (posix_kill($pid, 0)) { + die("Jetstream process $pid is running.\n"); + } + + unlink($pidfile); + + die("Jetstream process $pid isn't running.\n"); +} + +if ($mode == 'stop') { + posix_kill($pid, SIGTERM); + + unlink($pidfile); + + Logger::notice('Jetstream process was killed', ['pid' => $pid]); + + die("Jetstream process $pid was killed.\n"); +} + +if (!empty($pid) && posix_kill($pid, 0)) { + die("Jetstream process $pid is already running.\n"); +} + +Logger::notice('Starting jetstream daemon.', ['pid' => $pid]); + +if (!$foreground) { + echo "Starting jetstream daemon.\n"; + + DBA::disconnect(); + + // Fork a daemon process + $pid = pcntl_fork(); + if ($pid == -1) { + echo "Daemon couldn't be forked.\n"; + Logger::warning('Could not fork daemon'); + exit(1); + } elseif ($pid) { + // The parent process continues here + if (!file_put_contents($pidfile, $pid)) { + echo "Pid file wasn't written.\n"; + Logger::warning('Could not store pid file'); + posix_kill($pid, SIGTERM); + exit(1); + } + echo 'Child process started with pid ' . $pid . ".\n"; + Logger::notice('Child process started', ['pid' => $pid]); + exit(0); + } + + // We now are in the child process + register_shutdown_function('shutdown'); + + // Make the child the main process, detach it from the terminal + if (posix_setsid() < 0) { + return; + } + + // Closing all existing connections with the outside + fclose(STDIN); + + // And now connect the database again + DBA::connect(); +} + +// Just to be sure that this script really runs endlessly +set_time_limit(0); + +// Now running as a daemon. +$jetstream = $dice->create(Jetstream::class); +$jetstream->listen(); + +function shutdown() +{ + posix_kill(posix_getpid(), SIGTERM); + posix_kill(posix_getpid(), SIGHUP); +} diff --git a/src/DI.php b/src/DI.php index 46010ce62b..9e1185b5d8 100644 --- a/src/DI.php +++ b/src/DI.php @@ -13,6 +13,7 @@ use Friendica\Core\Logger\Util\LoggerSettingsCheck; use Friendica\Core\Session\Capability\IHandleSessions; use Friendica\Core\Session\Capability\IHandleUserSessions; use Friendica\Navigation\SystemMessages; +use Friendica\Protocol\ATProtocol; use Psr\Log\LoggerInterface; /** @@ -143,6 +144,34 @@ abstract class DI return self::$dice->create(App\Router::class); } + // + // "AtProtocol" namespace instances + // + + /** + * @return AtProtocol + */ + public static function atProtocol() + { + return self::$dice->create(ATProtocol::class); + } + + /** + * @return AtProtocol\Arguments + */ + public static function atpActor() + { + return self::$dice->create(ATProtocol\Actor::class); + } + + /** + * @return AtProtocol\Processor + */ + public static function atpProcessor() + { + return self::$dice->create(ATProtocol\Processor::class); + } + // // "Content" namespace instances // diff --git a/src/Protocol/ATProtocol.php b/src/Protocol/ATProtocol.php new file mode 100644 index 0000000000..139453224f --- /dev/null +++ b/src/Protocol/ATProtocol.php @@ -0,0 +1,427 @@ +<?php + +// Copyright (C) 2010-2024, the Friendica project +// SPDX-FileCopyrightText: 2010-2024 the Friendica project +// +// SPDX-License-Identifier: AGPL-3.0-or-later + +namespace Friendica\Protocol; + +use DOMDocument; +use DOMXPath; +use Friendica\Core\Config\Capability\IManageConfigValues; +use Friendica\Core\PConfig\Capability\IManagePersonalConfigValues; +use Friendica\Core\Protocol; +use Friendica\Database\Database; +use Friendica\Model\Item; +use Friendica\Model\User; +use Friendica\Network\HTTPClient\Capability\ICanSendHttpRequests; +use Friendica\Network\HTTPClient\Client\HttpClientAccept; +use Friendica\Network\HTTPClient\Client\HttpClientOptions; +use Friendica\Network\HTTPClient\Client\HttpClientRequest; +use Friendica\Util\DateTimeFormat; +use Psr\Log\LoggerInterface; +use stdClass; + +/** + * Base class for the ATProtocol + * @see https://atproto.com/ + */ +final class ATProtocol +{ + const STATUS_UNKNOWN = 0; + const STATUS_TOKEN_OK = 1; + const STATUS_SUCCESS = 2; + const STATUS_API_FAIL = 10; + const STATUS_DID_FAIL = 11; + const STATUS_PDS_FAIL = 12; + const STATUS_TOKEN_FAIL = 13; + + const APPVIEW_API = 'https://public.api.bsky.app'; // Path to the public Bluesky AppView API. + const DIRECTORY = 'https://plc.directory'; // Path to the directory server service to fetch the PDS of a given DID + const WEB = 'https://bsky.app'; // Path to the web interface with the user profile and posts + const HOSTNAME = 'bsky.social'; // Host name to be added to the handle if incomplete + + /** @var LoggerInterface */ + private $logger; + + /** @var Database */ + private $db; + + /** @var \Friendica\Core\Config\Capability\IManageConfigValues */ + private $config; + + /** @var IManagePersonalConfigValue */ + private $pConfig; + + /** @var ICanSendHttpRequests */ + private $httpClient; + + public function __construct(LoggerInterface $logger, Database $database, IManageConfigValues $config, IManagePersonalConfigValues $pConfig, ICanSendHttpRequests $httpClient) + { + $this->logger = $logger; + $this->db = $database; + $this->config = $config; + $this->pConfig = $pConfig; + $this->httpClient = $httpClient; + } + + public function getUids(): array + { + $uids = []; + $abandon_days = intval($this->config->get('system', 'account_abandon_days')); + if ($abandon_days < 1) { + $abandon_days = 0; + } + + $abandon_limit = date(DateTimeFormat::MYSQL, time() - $abandon_days * 86400); + + $pconfigs = $this->db->selectToArray('pconfig', [], ["`cat` = ? AND `k` = ? AND `v`", 'bluesky', 'import']); + foreach ($pconfigs as $pconfig) { + if (empty($this->getUserDid($pconfig['uid']))) { + continue; + } + + if ($abandon_days != 0) { + if (!$this->db->exists('user', ["`uid` = ? AND `login_date` >= ?", $pconfig['uid'], $abandon_limit])) { + continue; + } + } + $uids[] = $pconfig['uid']; + } + return $uids; + } + + public function XRPCGet(string $url, array $parameters = [], int $uid = 0): ?stdClass + { + if (!empty($parameters)) { + $url .= '?' . http_build_query($parameters); + } + + if ($uid == 0) { + return $this->get(ATProtocol::APPVIEW_API . '/xrpc/' . $url); + } + + $pds = $this->getUserPds($uid); + if (empty($pds)) { + return null; + } + + $headers = ['Authorization' => ['Bearer ' . $this->getUserToken($uid)]]; + + $languages = User::getWantedLanguages($uid); + if (!empty($languages)) { + $headers['Accept-Language'] = implode(',', $languages); + } + + $data = $this->get($pds . '/xrpc/' . $url, [HttpClientOptions::HEADERS => $headers]); + $this->pConfig->set($uid, 'bluesky', 'status', is_null($data) ? self::STATUS_API_FAIL : self::STATUS_SUCCESS); + return $data; + } + + public function get(string $url, array $opts = []): ?stdClass + { + try { + $curlResult = $this->httpClient->get($url, HttpClientAccept::JSON, $opts); + } catch (\Exception $e) { + $this->logger->notice('Exception on get', ['url' => $url, 'exception' => $e]); + return null; + } + + $data = json_decode($curlResult->getBodyString()); + if (!$curlResult->isSuccess()) { + $this->logger->notice('API Error', ['url' => $url, 'code' => $curlResult->getReturnCode(), 'error' => $data ?: $curlResult->getBodyString()]); + if (!$data) { + return null; + } + $data->code = $curlResult->getReturnCode(); + } + + Item::incrementInbound(Protocol::BLUESKY); + return $data; + } + + public function XRPCPost(int $uid, string $url, $parameters): ?stdClass + { + $data = $this->post($uid, '/xrpc/' . $url, json_encode($parameters), ['Content-type' => 'application/json', 'Authorization' => ['Bearer ' . $this->getUserToken($uid)]]); + return $data; + } + + private function post(int $uid, string $url, string $params, array $headers): ?stdClass + { + $pds = $this->getUserPds($uid); + if (empty($pds)) { + return null; + } + + try { + $curlResult = $this->httpClient->post($pds . $url, $params, $headers); + } catch (\Exception $e) { + $this->logger->notice('Exception on post', ['exception' => $e]); + $this->pConfig->set($uid, 'bluesky', 'status', self::STATUS_API_FAIL); + return null; + } + + $data = json_decode($curlResult->getBodyString()); + if (!$curlResult->isSuccess()) { + $this->logger->notice('API Error', ['url' => $url, 'code' => $curlResult->getReturnCode(), 'error' => $data ?: $curlResult->getBodyString()]); + if (!$data) { + $this->pConfig->set($uid, 'bluesky', 'status', self::STATUS_API_FAIL); + return null; + } + $data->code = $curlResult->getReturnCode(); + } + + $this->pConfig->set($uid, 'bluesky', 'status', self::STATUS_SUCCESS); + Item::incrementOutbound(Protocol::BLUESKY); + return $data; + } + + private function getUserPds(int $uid): ?string + { + if ($uid == 0) { + return self::APPVIEW_API; + } + + $pds = $this->pConfig->get($uid, 'bluesky', 'pds'); + if (!empty($pds)) { + return $pds; + } + + $did = $this->getUserDid($uid); + if (empty($did)) { + return null; + } + + $pds = $this->getPdsOfDid($did); + if (empty($pds)) { + return null; + } + + $this->pConfig->set($uid, 'bluesky', 'pds', $pds); + return $pds; + } + + public function getUserDid(int $uid, bool $refresh = false): ?string + { + if (!$this->pConfig->get($uid, 'bluesky', 'post')) { + return null; + } + + if (!$refresh) { + $did = $this->pConfig->get($uid, 'bluesky', 'did'); + if (!empty($did)) { + return $did; + } + } + + $handle = $this->pConfig->get($uid, 'bluesky', 'handle'); + if (empty($handle)) { + return null; + } + + $did = $this->getDid($handle); + if (empty($did)) { + return null; + } + + $this->logger->debug('Got DID for user', ['uid' => $uid, 'handle' => $handle, 'did' => $did]); + $this->pConfig->set($uid, 'bluesky', 'did', $did); + return $did; + } + + private function getDid(string $handle): string + { + if ($handle == '') { + return ''; + } + + if (strpos($handle, '.') === false) { + $handle .= '.' . self::HOSTNAME; + } + + // At first we use the AppView API which *should* cover all cases. + $data = $this->get(self::APPVIEW_API . '/xrpc/com.atproto.identity.resolveHandle?handle=' . urlencode($handle)); + if (!empty($data) && !empty($data->did)) { + $this->logger->debug('Got DID by system PDS call', ['handle' => $handle, 'did' => $data->did]); + return $data->did; + } + + // Then we query the DNS, which is used for third party handles (DNS should be faster than wellknown) + $did = $this->getDidByDns($handle); + if ($did != '') { + $this->logger->debug('Got DID by DNS', ['handle' => $handle, 'did' => $did]); + return $did; + } + + // Then we query wellknown, which should mostly cover the rest. + $did = $this->getDidByWellknown($handle); + if ($did != '') { + $this->logger->debug('Got DID by wellknown', ['handle' => $handle, 'did' => $did]); + return $did; + } + + $this->logger->notice('No DID detected', ['handle' => $handle]); + return ''; + } + + public function getDidByProfile(string $url): string + { + if (preg_match('#^' . self::WEB . '/profile/(.+)#', $url, $matches)) { + $did = $this->getDid($matches[1]); + if (!empty($did)) { + return $did; + } + } + try { + $curlResult = $this->httpClient->get($url, HttpClientAccept::HTML, [HttpClientOptions::REQUEST => HttpClientRequest::CONTACTINFO]); + } catch (\Throwable $th) { + return ''; + } + if (!$curlResult->isSuccess()) { + return ''; + } + $profile = $curlResult->getBodyString(); + if (empty($profile)) { + return ''; + } + + $doc = new DOMDocument(); + try { + @$doc->loadHTML($profile); + } catch (\Throwable $th) { + return ''; + } + $xpath = new DOMXPath($doc); + $list = $xpath->query('//p[@id]'); + foreach ($list as $node) { + foreach ($node->attributes as $attribute) { + if ($attribute->name == 'id') { + $ids[$attribute->value] = $node->textContent; + } + } + } + + if (empty($ids['bsky_handle']) || empty($ids['bsky_did'])) { + return ''; + } + + if (!$this->isValidDid($ids['bsky_did'], $ids['bsky_handle'])) { + $this->logger->notice('Invalid DID', ['handle' => $ids['bsky_handle'], 'did' => $ids['bsky_did']]); + return ''; + } + + return $ids['bsky_did']; + } + + private function getDidByWellknown(string $handle): string + { + $curlResult = $this->httpClient->get('http://' . $handle . '/.well-known/atproto-did'); + if ($curlResult->isSuccess() && substr($curlResult->getBodyString(), 0, 4) == 'did:') { + $did = $curlResult->getBodyString(); + if (!$this->isValidDid($did, $handle)) { + $this->logger->notice('Invalid DID', ['handle' => $handle, 'did' => $did]); + return ''; + } + return $did; + } + return ''; + } + + private function getDidByDns(string $handle): string + { + $records = @dns_get_record('_atproto.' . $handle . '.', DNS_TXT); + if (empty($records)) { + return ''; + } + foreach ($records as $record) { + if (!empty($record['txt']) && substr($record['txt'], 0, 4) == 'did=') { + $did = substr($record['txt'], 4); + if (!$this->isValidDid($did, $handle)) { + $this->logger->notice('Invalid DID', ['handle' => $handle, 'did' => $did]); + return ''; + } + return $did; + } + } + return ''; + } + + private function getPdsOfDid(string $did): ?string + { + $data = $this->get(self::DIRECTORY . '/' . $did); + if (empty($data) || empty($data->service)) { + return null; + } + + foreach ($data->service as $service) { + if (($service->id == '#atproto_pds') && ($service->type == 'AtprotoPersonalDataServer') && !empty($service->serviceEndpoint)) { + return $service->serviceEndpoint; + } + } + + return null; + } + + private function isValidDid(string $did, string $handle): bool + { + $data = $this->get(self::DIRECTORY . '/' . $did); + if (empty($data) || empty($data->alsoKnownAs)) { + return false; + } + + return in_array('at://' . $handle, $data->alsoKnownAs); + } + + private function getUserToken(int $uid): string + { + $token = $this->pConfig->get($uid, 'bluesky', 'access_token'); + $created = $this->pConfig->get($uid, 'bluesky', 'token_created'); + if (empty($token)) { + return ''; + } + + if ($created + 300 < time()) { + return $this->refreshUserToken($uid); + } + return $token; + } + + private function refreshUserToken(int $uid): string + { + $token = $this->pConfig->get($uid, 'bluesky', 'refresh_token'); + + $data = $this->post($uid, '/xrpc/com.atproto.server.refreshSession', '', ['Authorization' => ['Bearer ' . $token]]); + if (empty($data) || empty($data->accessJwt)) { + $this->pConfig->set($uid, 'bluesky', 'status', self::STATUS_TOKEN_FAIL); + return ''; + } + + $this->logger->debug('Refreshed token', ['return' => $data]); + $this->pConfig->set($uid, 'bluesky', 'access_token', $data->accessJwt); + $this->pConfig->set($uid, 'bluesky', 'refresh_token', $data->refreshJwt); + $this->pConfig->set($uid, 'bluesky', 'token_created', time()); + return $data->accessJwt; + } + + public function createUserToken(int $uid, string $password): string + { + $did = $this->getUserDid($uid); + if (empty($did)) { + return ''; + } + + $data = $this->post($uid, '/xrpc/com.atproto.server.createSession', json_encode(['identifier' => $did, 'password' => $password]), ['Content-type' => 'application/json']); + if (empty($data) || empty($data->accessJwt)) { + $this->pConfig->set($uid, 'bluesky', 'status', self::STATUS_TOKEN_FAIL); + return ''; + } + + $this->logger->debug('Created token', ['return' => $data]); + $this->pConfig->set($uid, 'bluesky', 'access_token', $data->accessJwt); + $this->pConfig->set($uid, 'bluesky', 'refresh_token', $data->refreshJwt); + $this->pConfig->set($uid, 'bluesky', 'token_created', time()); + $this->pConfig->set($uid, 'bluesky', 'status', self::STATUS_TOKEN_OK); + return $data->accessJwt; + } +} diff --git a/src/Protocol/ATProtocol/Actor.php b/src/Protocol/ATProtocol/Actor.php new file mode 100755 index 0000000000..e3fdd7e185 --- /dev/null +++ b/src/Protocol/ATProtocol/Actor.php @@ -0,0 +1,203 @@ +#!/usr/bin/env php +<?php +/** + * Copyright (C) 2010-2024, the Friendica project + * SPDX-FileCopyrightText: 2010-2024 the Friendica project + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + */ + +namespace Friendica\Protocol\ATProtocol; + +use Friendica\Content\Text\HTML; +use Friendica\Core\Protocol; +use Friendica\Model\Contact; +use Friendica\Model\GServer; +use Friendica\Protocol\ATProtocol; +use Friendica\Util\DateTimeFormat; +use Psr\Log\LoggerInterface; + +/** + * Class to handle AT Protocol actors + */ +class Actor +{ + /** @var LoggerInterface */ + private $logger; + + /** @var ATProtocol */ + private $atprotocol; + + public function __construct(LoggerInterface $logger, ATProtocol $atprotocol) + { + $this->logger = $logger; + $this->atprotocol = $atprotocol; + } + + public function syncContacts(int $uid) + { + $this->logger->info('Sync contacts for user - start', ['uid' => $uid]); + $contacts = Contact::selectToArray(['id', 'url', 'rel'], ['uid' => $uid, 'network' => Protocol::BLUESKY, 'rel' => [Contact::FRIEND, Contact::SHARING, Contact::FOLLOWER]]); + + $follows = []; + $cursor = ''; + $profiles = []; + + do { + $parameters = [ + 'actor' => $this->atprotocol->getUserDid($uid), + 'limit' => 100, + 'cursor' => $cursor + ]; + + $data = $this->atprotocol->XRPCGet('app.bsky.graph.getFollows', $parameters); + + foreach ($data->follows ?? [] as $follow) { + $profiles[$follow->did] = $follow; + $follows[$follow->did] = Contact::SHARING; + } + $cursor = $data->cursor ?? ''; + } while (!empty($data->follows) && !empty($data->cursor)); + + $cursor = ''; + + do { + $parameters = [ + 'actor' => $this->atprotocol->getUserDid($uid), + 'limit' => 100, + 'cursor' => $cursor + ]; + + $data = $this->atprotocol->XRPCGet('app.bsky.graph.getFollowers', $parameters); + + foreach ($data->followers ?? [] as $follow) { + $profiles[$follow->did] = $follow; + $follows[$follow->did] = ($follows[$follow->did] ?? 0) | Contact::FOLLOWER; + } + $cursor = $data->cursor ?? ''; + } while (!empty($data->followers) && !empty($data->cursor)); + + foreach ($contacts as $contact) { + if (empty($follows[$contact['url']])) { + Contact::update(['rel' => Contact::NOTHING], ['id' => $contact['id']]); + } + } + + foreach ($follows as $did => $rel) { + $contact = $this->getContactByDID($did, $uid, $uid); + if (($contact['rel'] != $rel) && ($contact['uid'] != 0)) { + Contact::update(['rel' => $rel], ['id' => $contact['id']]); + } + } + $this->logger->info('Sync contacts for user - done', ['uid' => $uid]); + } + + public function updateContactByDID(string $did) + { + $profile = $this->atprotocol->XRPCGet('app.bsky.actor.getProfile', ['actor' => $did]); + if (empty($profile) || empty($profile->did)) { + return; + } + + $nick = $profile->handle ?? $profile->did; + $name = $profile->displayName ?? $nick; + + $fields = [ + 'alias' => ATProtocol::WEB . '/profile/' . $nick, + 'name' => $name ?: $nick, + 'nick' => $nick, + 'addr' => $nick, + 'updated' => DateTimeFormat::utcNow(DateTimeFormat::MYSQL), + ]; + + if (!empty($profile->description)) { + $fields['about'] = HTML::toBBCode($profile->description); + } + + if (!empty($profile->banner)) { + $fields['header'] = $profile->banner; + } + + $directory = $this->atprotocol->get(ATProtocol::DIRECTORY . '/' . $profile->did); + if (!empty($directory)) { + foreach ($directory->service as $service) { + if (($service->id == '#atproto_pds') && ($service->type == 'AtprotoPersonalDataServer') && !empty($service->serviceEndpoint)) { + $fields['baseurl'] = $service->serviceEndpoint; + } + } + + if (!empty($fields['baseurl'])) { + GServer::check($fields['baseurl'], Protocol::BLUESKY); + $fields['gsid'] = GServer::getID($fields['baseurl'], true); + } + + foreach ($directory->verificationMethod as $method) { + if (!empty($method->publicKeyMultibase)) { + $fields['pubkey'] = $method->publicKeyMultibase; + } + } + } + + /* + @todo Add this part when the function will be callable with a uid + if (!empty($profile->viewer)) { + if (!empty($profile->viewer->following) && !empty($profile->viewer->followedBy)) { + $fields['rel'] = Contact::FRIEND; + } elseif (!empty($profile->viewer->following) && empty($profile->viewer->followedBy)) { + $fields['rel'] = Contact::SHARING; + } elseif (empty($profile->viewer->following) && !empty($profile->viewer->followedBy)) { + $fields['rel'] = Contact::FOLLOWER; + } else { + $fields['rel'] = Contact::NOTHING; + } + } + */ + + if (!empty($profile->avatar)) { + $contact = Contact::selectFirst(['id', 'avatar'], ['network' => Protocol::BLUESKY, 'nurl' => $did, 'uid' => 0]); + if (!empty($contact['id']) && ($contact['avatar'] != $profile->avatar)) { + Contact::updateAvatar($contact['id'], $profile->avatar); + } + } + + $this->logger->notice('Update profile', ['did' => $profile->did, 'fields' => $fields]); + + Contact::update($fields, ['nurl' => $profile->did, 'network' => Protocol::BLUESKY]); + } + + public function getContactByDID(string $did, int $uid, int $contact_uid): array + { + $contact = Contact::selectFirst([], ['network' => Protocol::BLUESKY, 'nurl' => $did, 'uid' => [$contact_uid, $uid]], ['order' => ['uid' => true]]); + + if (!empty($contact)) { + return $contact; + } + + if (empty($contact)) { + $fields = [ + 'uid' => $contact_uid, + 'network' => Protocol::BLUESKY, + 'priority' => 1, + 'writable' => true, + 'blocked' => false, + 'readonly' => false, + 'pending' => false, + 'url' => $did, + 'nurl' => $did, + 'alias' => ATProtocol::WEB . '/profile/' . $did, + 'name' => $did, + 'nick' => $did, + 'addr' => $did, + 'rel' => Contact::NOTHING, + ]; + $cid = Contact::insert($fields); + } else { + $cid = $contact['id']; + } + + $this->updateContactByDID($did); + + return Contact::getById($cid); + } +} diff --git a/src/Protocol/ATProtocol/Jetstream.php b/src/Protocol/ATProtocol/Jetstream.php new file mode 100755 index 0000000000..c1b866dac8 --- /dev/null +++ b/src/Protocol/ATProtocol/Jetstream.php @@ -0,0 +1,404 @@ +#!/usr/bin/env php +<?php +/** + * Copyright (C) 2010-2024, the Friendica project + * SPDX-FileCopyrightText: 2010-2024 the Friendica project + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + */ + +namespace Friendica\Protocol\ATProtocol; + +use Friendica\Core\Config\Capability\IManageConfigValues; +use Friendica\Core\KeyValueStorage\Capability\IManageKeyValuePairs; +use Friendica\Core\Protocol; +use Friendica\Core\System; +use Friendica\Model\Contact; +use Friendica\Model\Item; +use Friendica\Protocol\ATProtocol; +use Friendica\Util\DateTimeFormat; +use Psr\Log\LoggerInterface; +use stdClass; + +/** + * Class to handle the Bluesky Jetstream firehose + * + * Existing collections: + * app.bsky.feed.like, app.bsky.graph.follow, app.bsky.feed.repost, app.bsky.feed.post, app.bsky.graph.block, + * app.bsky.actor.profile, app.bsky.graph.listitem, app.bsky.graph.list, app.bsky.graph.listblock, app.bsky.feed.generator, + * app.bsky.feed.threadgate, app.bsky.graph.starterpack, app.bsky.feed.postgate, chat.bsky.actor.declaration, + * app.bsky.actor.domain, industries.geesawra.webpages + * + * Available servers: + * jetstream1.us-east.bsky.network, jetstream2.us-east.bsky.network, jetstream1.us-west.bsky.network, jetstream2.us-west.bsky.network + * + * @see https://github.com/bluesky-social/jetstream + * @todo Support more collections, support full firehose + */ +class Jetstream +{ + private $uids = []; + private $self = []; + private $capped = false; + + /** @var LoggerInterface */ + private $logger; + + /** @var \Friendica\Core\Config\Capability\IManageConfigValues */ + private $config; + + /** @var IManageKeyValuePairs */ + private $keyValue; + + /** @var ATProtocol */ + private $atprotocol; + + /** @var Actor */ + private $actor; + + /** @var Processor */ + private $processor; + + /** @var \WebSocket\Client */ + private $client; + + public function __construct(LoggerInterface $logger, IManageConfigValues $config, IManageKeyValuePairs $keyValue, ATProtocol $atprotocol, Actor $actor, Processor $processor) + { + $this->logger = $logger; + $this->config = $config; + $this->keyValue = $keyValue; + $this->atprotocol = $atprotocol; + $this->actor = $actor; + $this->processor = $processor; + } + + // ***************************************** + // * Listener + // ***************************************** + public function listen() + { + $timeout = 300; + $timeout_limit = 10; + $timestamp = $this->keyValue->get('jetstream_timestamp') ?? 0; + $cursor = ''; + while (true) { + if ($timestamp) { + $cursor = '&cursor=' . $timestamp; + $this->logger->notice('Start with cursor', ['cursor' => $cursor]); + } + + $this->syncContacts(); + try { + // @todo make the path configurable + $this->client = new \WebSocket\Client('wss://jetstream1.us-west.bsky.network/subscribe?requireHello=true' . $cursor); + $this->client->setTimeout($timeout); + } catch (\WebSocket\ConnectionException $e) { + $this->logger->error('Error while trying to establish the connection', ['code' => $e->getCode(), 'message' => $e->getMessage()]); + echo "Connection wasn't established.\n"; + exit(1); + } + $this->setOptions(); + $last_timeout = time(); + while (true) { + try { + $message = $this->client->receive(); + $data = json_decode($message); + if (is_object($data)) { + $timestamp = $data->time_us; + $this->route($data); + $this->keyValue->set('jetstream_timestamp', $timestamp); + } else { + $this->logger->warning('Unexpected return value', ['data' => $data]); + break; + } + } catch (\WebSocket\ConnectionException $e) { + if ($e->getCode() == 1024) { + $timeout_duration = time() - $last_timeout; + if ($timeout_duration < $timeout_limit) { + $this->logger->notice('Timeout - connection lost', ['duration' => $timeout_duration, 'timestamp' => $timestamp, 'code' => $e->getCode(), 'message' => $e->getMessage()]); + break; + } + $this->logger->notice('Timeout', ['duration' => $timeout_duration, 'timestamp' => $timestamp, 'code' => $e->getCode(), 'message' => $e->getMessage()]); + } else { + $this->logger->error('Error', ['code' => $e->getCode(), 'message' => $e->getMessage()]); + break; + } + } + $last_timeout = time(); + } + try { + $this->client->close(); + } catch (\WebSocket\ConnectionException $e) { + $this->logger->error('Error while trying to close the connection', ['code' => $e->getCode(), 'message' => $e->getMessage()]); + } + } + } + + private function syncContacts() + { + $active_uids = $this->atprotocol->getUids(); + if (empty($active_uids)) { + return; + } + + foreach ($active_uids as $uid) { + $this->actor->syncContacts($uid); + } + } + + private function setOptions() + { + $active_uids = $this->atprotocol->getUids(); + if (empty($active_uids)) { + return; + } + + $contacts = Contact::selectToArray(['uid', 'url'], ['uid' => $active_uids, 'network' => Protocol::BLUESKY, 'rel' => [Contact::FRIEND, Contact::SHARING]]); + + $self = []; + foreach ($active_uids as $uid) { + $did = $this->atprotocol->getUserDid($uid); + $contacts[] = ['uid' => $uid, 'url' => $did]; + $self[$did] = $uid; + } + $this->self = $self; + + $uids = []; + foreach ($contacts as $contact) { + $uids[$contact['url']][] = $contact['uid']; + } + $this->uids = $uids; + + $did_limit = $this->config->get('jetstream', 'did_limit'); + + $dids = array_keys($uids); + if (count($dids) > $did_limit) { + $contacts = Contact::selectToArray(['url'], ['uid' => $active_uids, 'network' => Protocol::BLUESKY, 'rel' => [Contact::FRIEND, Contact::SHARING]], ['order' => ['last-item' => true]]); + $dids = $this->addDids($contacts, $uids, $did_limit, array_keys($self)); + } + + if (count($dids) < $did_limit) { + $contacts = Contact::selectToArray(['url'], ['uid' => $active_uids, 'network' => Protocol::BLUESKY, 'rel' => Contact::FOLLOWER], ['order' => ['last-item' => true]]); + $dids = $this->addDids($contacts, $uids, $did_limit, $dids); + } + + if (!$this->capped && count($dids) < $did_limit) { + $contacts = Contact::selectToArray(['url'], ['uid' => 0, 'network' => Protocol::BLUESKY], ['order' => ['last-item' => true], 'limit' => $did_limit]); + $dids = $this->addDids($contacts, $uids, $did_limit, $dids); + } + + $this->logger->debug('Selected DIDs', ['uids' => $active_uids, 'count' => count($dids), 'capped' => $this->capped]); + $update = [ + 'type' => 'options_update', + 'payload' => [ + 'wantedCollections' => ['app.bsky.feed.post', 'app.bsky.feed.repost', 'app.bsky.feed.like', 'app.bsky.graph.block', 'app.bsky.actor.profile', 'app.bsky.graph.follow'], + 'wantedDids' => $dids, + 'maxMessageSizeBytes' => 1000000 + ] + ]; + try { + $this->client->send(json_encode($update)); + } catch (\WebSocket\ConnectionException $e) { + $this->logger->error('Error while trying to send options.', ['code' => $e->getCode(), 'message' => $e->getMessage()]); + } + } + + private function addDids(array $contacts, array $uids, int $did_limit, array $dids): array + { + foreach ($contacts as $contact) { + if (in_array($contact['url'], $uids)) { + continue; + } + $dids[] = $contact['url']; + if (count($dids) >= $did_limit) { + break; + } + } + return $dids; + } + + private function route(stdClass $data) + { + Item::incrementInbound(Protocol::BLUESKY); + + switch ($data->kind) { + case 'account': + if (!empty($data->identity->did)) { + $this->processor->processAccount($data); + } + break; + + case 'identity': + $this->processor->processIdentity($data); + break; + + case 'commit': + $this->routeCommits($data); + break; + } + } + + private function routeCommits(stdClass $data) + { + $drift = max(0, round(time() - $data->time_us / 1000000)); + if ($drift > 60 && !$this->capped) { + $this->capped = true; + $this->setOptions(); + $this->logger->notice('Drift is too high, dids will be capped'); + } elseif ($drift == 0 && $this->capped) { + $this->capped = false; + $this->setOptions(); + $this->logger->notice('Drift is low enough, dids will be uncapped'); + } + + $this->logger->notice('Received commit', ['time' => date(DateTimeFormat::ATOM, $data->time_us / 1000000), 'drift' => $drift, 'capped' => $this->capped, 'did' => $data->did, 'operation' => $data->commit->operation, 'collection' => $data->commit->collection, 'timestamp' => $data->time_us]); + $timestamp = microtime(true); + + switch ($data->commit->collection) { + case 'app.bsky.feed.post': + $this->routePost($data, $drift); + break; + + case 'app.bsky.feed.repost': + $this->routeRepost($data, $drift); + break; + + case 'app.bsky.feed.like': + $this->routeLike($data); + break; + + case 'app.bsky.graph.block': + $this->processor->performBlocks($data, $this->self[$data->did] ?? 0); + break; + + case 'app.bsky.actor.profile': + $this->routeProfile($data); + break; + + case 'app.bsky.graph.follow': + $this->routeFollow($data); + break; + + case 'app.bsky.feed.generator': + case 'app.bsky.feed.postgate': + case 'app.bsky.feed.threadgate': + case 'app.bsky.graph.list': + case 'app.bsky.graph.listblock': + case 'app.bsky.graph.listitem': + case 'app.bsky.graph.starterpack': + // Ignore these collections, since we can't really process them + break; + + default: + $this->storeCommitMessage($data); + break; + } + if (microtime(true) - $timestamp > 2) { + $this->logger->notice('Commit processed', ['duration' => round(microtime(true) - $timestamp, 3), 'time' => date(DateTimeFormat::ATOM, $data->time_us / 1000000), 'did' => $data->did, 'operation' => $data->commit->operation, 'collection' => $data->commit->collection]); + } + } + + private function routePost(stdClass $data, int $drift) + { + switch ($data->commit->operation) { + case 'delete': + $this->processor->deleteRecord($data); + break; + + case 'create': + $this->processor->createPost($data, $this->uids[$data->did] ?? [0], ($drift > 30)); + break; + + default: + $this->storeCommitMessage($data); + break; + } + } + + private function routeRepost(stdClass $data, int $drift) + { + switch ($data->commit->operation) { + case 'delete': + $this->processor->deleteRecord($data); + break; + + case 'create': + $this->processor->createRepost($data, $this->uids[$data->did] ?? [0], ($drift > 30)); + break; + + default: + $this->storeCommitMessage($data); + break; + } + } + + private function routeLike(stdClass $data) + { + switch ($data->commit->operation) { + case 'delete': + $this->processor->deleteRecord($data); + break; + + case 'create': + $this->processor->createLike($data); + break; + + default: + $this->storeCommitMessage($data); + break; + } + } + + private function routeProfile(stdClass $data) + { + switch ($data->commit->operation) { + case 'delete': + $this->storeCommitMessage($data); + break; + + case 'create': + $this->actor->updateContactByDID($data->did); + break; + + case 'update': + $this->actor->updateContactByDID($data->did); + break; + + default: + $this->storeCommitMessage($data); + break; + } + } + + private function routeFollow(stdClass $data) + { + switch ($data->commit->operation) { + case 'delete': + if ($this->processor->deleteFollow($data, $this->self)) { + $this->syncContacts(); + $this->setOptions(); + } + break; + + case 'create': + if ($this->processor->createFollow($data, $this->self)) { + $this->syncContacts(); + $this->setOptions(); + } + break; + + default: + $this->storeCommitMessage($data); + break; + } + } + + private function storeCommitMessage(stdClass $data) + { + if ($this->config->get('debug', 'jetstream_log')) { + $tempfile = tempnam(System::getTempPath(), 'at-proto.commit.' . $data->commit->collection . '.' . $data->commit->operation . '-'); + file_put_contents($tempfile, json_encode($data, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT)); + } + } +} diff --git a/src/Protocol/ATProtocol/Processor.php b/src/Protocol/ATProtocol/Processor.php new file mode 100755 index 0000000000..9416ef7a2f --- /dev/null +++ b/src/Protocol/ATProtocol/Processor.php @@ -0,0 +1,904 @@ +#!/usr/bin/env php +<?php +/** + * Copyright (C) 2010-2024, the Friendica project + * SPDX-FileCopyrightText: 2010-2024 the Friendica project + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + */ + +namespace Friendica\Protocol\ATProtocol; + +use Friendica\App\BaseURL; +use Friendica\Core\Protocol; +use Friendica\Database\Database; +use Friendica\Model\Contact; +use Friendica\Model\Conversation; +use Friendica\Model\Item; +use Friendica\Model\ItemURI; +use Friendica\Model\Post; +use Friendica\Model\Tag; +use Friendica\Protocol\Activity; +use Friendica\Protocol\ATProtocol; +use Friendica\Util\DateTimeFormat; +use Friendica\Util\Strings; +use Psr\Log\LoggerInterface; +use stdClass; + +/** + * Class to process AT protocol messages + */ +class Processor +{ + /** @var Database */ + private $db; + + /** @var LoggerInterface */ + private $logger; + + /** @var BaseURL */ + private $baseURL; + + /** @var ATProtocol */ + private $atprotocol; + + /** @var Actor */ + private $actor; + + public function __construct(Database $database, LoggerInterface $logger, BaseURL $baseURL, ATProtocol $atprotocol, Actor $actor) + { + $this->db = $database; + $this->logger = $logger; + $this->baseURL = $baseURL; + $this->atprotocol = $atprotocol; + $this->actor = $actor; + } + + public function processAccount(stdClass $data) + { + $fields = [ + 'archive' => !$data->account->active, + 'failed' => !$data->account->active, + 'updated' => DateTimeFormat::utc($data->account->time, DateTimeFormat::MYSQL) + ]; + + $this->logger->notice('Process account', ['did' => $data->identity->did, 'fields' => $fields]); + + Contact::update($fields, ['nurl' => $data->identity->did, 'network' => Protocol::BLUESKY]); + } + + public function processIdentity(stdClass $data) + { + $fields = [ + 'alias' => ATProtocol::WEB . '/profile/' . $data->identity->handle, + 'nick' => $data->identity->handle, + 'addr' => $data->identity->handle, + 'updated' => DateTimeFormat::utc($data->identity->time, DateTimeFormat::MYSQL), + ]; + + $this->logger->notice('Process identity', ['did' => $data->identity->did, 'fields' => $fields]); + + Contact::update($fields, ['nurl' => $data->identity->did, 'network' => Protocol::BLUESKY]); + } + + public function performBlocks(stdClass $data, int $uid) + { + if (!$uid) { + $this->logger->info('Not a block from a local user'); + return; + } + + if (empty($data->commit->record->subject)) { + $this->logger->info('No subject in data', ['data' => $data]); + return; + } + + $contact = Contact::selectFirst(['id'], ['nurl' => $data->commit->record->subject, 'uid' => 0]); + if (empty($contact['id'])) { + $this->logger->info('Contact not found', ['did' => $data->commit->record->subject]); + return; + } + + // @todo unblock doesn't provide a subject. We will only arrive here, wenn the operation is "create". + Contact\User::setBlocked($contact['id'], $uid, ($data->commit->operation == 'create'), true); + $this->logger->info('Contact blocked', ['id' => $contact['id'], 'did' => $data->commit->record->subject, 'uid' => $uid]); + } + + public function deleteRecord(stdClass $data) + { + $uri = 'at://' . $data->did . '/' . $data->commit->collection . '/' . $data->commit->rkey; + $itemuri = $this->db->selectFirst('item-uri', ['id'], ["`uri` LIKE ?", $uri . ':%']); + if (empty($itemuri['id'])) { + $this->logger->info('URI not found', ['url' => $uri]); + return; + } + + $condition = ['uri-id' => $itemuri['id'], 'author-link' => $data->did, 'network' => Protocol::BLUESKY]; + if (!Post::exists($condition)) { + $this->logger->info('Record not found', $condition); + return; + } + Item::markForDeletion($condition); + $this->logger->info('Record deleted', $condition); + } + + public function createPost(stdClass $data, array $uids, bool $dont_fetch) + { + if (!empty($data->commit->record->reply)) { + $root = $this->getUri($data->commit->record->reply->root); + $parent = $this->getUri($data->commit->record->reply->parent); + $uids = $this->getPostUids($root); + if (!$uids) { + $this->logger->debug('Comment is not imported since the root post is not found.', ['root' => $root, 'parent' => $parent]); + return; + } + if ($dont_fetch && !$this->getPostUids($parent)) { + $this->logger->debug('Comment is not imported since the parent post is not found.', ['root' => $root, 'parent' => $parent]); + return; + } + } + + foreach ($uids as $uid) { + $item = []; + $item = $this->getHeaderFromJetstream($data, $uid); + if (empty($item)) { + continue; + } + + if (!empty($root)) { + $item['parent-uri'] = $root; + $item['thr-parent'] = $this->fetchMissingPost($parent, $uid, Item::PR_FETCHED, $item['contact-id'], 0, $parent, false, Conversation::PARCEL_JETSTREAM); + $item['gravity'] = Item::GRAVITY_COMMENT; + } else { + $item['gravity'] = Item::GRAVITY_PARENT; + } + + $item['body'] = $this->parseFacets($data->commit->record, $item['uri-id']); + $item['transmitted-languages'] = $data->commit->record->langs ?? []; + + if (!empty($data->commit->record->embed)) { + if (empty($post)) { + $uri = 'at://' . $data->did . '/' . $data->commit->collection . '/' . $data->commit->rkey; + $post = $this->atprotocol->XRPCGet('app.bsky.feed.getPostThread', ['uri' => $uri]); + if (empty($post->thread->post->embed)) { + $this->logger->notice('Post was not fetched', ['uri' => $uri, 'post' => $post]); + return; + } + } + $item = $this->addMedia($post->thread->post->embed, $item, 0, 0, 0); + } + + $id = Item::insert($item); + + if ($id) { + $this->logger->info('Post inserted', ['id' => $id, 'guid' => $item['guid']]); + } elseif (Post::exists(['uid' => $uid, 'uri-id' => $item['uri-id']])) { + $this->logger->warning('Post was found', ['guid' => $item['guid'], 'uri' => $item['uri']]); + } else { + $this->logger->warning('Post was not inserted', ['guid' => $item['guid'], 'uri' => $item['uri']]); + } + } + } + + public function createRepost(stdClass $data, array $uids, bool $dont_fetch) + { + if ($dont_fetch && !$this->getPostUids($this->getUri($data->commit->record->subject))) { + $this->logger->debug('Repost is not imported since the subject is not found.', ['subject' => $this->getUri($data->commit->record->subject)]); + return; + } + + foreach ($uids as $uid) { + $item = $this->getHeaderFromJetstream($data, $uid); + if (empty($item)) { + continue; + } + + $item['gravity'] = Item::GRAVITY_ACTIVITY; + $item['body'] = $item['verb'] = Activity::ANNOUNCE; + $item['thr-parent'] = $this->getUri($data->commit->record->subject); + $item['thr-parent'] = $this->fetchMissingPost($item['thr-parent'], 0, Item::PR_FETCHED, $item['contact-id'], 0, $item['thr-parent'], false, Conversation::PARCEL_JETSTREAM); + + $id = Item::insert($item); + + if ($id) { + $this->logger->info('Repost inserted', ['id' => $id]); + } elseif (Post::exists(['uid' => $uid, 'uri-id' => $item['uri-id']])) { + $this->logger->warning('Repost was found', ['uri' => $item['uri']]); + } else { + $this->logger->warning('Repost was not inserted', ['uri' => $item['uri']]); + } + } + } + + public function createLike(stdClass $data) + { + $uids = $this->getPostUids($this->getUri($data->commit->record->subject)); + if (!$uids) { + $this->logger->debug('Like is not imported since the subject is not found.', ['subject' => $this->getUri($data->commit->record->subject)]); + return; + } + foreach ($uids as $uid) { + $item = $this->getHeaderFromJetstream($data, $uid); + if (empty($item)) { + continue; + } + + $item['gravity'] = Item::GRAVITY_ACTIVITY; + $item['body'] = $item['verb'] = Activity::LIKE; + $item['thr-parent'] = $this->getPostUri($this->getUri($data->commit->record->subject), $uid); + + $id = Item::insert($item); + + if ($id) { + $this->logger->info('Like inserted', ['id' => $id]); + } elseif (Post::exists(['uid' => $uid, 'uri-id' => $item['uri-id']])) { + $this->logger->warning('Like was found', ['uri' => $item['uri']]); + } else { + $this->logger->warning('Like was not inserted', ['uri' => $item['uri']]); + } + } + } + + public function deleteFollow(stdClass $data, array $self): bool + { + return !empty($self[$data->did]); + } + + public function createFollow(stdClass $data, array $self): bool + { + if (!empty($self[$data->did])) { + $uid = $self[$data->did]; + $target = $data->commit->record->subject; + $rel = Contact::SHARING; + $this->logger->debug('Follow by a local user', ['uid' => $uid, 'following' => $target]); + } elseif (!empty($self[$data->commit->record->subject])) { + $uid = $self[$data->commit->record->subject]; + $target = $data->did; + $rel = Contact::FOLLOWER; + $this->logger->debug('New follower for a local user', ['uid' => $uid, 'follower' => $target]); + } else { + $this->logger->debug('No local part', ['did' => $data->did, 'target' => $data->commit->record->subject]); + return false; + } + $contact = $this->actor->getContactByDID($target, $uid, $uid); + if (empty($contact)) { + $this->logger->notice('Contact not found', ['uid' => $uid, 'target' => $target]); + return false; + } + Contact::update(['rel' => $rel | $contact['rel']], ['id' => $contact['id']]); + return true; + } + + private function processPost(stdClass $post, int $uid, int $post_reason, int $causer, int $level, int $protocol): int + { + $uri = $this->getUri($post); + + if ($uri_id = $this->fetchUriId($uri, $uid)) { + return $uri_id; + } + + if (empty($post->record)) { + $this->logger->debug('Invalid post', ['uri' => $uri]); + return 0; + } + + $this->logger->debug('Importing post', ['uid' => $uid, 'indexedAt' => $post->indexedAt, 'uri' => $post->uri, 'cid' => $post->cid, 'root' => $post->record->reply->root ?? '']); + + $item = $this->getHeaderFromPost($post, $uri, $uid, $protocol); + if (empty($item)) { + return 0; + } + $item = $this->getContent($item, $post->record, $uri, $uid, $level); + if (empty($item)) { + return 0; + } + + if (!empty($post->embed)) { + $item = $this->addMedia($post->embed, $item, $uid, $level); + } + + $item['restrictions'] = $this->getRestrictionsForUser($post, $item, $post_reason); + + if (empty($item['post-reason'])) { + $item['post-reason'] = $post_reason; + } + + if ($causer != 0) { + $item['causer-id'] = $causer; + } + + $id = Item::insert($item); + + if ($id) { + $this->logger->info('Fetched post inserted', ['id' => $id, 'guid' => $item['guid']]); + } elseif (Post::exists(['uid' => $uid, 'uri-id' => $item['uri-id']])) { + $this->logger->warning('Fetched post was found', ['guid' => $item['guid'], 'uri' => $item['uri']]); + } else { + $this->logger->warning('Fetched post was not inserted', ['guid' => $item['guid'], 'uri' => $item['uri']]); + } + + return $this->fetchUriId($uri, $uid); + } + + private function getHeaderFromJetstream(stdClass $data, int $uid, int $protocol = Conversation::PARCEL_JETSTREAM): array + { + $contact = $this->actor->getContactByDID($data->did, $uid, 0); + if (empty($contact)) { + $this->logger->info('Contact not found for user', ['did' => $data->did, 'uid' => $uid]); + return []; + } + + $item = [ + 'network' => Protocol::BLUESKY, + 'protocol' => $protocol, + 'uid' => $uid, + 'wall' => false, + 'uri' => 'at://' . $data->did . '/' . $data->commit->collection . '/' . $data->commit->rkey . ':' . $data->commit->cid, + 'guid' => $data->commit->cid, + 'created' => DateTimeFormat::utc($data->commit->record->createdAt, DateTimeFormat::MYSQL), + 'private' => Item::UNLISTED, + 'verb' => Activity::POST, + 'contact-id' => $contact['id'], + 'author-name' => $contact['name'], + 'author-link' => $contact['url'], + 'author-avatar' => $contact['avatar'], + 'owner-name' => $contact['name'], + 'owner-link' => $contact['url'], + 'owner-avatar' => $contact['avatar'], + 'plink' => $contact['alias'] . '/post/' . $data->commit->rkey, + 'source' => json_encode($data), + ]; + + if ($this->postExists($item['uri'], [$uid])) { + $this->logger->info('Post already exists for user', ['uri' => $item['uri'], 'uid' => $uid]); + return []; + } + + $account = Contact::selectFirstAccountUser(['pid'], ['id' => $contact['id']]); + $item['owner-id'] = $item['author-id'] = $account['pid']; + $item['uri-id'] = ItemURI::getIdByURI($item['uri']); + + if (in_array($contact['rel'], [Contact::SHARING, Contact::FRIEND])) { + $item['post-reason'] = Item::PR_FOLLOWER; + } + + if (!empty($data->commit->record->labels)) { + foreach ($data->commit->record->labels as $label) { + // Only flag posts as sensitive based on labels that had been provided by the author. + // When "ver" is set to "1" it was flagged by some automated process. + if (empty($label->ver)) { + $item['sensitive'] = true; + $item['content-warning'] = $label->val ?? ''; + $this->logger->debug('Sensitive content', ['uri-id' => $item['uri-id'], 'label' => $label]); + } + } + } + + return $item; + } + + private function getHeaderFromPost(stdClass $post, string $uri, int $uid, int $protocol): array + { + $parts = $this->getUriParts($uri); + if (empty($post->author) || empty($post->cid) || empty($parts->rkey)) { + return []; + } + $contact = $this->actor->getContactByDID($post->author->did, $uid, 0); + if (empty($contact)) { + $this->logger->info('Contact not found for user', ['did' => $post->author->did, 'uid' => $uid]); + return []; + } + + $item = [ + 'network' => Protocol::BLUESKY, + 'protocol' => $protocol, + 'uid' => $uid, + 'wall' => false, + 'uri' => $uri, + 'guid' => $post->cid, + 'received' => DateTimeFormat::utc($post->indexedAt, DateTimeFormat::MYSQL), + 'private' => Item::UNLISTED, + 'verb' => Activity::POST, + 'contact-id' => $contact['id'], + 'author-name' => $contact['name'], + 'author-link' => $contact['url'], + 'author-avatar' => $contact['avatar'], + 'owner-name' => $contact['name'], + 'owner-link' => $contact['url'], + 'owner-avatar' => $contact['avatar'], + 'plink' => $contact['alias'] . '/post/' . $parts->rkey, + 'source' => json_encode($post), + ]; + + if ($this->postExists($item['uri'], [$uid])) { + $this->logger->info('Post already exists for user', ['uri' => $item['uri'], 'uid' => $uid]); + return []; + } + + $account = Contact::selectFirstAccountUser(['pid'], ['id' => $contact['id']]); + + $item['owner-id'] = $item['author-id'] = $account['pid']; + $item['uri-id'] = ItemURI::getIdByURI($uri); + + if (in_array($contact['rel'], [Contact::SHARING, Contact::FRIEND])) { + $item['post-reason'] = Item::PR_FOLLOWER; + } + + if (!empty($post->labels)) { + foreach ($post->labels as $label) { + // Only flag posts as sensitive based on labels that had been provided by the author. + // When "ver" is set to "1" it was flagged by some automated process. + if (empty($label->ver)) { + $item['sensitive'] = true; + $item['content-warning'] = $label->val ?? ''; + $this->logger->debug('Sensitive content', ['uri-id' => $item['uri-id'], 'label' => $label]); + } + } + } + + return $item; + } + + private function getContent(array $item, stdClass $record, string $uri, int $uid, int $level): array + { + if (empty($item)) { + return []; + } + + if (!empty($record->reply)) { + $item['parent-uri'] = $this->getUri($record->reply->root); + if ($item['parent-uri'] != $uri) { + $item['parent-uri'] = $this->getPostUri($item['parent-uri'], $uid); + if (empty($item['parent-uri'])) { + $this->logger->notice('Parent-uri not found', ['uri' => $this->getUri($record->reply->root)]); + return []; + } + } + + $item['thr-parent'] = $this->getUri($record->reply->parent); + if (!in_array($item['thr-parent'], [$uri, $item['parent-uri']])) { + $item['thr-parent'] = $this->getPostUri($item['thr-parent'], $uid) ?: $item['thr-parent']; + } + } + + $item['body'] = $this->parseFacets($record, $item['uri-id']); + $item['created'] = DateTimeFormat::utc($record->createdAt, DateTimeFormat::MYSQL); + $item['transmitted-languages'] = $record->langs ?? []; + + return $item; + } + + private function parseFacets(stdClass $record, int $uri_id): string + { + $text = $record->text ?? ''; + + if (empty($record->facets)) { + return $text; + } + + $facets = []; + foreach ($record->facets as $facet) { + $facets[$facet->index->byteStart] = $facet; + } + krsort($facets); + + foreach ($facets as $facet) { + $prefix = substr($text, 0, $facet->index->byteStart); + $linktext = substr($text, $facet->index->byteStart, $facet->index->byteEnd - $facet->index->byteStart); + $suffix = substr($text, $facet->index->byteEnd); + + $url = ''; + $type = '$type'; + foreach ($facet->features as $feature) { + + switch ($feature->$type) { + case 'app.bsky.richtext.facet#link': + $url = $feature->uri; + break; + + case 'app.bsky.richtext.facet#mention': + $contact = Contact::getByURL($feature->did, null, ['id']); + if (!empty($contact['id'])) { + $url = $this->baseURL . '/contact/' . $contact['id']; + if (substr($linktext, 0, 1) == '@') { + $prefix .= '@'; + $linktext = substr($linktext, 1); + } + } + break; + + case 'app.bsky.richtext.facet#tag': + Tag::store($uri_id, Tag::HASHTAG, $feature->tag); + $url = $this->baseURL . '/search?tag=' . urlencode($feature->tag); + $linktext = '#' . $feature->tag; + break; + + default: + $this->logger->notice('Unhandled feature type', ['type' => $feature->$type, 'feature' => $feature, 'record' => $record]); + break; + } + } + if (!empty($url)) { + $text = $prefix . '[url=' . $url . ']' . $linktext . '[/url]' . $suffix; + } + } + return $text; + } + + private function addMedia(stdClass $embed, array $item, int $level): array + { + $type = '$type'; + switch ($embed->$type) { + case 'app.bsky.embed.images#view': + foreach ($embed->images as $image) { + $media = [ + 'uri-id' => $item['uri-id'], + 'type' => Post\Media::IMAGE, + 'url' => $image->fullsize, + 'preview' => $image->thumb, + 'description' => $image->alt, + ]; + Post\Media::insert($media); + } + break; + + case 'app.bsky.embed.video#view': + $media = [ + 'uri-id' => $item['uri-id'], + 'type' => Post\Media::HLS, + 'url' => $embed->playlist, + 'preview' => $embed->thumbnail, + 'description' => $embed->alt ?? '', + 'height' => $embed->aspectRatio->height ?? null, + 'width' => $embed->aspectRatio->width ?? null, + ]; + Post\Media::insert($media); + break; + + case 'app.bsky.embed.external#view': + $media = [ + 'uri-id' => $item['uri-id'], + 'type' => Post\Media::HTML, + 'url' => $embed->external->uri, + 'name' => $embed->external->title, + 'description' => $embed->external->description, + ]; + Post\Media::insert($media); + break; + + case 'app.bsky.embed.record#view': + $original_uri = $uri = $this->getUri($embed->record); + $type = '$type'; + if (!empty($embed->record->record->$type)) { + $embed_type = $embed->record->record->$type; + if ($embed_type == 'app.bsky.graph.starterpack') { + $this->addStarterpack($item, $embed->record); + break; + } + } + $fetched_uri = $this->getPostUri($uri, $item['uid']); + if (!$fetched_uri) { + $uri = $this->fetchMissingPost($uri, 0, Item::PR_FETCHED, $item['contact-id'], $level, $uri); + } else { + $uri = $fetched_uri; + } + if ($uri) { + $shared = Post::selectFirst(['uri-id'], ['uri' => $uri, 'uid' => [$item['uid'], 0]]); + $uri_id = $shared['uri-id'] ?? 0; + } + if (!empty($uri_id)) { + $item['quote-uri-id'] = $uri_id; + } else { + $this->logger->debug('Quoted post could not be fetched', ['original-uri' => $original_uri, 'uri' => $uri]); + } + break; + + case 'app.bsky.embed.recordWithMedia#view': + $this->addMedia($embed->media, $item, $level); + $original_uri = $uri = $this->getUri($embed->record->record); + $uri = $this->fetchMissingPost($uri, 0, Item::PR_FETCHED, $item['contact-id'], $level, $uri); + if ($uri) { + $shared = Post::selectFirst(['uri-id'], ['uri' => $uri, 'uid' => [$item['uid'], 0]]); + $uri_id = $shared['uri-id'] ?? 0; + } + if (!empty($uri_id)) { + $item['quote-uri-id'] = $uri_id; + } else { + $this->logger->debug('Quoted post could not be fetched', ['original-uri' => $original_uri, 'uri' => $uri]); + } + break; + + default: + $this->logger->notice('Unhandled embed type', ['uri-id' => $item['uri-id'], 'type' => $embed->$type, 'embed' => $embed]); + break; + } + return $item; + } + + private function addStarterpack(array $item, stdClass $record) + { + $this->logger->debug('Received starterpack', ['uri-id' => $item['uri-id'], 'guid' => $item['guid'], 'uri' => $record->uri]); + if (!preg_match('#^at://(.+)/app.bsky.graph.starterpack/(.+)#', $record->uri, $matches)) { + return; + } + + $media = [ + 'uri-id' => $item['uri-id'], + 'type' => Post\Media::HTML, + 'url' => 'https://bsky.app/starter-pack/' . $matches[1] . '/' . $matches[2], + 'name' => $record->record->name, + 'description' => $record->record->description ?? '', + ]; + + Post\Media::insert($media); + + $fields = [ + 'name' => $record->record->name, + 'description' => $record->record->description ?? '', + ]; + Post\Media::update($fields, ['uri-id' => $media['uri-id'], 'url' => $media['url']]); + } + + private function getRestrictionsForUser(stdClass $post, array $item, int $post_reason): ?int + { + if (!empty($post->viewer->replyDisabled)) { + return Item::CANT_REPLY; + } + + if (empty($post->threadgate)) { + return null; + } + + if (!isset($post->threadgate->record->allow)) { + return null; + } + + if ($item['uid'] == 0) { + return Item::CANT_REPLY; + } + + $restrict = true; + $type = '$type'; + + foreach ($post->threadgate->record->allow as $allow) { + switch ($allow->$type) { + case 'app.bsky.feed.threadgate#followingRule': + // Only followers can reply. + if (Contact::isFollower($item['author-id'], $item['uid'])) { + $restrict = false; + } + break; + + case 'app.bsky.feed.threadgate#mentionRule': + // Only mentioned accounts can reply. + if ($post_reason == Item::PR_TO) { + $restrict = false; + } + break; + + case 'app.bsky.feed.threadgate#listRule': + // Only accounts in the provided list can reply. We don't support this at the moment. + break; + } + } + + return $restrict ? Item::CANT_REPLY : null; + } + + private function fetchMissingPost(string $uri, int $uid, int $post_reason, int $causer, int $level, string $fallback = '', bool $always_fetch = false, int $Protocol = Conversation::PARCEL_JETSTREAM): string + { + $timestamp = microtime(true); + $stamp = Strings::getRandomHex(30); + $this->logger->debug('Fetch missing post', ['uri' => $uri, 'stamp' => $stamp]); + + $fetched_uri = $this->getPostUri($uri, $uid); + if (!$always_fetch && !empty($fetched_uri)) { + return $fetched_uri; + } + + if (++$level > 100) { + $this->logger->info('Recursion level too deep', ['level' => $level, 'uid' => $uid, 'uri' => $uri, 'fallback' => $fallback]); + // When the level is too deep we will fallback to the parent uri. + // Allthough the threading won't be correct, we at least had stored all posts and won't try again + return $fallback; + } + + $class = $this->getUriClass($uri); + if (empty($class)) { + return $fallback; + } + + $fetch_uri = $class->uri; + + $this->logger->debug('Fetch missing post', ['level' => $level, 'uid' => $uid, 'uri' => $uri]); + $data = $this->atprotocol->XRPCGet('app.bsky.feed.getPostThread', ['uri' => $fetch_uri]); + if (empty($data) || empty($data->thread)) { + $this->logger->info('Thread was not fetched', ['level' => $level, 'uid' => $uid, 'uri' => $uri, 'fallback' => $fallback]); + if (microtime(true) - $timestamp > 2) { + $this->logger->debug('Not fetched', ['duration' => round(microtime(true) - $timestamp, 3), 'uri' => $uri, 'stamp' => $stamp]); + } + return $fallback; + } + + $this->logger->debug('Reply count', ['level' => $level, 'uid' => $uid, 'uri' => $uri]); + + if ($causer != 0) { + $causer = Contact::getPublicContactId($causer, $uid); + } + + if (!empty($data->thread->parent)) { + $parents = $this->fetchParents($data->thread->parent, $uid); + + if (!empty($parents)) { + if ($data->thread->post->record->reply->root->uri != $parents[0]->uri) { + $parent_uri = $this->getUri($data->thread->post->record->reply->root); + $this->fetchMissingPost($parent_uri, $uid, $post_reason, $causer, $level, $data->thread->post->record->reply->root->uri, false, $Protocol); + } + } + + foreach ($parents as $parent) { + $uri_id = $this->processPost($parent, $uid, Item::PR_FETCHED, $causer, $level, $Protocol); + $this->logger->debug('Parent created', ['uri-id' => $uri_id]); + } + } + + $uri = $this->processThread($data->thread, $uid, $post_reason, $causer, $level, $Protocol); + if (microtime(true) - $timestamp > 2) { + $this->logger->debug('Fetched and processed post', ['duration' => round(microtime(true) - $timestamp, 3), 'uri' => $uri, 'stamp' => $stamp]); + } + return $uri; + } + + private function fetchParents(stdClass $parent, int $uid, array $parents = []): array + { + if (!empty($parent->parent)) { + $parents = $this->fetchParents($parent->parent, $uid, $parents); + } + + if (!empty($parent->post) && empty($this->getPostUri($this->getUri($parent->post), $uid))) { + $parents[] = $parent->post; + } + + return $parents; + } + + private function processThread(stdClass $thread, int $uid, int $post_reason, int $causer, int $level, int $protocol): string + { + if (empty($thread->post)) { + $this->logger->info('Invalid post', ['post' => $thread]); + return ''; + } + $uri = $this->getUri($thread->post); + + $fetched_uri = $this->getPostUri($uri, $uid); + if (empty($fetched_uri)) { + $uri_id = $this->processPost($thread->post, $uid, $post_reason, $causer, $level, $protocol); + if ($uri_id) { + $this->logger->debug('Post has been processed and stored', ['uri-id' => $uri_id, 'uri' => $uri]); + return $uri; + } else { + $this->logger->info('Post has not not been stored', ['uri' => $uri]); + return ''; + } + } else { + $this->logger->debug('Post exists', ['uri' => $uri]); + $uri = $fetched_uri; + } + + foreach ($thread->replies ?? [] as $reply) { + $reply_uri = $this->processThread($reply, $uid, Item::PR_FETCHED, $causer, $level, $protocol); + $this->logger->debug('Reply has been processed', ['uri' => $uri, 'reply' => $reply_uri]); + } + + return $uri; + } + + private function getUriParts(string $uri): ?stdClass + { + $class = $this->getUriClass($uri); + if (empty($class)) { + return null; + } + + $parts = explode('/', substr($class->uri, 5)); + + $class = new stdClass(); + + $class->repo = $parts[0]; + $class->collection = $parts[1]; + $class->rkey = $parts[2]; + + return $class; + } + + private function getUriClass(string $uri): ?stdClass + { + if (empty($uri)) { + return null; + } + + $elements = explode(':', $uri); + if (empty($elements) || ($elements[0] != 'at')) { + $post = Post::selectFirstPost(['extid'], ['uri' => $uri]); + return $this->getUriClass($post['extid'] ?? ''); + } + + $class = new stdClass(); + + $class->cid = array_pop($elements); + $class->uri = implode(':', $elements); + + if ((substr_count($class->uri, '/') == 2) && (substr_count($class->cid, '/') == 2)) { + $class->uri .= ':' . $class->cid; + $class->cid = ''; + } + + return $class; + } + + private function fetchUriId(string $uri, int $uid): string + { + $reply = Post::selectFirst(['uri-id'], ['uri' => $uri, 'uid' => [$uid, 0]]); + if (!empty($reply['uri-id'])) { + $this->logger->debug('Post exists', ['uri' => $uri]); + return $reply['uri-id']; + } + $reply = Post::selectFirst(['uri-id'], ['extid' => $uri, 'uid' => [$uid, 0]]); + if (!empty($reply['uri-id'])) { + $this->logger->debug('Post with extid exists', ['uri' => $uri]); + return $reply['uri-id']; + } + return 0; + } + + private function getPostUids(string $uri): array + { + $uids = []; + $posts = Post::select(['uid'], ['uri' => $uri]); + while ($post = Post::fetch($posts)) { + $uids[] = $post['uid']; + } + $this->db->close($posts); + + $posts = Post::select(['uid'], ['extid' => $uri]); + while ($post = Post::fetch($posts)) { + $uids[] = $post['uid']; + } + $this->db->close($posts); + return array_unique($uids); + } + + private function postExists(string $uri, array $uids): bool + { + if (Post::exists(['uri' => $uri, 'uid' => $uids])) { + return true; + } + + return Post::exists(['extid' => $uri, 'uid' => $uids]); + } + + private function getUri(stdClass $post): string + { + if (empty($post->cid)) { + $this->logger->info('Invalid URI', ['post' => $post]); + return ''; + } + return $post->uri . ':' . $post->cid; + } + + private function getPostUri(string $uri, int $uid): string + { + if (Post::exists(['uri' => $uri, 'uid' => [$uid, 0]])) { + $this->logger->debug('Post exists', ['uri' => $uri]); + return $uri; + } + + $reply = Post::selectFirst(['uri'], ['extid' => $uri, 'uid' => [$uid, 0]]); + if (!empty($reply['uri'])) { + $this->logger->debug('Post with extid exists', ['uri' => $uri]); + return $reply['uri']; + } + return ''; + } +} diff --git a/static/defaults.config.php b/static/defaults.config.php index b74ac5aa84..d821ff1813 100644 --- a/static/defaults.config.php +++ b/static/defaults.config.php @@ -730,4 +730,13 @@ return [ // Wether the blocklist is publicly listed under /about (or in any later API) 'public' => true, ], + 'jetstream' => [ + // pidfile (Path) + // Jetstream pid file path. For example: pidfile = /path/to/jetstream.pid + 'pidfile' => '', + // did_limit (Integer) + // Maximum number of DIDs that are filtered in Jetstream. The maximum number is 10,000, + // The higher the number, the more likely the system won't be able to process the posts on time. + 'did_limit' => 1000, + ], ];