diff --git a/bin/exec_error.dart b/bin/exec_error.dart index 9c08b3b..6b1af0a 100644 --- a/bin/exec_error.dart +++ b/bin/exec_error.dart @@ -7,5 +7,6 @@ class ExecError { enum ErrorType { authentication, + localError, missingEndpoint, } diff --git a/bin/extensions.dart b/bin/extensions.dart new file mode 100644 index 0000000..dc5fbf7 --- /dev/null +++ b/bin/extensions.dart @@ -0,0 +1,15 @@ +extension ListEqualityTest on List { + bool equals(List list2) { + if (length != list2.length) { + return false; + } + + for (var i = 0; i < length; i++) { + if (this[i] != list2[i]) { + return false; + } + } + + return true; + } +} diff --git a/bin/friendica_archiver.dart b/bin/friendica_archiver.dart index 404b2a9..9015ea3 100644 --- a/bin/friendica_archiver.dart +++ b/bin/friendica_archiver.dart @@ -1,48 +1,20 @@ import 'dart:io'; import 'package:args/args.dart'; +import 'package:path/path.dart' as p; import 'friendica_client.dart'; +import 'image_archiver.dart'; import 'json_printer.dart'; +import 'models.dart'; const defaultRequestDelayMilliseconds = 5000; -const defaultMaxPosts = 1; +const defaultMaxPostsQuery = 10; const defaultReadComments = false; const defaultReadImages = false; void main(List arguments) async { - final argParser = ArgParser() - ..addOption('archive-folder', - abbr: 'a', - help: - 'Specifies the local folder all data files pulled from the server will be stored', - mandatory: true) - ..addOption('username', - abbr: 'u', help: 'Username on your Friendica instance', mandatory: true) - ..addOption('server-name', - abbr: 's', - help: - 'The server name for your instance. (e.g. if the URL in your browser is "https://friendica.com/" then this would be "friendica.com', - mandatory: true) - ..addOption('delay', - abbr: 'd', - help: - 'Delay in milliseconds between requests to try not to stress the server (thousands of API calls can be made)', - defaultsTo: '$defaultRequestDelayMilliseconds') - ..addOption('max-post-requests', - abbr: 'm', - help: 'The maximum number of times to query for posts', - defaultsTo: '$defaultMaxPosts') - ..addFlag('read-comments', - abbr: 'c', - help: - 'Whether to read comments on posts (defaults to $defaultReadComments)', - defaultsTo: defaultReadComments) - ..addFlag('download-images', - abbr: 'i', - help: - 'Whether to download images from posts when those images are stored on the server (not links to other sites) (defaults to $defaultReadImages)', - defaultsTo: defaultReadComments); + final argParser = _buildArgs(); late ArgResults settings; try { @@ -53,10 +25,7 @@ void main(List arguments) async { return; } - stdout.write('Enter Password: '); - _setEcho(false); final password = stdin.readLineSync() ?? ''; - _setEcho(true); print(''); final username = settings['username']; @@ -64,25 +33,81 @@ void main(List arguments) async { username: username, password: password, serverName: settings['server-name']); - final timelineResult = await client.getTimeline(username, 1, 20); - timelineResult.match( - onSuccess: (posts) { - posts.forEach(print); - File('/tmp/test.json').writeAsStringSync(PrettyJsonEncoder() - .convert(posts.map((p) => p.originalJson).toList())); - }, - onError: (error) => print('Error getting posts: $error')); - print("Done processing API requests"); + final baseDirectory = Directory(settings['archive-folder']); + final imageArchive = ImageArchiver(client, baseDirectory); + final maxQueries = int.parse(settings['max-post-requests']); + final queryDelayMillis = int.parse(settings['delay']); + final sleepDuration = Duration(milliseconds: queryDelayMillis); + final itemsPerPage = 20; + final allEntries = []; + + for (var page = 0; page < maxQueries; page++) { + final timelineResult = + await client.getTimeline(username, page, itemsPerPage); + if (timelineResult.isFailure) { + print('Error getting entries: ${timelineResult.error}'); + continue; + } + final entries = timelineResult.value; + print('# Post/Comments returned for Page $page: ${entries.length}'); + allEntries.addAll(entries); + for (final entry in entries) { + final imageEntryResults = await imageArchive.addEntryImages(entry); + if (entry.images.isNotEmpty) { + print( + '${imageEntryResults.length} new images of ${entry.images.length} in entry retrieved'); + } + } + if (entries.length != itemsPerPage) { + print( + 'Returned less than a full page, assuming at end of timeline and quiting'); + break; + } + print("Sleeping for $queryDelayMillis milliseconds before next query"); + final postsJsonFile = p.join(baseDirectory.path, 'postsAndComments.json'); + final postsJson = allEntries.map((e) => e.originalJson).toList(); + File(postsJsonFile) + .writeAsStringSync(PrettyJsonEncoder().convert(postsJson)); + print("Posts written to JSON file: $postsJsonFile"); + final imageArchiveJsonFilePath = p.join(baseDirectory.path, 'images.json'); + File(imageArchiveJsonFilePath) + .writeAsStringSync(PrettyJsonEncoder().convert(imageArchive.images)); + print('Images directory saved to: $imageArchiveJsonFilePath'); + sleep(sleepDuration); + } + return; } -// Seems in IntelliJ and release build mode setting echo fails -void _setEcho(bool value) { - try { - stdin.echoMode = value; - // ignore: empty_catches - } catch (e) { - print(''); - print('Error toggling echo to $value, so will stay current value...'); - } -} +ArgParser _buildArgs() => ArgParser() + ..addOption('archive-folder', + abbr: 'a', + help: + 'Specifies the local folder all data files pulled from the server will be stored', + mandatory: true) + ..addOption('username', + abbr: 'u', help: 'Username on your Friendica instance', mandatory: true) + ..addOption('server-name', + abbr: 's', + help: + 'The server name for your instance. (e.g. if the URL in your browser is "https://friendica.com/" then this would be "friendica.com', + mandatory: true) + ..addOption('delay', + abbr: 'd', + help: + 'Delay in milliseconds between requests to try not to stress the server (thousands of API calls can be made)', + defaultsTo: '$defaultRequestDelayMilliseconds') + ..addOption('max-post-requests', + abbr: 'm', + help: 'The maximum number of times to query for posts', + defaultsTo: '$defaultMaxPostsQuery') + ..addFlag('read-comments', + abbr: 'c', + help: + 'Whether to read comments on posts (defaults to $defaultReadComments)', + defaultsTo: defaultReadComments) + ..addFlag('download-images', + abbr: 'i', + help: + 'Whether to download images from posts when those images are stored on the server (not links to other sites) (defaults to $defaultReadImages)', + defaultsTo: defaultReadComments); diff --git a/bin/friendica_client.dart b/bin/friendica_client.dart index 955923c..c974b2b 100644 --- a/bin/friendica_client.dart +++ b/bin/friendica_client.dart @@ -3,13 +3,13 @@ import 'dart:io'; import 'package:result_monad/result_monad.dart'; +import 'exec_error.dart'; import 'models.dart'; class FriendicaClient { final String username; final String password; final String serverName; - final _client = HttpClient(); late final String _authHeader; FriendicaClient( @@ -21,23 +21,34 @@ class FriendicaClient { _authHeader = "Basic $encodedAuthString"; } - FutureResult, String> getTimeline( + FutureResult, ExecError> getTimeline( String userId, int page, int count) async { final request = Uri.parse( 'https://$serverName/api/statuses/user_timelineuser_id=$userId&count=$count&page=$page'); - return (await _getApiRequest(request)).mapValue((postsJson) => - postsJson.map((postJson) => FriendicaEntry.fromJson(postJson)).toList()); + return (await _getApiRequest(request)).mapValue((postsJson) => postsJson + .map((postJson) => FriendicaEntry.fromJson(postJson)) + .toList()); } - FutureResult, String> _getApiRequest(Uri url) async { + FutureResult getUrl(Uri url) async { + try { + final request = await HttpClient().getUrl(url); + request.headers.add('authorization', _authHeader); + final response = await request.close(); + return Result.ok(response); + } catch (e) { + return Result.error( + ExecError(type: ErrorType.localError, message: e.toString())); + } + } + + FutureResult, ExecError> _getApiRequest(Uri url) async { // TODO Error mode against: bad server URL, bad auth, bad path, empty response - final request = await _client.getUrl(url); - request.headers.add('authorization', _authHeader); - request.headers.contentType = - ContentType('application', 'json', charset: 'utf-8'); - final response = await request.close(); - final body = await response.transform(utf8.decoder).join(''); - File('/tmp/response.json').writeAsStringSync(body); + final responseResult = await getUrl(url); + if (responseResult.isFailure) { + return responseResult.mapValue((value) => []); + } + final body = await responseResult.value.transform(utf8.decoder).join(''); final bodyJson = jsonDecode(body) as List; return Result.ok(bodyJson); } diff --git a/bin/image_archiver.dart b/bin/image_archiver.dart new file mode 100644 index 0000000..a15e220 --- /dev/null +++ b/bin/image_archiver.dart @@ -0,0 +1,97 @@ +import 'dart:convert'; +import 'dart:io'; + +import 'package:path/path.dart' as p; +import 'package:uuid/uuid.dart'; + +import 'extensions.dart'; +import 'friendica_client.dart'; +import 'models.dart'; + +class ImageArchiver { + final FriendicaClient client; + final _images = {}; + late final Directory imageDirectory; + + List get images => List.unmodifiable(_images.values); + + ImageArchiver(this.client, Directory baseDirectory) { + final imageDirPath = p.join(baseDirectory.path, 'images'); + imageDirectory = Directory(imageDirPath); + imageDirectory.createSync(recursive: true); + } + + Future> addEntryImages(FriendicaEntry entry) async { + final imageEntries = []; + for (final imageUrl in entry.images) { + if (_images.containsKey(imageUrl)) { + continue; + } + final url = Uri.parse(imageUrl); + final imageResponse = await client.getUrl(url); + if (imageResponse.isFailure) { + print(imageResponse.error); + continue; + } + + if (imageResponse.value.statusCode == 200) { + final contents = []; + await for (var data in imageResponse.value) { + contents.addAll(data); + } + final extension = calculateExtensions(contents); + final filename = Uuid().v4().replaceAll('-', '') + extension; + final filePath = p.join(imageDirectory.path, filename); + await File(filePath).writeAsBytes(contents); + final newEntry = ImageEntry( + postId: entry.id.toString(), + localFilename: filename, + url: imageUrl); + _images[imageUrl] = newEntry; + imageEntries.add(newEntry); + } else { + print( + 'Error response attempting to retrieve image $imageUrl: ${imageResponse.value.statusCode}'); + } + } + + return imageEntries; + } + + String calculateExtensions(List imageBytes) { + // Using table from https://www.sparkhound.com/blog/detect-image-file-types-through-byte-arrays + final bmp = ascii.encode("BM").toList(); // BMP + final gif = ascii.encode("GIF").toList(); // GIF + const png = [137, 80, 78, 71]; // PNG + const tiff = [73, 73, 42]; // TIFF + const tiff2 = [77, 77, 42]; // TIFF + const jpeg = [255, 216, 255, 224]; // jpeg + const jpeg2 = [255, 216, 255, 225]; // jpeg canon + + final firstFour = imageBytes.sublist(0, 4); + + if (firstFour.equals(jpeg) || firstFour.equals(jpeg2)) { + return '.jpg'; + } + + if (firstFour.equals(png)) { + return '.png'; + } + + final firstThree = imageBytes.sublist(0, 3); + if (firstThree.equals(gif)) { + return '.gif'; + } + + if (firstThree.equals(tiff) || firstThree.equals(tiff2)) { + return '.tif'; + } + + final firstTwo = imageBytes.sublist(0, 2); + if (firstTwo.equals(bmp)) { + return '.bmp'; + } + + return ''; + } +} diff --git a/bin/models.dart b/bin/models.dart index 09959fa..280d59b 100644 --- a/bin/models.dart +++ b/bin/models.dart @@ -46,3 +46,23 @@ class FriendicaEntry { .toList(); } } + +class ImageEntry { + final String postId; + final String localFilename; + final String url; + + ImageEntry( + {required this.postId, required this.localFilename, required this.url}); + + ImageEntry.fromJson(Map json) + : postId = json['postId'] ?? '', + localFilename = json['localFilename'] ?? '', + url = json['url'] ?? ''; + + Map toJson() => { + 'postId': postId, + 'localFilename': localFilename, + 'url': url, + }; +} diff --git a/pubspec.lock b/pubspec.lock index 0404e4b..78f3475 100644 --- a/pubspec.lock +++ b/pubspec.lock @@ -8,6 +8,20 @@ packages: url: "https://pub.dartlang.org" source: hosted version: "2.3.0" + collection: + dependency: transitive + description: + name: collection + url: "https://pub.dartlang.org" + source: hosted + version: "1.15.0" + crypto: + dependency: transitive + description: + name: crypto + url: "https://pub.dartlang.org" + source: hosted + version: "3.0.1" lints: dependency: "direct dev" description: @@ -15,6 +29,20 @@ packages: url: "https://pub.dartlang.org" source: hosted version: "1.0.1" + logging: + dependency: "direct main" + description: + name: logging + url: "https://pub.dartlang.org" + source: hosted + version: "1.0.2" + path: + dependency: "direct main" + description: + name: path + url: "https://pub.dartlang.org" + source: hosted + version: "1.8.1" result_monad: dependency: "direct main" description: @@ -22,5 +50,19 @@ packages: url: "https://pub.dartlang.org" source: hosted version: "1.0.2" + typed_data: + dependency: transitive + description: + name: typed_data + url: "https://pub.dartlang.org" + source: hosted + version: "1.3.0" + uuid: + dependency: "direct main" + description: + name: uuid + url: "https://pub.dartlang.org" + source: hosted + version: "3.0.5" sdks: dart: ">=2.15.1 <3.0.0" diff --git a/pubspec.yaml b/pubspec.yaml index cc70bf4..6ae26f0 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -8,7 +8,10 @@ environment: dependencies: args: ^2.3.0 + logging: ^1.0.2 + path: ^1.8.1 result_monad: ^1.0.2 + uuid: ^3.0.5 dev_dependencies: lints: ^1.0.0