fediverse-archiving-tools/friendica_archiver/bin/friendica_archiver.dart

177 lines
6.4 KiB
Dart

import 'dart:convert';
import 'dart:io';
import 'package:args/args.dart';
import 'package:path/path.dart' as p;
import 'friendica_client.dart';
import 'image_archiver.dart';
import 'json_printer.dart';
import 'models.dart';
const defaultRequestDelayMilliseconds = 5000;
const defaultMaxPostsQuery = 1000000000;
const defaultItemsPerPage = 20;
const defaultDownloadImages = true;
const defaultStartPage = 1;
void main(List<String> arguments) async {
final argParser = _buildArgs();
late ArgResults settings;
try {
settings = argParser.parse(arguments);
} on ArgParserException catch (e) {
print("Error with arguments: ${e.message}");
print(argParser.usage);
return;
}
stdout.write('Password: ');
final password = stdin.readLineSync() ?? '';
print('');
final username = settings['username'];
final client = FriendicaClient(
username: username,
password: password,
serverName: settings['server-name']);
final baseDirectory = Directory(settings['archive-folder']);
final imageArchive = ImageArchiver(client, baseDirectory);
final maxQueries = int.parse(settings['max-post-requests']);
final queryDelayMillis = int.parse(settings['delay']);
final sleepDuration = Duration(milliseconds: queryDelayMillis);
final itemsPerPage = int.parse(settings['items-per-page']);
final firstPage = int.parse(settings['resume-page']);
final allEntries = <int, FriendicaEntry>{};
final imageArchiveJsonFilePath = p.join(baseDirectory.path, 'images.json');
final postsJsonFile = p.join(baseDirectory.path, 'postsAndComments.json');
print(
"Max number of queries will be $maxQueries with $itemsPerPage items per page");
if (File(postsJsonFile).existsSync()) {
try {
final oldEntriesJson =
jsonDecode(File(postsJsonFile).readAsStringSync()) as List<dynamic>;
final oldEntries = oldEntriesJson.map((j) => FriendicaEntry.fromJson(j));
for (final entry in oldEntries) {
allEntries[entry.id] = entry;
}
print('Loading ${oldEntries.length} post/comment entries from disk');
} catch (e) {
print(
'Error loading old entries, will be starting from scratch file: $e');
}
} else {
print(
'Entries file did not exist at location therefore assuming starting from scratch: $postsJsonFile');
}
if (File(imageArchiveJsonFilePath).existsSync()) {
final oldEntriesJson =
jsonDecode(File(imageArchiveJsonFilePath).readAsStringSync())
as List<dynamic>;
final oldEntries = oldEntriesJson.map((j) => ImageEntry.fromJson(j));
for (final entry in oldEntries) {
final alreadyHadEntry = imageArchive.addDirectEntries(entry);
if (alreadyHadEntry) {
print("Image cache already had entry for: ${entry.url}");
}
}
print('Loading ${oldEntries.length} image entries from disk');
} else {
print(
'Image archive file did not exist at location so assuming starting from scratch: $imageArchiveJsonFilePath');
}
print("Loading data from server");
final maxPage = firstPage + maxQueries;
for (var page = firstPage; page < maxPage; page++) {
print("Querying for posts/comments for $page");
final timelineResult =
await client.getTimeline(username, page, itemsPerPage);
if (timelineResult.isFailure) {
print('Error getting entries: ${timelineResult.error}');
continue;
}
final entries = timelineResult.value;
print('# Post/Comments returned for Page $page: ${entries.length}');
for (final entry in entries) {
if (allEntries.containsKey(entry.id)) {
print('Replacing existing entry for ${entry.url}');
}
allEntries[entry.id] = entry;
if (settings['download-images']) {
final imageEntryResults = await imageArchive.addEntryImages(entry);
if (entry.images.isNotEmpty) {
print(
'${imageEntryResults.length} new images of ${entry.images.length} in entry retrieved');
}
}
}
print("Sleeping for $queryDelayMillis milliseconds before next query");
// Yes we are rewriting the entire file every time to preserve the results
// over time.
final postsJson = allEntries.values.map((e) => e.originalJson).toList();
File(postsJsonFile)
.writeAsStringSync(PrettyJsonEncoder().convert(postsJson));
print("Posts written to JSON file: $postsJsonFile");
if (settings['download-images']) {
File(imageArchiveJsonFilePath)
.writeAsStringSync(PrettyJsonEncoder().convert(imageArchive.images));
print('Images directory saved to: $imageArchiveJsonFilePath');
}
if (entries.isEmpty) {
print('Returned no results for this page, assuming at end of timeline');
break;
} else {
sleep(sleepDuration);
}
}
print('Done');
return;
}
ArgParser _buildArgs() => ArgParser()
..addOption('archive-folder',
abbr: 'a',
help:
'Specifies the local folder all data files pulled from the server will be stored',
mandatory: true)
..addOption('username',
abbr: 'u', help: 'Username on your Friendica instance', mandatory: true)
..addOption('server-name',
abbr: 's',
help:
'The server name for your instance. (e.g. if the URL in your browser is "https://friendica.com/" then this would be "friendica.com',
mandatory: true)
..addOption('resume-page',
abbr: 'r',
help:
'The page to restart the downloading process. Will try to read in existing posts and image archive data and start download from there. If set to 0 it resets from scratch.',
defaultsTo: '$defaultStartPage')
..addOption('delay',
abbr: 'd',
help:
'Delay in milliseconds between requests to try not to stress the server (thousands of API calls can be made)',
defaultsTo: '$defaultRequestDelayMilliseconds')
..addOption('max-post-requests',
abbr: 'm',
help: 'The maximum number of times to query for posts',
defaultsTo: '$defaultMaxPostsQuery')
..addOption('items-per-page',
abbr: 'p',
help: 'The requested number of items per page',
allowed: ['1', '5', '10', '20', '50', '100'],
defaultsTo: '$defaultItemsPerPage')
..addFlag('download-images',
abbr: 'i',
help:
'Whether to download images from posts when those images are stored on the server (not links to other sites) (defaults to $defaultDownloadImages)',
defaultsTo: defaultDownloadImages);