mirror of
https://gitlab.com/mysocialportal/fediverse-archiving-tools.git
synced 2024-10-18 08:53:31 +00:00
177 lines
6.4 KiB
Dart
177 lines
6.4 KiB
Dart
import 'dart:convert';
|
|
import 'dart:io';
|
|
|
|
import 'package:args/args.dart';
|
|
import 'package:path/path.dart' as p;
|
|
|
|
import 'friendica_client.dart';
|
|
import 'image_archiver.dart';
|
|
import 'json_printer.dart';
|
|
import 'models.dart';
|
|
|
|
const defaultRequestDelayMilliseconds = 5000;
|
|
const defaultMaxPostsQuery = 1000000000;
|
|
const defaultItemsPerPage = 20;
|
|
const defaultDownloadImages = true;
|
|
const defaultStartPage = 1;
|
|
|
|
void main(List<String> arguments) async {
|
|
final argParser = _buildArgs();
|
|
|
|
late ArgResults settings;
|
|
try {
|
|
settings = argParser.parse(arguments);
|
|
} on ArgParserException catch (e) {
|
|
print("Error with arguments: ${e.message}");
|
|
print(argParser.usage);
|
|
return;
|
|
}
|
|
|
|
stdout.write('Password: ');
|
|
final password = stdin.readLineSync() ?? '';
|
|
print('');
|
|
|
|
final username = settings['username'];
|
|
final client = FriendicaClient(
|
|
username: username,
|
|
password: password,
|
|
serverName: settings['server-name']);
|
|
final baseDirectory = Directory(settings['archive-folder']);
|
|
final imageArchive = ImageArchiver(client, baseDirectory);
|
|
final maxQueries = int.parse(settings['max-post-requests']);
|
|
final queryDelayMillis = int.parse(settings['delay']);
|
|
final sleepDuration = Duration(milliseconds: queryDelayMillis);
|
|
final itemsPerPage = int.parse(settings['items-per-page']);
|
|
final firstPage = int.parse(settings['resume-page']);
|
|
final allEntries = <int, FriendicaEntry>{};
|
|
final imageArchiveJsonFilePath = p.join(baseDirectory.path, 'images.json');
|
|
final postsJsonFile = p.join(baseDirectory.path, 'postsAndComments.json');
|
|
|
|
print(
|
|
"Max number of queries will be $maxQueries with $itemsPerPage items per page");
|
|
|
|
if (File(postsJsonFile).existsSync()) {
|
|
try {
|
|
final oldEntriesJson =
|
|
jsonDecode(File(postsJsonFile).readAsStringSync()) as List<dynamic>;
|
|
final oldEntries = oldEntriesJson.map((j) => FriendicaEntry.fromJson(j));
|
|
for (final entry in oldEntries) {
|
|
allEntries[entry.id] = entry;
|
|
}
|
|
print('Loading ${oldEntries.length} post/comment entries from disk');
|
|
} catch (e) {
|
|
print(
|
|
'Error loading old entries, will be starting from scratch file: $e');
|
|
}
|
|
} else {
|
|
print(
|
|
'Entries file did not exist at location therefore assuming starting from scratch: $postsJsonFile');
|
|
}
|
|
|
|
if (File(imageArchiveJsonFilePath).existsSync()) {
|
|
final oldEntriesJson =
|
|
jsonDecode(File(imageArchiveJsonFilePath).readAsStringSync())
|
|
as List<dynamic>;
|
|
final oldEntries = oldEntriesJson.map((j) => ImageEntry.fromJson(j));
|
|
for (final entry in oldEntries) {
|
|
final alreadyHadEntry = imageArchive.addDirectEntries(entry);
|
|
if (alreadyHadEntry) {
|
|
print("Image cache already had entry for: ${entry.url}");
|
|
}
|
|
}
|
|
print('Loading ${oldEntries.length} image entries from disk');
|
|
} else {
|
|
print(
|
|
'Image archive file did not exist at location so assuming starting from scratch: $imageArchiveJsonFilePath');
|
|
}
|
|
|
|
print("Loading data from server");
|
|
final maxPage = firstPage + maxQueries;
|
|
for (var page = firstPage; page < maxPage; page++) {
|
|
print("Querying for posts/comments for $page");
|
|
final timelineResult =
|
|
await client.getTimeline(username, page, itemsPerPage);
|
|
if (timelineResult.isFailure) {
|
|
print('Error getting entries: ${timelineResult.error}');
|
|
continue;
|
|
}
|
|
final entries = timelineResult.value;
|
|
print('# Post/Comments returned for Page $page: ${entries.length}');
|
|
for (final entry in entries) {
|
|
if (allEntries.containsKey(entry.id)) {
|
|
print('Replacing existing entry for ${entry.url}');
|
|
}
|
|
allEntries[entry.id] = entry;
|
|
if (settings['download-images']) {
|
|
final imageEntryResults = await imageArchive.addEntryImages(entry);
|
|
if (entry.images.isNotEmpty) {
|
|
print(
|
|
'${imageEntryResults.length} new images of ${entry.images.length} in entry retrieved');
|
|
}
|
|
}
|
|
}
|
|
|
|
print("Sleeping for $queryDelayMillis milliseconds before next query");
|
|
|
|
// Yes we are rewriting the entire file every time to preserve the results
|
|
// over time.
|
|
final postsJson = allEntries.values.map((e) => e.originalJson).toList();
|
|
File(postsJsonFile)
|
|
.writeAsStringSync(PrettyJsonEncoder().convert(postsJson));
|
|
print("Posts written to JSON file: $postsJsonFile");
|
|
|
|
if (settings['download-images']) {
|
|
File(imageArchiveJsonFilePath)
|
|
.writeAsStringSync(PrettyJsonEncoder().convert(imageArchive.images));
|
|
print('Images directory saved to: $imageArchiveJsonFilePath');
|
|
}
|
|
|
|
if (entries.isEmpty) {
|
|
print('Returned no results for this page, assuming at end of timeline');
|
|
break;
|
|
} else {
|
|
sleep(sleepDuration);
|
|
}
|
|
}
|
|
|
|
print('Done');
|
|
return;
|
|
}
|
|
|
|
ArgParser _buildArgs() => ArgParser()
|
|
..addOption('archive-folder',
|
|
abbr: 'a',
|
|
help:
|
|
'Specifies the local folder all data files pulled from the server will be stored',
|
|
mandatory: true)
|
|
..addOption('username',
|
|
abbr: 'u', help: 'Username on your Friendica instance', mandatory: true)
|
|
..addOption('server-name',
|
|
abbr: 's',
|
|
help:
|
|
'The server name for your instance. (e.g. if the URL in your browser is "https://friendica.com/" then this would be "friendica.com',
|
|
mandatory: true)
|
|
..addOption('resume-page',
|
|
abbr: 'r',
|
|
help:
|
|
'The page to restart the downloading process. Will try to read in existing posts and image archive data and start download from there. If set to 0 it resets from scratch.',
|
|
defaultsTo: '$defaultStartPage')
|
|
..addOption('delay',
|
|
abbr: 'd',
|
|
help:
|
|
'Delay in milliseconds between requests to try not to stress the server (thousands of API calls can be made)',
|
|
defaultsTo: '$defaultRequestDelayMilliseconds')
|
|
..addOption('max-post-requests',
|
|
abbr: 'm',
|
|
help: 'The maximum number of times to query for posts',
|
|
defaultsTo: '$defaultMaxPostsQuery')
|
|
..addOption('items-per-page',
|
|
abbr: 'p',
|
|
help: 'The requested number of items per page',
|
|
allowed: ['1', '5', '10', '20', '50', '100'],
|
|
defaultsTo: '$defaultItemsPerPage')
|
|
..addFlag('download-images',
|
|
abbr: 'i',
|
|
help:
|
|
'Whether to download images from posts when those images are stored on the server (not links to other sites) (defaults to $defaultDownloadImages)',
|
|
defaultsTo: defaultDownloadImages);
|