mirror of
https://gitlab.com/mysocialportal/fediverse-archiving-tools.git
synced 2024-10-18 08:53:31 +00:00
Add default start page and concept of restarting.
This commit is contained in:
parent
299638ea74
commit
d9793be893
2 changed files with 71 additions and 15 deletions
|
@ -1,3 +1,4 @@
|
|||
import 'dart:convert';
|
||||
import 'dart:io';
|
||||
|
||||
import 'package:args/args.dart';
|
||||
|
@ -12,6 +13,7 @@ const defaultRequestDelayMilliseconds = 5000;
|
|||
const defaultMaxPostsQuery = 1000000000;
|
||||
const defaultItemsPerPage = 20;
|
||||
const defaultDownloadImages = true;
|
||||
const defaultStartPage = 0;
|
||||
|
||||
void main(List<String> arguments) async {
|
||||
final argParser = _buildArgs();
|
||||
|
@ -39,12 +41,57 @@ void main(List<String> arguments) async {
|
|||
final queryDelayMillis = int.parse(settings['delay']);
|
||||
final sleepDuration = Duration(milliseconds: queryDelayMillis);
|
||||
final itemsPerPage = int.parse(settings['items-per-page']);
|
||||
final allEntries = <FriendicaEntry>[];
|
||||
final firstPage = int.parse(settings['resume-page']);
|
||||
final allEntries = <int, FriendicaEntry>{};
|
||||
final imageArchiveJsonFilePath = p.join(baseDirectory.path, 'images.json');
|
||||
final postsJsonFile = p.join(baseDirectory.path, 'postsAndComments.json');
|
||||
|
||||
print(
|
||||
"Max number of queries will be $maxQueries with $itemsPerPage items per page");
|
||||
|
||||
for (var page = 0; page < maxQueries; page++) {
|
||||
if (firstPage != 0) {
|
||||
print(
|
||||
"Starting page is not zero therefore attempting to load image and post/comment archives from disk");
|
||||
if (File(postsJsonFile).existsSync()) {
|
||||
try {
|
||||
final oldEntriesJson =
|
||||
jsonDecode(File(postsJsonFile).readAsStringSync()) as List<dynamic>;
|
||||
final oldEntries =
|
||||
oldEntriesJson.map((j) => FriendicaEntry.fromJson(j));
|
||||
for (final entry in oldEntries) {
|
||||
allEntries[entry.id] = entry;
|
||||
}
|
||||
print('Loading ${oldEntries.length} post/comment entries from disk');
|
||||
} catch (e) {
|
||||
print(
|
||||
'Error loading old entries, will be starting from scratch file: $e');
|
||||
}
|
||||
} else {
|
||||
print(
|
||||
'Entries file did not exist at location therefore assuming starting from scratch: $postsJsonFile');
|
||||
}
|
||||
|
||||
if (File(imageArchiveJsonFilePath).existsSync()) {
|
||||
final oldEntriesJson =
|
||||
jsonDecode(File(imageArchiveJsonFilePath).readAsStringSync())
|
||||
as List<dynamic>;
|
||||
final oldEntries = oldEntriesJson.map((j) => ImageEntry.fromJson(j));
|
||||
for (final entry in oldEntries) {
|
||||
final alreadyHadEntry = imageArchive.addDirectEntries(entry);
|
||||
if (alreadyHadEntry) {
|
||||
print("Image cache already had entry for: ${entry.url}");
|
||||
}
|
||||
}
|
||||
print('Loading ${oldEntries.length} image entries from disk');
|
||||
} else {
|
||||
print(
|
||||
'Image archive file did not exist at location so assuming starting from scratch: $imageArchiveJsonFilePath');
|
||||
}
|
||||
}
|
||||
|
||||
print("Loading data from server");
|
||||
for (var page = firstPage; page < maxQueries; page++) {
|
||||
print("Querying for posts/comments for $page");
|
||||
final timelineResult =
|
||||
await client.getTimeline(username, page, itemsPerPage);
|
||||
if (timelineResult.isFailure) {
|
||||
|
@ -53,9 +100,9 @@ void main(List<String> arguments) async {
|
|||
}
|
||||
final entries = timelineResult.value;
|
||||
print('# Post/Comments returned for Page $page: ${entries.length}');
|
||||
allEntries.addAll(entries);
|
||||
if (settings['download-images']) {
|
||||
for (final entry in entries) {
|
||||
for (final entry in entries) {
|
||||
allEntries[entry.id] = entry;
|
||||
if (settings['download-images']) {
|
||||
final imageEntryResults = await imageArchive.addEntryImages(entry);
|
||||
if (entry.images.isNotEmpty) {
|
||||
print(
|
||||
|
@ -64,30 +111,28 @@ void main(List<String> arguments) async {
|
|||
}
|
||||
}
|
||||
|
||||
if (entries.length != itemsPerPage) {
|
||||
print(
|
||||
'Returned less than a full page, assuming at end of timeline and quiting');
|
||||
break;
|
||||
}
|
||||
print("Sleeping for $queryDelayMillis milliseconds before next query");
|
||||
final postsJsonFile = p.join(baseDirectory.path, 'postsAndComments.json');
|
||||
final postsJson = allEntries.map((e) => e.originalJson).toList();
|
||||
|
||||
// Yes we are rewriting the entire file every time to preserve the results
|
||||
// over time.
|
||||
final postsJson = allEntries.values.map((e) => e.originalJson).toList();
|
||||
File(postsJsonFile)
|
||||
.writeAsStringSync(PrettyJsonEncoder().convert(postsJson));
|
||||
print("Posts written to JSON file: $postsJsonFile");
|
||||
|
||||
if (settings['download-images']) {
|
||||
final imageArchiveJsonFilePath =
|
||||
p.join(baseDirectory.path, 'images.json');
|
||||
File(imageArchiveJsonFilePath)
|
||||
.writeAsStringSync(PrettyJsonEncoder().convert(imageArchive.images));
|
||||
print('Images directory saved to: $imageArchiveJsonFilePath');
|
||||
}
|
||||
|
||||
sleep(sleepDuration);
|
||||
if (entries.length != itemsPerPage) {
|
||||
print(
|
||||
'Returned less than a full page, assuming at end of timeline and quiting');
|
||||
break;
|
||||
} else {
|
||||
sleep(sleepDuration);
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
|
@ -106,6 +151,11 @@ ArgParser _buildArgs() => ArgParser()
|
|||
help:
|
||||
'The server name for your instance. (e.g. if the URL in your browser is "https://friendica.com/" then this would be "friendica.com',
|
||||
mandatory: true)
|
||||
..addOption('resume-page',
|
||||
abbr: 'r',
|
||||
help:
|
||||
'The page to restart the downloading process. Will try to read in existing posts and image archive data and start download from there. If set to 0 it resets from scratch.',
|
||||
defaultsTo: '$defaultStartPage')
|
||||
..addOption('delay',
|
||||
abbr: 'd',
|
||||
help:
|
||||
|
|
|
@ -21,6 +21,12 @@ class ImageArchiver {
|
|||
imageDirectory.createSync(recursive: true);
|
||||
}
|
||||
|
||||
bool addDirectEntries(ImageEntry entry) {
|
||||
final alreadyExists = _images.containsKey(entry.url);
|
||||
_images[entry.url] = entry;
|
||||
return alreadyExists;
|
||||
}
|
||||
|
||||
Future<List<ImageEntry>> addEntryImages(FriendicaEntry entry) async {
|
||||
final imageEntries = <ImageEntry>[];
|
||||
for (final imageUrl in entry.images) {
|
||||
|
|
Loading…
Reference in a new issue