Add default start page and concept of restarting.

This commit is contained in:
Hank Grabowski 2022-01-15 00:17:38 -05:00
parent 299638ea74
commit d9793be893
2 changed files with 71 additions and 15 deletions

View file

@ -1,3 +1,4 @@
import 'dart:convert';
import 'dart:io';
import 'package:args/args.dart';
@ -12,6 +13,7 @@ const defaultRequestDelayMilliseconds = 5000;
const defaultMaxPostsQuery = 1000000000;
const defaultItemsPerPage = 20;
const defaultDownloadImages = true;
const defaultStartPage = 0;
void main(List<String> arguments) async {
final argParser = _buildArgs();
@ -39,12 +41,57 @@ void main(List<String> arguments) async {
final queryDelayMillis = int.parse(settings['delay']);
final sleepDuration = Duration(milliseconds: queryDelayMillis);
final itemsPerPage = int.parse(settings['items-per-page']);
final allEntries = <FriendicaEntry>[];
final firstPage = int.parse(settings['resume-page']);
final allEntries = <int, FriendicaEntry>{};
final imageArchiveJsonFilePath = p.join(baseDirectory.path, 'images.json');
final postsJsonFile = p.join(baseDirectory.path, 'postsAndComments.json');
print(
"Max number of queries will be $maxQueries with $itemsPerPage items per page");
for (var page = 0; page < maxQueries; page++) {
if (firstPage != 0) {
print(
"Starting page is not zero therefore attempting to load image and post/comment archives from disk");
if (File(postsJsonFile).existsSync()) {
try {
final oldEntriesJson =
jsonDecode(File(postsJsonFile).readAsStringSync()) as List<dynamic>;
final oldEntries =
oldEntriesJson.map((j) => FriendicaEntry.fromJson(j));
for (final entry in oldEntries) {
allEntries[entry.id] = entry;
}
print('Loading ${oldEntries.length} post/comment entries from disk');
} catch (e) {
print(
'Error loading old entries, will be starting from scratch file: $e');
}
} else {
print(
'Entries file did not exist at location therefore assuming starting from scratch: $postsJsonFile');
}
if (File(imageArchiveJsonFilePath).existsSync()) {
final oldEntriesJson =
jsonDecode(File(imageArchiveJsonFilePath).readAsStringSync())
as List<dynamic>;
final oldEntries = oldEntriesJson.map((j) => ImageEntry.fromJson(j));
for (final entry in oldEntries) {
final alreadyHadEntry = imageArchive.addDirectEntries(entry);
if (alreadyHadEntry) {
print("Image cache already had entry for: ${entry.url}");
}
}
print('Loading ${oldEntries.length} image entries from disk');
} else {
print(
'Image archive file did not exist at location so assuming starting from scratch: $imageArchiveJsonFilePath');
}
}
print("Loading data from server");
for (var page = firstPage; page < maxQueries; page++) {
print("Querying for posts/comments for $page");
final timelineResult =
await client.getTimeline(username, page, itemsPerPage);
if (timelineResult.isFailure) {
@ -53,9 +100,9 @@ void main(List<String> arguments) async {
}
final entries = timelineResult.value;
print('# Post/Comments returned for Page $page: ${entries.length}');
allEntries.addAll(entries);
if (settings['download-images']) {
for (final entry in entries) {
for (final entry in entries) {
allEntries[entry.id] = entry;
if (settings['download-images']) {
final imageEntryResults = await imageArchive.addEntryImages(entry);
if (entry.images.isNotEmpty) {
print(
@ -64,30 +111,28 @@ void main(List<String> arguments) async {
}
}
if (entries.length != itemsPerPage) {
print(
'Returned less than a full page, assuming at end of timeline and quiting');
break;
}
print("Sleeping for $queryDelayMillis milliseconds before next query");
final postsJsonFile = p.join(baseDirectory.path, 'postsAndComments.json');
final postsJson = allEntries.map((e) => e.originalJson).toList();
// Yes we are rewriting the entire file every time to preserve the results
// over time.
final postsJson = allEntries.values.map((e) => e.originalJson).toList();
File(postsJsonFile)
.writeAsStringSync(PrettyJsonEncoder().convert(postsJson));
print("Posts written to JSON file: $postsJsonFile");
if (settings['download-images']) {
final imageArchiveJsonFilePath =
p.join(baseDirectory.path, 'images.json');
File(imageArchiveJsonFilePath)
.writeAsStringSync(PrettyJsonEncoder().convert(imageArchive.images));
print('Images directory saved to: $imageArchiveJsonFilePath');
}
sleep(sleepDuration);
if (entries.length != itemsPerPage) {
print(
'Returned less than a full page, assuming at end of timeline and quiting');
break;
} else {
sleep(sleepDuration);
}
}
return;
@ -106,6 +151,11 @@ ArgParser _buildArgs() => ArgParser()
help:
'The server name for your instance. (e.g. if the URL in your browser is "https://friendica.com/" then this would be "friendica.com',
mandatory: true)
..addOption('resume-page',
abbr: 'r',
help:
'The page to restart the downloading process. Will try to read in existing posts and image archive data and start download from there. If set to 0 it resets from scratch.',
defaultsTo: '$defaultStartPage')
..addOption('delay',
abbr: 'd',
help:

View file

@ -21,6 +21,12 @@ class ImageArchiver {
imageDirectory.createSync(recursive: true);
}
bool addDirectEntries(ImageEntry entry) {
final alreadyExists = _images.containsKey(entry.url);
_images[entry.url] = entry;
return alreadyExists;
}
Future<List<ImageEntry>> addEntryImages(FriendicaEntry entry) async {
final imageEntries = <ImageEntry>[];
for (final imageUrl in entry.images) {