Add image importing and looping over pages

This commit is contained in:
Hank Grabowski 2022-01-14 20:07:58 -05:00
parent 0302fa2cd0
commit 1cd010ea02
8 changed files with 281 additions and 67 deletions

View file

@ -7,5 +7,6 @@ class ExecError {
enum ErrorType {
authentication,
localError,
missingEndpoint,
}

15
bin/extensions.dart Normal file
View file

@ -0,0 +1,15 @@
extension ListEqualityTest<T> on List<T> {
bool equals(List<T> list2) {
if (length != list2.length) {
return false;
}
for (var i = 0; i < length; i++) {
if (this[i] != list2[i]) {
return false;
}
}
return true;
}
}

View file

@ -1,48 +1,20 @@
import 'dart:io';
import 'package:args/args.dart';
import 'package:path/path.dart' as p;
import 'friendica_client.dart';
import 'image_archiver.dart';
import 'json_printer.dart';
import 'models.dart';
const defaultRequestDelayMilliseconds = 5000;
const defaultMaxPosts = 1;
const defaultMaxPostsQuery = 10;
const defaultReadComments = false;
const defaultReadImages = false;
void main(List<String> arguments) async {
final argParser = ArgParser()
..addOption('archive-folder',
abbr: 'a',
help:
'Specifies the local folder all data files pulled from the server will be stored',
mandatory: true)
..addOption('username',
abbr: 'u', help: 'Username on your Friendica instance', mandatory: true)
..addOption('server-name',
abbr: 's',
help:
'The server name for your instance. (e.g. if the URL in your browser is "https://friendica.com/" then this would be "friendica.com',
mandatory: true)
..addOption('delay',
abbr: 'd',
help:
'Delay in milliseconds between requests to try not to stress the server (thousands of API calls can be made)',
defaultsTo: '$defaultRequestDelayMilliseconds')
..addOption('max-post-requests',
abbr: 'm',
help: 'The maximum number of times to query for posts',
defaultsTo: '$defaultMaxPosts')
..addFlag('read-comments',
abbr: 'c',
help:
'Whether to read comments on posts (defaults to $defaultReadComments)',
defaultsTo: defaultReadComments)
..addFlag('download-images',
abbr: 'i',
help:
'Whether to download images from posts when those images are stored on the server (not links to other sites) (defaults to $defaultReadImages)',
defaultsTo: defaultReadComments);
final argParser = _buildArgs();
late ArgResults settings;
try {
@ -53,10 +25,7 @@ void main(List<String> arguments) async {
return;
}
stdout.write('Enter Password: ');
_setEcho(false);
final password = stdin.readLineSync() ?? '';
_setEcho(true);
print('');
final username = settings['username'];
@ -64,25 +33,81 @@ void main(List<String> arguments) async {
username: username,
password: password,
serverName: settings['server-name']);
final timelineResult = await client.getTimeline(username, 1, 20);
timelineResult.match(
onSuccess: (posts) {
posts.forEach(print);
File('/tmp/test.json').writeAsStringSync(PrettyJsonEncoder()
.convert(posts.map((p) => p.originalJson).toList()));
},
onError: (error) => print('Error getting posts: $error'));
print("Done processing API requests");
final baseDirectory = Directory(settings['archive-folder']);
final imageArchive = ImageArchiver(client, baseDirectory);
final maxQueries = int.parse(settings['max-post-requests']);
final queryDelayMillis = int.parse(settings['delay']);
final sleepDuration = Duration(milliseconds: queryDelayMillis);
final itemsPerPage = 20;
final allEntries = <FriendicaEntry>[];
for (var page = 0; page < maxQueries; page++) {
final timelineResult =
await client.getTimeline(username, page, itemsPerPage);
if (timelineResult.isFailure) {
print('Error getting entries: ${timelineResult.error}');
continue;
}
final entries = timelineResult.value;
print('# Post/Comments returned for Page $page: ${entries.length}');
allEntries.addAll(entries);
for (final entry in entries) {
final imageEntryResults = await imageArchive.addEntryImages(entry);
if (entry.images.isNotEmpty) {
print(
'${imageEntryResults.length} new images of ${entry.images.length} in entry retrieved');
}
}
if (entries.length != itemsPerPage) {
print(
'Returned less than a full page, assuming at end of timeline and quiting');
break;
}
print("Sleeping for $queryDelayMillis milliseconds before next query");
final postsJsonFile = p.join(baseDirectory.path, 'postsAndComments.json');
final postsJson = allEntries.map((e) => e.originalJson).toList();
File(postsJsonFile)
.writeAsStringSync(PrettyJsonEncoder().convert(postsJson));
print("Posts written to JSON file: $postsJsonFile");
final imageArchiveJsonFilePath = p.join(baseDirectory.path, 'images.json');
File(imageArchiveJsonFilePath)
.writeAsStringSync(PrettyJsonEncoder().convert(imageArchive.images));
print('Images directory saved to: $imageArchiveJsonFilePath');
sleep(sleepDuration);
}
return;
}
// Seems in IntelliJ and release build mode setting echo fails
void _setEcho(bool value) {
try {
stdin.echoMode = value;
// ignore: empty_catches
} catch (e) {
print('');
print('Error toggling echo to $value, so will stay current value...');
}
}
ArgParser _buildArgs() => ArgParser()
..addOption('archive-folder',
abbr: 'a',
help:
'Specifies the local folder all data files pulled from the server will be stored',
mandatory: true)
..addOption('username',
abbr: 'u', help: 'Username on your Friendica instance', mandatory: true)
..addOption('server-name',
abbr: 's',
help:
'The server name for your instance. (e.g. if the URL in your browser is "https://friendica.com/" then this would be "friendica.com',
mandatory: true)
..addOption('delay',
abbr: 'd',
help:
'Delay in milliseconds between requests to try not to stress the server (thousands of API calls can be made)',
defaultsTo: '$defaultRequestDelayMilliseconds')
..addOption('max-post-requests',
abbr: 'm',
help: 'The maximum number of times to query for posts',
defaultsTo: '$defaultMaxPostsQuery')
..addFlag('read-comments',
abbr: 'c',
help:
'Whether to read comments on posts (defaults to $defaultReadComments)',
defaultsTo: defaultReadComments)
..addFlag('download-images',
abbr: 'i',
help:
'Whether to download images from posts when those images are stored on the server (not links to other sites) (defaults to $defaultReadImages)',
defaultsTo: defaultReadComments);

View file

@ -3,13 +3,13 @@ import 'dart:io';
import 'package:result_monad/result_monad.dart';
import 'exec_error.dart';
import 'models.dart';
class FriendicaClient {
final String username;
final String password;
final String serverName;
final _client = HttpClient();
late final String _authHeader;
FriendicaClient(
@ -21,23 +21,34 @@ class FriendicaClient {
_authHeader = "Basic $encodedAuthString";
}
FutureResult<List<FriendicaEntry>, String> getTimeline(
FutureResult<List<FriendicaEntry>, ExecError> getTimeline(
String userId, int page, int count) async {
final request = Uri.parse(
'https://$serverName/api/statuses/user_timelineuser_id=$userId&count=$count&page=$page');
return (await _getApiRequest(request)).mapValue((postsJson) =>
postsJson.map((postJson) => FriendicaEntry.fromJson(postJson)).toList());
return (await _getApiRequest(request)).mapValue((postsJson) => postsJson
.map((postJson) => FriendicaEntry.fromJson(postJson))
.toList());
}
FutureResult<List<dynamic>, String> _getApiRequest(Uri url) async {
FutureResult<HttpClientResponse, ExecError> getUrl(Uri url) async {
try {
final request = await HttpClient().getUrl(url);
request.headers.add('authorization', _authHeader);
final response = await request.close();
return Result.ok(response);
} catch (e) {
return Result.error(
ExecError(type: ErrorType.localError, message: e.toString()));
}
}
FutureResult<List<dynamic>, ExecError> _getApiRequest(Uri url) async {
// TODO Error mode against: bad server URL, bad auth, bad path, empty response
final request = await _client.getUrl(url);
request.headers.add('authorization', _authHeader);
request.headers.contentType =
ContentType('application', 'json', charset: 'utf-8');
final response = await request.close();
final body = await response.transform(utf8.decoder).join('');
File('/tmp/response.json').writeAsStringSync(body);
final responseResult = await getUrl(url);
if (responseResult.isFailure) {
return responseResult.mapValue((value) => <dynamic>[]);
}
final body = await responseResult.value.transform(utf8.decoder).join('');
final bodyJson = jsonDecode(body) as List<dynamic>;
return Result.ok(bodyJson);
}

97
bin/image_archiver.dart Normal file
View file

@ -0,0 +1,97 @@
import 'dart:convert';
import 'dart:io';
import 'package:path/path.dart' as p;
import 'package:uuid/uuid.dart';
import 'extensions.dart';
import 'friendica_client.dart';
import 'models.dart';
class ImageArchiver {
final FriendicaClient client;
final _images = <String, ImageEntry>{};
late final Directory imageDirectory;
List<ImageEntry> get images => List.unmodifiable(_images.values);
ImageArchiver(this.client, Directory baseDirectory) {
final imageDirPath = p.join(baseDirectory.path, 'images');
imageDirectory = Directory(imageDirPath);
imageDirectory.createSync(recursive: true);
}
Future<List<ImageEntry>> addEntryImages(FriendicaEntry entry) async {
final imageEntries = <ImageEntry>[];
for (final imageUrl in entry.images) {
if (_images.containsKey(imageUrl)) {
continue;
}
final url = Uri.parse(imageUrl);
final imageResponse = await client.getUrl(url);
if (imageResponse.isFailure) {
print(imageResponse.error);
continue;
}
if (imageResponse.value.statusCode == 200) {
final contents = <int>[];
await for (var data in imageResponse.value) {
contents.addAll(data);
}
final extension = calculateExtensions(contents);
final filename = Uuid().v4().replaceAll('-', '') + extension;
final filePath = p.join(imageDirectory.path, filename);
await File(filePath).writeAsBytes(contents);
final newEntry = ImageEntry(
postId: entry.id.toString(),
localFilename: filename,
url: imageUrl);
_images[imageUrl] = newEntry;
imageEntries.add(newEntry);
} else {
print(
'Error response attempting to retrieve image $imageUrl: ${imageResponse.value.statusCode}');
}
}
return imageEntries;
}
String calculateExtensions(List<int> imageBytes) {
// Using table from https://www.sparkhound.com/blog/detect-image-file-types-through-byte-arrays
final bmp = ascii.encode("BM").toList(); // BMP
final gif = ascii.encode("GIF").toList(); // GIF
const png = <int>[137, 80, 78, 71]; // PNG
const tiff = <int>[73, 73, 42]; // TIFF
const tiff2 = <int>[77, 77, 42]; // TIFF
const jpeg = <int>[255, 216, 255, 224]; // jpeg
const jpeg2 = <int>[255, 216, 255, 225]; // jpeg canon
final firstFour = imageBytes.sublist(0, 4);
if (firstFour.equals(jpeg) || firstFour.equals(jpeg2)) {
return '.jpg';
}
if (firstFour.equals(png)) {
return '.png';
}
final firstThree = imageBytes.sublist(0, 3);
if (firstThree.equals(gif)) {
return '.gif';
}
if (firstThree.equals(tiff) || firstThree.equals(tiff2)) {
return '.tif';
}
final firstTwo = imageBytes.sublist(0, 2);
if (firstTwo.equals(bmp)) {
return '.bmp';
}
return '';
}
}

View file

@ -46,3 +46,23 @@ class FriendicaEntry {
.toList();
}
}
class ImageEntry {
final String postId;
final String localFilename;
final String url;
ImageEntry(
{required this.postId, required this.localFilename, required this.url});
ImageEntry.fromJson(Map<String, dynamic> json)
: postId = json['postId'] ?? '',
localFilename = json['localFilename'] ?? '',
url = json['url'] ?? '';
Map<String, dynamic> toJson() => {
'postId': postId,
'localFilename': localFilename,
'url': url,
};
}

View file

@ -8,6 +8,20 @@ packages:
url: "https://pub.dartlang.org"
source: hosted
version: "2.3.0"
collection:
dependency: transitive
description:
name: collection
url: "https://pub.dartlang.org"
source: hosted
version: "1.15.0"
crypto:
dependency: transitive
description:
name: crypto
url: "https://pub.dartlang.org"
source: hosted
version: "3.0.1"
lints:
dependency: "direct dev"
description:
@ -15,6 +29,20 @@ packages:
url: "https://pub.dartlang.org"
source: hosted
version: "1.0.1"
logging:
dependency: "direct main"
description:
name: logging
url: "https://pub.dartlang.org"
source: hosted
version: "1.0.2"
path:
dependency: "direct main"
description:
name: path
url: "https://pub.dartlang.org"
source: hosted
version: "1.8.1"
result_monad:
dependency: "direct main"
description:
@ -22,5 +50,19 @@ packages:
url: "https://pub.dartlang.org"
source: hosted
version: "1.0.2"
typed_data:
dependency: transitive
description:
name: typed_data
url: "https://pub.dartlang.org"
source: hosted
version: "1.3.0"
uuid:
dependency: "direct main"
description:
name: uuid
url: "https://pub.dartlang.org"
source: hosted
version: "3.0.5"
sdks:
dart: ">=2.15.1 <3.0.0"

View file

@ -8,7 +8,10 @@ environment:
dependencies:
args: ^2.3.0
logging: ^1.0.2
path: ^1.8.1
result_monad: ^1.0.2
uuid: ^3.0.5
dev_dependencies:
lints: ^1.0.0