From 1f89a2c0616b46792886af25fdaa3b6b6b10c519 Mon Sep 17 00:00:00 2001 From: Drew Noakes Date: Mon, 5 Feb 2024 13:04:02 +1100 Subject: [PATCH] Port UnknownTagHandler This utility reports the most common unknown tags in a series of images. Using this we can find which common tags we do not yet understand, in order to prioritise investments in expanding support. --- .../DeconstructionExtensions.cs | 17 +++++++ .../Program.cs | 15 +++--- .../UnknownTagHandler.cs | 48 +++++++++++++++++++ 3 files changed, 71 insertions(+), 9 deletions(-) create mode 100644 MetadataExtractor.Tools.FileProcessor/DeconstructionExtensions.cs create mode 100644 MetadataExtractor.Tools.FileProcessor/UnknownTagHandler.cs diff --git a/MetadataExtractor.Tools.FileProcessor/DeconstructionExtensions.cs b/MetadataExtractor.Tools.FileProcessor/DeconstructionExtensions.cs new file mode 100644 index 000000000..284422b9f --- /dev/null +++ b/MetadataExtractor.Tools.FileProcessor/DeconstructionExtensions.cs @@ -0,0 +1,17 @@ +// Copyright (c) Drew Noakes and contributors. All Rights Reserved. Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. + +#if NETFRAMEWORK + +namespace System.Collections.Generic; + +internal static class DeconstructionExtensions +{ + [DebuggerStepThrough] + public static void Deconstruct(this KeyValuePair pair, out TKey key, out TValue value) + { + key = pair.Key; + value = pair.Value; + } +} + +#endif diff --git a/MetadataExtractor.Tools.FileProcessor/Program.cs b/MetadataExtractor.Tools.FileProcessor/Program.cs index d7929abfc..3a6fef2bb 100644 --- a/MetadataExtractor.Tools.FileProcessor/Program.cs +++ b/MetadataExtractor.Tools.FileProcessor/Program.cs @@ -14,8 +14,6 @@ namespace MetadataExtractor.Tools.FileProcessor { - // TODO port UnknownTagHandler - internal static class Program { private static int Main(string[] args) @@ -191,11 +189,11 @@ private static int ProcessRecursively(string[] args) // If "--markdown" is specified, write a summary table in markdown format fileHandler = new MarkdownTableOutputHandler(); } -// else if (arg == "--unknown") -// { -// // If "--unknown" is specified, write CSV tallying unknown tag counts -// fileHandler = new UnknownTagHandler(); -// } + else if (arg == "--unknown") + { + // If "--unknown" is specified, write CSV tallying unknown tag counts + fileHandler = new UnknownTagHandler(); + } else if (arg == "--log-file") { if (i == args.Length - 1) @@ -224,8 +222,7 @@ private static int ProcessRecursively(string[] args) return 1; } - if (fileHandler is null) - fileHandler = new BasicFileHandler(); + fileHandler ??= new BasicFileHandler(); var stopwatch = Stopwatch.StartNew(); diff --git a/MetadataExtractor.Tools.FileProcessor/UnknownTagHandler.cs b/MetadataExtractor.Tools.FileProcessor/UnknownTagHandler.cs new file mode 100644 index 000000000..bbfeca6e9 --- /dev/null +++ b/MetadataExtractor.Tools.FileProcessor/UnknownTagHandler.cs @@ -0,0 +1,48 @@ +// Copyright (c) Drew Noakes and contributors. All Rights Reserved. Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. + +namespace MetadataExtractor.Tools.FileProcessor; + +/// +/// Keeps track of unknown tags. +/// +internal sealed class UnknownTagHandler : FileHandlerBase +{ + private readonly record struct Key(string DirectoryName, int TagType); + + private readonly Dictionary _occurrenceByKey = []; + + public override void OnExtractionSuccess(string filePath, IList directories, string relativePath, TextWriter log, long streamPosition) + { + base.OnExtractionSuccess(filePath, directories, relativePath, log, streamPosition); + + foreach (Directory directory in directories) + { + foreach (Tag tag in directory.Tags) + { + // Only interested in unknown tags (those without names) + if (tag.HasName) + continue; + + Key key = new(directory.Name, tag.Type); + + _occurrenceByKey.TryGetValue(key, out int count); + _occurrenceByKey[key] = count + 1; + } + } + } + + public override void OnScanCompleted(TextWriter log) + { + base.OnScanCompleted(log); + + var results = _occurrenceByKey + .OrderByDescending(pair => pair.Value) + .ThenBy(pair => pair.Key.DirectoryName) + .ThenBy(pair => pair.Key.TagType); + + foreach ((Key key, int count) in results) + { + log.WriteLine($"{key.DirectoryName}, 0x{key.TagType:X4}, {count}"); + } + } +}