Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve Code Quality / Tiding #390

Merged
merged 16 commits into from
Jan 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion MetadataExtractor/DirectoryExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -929,7 +929,7 @@ public static bool TryGetRational(this Directory directory, int tagType, out Rat
{
var bytes = directory.GetByteArray(tagType);
return bytes is null ? null
: encoding.GetString(bytes, 0, bytes.Length);
: encoding.GetString(bytes);
}

public static StringValue GetStringValue(this Directory directory, int tagType)
Expand Down
11 changes: 5 additions & 6 deletions MetadataExtractor/Formats/Avi/AviRiffHandler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,12 @@ public AviRiffHandler(List<Directory> directories)
_ => false
};

public bool ShouldAcceptList(string fourCc) => fourCc switch
public bool ShouldAcceptList(ReadOnlySpan<byte> fourCc)
{
"hdrl" => true,
"strl" => true,
"AVI " => true,
_ => false
};
return fourCc.SequenceEqual("hdrl"u8)
|| fourCc.SequenceEqual("strl"u8)
|| fourCc.SequenceEqual("AVI "u8);
}

public void ProcessChunk(string fourCc, byte[] payload)
{
Expand Down
2 changes: 1 addition & 1 deletion MetadataExtractor/Formats/Exif/ExifDescriptorBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ public abstract class ExifDescriptorBase<T>(T directory)
try
{
// Decode the Unicode string and trim the Unicode zero "\0" from the end.
return Encoding.Unicode.GetString(bytes, 0, bytes.Length).TrimEnd('\0');
return Encoding.Unicode.GetString(bytes).TrimEnd('\0');
}
catch
{
Expand Down
2 changes: 1 addition & 1 deletion MetadataExtractor/Formats/Exif/ExifReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public sealed class ExifReader : JpegSegmentWithPreambleMetadataReader
{
public static ReadOnlySpan<byte> JpegSegmentPreamble => "Exif\x0\x0"u8;

public static bool StartsWithJpegExifPreamble(byte[] bytes) => bytes.AsSpan().StartsWith(JpegSegmentPreamble);
public static bool StartsWithJpegExifPreamble(ReadOnlySpan<byte> bytes) => bytes.StartsWith(JpegSegmentPreamble);

public static int JpegSegmentPreambleLength => JpegSegmentPreamble.Length;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,14 @@ public class LeicaType5MakernoteDescriptor(LeicaType5MakernoteDirectory director
if (Directory.GetObject(LeicaType5MakernoteDirectory.TagExposureMode) is not byte[] values || values.Length < 4)
return null;

var join = $"{values[0]} {values[1]} {values[2]} {values[3]}";
var ret = join switch
var ret = (values[0], values[1], values[2], values[3]) switch
{
"0 0 0 0" => "Program AE",
"1 0 0 0" => "Aperture-priority AE",
"1 1 0 0" => "Aperture-priority AE (1)",
"2 0 0 0" => "Shutter speed priority AE", // guess
"3 0 0 0" => "Manual",
_ => "Unknown (" + join + ")",
(0, 0, 0, 0) => "Program AE",
(1, 0, 0, 0) => "Aperture-priority AE",
(1, 1, 0, 0) => "Aperture-priority AE (1)",
(2, 0, 0, 0) => "Shutter speed priority AE", // guess
(3, 0, 0, 0) => "Manual",
_ => $"Unknown ({values[0]} {values[1]} {values[2]} {values[3]})"
};
return ret;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -651,15 +651,15 @@ public sealed class OlympusCameraSettingsMakernoteDescriptor(OlympusCameraSettin
if (Directory.GetObject(OlympusCameraSettingsMakernoteDirectory.TagGradation) is not short[] values || values.Length < 3)
return null;

var join = $"{values[0]} {values[1]} {values[2]}";
var ret = join switch
var ret = (values[0], values[1], values[3]) switch
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops, this should have been a 2 instead of a 3. I'll push a fix shortly.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

{
"0 0 0" => "n/a",
"-1 -1 1" => "Low Key",
"0 -1 1" => "Normal",
"1 -1 1" => "High Key",
_ => "Unknown (" + join + ")",
(0, 0, 0) => "n/a",
(-1, -1, 1) => "Low Key",
(0, -1, 1) => "Normal",
(1, -1, 1) => "High Key",
_ => $"Unknown ({values[0]} {values[1]} {values[2]})"
};

if (values.Length > 3)
{
if (values[3] == 0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public sealed class OlympusFocusInfoMakernoteDescriptor(OlympusFocusInfoMakernot
OlympusFocusInfoMakernoteDirectory.TagMacroLed => GetMacroLedDescription(),
OlympusFocusInfoMakernoteDirectory.TagSensorTemperature => GetSensorTemperatureDescription(),
OlympusFocusInfoMakernoteDirectory.TagImageStabilization => GetImageStabilizationDescription(),
_ => base.GetDescription(tagType),
_ => base.GetDescription(tagType)
};
}

Expand Down Expand Up @@ -77,13 +77,11 @@ public sealed class OlympusFocusInfoMakernoteDescriptor(OlympusFocusInfoMakernot
if (Directory.GetObject(OlympusFocusInfoMakernoteDirectory.TagExternalFlash) is not ushort[] values || values.Length < 2)
return null;

var join = $"{values[0]} {values[1]}";

return join switch
return (values[0], values[1]) switch
{
"0 0" => "Off",
"1 0" => "On",
_ => "Unknown (" + join + ")",
(0, 0) => "Off",
(1, 0) => "On",
_ => $"Unknown ({values[0]} {values[1]})"
};
}

Expand All @@ -109,15 +107,13 @@ public sealed class OlympusFocusInfoMakernoteDescriptor(OlympusFocusInfoMakernot
if (values.Length == 0)
return null;

var join = $"{values[0]}" + (values.Length > 1 ? $"{ values[1]}" : "");

return join switch
return (values[0], values.Length > 1 ? values[1] : -1) switch
{
"0" => "Off",
"1" => "On",
"0 0" => "Off",
"1 0" => "On",
_ => "Unknown (" + join + ")",
(0, -1) => "Off",
(1, -1) => "On",
(0, 0) => "Off",
(1, 0) => "On",
_ => $"Unknown ({string.Join(" ", values)})"
drewnoakes marked this conversation as resolved.
Show resolved Hide resolved
};
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,24 +130,23 @@ public sealed class OlympusImageProcessingMakernoteDescriptor(OlympusImageProces
if (Directory.GetObject(OlympusImageProcessingMakernoteDirectory.TagAspectRatio) is not byte[] values || values.Length < 2)
return null;

var join = $"{values[0]} {values[1]}";
var ret = join switch
var ret = (values[0], values[1]) switch
{
"1 1" => "4:3",
"1 4" => "1:1",
"2 1" => "3:2 (RAW)",
"2 2" => "3:2",
"3 1" => "16:9 (RAW)",
"3 3" => "16:9",
"4 1" => "1:1 (RAW)",
"4 4" => "6:6",
"5 5" => "5:4",
"6 6" => "7:6",
"7 7" => "6:5",
"8 8" => "7:5",
"9 1" => "3:4 (RAW)",
"9 9" => "3:4",
_ => "Unknown (" + join + ")",
(1, 1) => "4:3",
(1, 4) => "1:1",
(2, 1) => "3:2 (RAW)",
(2, 2) => "3:2",
(3, 1) => "16:9 (RAW)",
(3, 3) => "16:9",
(4, 1) => "1:1 (RAW)",
(4, 4) => "6:6",
(5, 5) => "5:4",
(6, 6) => "7:6",
(7, 7) => "6:5",
(8, 8) => "7:5",
(9, 1) => "3:4 (RAW)",
(9, 9) => "3:4",
_ => $"Unknown ({values[0]} {values[1]})"
};
return ret;
}
Expand All @@ -157,14 +156,12 @@ public sealed class OlympusImageProcessingMakernoteDescriptor(OlympusImageProces
if (Directory.GetObject(OlympusImageProcessingMakernoteDirectory.TagKeystoneCompensation) is not byte[] values || values.Length < 2)
return null;

var join = $"{values[0]} {values[1]}";
var ret = join switch
return (values[0], values[1]) switch
{
"0 0" => "Off",
"0 1" => "On",
_ => "Unknown (" + join + ")",
(0, 0) => "Off",
(0, 1) => "On",
_ => $"Unknown ({values[0]} {values[1]})"
};
return ret;
}

public string? GetKeystoneDirectionDescription()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ public sealed class OlympusMakernoteDescriptor(OlympusMakernoteDirectory directo
if (Directory.GetObject(OlympusMakernoteDirectory.TagColourMatrix) is not short[] values)
return null;

return string.Join(" ", values.Select(b => b.ToString()).ToArray());
return string.Join(" ", values);
}

public string? GetWbModeDescription()
Expand Down Expand Up @@ -574,7 +574,7 @@ public sealed class OlympusMakernoteDescriptor(OlympusMakernoteDirectory directo
if (bytes is null)
return null;

return Encoding.UTF8.GetString(bytes, 0, bytes.Length);
return Encoding.UTF8.GetString(bytes);
}

public string? GetOneTouchWbDescription()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ public sealed class OlympusRawInfoMakernoteDescriptor(OlympusRawInfoMakernoteDir
if (Directory.GetObject(OlympusRawInfoMakernoteDirectory.TagColorMatrix2) is not short[] values)
return null;

return string.Join(" ", values.Select(b => b.ToString()).ToArray());
return string.Join(" ", values);
}

public string? GetYCbCrCoefficientsDescription()
Expand All @@ -46,7 +46,7 @@ public sealed class OlympusRawInfoMakernoteDescriptor(OlympusRawInfoMakernoteDir
ret[i] = new Rational(values[2 * i], values[2 * i + 1]);
}

return string.Join(" ", ret.Select(r => r.ToDecimal().ToString()).ToArray());
return string.Join(" ", ret.Select(r => r.ToDecimal()));
}

public string? GetOlympusLightSourceDescription()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ public sealed class PanasonicMakernoteDescriptor(PanasonicMakernoteDirectory dir
if (bytes is null)
return null;

return string.Join(".", bytes.Select(b => b.ToString()).ToArray());
return string.Join(".", bytes);
}

public string? GetIntelligentDRangeDescription()
Expand Down
35 changes: 7 additions & 28 deletions MetadataExtractor/Formats/Gif/GifReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -190,35 +190,14 @@ private static GifHeaderDirectory ReadGifHeader(SequentialReader reader)
var blockSizeBytes = reader.GetByte();
var blockStartPos = reader.Position;

Directory? directory;
switch (extensionLabel)
Directory? directory = extensionLabel switch
{
case 0x01:
{
directory = ReadPlainTextBlock(reader, blockSizeBytes);
break;
}
case 0xf9:
{
directory = ReadControlBlock(reader);
break;
}
case 0xfe:
{
directory = ReadCommentBlock(reader, blockSizeBytes);
break;
}
case 0xff:
{
directory = ReadApplicationExtensionBlock(reader, blockSizeBytes);
break;
}
default:
{
directory = new ErrorDirectory($"Unsupported GIF extension block with type 0x{extensionLabel:X2}.");
break;
}
}
0x01 => ReadPlainTextBlock(reader, blockSizeBytes),
0xf9 => ReadControlBlock(reader),
0xfe => ReadCommentBlock(reader, blockSizeBytes),
0xff => ReadApplicationExtensionBlock(reader, blockSizeBytes),
_ => new ErrorDirectory($"Unsupported GIF extension block with type 0x{extensionLabel:X2}.")
};

var skipCount = blockStartPos + blockSizeBytes - reader.Position;
if (skipCount > 0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public class HeicImagePropertyDescriptor(HeicImagePropertiesDirectory directory)
return Directory.GetString(tagType) + " degrees";
case HeicImagePropertiesDirectory.TagPixelDepths:
var o = Directory.GetObject(HeicImagePropertiesDirectory.TagPixelDepths);
return o is null ? null : string.Join(" ", ((byte[])o).Select(i => i.ToString()).ToArray());
return o is byte[] bytes ? string.Join(" ", bytes) : null;
case HeicImagePropertiesDirectory.TagColorFormat:
return TypeStringConverter.ToTypeString(Directory.GetUInt32(HeicImagePropertiesDirectory.TagColorFormat));
case HeicImagePropertiesDirectory.TagColorPrimaries:
Expand Down
2 changes: 1 addition & 1 deletion MetadataExtractor/Formats/Heif/HeifMetadataReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ void ParseQuickTimeTest()
{
dir.Set(
QuickTimeFileTypeDirectory.TagCompatibleBrands,
string.Join(", ", ftype.CompatibleBrandStrings.ToArray()));
string.Join(", ", ftype.CompatibleBrandStrings));
}

directories.Add(dir);
Expand Down
2 changes: 1 addition & 1 deletion MetadataExtractor/Formats/Icc/IccReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ public static string GetStringFromUInt32(uint d)
unchecked((byte)d)
};

return Encoding.UTF8.GetString(b, 0, b.Length);
return Encoding.UTF8.GetString(b);
}
}
}
2 changes: 1 addition & 1 deletion MetadataExtractor/Formats/Iptc/IptcReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ private static void ProcessTag(SequentialReader reader, Directory directory, int
if (charset is null)
{
// Unable to determine the charset, so fall through and treat tag as a regular string
charset = Encoding.UTF8.GetString(bytes, 0, bytes.Length);
charset = Encoding.UTF8.GetString(bytes);
}
directory.Set(tagIdentifier, charset);
return;
Expand Down
13 changes: 11 additions & 2 deletions MetadataExtractor/Formats/Iptc/Iso2022Converter.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
// Copyright (c) Drew Noakes and contributors. All Rights Reserved. Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information.

using System.Buffers;

namespace MetadataExtractor.Formats.Iptc
{
public static class Iso2022Converter
Expand Down Expand Up @@ -89,17 +91,24 @@ public static class Iso2022Converter

foreach (var encoding in encodings)
{
char[] charBuffer = ArrayPool<char>.Shared.Rent(encoding.GetMaxCharCount(bytes.Length));
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice. There are some other places we could use ArrayPool too actually, like in the reader classes. I'll investigate that separately for 2.9.0, unless you'd like to.

Copy link
Collaborator Author

@iamcarbon iamcarbon Jan 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's also some wins introducing a few new specialized readers that can operate directly over ReadOnlySpan.

One callout, would be replacing SequentialByteArrayReader with an optimized ref struct {LittleEndian/BigEndian}BufferReader(ReadOnlySpan buffer) -- and spanifying the outer method.

This would allow us to operate directly over a span, and eliminate the reader allocation.

There's another big win eliminating all the temporary array allocations when we make (int tagName, params string[] descriptions) calls.

Copy link
Owner

@drewnoakes drewnoakes Jan 31, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some more use of ArrayPool in #392.

There's definitely room for improvement in the reader classes. Check out the PRs from @kwhopper for some more ideas.

My vague idea here is to build on the new span types and @kwhopper's investigations, and actually avoid doing as much parsing work during the extraction phase. Many directories we store could actually be backed by a byte[] (or Memory<byte>) that could be inspected when enumerating through tags. This would be quite a big change, and requires some research before it could be pursued.

There's another big win eliminating all the temporary array allocations when we make (int tagName, params string[] descriptions) calls.

This sounds promising. We'd need to verify that the compiler doesn't allocate an array behind the scenes.

EDIT: It seems to do the right thing on modern .NET: https://sharplab.io/#v2:EYLgtghglgdgPgAQEwEYCwAoBBmABAlANnyVwGFcBvTXW/PA4hAFlwFkUAKAJQFMIAJgHkYAGwCeAZQAOEGAB4CABgB8+FEoDOAShp1qGOrgC+e2mfrqmrNkk67D+i0Y6cA2gCIAgh4A0uDwAhPwCyDwBdbQBuC1MMYyA===

...but not on .NET Framework: https://sharplab.io/#v2:EYLgHgbALAPgAgJgIwFgBQcDMACOSK4LYDC2A3utlbjngXFNgLJIAUASgKYCGAJgPIA7ADYBPAMoAHboIA8eAAwA+XEgUBnAJSVqFNNWwBfHVRM1V9RkwStt+3WYMtWAbQBEAQTcAabG4BCPn7EbgC6mgDcZsZohkA==

This is a compiler feature, so we'd need to test netstandard2.1 csc output, which sharplab doesn't support afaik.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's definitely room for improvement in the reader classes. Check out the PRs from @kwhopper for some more ideas.

Thanks. RandomAccessStream+ReaderInfo is an experiment somewhat similar to this Span conversion. It goes a bit further by abstracting away all the buffering (RandomAccessStream) and "span-ifying" (ReaderInfo with byte arrays) entirely; callers then only have to worry about one kind of reader. Side effects are the ability to know your exact physical offset at any time, and support for streamed content.

If you can reach those same goals in this process, that's a great addition and should allow new things in the future. I can also check through the code in those old PR's to see if they could use Spans like you're doing here, if that has some value.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kwhopper the recent activity has been small incremental improvements. I see your PR as the kind of thing we want for 3.0. It'll be a lot of work to integrate throughout the code though, so we should noodle out the details with sketches and discussion before starting the work of integration. We only want to do that integration work once.

A direction I think would be good is to divide the parsing into two stages:

  1. Reading the file in coarse chunks. E.g. for JPEG, this could be just pulling out and labelling the segments we need. These would be allocated in contiguous chunks of memory, with no further processing. I think this phase could mostly be done sequentially. The chunks would remember their offsets relative to the start of the file too, along with whatever metadata is needed for later steps.
  2. As the consumer walks through the metadata, we process the chunks of data to produce the tags.

Currently we do both 1 and 2 during the read phase. I'm thinking that, with this, we'd just do step 1 during that phase, and step 2 during the enumeration. This will mean a lot less work and fewer allocations during the first phase, and when that work's done during the second phase, any allocations would be shorter-lived and therefore more likely to be GC'd quickly in gen0. It'd also allow consumers to skip decoding bits they don't actually care about.

I'm hoping to write this up a bit more comprehensively and would really appreciate your input.


try
{
var s = encoding.GetString(bytes, 0, bytes.Length);
if (s.IndexOf((char)65533) != -1)
int charCount = encoding.GetChars(bytes, 0, bytes.Length, charBuffer, 0);

if (charBuffer.AsSpan(0, charCount).IndexOf((char)65533) != -1)
continue;
return encoding;
}
catch
{
// fall through...
}
finally
{
ArrayPool<char>.Shared.Return(charBuffer);
}
}

// No encodings succeeded. Return null.
Expand Down
2 changes: 1 addition & 1 deletion MetadataExtractor/Formats/Iso14496/TypeStringConverter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public static uint ToTypeId(string typeString)
(((typeString[0] & 0xFF) << 24) |
((typeString[1] & 0xFF) << 16) |
((typeString[2] & 0xFF) << 8) |
( typeString[3] & 0xFF));
(typeString[3] & 0xFF));
}
}
}
2 changes: 1 addition & 1 deletion MetadataExtractor/Formats/Photoshop/PhotoshopDescriptor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ public sealed class PhotoshopDescriptor(PhotoshopDirectory directory)

return bytes is null
? null
: Encoding.UTF8.GetString(bytes, 0, bytes.Length);
: Encoding.UTF8.GetString(bytes);
}

private string? GetBinaryDataString(int tagType)
Expand Down
2 changes: 1 addition & 1 deletion MetadataExtractor/Formats/Png/PngChunkType.cs
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ private static void ValidateBytes(byte[] bytes)

private static bool IsValidByte(byte b) => b is >= 65 and <= 90 or >= 97 and <= 122;

public string Identifier => Encoding.UTF8.GetString(_bytes, 0, _bytes.Length);
public string Identifier => Encoding.UTF8.GetString(_bytes);

public override string ToString() => Identifier;

Expand Down
4 changes: 1 addition & 3 deletions MetadataExtractor/Formats/Png/PngDescriptor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,7 @@ public sealed class PngDescriptor(PngDirectory directory)
{
return Directory.GetObject(PngDirectory.TagTextualData) is not IList<KeyValuePair> pairs
? null
: string.Join(
"\n",
pairs.Select(kv => $"{kv.Key}: {kv.Value}"));
: string.Join("\n", pairs.Select(kv => $"{kv.Key}: {kv.Value}"));
}

public string? GetBackgroundColorDescription()
Expand Down
Loading
Loading