From a3c2e86fe21c2e4cbf1ce7c56ad6928b270d3d87 Mon Sep 17 00:00:00 2001 From: Kevin Mott Date: Sun, 3 May 2020 12:39:59 -0500 Subject: [PATCH 01/10] - Add RandomAccessStream and ReaderInfo - Minor input change to BufferBoundsException - Some initial test cases for Ras and ReaderInfo --- .../IO/RAS/NonSeekableStream.cs | 49 ++ .../IO/RAS/RasByteArrayReaderTest.cs | 27 + .../IO/RAS/RasIndexedCapturingReaderTest.cs | 27 + .../IO/RAS/RasIndexedSeekingReaderTest.cs | 63 ++ .../IO/RAS/RasIndexedTestBase.cs | 328 +++++++++ .../RAS/RasSequentialByteArrayReaderTest.cs | 26 + .../IO/RAS/RasSequentialReaderTestBase.cs | 311 ++++++++ .../IO/RAS/RasSequentialStreamReaderTest.cs | 27 + .../IO/RAS/RasUsageTests.cs | 210 ++++++ MetadataExtractor/IO/BufferBoundsException.cs | 4 +- MetadataExtractor/IO/RandomAccessStream.cs | 561 ++++++++++++++ MetadataExtractor/IO/ReaderInfo.cs | 694 ++++++++++++++++++ 12 files changed, 2325 insertions(+), 2 deletions(-) create mode 100644 MetadataExtractor.Tests/IO/RAS/NonSeekableStream.cs create mode 100644 MetadataExtractor.Tests/IO/RAS/RasByteArrayReaderTest.cs create mode 100644 MetadataExtractor.Tests/IO/RAS/RasIndexedCapturingReaderTest.cs create mode 100644 MetadataExtractor.Tests/IO/RAS/RasIndexedSeekingReaderTest.cs create mode 100644 MetadataExtractor.Tests/IO/RAS/RasIndexedTestBase.cs create mode 100644 MetadataExtractor.Tests/IO/RAS/RasSequentialByteArrayReaderTest.cs create mode 100644 MetadataExtractor.Tests/IO/RAS/RasSequentialReaderTestBase.cs create mode 100644 MetadataExtractor.Tests/IO/RAS/RasSequentialStreamReaderTest.cs create mode 100644 MetadataExtractor.Tests/IO/RAS/RasUsageTests.cs create mode 100644 MetadataExtractor/IO/RandomAccessStream.cs create mode 100644 MetadataExtractor/IO/ReaderInfo.cs diff --git a/MetadataExtractor.Tests/IO/RAS/NonSeekableStream.cs b/MetadataExtractor.Tests/IO/RAS/NonSeekableStream.cs new file mode 100644 index 000000000..6896626c3 --- /dev/null +++ b/MetadataExtractor.Tests/IO/RAS/NonSeekableStream.cs @@ -0,0 +1,49 @@ +using System; +using System.IO; + +namespace MetadataExtractor.Tests.IO +{ + public class NonSeekableStream : Stream + { + Stream m_stream; + public NonSeekableStream(Stream baseStream) + { + m_stream = baseStream; + } + + public override bool CanRead => m_stream.CanRead; + + public override bool CanSeek => false; + + public override bool CanWrite => m_stream.CanWrite; + + public override long Length => throw new NotSupportedException(); + + public override long Position { get => m_stream.Position; set => throw new NotSupportedException(); } + + public override void Flush() + { + m_stream.Flush(); + } + + public override int Read(byte[] buffer, int offset, int count) + { + return m_stream.Read(buffer, offset, count); + } + + public override long Seek(long offset, SeekOrigin origin) + { + throw new NotImplementedException(); + } + + public override void SetLength(long value) + { + throw new NotSupportedException(); + } + + public override void Write(byte[] buffer, int offset, int count) + { + m_stream.Write(buffer, offset, count); + } + } +} diff --git a/MetadataExtractor.Tests/IO/RAS/RasByteArrayReaderTest.cs b/MetadataExtractor.Tests/IO/RAS/RasByteArrayReaderTest.cs new file mode 100644 index 000000000..45b007a6e --- /dev/null +++ b/MetadataExtractor.Tests/IO/RAS/RasByteArrayReaderTest.cs @@ -0,0 +1,27 @@ +// Copyright (c) Drew Noakes and contributors. All Rights Reserved. Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. + +using System; +using MetadataExtractor.IO; +using Xunit; + +namespace MetadataExtractor.Tests.IO +{ + /// Unit tests for with indexed reading on a byte array. + /// Drew Noakes https://drewnoakes.com + /// Kevin Mott https://github.com/kwhopper + public sealed class RasByteArrayReaderTest : RasIndexedTestBase + { + protected override ReaderInfo CreateReader(params byte[] bytes) + { + return ReaderInfo.CreateFromArray(bytes); + } + + [Fact] + public void ConstructWithNullBufferThrows() + { + // ReSharper disable once AssignNullToNotNullAttribute + Assert.Throws(() => CreateReader(null!)); + } + + } +} diff --git a/MetadataExtractor.Tests/IO/RAS/RasIndexedCapturingReaderTest.cs b/MetadataExtractor.Tests/IO/RAS/RasIndexedCapturingReaderTest.cs new file mode 100644 index 000000000..9d789c920 --- /dev/null +++ b/MetadataExtractor.Tests/IO/RAS/RasIndexedCapturingReaderTest.cs @@ -0,0 +1,27 @@ +// Copyright (c) Drew Noakes and contributors. All Rights Reserved. Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. + +using System; +using System.IO; +using MetadataExtractor.IO; +using Xunit; + +namespace MetadataExtractor.Tests.IO +{ + /// Unit tests for with indexed reading on a MemoryStream. + /// Drew Noakes https://drewnoakes.com + /// Kevin Mott https://github.com/kwhopper + public sealed class RasIndexedCapturingReaderTest : RasIndexedTestBase + { + [Fact] + public void ConstructWithNullBufferThrows() + { + // ReSharper disable once AssignNullToNotNullAttribute + Assert.Throws(() => ReaderInfo.CreateFromStream(null!)); + } + + protected override ReaderInfo CreateReader(params byte[] bytes) + { + return ReaderInfo.CreateFromStream(new MemoryStream(bytes)); + } + } +} diff --git a/MetadataExtractor.Tests/IO/RAS/RasIndexedSeekingReaderTest.cs b/MetadataExtractor.Tests/IO/RAS/RasIndexedSeekingReaderTest.cs new file mode 100644 index 000000000..c8337aa16 --- /dev/null +++ b/MetadataExtractor.Tests/IO/RAS/RasIndexedSeekingReaderTest.cs @@ -0,0 +1,63 @@ +// Copyright (c) Drew Noakes and contributors. All Rights Reserved. Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. + +using System; +using System.IO; +using MetadataExtractor.IO; +using Xunit; + +namespace MetadataExtractor.Tests.IO +{ + /// Unit tests for with indexed reading on a FileStream. + /// Drew Noakes https://drewnoakes.com + /// Kevin Mott https://github.com/kwhopper + public sealed class RasIndexedSeekingReaderTest : RasIndexedTestBase, IDisposable + { + private string? _tempFile; + private Stream? _stream; + + protected override ReaderInfo CreateReader(params byte[] bytes) + { + try + { + // Unit tests can create multiple readers in the same test, as long as they're used one after the other + DeleteTempFile(); + _tempFile = Path.GetTempFileName(); + File.WriteAllBytes(_tempFile, bytes); + _stream = new FileStream(_tempFile, FileMode.Open, FileAccess.Read); + return ReaderInfo.CreateFromStream(_stream); + } + catch (IOException ex) + { + throw new IOException("Unable to create temp file", ex); + } + } + + public void Dispose() + { + DeleteTempFile(); + } + + private void DeleteTempFile() + { + if (_stream != null) + { + _stream.Dispose(); + _stream = null; + } + + if (_tempFile != null) + { + if (File.Exists(_tempFile)) + File.Delete(_tempFile); + _tempFile = null; + } + } + + [Fact] + public void ConstructWithNullBufferThrows() + { + // ReSharper disable once AssignNullToNotNullAttribute + Assert.Throws(() => ReaderInfo.CreateFromStream(null!)); + } + } +} diff --git a/MetadataExtractor.Tests/IO/RAS/RasIndexedTestBase.cs b/MetadataExtractor.Tests/IO/RAS/RasIndexedTestBase.cs new file mode 100644 index 000000000..8debc13c9 --- /dev/null +++ b/MetadataExtractor.Tests/IO/RAS/RasIndexedTestBase.cs @@ -0,0 +1,328 @@ +// Copyright (c) Drew Noakes and contributors. All Rights Reserved. Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. + +using System; +using System.Text; +using MetadataExtractor.IO; +using Xunit; + +namespace MetadataExtractor.Tests.IO +{ + /// Base class for testing implementations of using indexed reading. + /// Drew Noakes https://drewnoakes.com + /// Kevin Mott https://github.com/kwhopper + public abstract class RasIndexedTestBase + { + protected abstract ReaderInfo CreateReader(params byte[] bytes); + + [Fact] + public void DefaultEndianness() + { + Assert.True(CreateReader(new byte[1]).IsMotorolaByteOrder); + } + + [Fact] + public void GetSByte() + { + var reader = CreateReader(0x00, 0x01, 0x7F, 0xFF); + + Assert.Equal(0, reader.GetSByte(0)); + Assert.Equal(1, reader.GetSByte(1)); + Assert.Equal(127, reader.GetSByte(2)); + Assert.Equal(-1, reader.GetSByte(3)); + } + + [Fact] + public void GetByte() + { + var reader = CreateReader(0x00, 0x01, 0x7F, 0xFF); + + Assert.Equal(0, reader.GetByte(0)); + Assert.Equal(1, reader.GetByte(1)); + Assert.Equal(127, reader.GetByte(2)); + Assert.Equal(255, reader.GetByte(3)); + } + + [Fact] + public void GetByte_OutOfBounds() + { + var reader = CreateReader(new byte[2]); + + var ex = Assert.Throws(() => reader.GetByte(2)); + + Assert.Equal( + "Attempt to read from beyond end of underlying data source (requested index: 2, requested count: 1, max index: 1)", + ex.Message); + } + + [Fact] + public void GetInt16() + { + Assert.Equal(-1, CreateReader(0xff, 0xff).GetInt16(0)); + + var reader = CreateReader(0x00, 0x01, 0x7F, 0xFF); + + Assert.Equal(0x0001, reader.GetInt16(0)); + Assert.Equal(0x017F, reader.GetInt16(1)); + Assert.Equal(0x7FFF, reader.GetInt16(2)); + + reader.IsMotorolaByteOrder = false; + + Assert.Equal(0x0100, reader.GetInt16(0)); + Assert.Equal(0x7F01, reader.GetInt16(1)); + Assert.Equal(unchecked((short)0xFF7F), reader.GetInt16(2)); + } + + [Fact] + public void GetUInt16() + { + var reader = CreateReader(0x00, 0x01, 0x7F, 0xFF); + + Assert.Equal(0x0001, reader.GetUInt16(0)); + Assert.Equal(0x017F, reader.GetUInt16(1)); + Assert.Equal(0x7FFF, reader.GetUInt16(2)); + + reader.IsMotorolaByteOrder = false; + + Assert.Equal(0x0100, reader.GetUInt16(0)); + Assert.Equal(0x7F01, reader.GetUInt16(1)); + Assert.Equal(0xFF7F, reader.GetUInt16(2)); + } + + [Fact] + public void GetUInt16_OutOfBounds() + { + var reader = CreateReader(new byte[2]); + + var ex = Assert.Throws(() => reader.GetUInt16(1)); + + Assert.Equal( + "Attempt to read from beyond end of underlying data source (requested index: 1, requested count: 2, max index: 1)", + ex.Message); + } + + [Fact] + public void GetInt32() + { + Assert.Equal(-1, CreateReader(0xff, 0xff, 0xff, 0xff).GetInt32(0)); + + var reader = CreateReader(0x00, 0x01, 0x7F, 0xFF, 0x02, 0x03, 0x04); + + Assert.Equal(0x00017FFF, reader.GetInt32(0)); + Assert.Equal(0x017FFF02, reader.GetInt32(1)); + Assert.Equal(0x7FFF0203, reader.GetInt32(2)); + Assert.Equal(unchecked((int)0xFF020304), reader.GetInt32(3)); + + reader.IsMotorolaByteOrder = false; + + Assert.Equal(unchecked((int)0xFF7F0100), reader.GetInt32(0)); + Assert.Equal(0x02FF7F01, reader.GetInt32(1)); + Assert.Equal(0x0302FF7F, reader.GetInt32(2)); + Assert.Equal(0x040302FF, reader.GetInt32(3)); + } + + [Fact] + public void GetUInt32() + { + Assert.Equal(4294967295u, CreateReader(0xff, 0xff, 0xff, 0xff).GetUInt32(0)); + + var reader = CreateReader(0x00, 0x01, 0x7F, 0xFF, 0x02, 0x03, 0x04); + + Assert.Equal(0x00017FFFu, reader.GetUInt32(0)); + Assert.Equal(0x017FFF02u, reader.GetUInt32(1)); + Assert.Equal(0x7FFF0203u, reader.GetUInt32(2)); + Assert.Equal(0xFF020304u, reader.GetUInt32(3)); + + reader.IsMotorolaByteOrder = false; + + Assert.Equal(4286513408u, reader.GetUInt32(0)); + Assert.Equal(0x02FF7F01u, reader.GetUInt32(1)); + Assert.Equal(0x0302FF7Fu, reader.GetUInt32(2)); + Assert.Equal(0x040302FFu, reader.GetUInt32(3)); + } + + [Fact] + public void GetInt32_OutOfBounds() + { + var reader = CreateReader(new byte[3]); + + var ex = Assert.Throws(() => reader.GetInt32(0)); + + Assert.Equal( + "Attempt to read from beyond end of underlying data source (requested index: 0, requested count: 4, max index: 2)", + ex.Message); + } + + [Fact] + public void GetInt64() + { + var reader = CreateReader(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0xFF); + + Assert.Equal(0x0001020304050607L, (object)reader.GetInt64(0)); + Assert.Equal(0x01020304050607FFL, (object)reader.GetInt64(1)); + + reader.IsMotorolaByteOrder = false; + + Assert.Equal(0x0706050403020100L, (object)reader.GetInt64(0)); + Assert.Equal(unchecked((long)0xFF07060504030201L), (object)reader.GetInt64(1)); + } + + [Fact] + public void GetInt64_OutOfBounds() + { + var reader = CreateReader(new byte[7]); + + var ex = Assert.Throws(() => reader.GetInt64(0)); + + Assert.Equal( + "Attempt to read from beyond end of underlying data source (requested index: 0, requested count: 8, max index: 6)", + ex.Message); + + reader = CreateReader(new byte[7]); + + ex = Assert.Throws(() => reader.GetInt64(-1)); + + Assert.Equal( + "Attempt to read from buffer using a negative index (-1)", + ex.Message); + } + + [Fact] + public void GetFloat32() + { + const int nanBits = 0x7fc00000; + Assert.True(float.IsNaN(BitConverter.ToSingle(BitConverter.GetBytes(nanBits), 0))); + + var reader = CreateReader(0x7f, 0xc0, 0x00, 0x00); + + Assert.True(float.IsNaN(reader.GetFloat32(0))); + } + + [Fact] + public void GetFloat64() + { + const long nanBits = unchecked((long)0xfff0000000000001L); + Assert.True(double.IsNaN(BitConverter.Int64BitsToDouble(nanBits))); + + var reader = CreateReader(0xff, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01); + + Assert.True(double.IsNaN(reader.GetDouble64(0))); + } + + [Fact] + public void GetNullTerminatedString() + { + var reader = CreateReader(0x41, 0x42, 0x43, 0x44, 0x00, 0x45, 0x46, 0x47); + + Assert.Equal(string.Empty, reader.GetNullTerminatedString(0, 0)); + Assert.Equal("A", reader.GetNullTerminatedString(0, 1)); + Assert.Equal("AB", reader.GetNullTerminatedString(0, 2)); + Assert.Equal("ABC", reader.GetNullTerminatedString(0, 3)); + Assert.Equal("ABCD", reader.GetNullTerminatedString(0, 4)); + Assert.Equal("ABCD", reader.GetNullTerminatedString(0, 5)); + Assert.Equal("ABCD", reader.GetNullTerminatedString(0, 6)); + Assert.Equal("BCD", reader.GetNullTerminatedString(1, 3)); + Assert.Equal("BCD", reader.GetNullTerminatedString(1, 4)); + Assert.Equal("BCD", reader.GetNullTerminatedString(1, 5)); + Assert.Equal(string.Empty, reader.GetNullTerminatedString(4, 3)); + } + + [Fact] + public void GetString() + { + var reader = CreateReader(0x41, 0x42, 0x43, 0x44, 0x00, 0x45, 0x46, 0x47); + + Assert.Equal(string.Empty, reader.GetString(0, 0, Encoding.UTF8)); + Assert.Equal("A", reader.GetString(0, 1, Encoding.UTF8)); + Assert.Equal("AB", reader.GetString(0, 2, Encoding.UTF8)); + Assert.Equal("ABC", reader.GetString(0, 3, Encoding.UTF8)); + Assert.Equal("ABCD", reader.GetString(0, 4, Encoding.UTF8)); + Assert.Equal("ABCD\x0", reader.GetString(0, 5, Encoding.UTF8)); + Assert.Equal("ABCD\x0000E", reader.GetString(0, 6, Encoding.UTF8)); + Assert.Equal("BCD", reader.GetString(1, 3, Encoding.UTF8)); + Assert.Equal("BCD\x0", reader.GetString(1, 4, Encoding.UTF8)); + Assert.Equal("BCD\x0000E", reader.GetString(1, 5, Encoding.UTF8)); + Assert.Equal("\x0000EF", reader.GetString(4, 3, Encoding.UTF8)); + } + + [Fact] + public void IndexPlusCountExceedsIntMaxValue() + { + var reader = CreateReader(new byte[10]); + var ex = Assert.Throws(() => reader.GetBytes(0x6FFFFFFF, 0x6FFFFFFF)); + Assert.Equal( + "Number of requested bytes summed with starting index exceed maximum range of signed 32 bit integers (requested index: 1879048191, requested count: 1879048191)", + ex.Message); + } + + [Fact] + public void OverflowBoundsCalculation() + { + var reader = CreateReader(new byte[10]); + var ex = Assert.Throws(() => reader.GetBytes(5, 10)); + Assert.Equal( + "Attempt to read from beyond end of underlying data source (requested index: 5, requested count: 10, max index: 9)", + ex.Message); + } + + [Fact] + public void GetBytesEof() + { + CreateReader(new byte[50]).GetBytes(0, 50); + + var reader = CreateReader(new byte[50]); + reader.GetBytes(25, 25); + + Assert.Throws(() => CreateReader(new byte[50]).GetBytes(0, 51)); + } + + [Fact] + public void GetByteEof() + { + CreateReader(new byte[1]).GetByte(0); + + var reader = CreateReader(new byte[2]); + reader.GetByte(0); + reader.GetByte(1); + + reader = CreateReader(new byte[1]); + reader.GetByte(0); + Assert.Throws(() => reader.GetByte(1)); + } + + [Fact] + public void WithShiftedBaseOffset() + { + var reader = CreateReader(0, 1, 2, 3, 4, 5, 6, 7, 8, 9); //.WithByteOrder(isMotorolaByteOrder: false); + reader.IsMotorolaByteOrder = false; + + Assert.Equal(10, reader.Length); + Assert.Equal(0, reader.GetByte(0)); + Assert.Equal(1, reader.GetByte(1)); + Assert.Equal(new byte[] { 0, 1 }, reader.GetBytes(0, 2)); + //Assert.Equal(4, reader.ToUnshiftedOffset(4)); + Assert.Equal(4, reader.StartPosition + 4); + + //reader = reader.WithShiftedBaseOffset(2); + reader = reader.Clone(2, -1); + + Assert.False(reader.IsMotorolaByteOrder); + Assert.Equal(8, reader.Length); + Assert.Equal(2, reader.GetByte(0)); + Assert.Equal(3, reader.GetByte(1)); + Assert.Equal(new byte[] { 2, 3 }, reader.GetBytes(0, 2)); + //Assert.Equal(6, reader.ToUnshiftedOffset(4)); + Assert.Equal(6, reader.StartPosition + 4); + + //reader = reader.WithShiftedBaseOffset(2); + reader = reader.Clone(2, -1); + + Assert.False(reader.IsMotorolaByteOrder); + Assert.Equal(6, reader.Length); + Assert.Equal(4, reader.GetByte(0)); + Assert.Equal(5, reader.GetByte(1)); + Assert.Equal(new byte[] { 4, 5 }, reader.GetBytes(0, 2)); + //Assert.Equal(8, reader.ToUnshiftedOffset(4)); + Assert.Equal(8, reader.StartPosition + 4); + } + } +} diff --git a/MetadataExtractor.Tests/IO/RAS/RasSequentialByteArrayReaderTest.cs b/MetadataExtractor.Tests/IO/RAS/RasSequentialByteArrayReaderTest.cs new file mode 100644 index 000000000..15ae7a329 --- /dev/null +++ b/MetadataExtractor.Tests/IO/RAS/RasSequentialByteArrayReaderTest.cs @@ -0,0 +1,26 @@ +// Copyright (c) Drew Noakes and contributors. All Rights Reserved. Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. + +using System; +using MetadataExtractor.IO; +using Xunit; + +namespace MetadataExtractor.Tests.IO +{ + /// Unit tests for with sequential reading on a byte array. + /// Drew Noakes https://drewnoakes.com + /// Kevin Mott https://github.com/kwhopper + public sealed class RasSequentialByteArrayReaderTest : RasSequentialReaderTestBase + { + [Fact] + public void ConstructWithNullStreamThrows() + { + // ReSharper disable once AssignNullToNotNullAttribute + Assert.Throws(() => ReaderInfo.CreateFromArray(null!)); + } + + protected override ReaderInfo CreateReader(byte[] bytes) + { + return ReaderInfo.CreateFromArray(bytes); + } + } +} diff --git a/MetadataExtractor.Tests/IO/RAS/RasSequentialReaderTestBase.cs b/MetadataExtractor.Tests/IO/RAS/RasSequentialReaderTestBase.cs new file mode 100644 index 000000000..abc60eea4 --- /dev/null +++ b/MetadataExtractor.Tests/IO/RAS/RasSequentialReaderTestBase.cs @@ -0,0 +1,311 @@ +// Copyright (c) Drew Noakes and contributors. All Rights Reserved. Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. + +using System; +using System.IO; +using System.Linq; +using System.Text; +using MetadataExtractor.IO; +using Xunit; + +namespace MetadataExtractor.Tests.IO +{ + /// Base class for testing implementations of using sequential reading. + /// Drew Noakes https://drewnoakes.com + /// Kevin Mott https://github.com/kwhopper + public abstract class RasSequentialReaderTestBase + { + protected abstract ReaderInfo CreateReader(byte[] bytes); + + [Fact] + public void DefaultEndianness() + { + Assert.True(CreateReader(new byte[1]).IsMotorolaByteOrder); + } + + [Fact] + public void GetSByte() + { + var buffer = new byte[] { 0x00, 0x01, 0x7F, 0xFF }; + var reader = CreateReader(buffer); + Assert.Equal(0, reader.GetSByte()); + Assert.Equal(1, reader.GetSByte()); + Assert.Equal(127, reader.GetSByte()); + Assert.Equal(-1, reader.GetSByte()); + } + + [Fact] + public void GetByte() + { + var buffer = new byte[] { 0x00, 0x01, 0x7F, 0xFF }; + var reader = CreateReader(buffer); + Assert.Equal(0, reader.GetByte()); + Assert.Equal(1, reader.GetByte()); + Assert.Equal(127, reader.GetByte()); + Assert.Equal(255, reader.GetByte()); + } + + [Fact] + public void GetByte_OutOfBounds() + { + var reader = CreateReader(new byte[1]); + reader.GetByte(); + var ex = Assert.Throws(() => reader.GetByte()); + Assert.Equal("End of data reached.", ex.Message); + } + + [Fact] + public void GetInt16() + { + Assert.Equal(-1, CreateReader(new[] { (byte)0xff, (byte)0xff }).GetInt16()); + + var buffer = new byte[] { 0x00, 0x01, 0x7F, 0xFF }; + + var reader = CreateReader(buffer); + + Assert.Equal(0x0001, reader.GetInt16()); + Assert.Equal(0x7FFF, reader.GetInt16()); + + reader = CreateReader(buffer); + reader.IsMotorolaByteOrder = false; + + Assert.Equal(0x0100, reader.GetInt16()); + Assert.Equal(unchecked((short)0xFF7F), reader.GetInt16()); + } + + [Fact] + public void GetUInt16() + { + var buffer = new byte[] { 0x00, 0x01, 0x7F, 0xFF }; + + var reader = CreateReader(buffer); + + Assert.Equal(0x0001, reader.GetUInt16()); + Assert.Equal(0x7FFF, reader.GetUInt16()); + + reader = CreateReader(buffer); + reader.IsMotorolaByteOrder = false; + + Assert.Equal(0x0100, reader.GetUInt16()); + Assert.Equal(0xFF7F, reader.GetUInt16()); + } + + [Fact] + public void GetUInt16_OutOfBounds() + { + var reader = CreateReader(new byte[1]); + var ex = Assert.Throws(() => reader.GetUInt16()); + Assert.Equal("End of data reached.", ex.Message); + } + + [Fact] + public void GetInt32() + { + Assert.Equal(-1, CreateReader(new[] { (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff }).GetInt32()); + + var buffer = new byte[] { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 }; + + var reader = CreateReader(buffer); + + Assert.Equal(0x00010203, reader.GetInt32()); + Assert.Equal(0x04050607, reader.GetInt32()); + + reader = CreateReader(buffer); + reader.IsMotorolaByteOrder = false; + + Assert.Equal(0x03020100, reader.GetInt32()); + Assert.Equal(0x07060504, reader.GetInt32()); + } + + [Fact] + public void GetUInt32() + { + Assert.Equal(4294967295u, CreateReader(new byte[] { 0xff, 0xff, 0xff, 0xff }).GetUInt32()); + + var buffer = new byte[] { 0xFF, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06 }; + + var reader = CreateReader(buffer); + + Assert.Equal(0xFF000102u, reader.GetUInt32()); + Assert.Equal(0x03040506u, reader.GetUInt32()); + + reader = CreateReader(buffer); + reader.IsMotorolaByteOrder = false; + + Assert.Equal(0x020100FFu, reader.GetUInt32()); + // 0x0010200FF + Assert.Equal(0x06050403u, reader.GetUInt32()); + } + + [Fact] + public void GetInt32_OutOfBounds() + { + var reader = CreateReader(new byte[3]); + var ex = Assert.Throws(() => reader.GetInt32()); + Assert.Equal("End of data reached.", ex.Message); + } + + [Fact] + public void GetInt64() + { + var buffer = new byte[] { 0xFF, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 }; + + var reader = CreateReader(buffer); + + Assert.Equal(unchecked((long)0xFF00010203040506UL), (object)reader.GetInt64()); + + reader = CreateReader(buffer); + reader.IsMotorolaByteOrder = false; + + Assert.Equal(0x06050403020100FFL, (object)reader.GetInt64()); + } + + [Fact] + public void GetInt64_OutOfBounds() + { + var reader = CreateReader(new byte[7]); + var ex = Assert.Throws(() => reader.GetInt64()); + Assert.Equal("End of data reached.", ex.Message); + } + + [Fact] + public void GetUInt64() + { + var buffer = new byte[] { 0xFF, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 }; + + var reader = CreateReader(buffer); + + Assert.Equal(0xFF00010203040506UL, (object)reader.GetUInt64()); + + reader = CreateReader(buffer); + reader.IsMotorolaByteOrder = false; + + Assert.Equal(0x06050403020100FFUL, (object)reader.GetUInt64()); + } + + [Fact] + public void GetUInt64_OutOfBounds() + { + var reader = CreateReader(new byte[7]); + var ex = Assert.Throws(() => reader.GetUInt64()); + Assert.Equal("End of data reached.", ex.Message); + } + + [Fact] + public void GetFloat32() + { + const int nanBits = 0x7fc00000; + Assert.True(float.IsNaN(BitConverter.ToSingle(BitConverter.GetBytes(nanBits), 0))); + + var reader = CreateReader(new byte[] { 0x7f, 0xc0, 0x00, 0x00 }); + Assert.True(float.IsNaN(reader.GetFloat32())); + } + + [Fact] + public void GetFloat64() + { + const long nanBits = unchecked((long)0xfff0000000000001L); + Assert.True(double.IsNaN(BitConverter.Int64BitsToDouble(nanBits))); + + var reader = CreateReader(new byte[] { 0xff, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }); + Assert.True(double.IsNaN(reader.GetDouble64())); + } + + [Fact] + public void GetNullTerminatedString() + { + var bytes = new byte[] { 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47 }; + + // Test max length + for (var i = 0; i < bytes.Length; i++) + Assert.Equal("ABCDEFG".Substring(0, i - 0), CreateReader(bytes).GetNullTerminatedString(i)); + + Assert.Equal(string.Empty, CreateReader(new byte[] { 0 }).GetNullTerminatedString(10)); + Assert.Equal("A", CreateReader(new byte[] { 0x41, 0 }).GetNullTerminatedString(10)); + Assert.Equal("AB", CreateReader(new byte[] { 0x41, 0x42, 0 }).GetNullTerminatedString(10)); + Assert.Equal("AB", CreateReader(new byte[] { 0x41, 0x42, 0, 0x43 }).GetNullTerminatedString(10)); + } + + [Fact] + public void GetString() + { + var bytes = new byte[] { 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47 }; + var expected = Encoding.UTF8.GetString(bytes); + + Assert.Equal(bytes.Length, expected.Length); + + for (var i = 0; i < bytes.Length; i++) + Assert.Equal("ABCDEFG".Substring(0, i - 0), CreateReader(bytes).GetString(i, Encoding.UTF8)); + } + + [Fact] + public void GetBytes() + { + var bytes = new byte[] { 0, 1, 2, 3, 4, 5 }; + for (var i = 0; i < bytes.Length; i++) + { + var reader = CreateReader(bytes); + var readBytes = reader.GetBytes(i); + Assert.Equal(bytes.Take(i).ToArray(), readBytes); + } + } + + [Fact] + public void OverflowBoundsCalculation() + { + var reader = CreateReader(new byte[10]); + var ex = Assert.Throws(() => reader.GetBytes(15)); + Assert.Equal("End of data reached.", ex.Message); + } + + [Fact] + public void GetBytesEof() + { + CreateReader(new byte[50]).GetBytes(50); + + var reader = CreateReader(new byte[50]); + reader.GetBytes(25); + reader.GetBytes(25); + + Assert.Throws(() => CreateReader(new byte[50]).GetBytes(51)); + } + + [Fact] + public void GetByteEof() + { + CreateReader(new byte[1]).GetByte(); + + var reader = CreateReader(new byte[2]); + reader.GetByte(); + reader.GetByte(); + + reader = CreateReader(new byte[1]); + reader.GetByte(); + Assert.Throws(() => reader.GetByte()); + } + + [Fact] + public void SkipEof() + { + CreateReader(new byte[1]).Skip(1); + + var reader = CreateReader(new byte[2]); + reader.Skip(1); + reader.Skip(1); + + reader = CreateReader(new byte[1]); + reader.Skip(1); + Assert.Throws(() => reader.Skip(1, true)); + } + + [Fact] + public void TrySkipEof() + { + Assert.True(CreateReader(new byte[1]).TrySkip(1, true)); + + var reader = CreateReader(new byte[2]); + Assert.True(reader.TrySkip(1, true)); + Assert.True(reader.TrySkip(1, true)); + Assert.False(reader.TrySkip(1, true)); + } + } +} diff --git a/MetadataExtractor.Tests/IO/RAS/RasSequentialStreamReaderTest.cs b/MetadataExtractor.Tests/IO/RAS/RasSequentialStreamReaderTest.cs new file mode 100644 index 000000000..f56cb811c --- /dev/null +++ b/MetadataExtractor.Tests/IO/RAS/RasSequentialStreamReaderTest.cs @@ -0,0 +1,27 @@ +// Copyright (c) Drew Noakes and contributors. All Rights Reserved. Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. + +using System; +using System.IO; +using MetadataExtractor.IO; +using Xunit; + +namespace MetadataExtractor.Tests.IO +{ + /// Unit tests for with sequential reading on a MemoryStream. + /// Drew Noakes https://drewnoakes.com + /// Kevin Mott https://github.com/kwhopper + public sealed class RasSequentialStreamReaderTest : RasSequentialReaderTestBase + { + [Fact] + public void ConstructWithNullStreamThrows() + { + // ReSharper disable once AssignNullToNotNullAttribute + Assert.Throws(() => ReaderInfo.CreateFromStream(null!)); + } + + protected override ReaderInfo CreateReader(byte[] bytes) + { + return ReaderInfo.CreateFromStream(new MemoryStream(bytes)); + } + } +} diff --git a/MetadataExtractor.Tests/IO/RAS/RasUsageTests.cs b/MetadataExtractor.Tests/IO/RAS/RasUsageTests.cs new file mode 100644 index 000000000..f19c6041a --- /dev/null +++ b/MetadataExtractor.Tests/IO/RAS/RasUsageTests.cs @@ -0,0 +1,210 @@ +using System; +using System.IO; +using MetadataExtractor.IO; +using Xunit; + +namespace MetadataExtractor.Tests.IO +{ + public sealed class RasUsageTests + { + /// + /// Wraps normal usage with ReaderInfo static methods, bypassing some complexity of a RandomAccessStream + /// + /// + /// + private ReaderInfo CreateReader(params byte[] bytes) + { + return ReaderInfo.CreateFromStream(new MemoryStream(bytes)); + } + + [Fact] + public void TestRasByteArray() + { + var bytes = new byte[] { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; + + var ras = new RandomAccessStream(bytes); + // byte array inputs should always be seekable + Assert.True(ras.CanSeek); + // same byte array reference, so lengths should match + Assert.Equal(bytes.Length, ras.Length); + + // bytes two thru five in motorola byte order + Assert.Equal(33752069, ras.GetInt32(2, true, false)); + + // bytes two thru five in non-motorola byte order + Assert.Equal(84148994, ras.GetInt32(2, false, false)); + } + + [Fact] + public void TestRasMemoryStream() + { + var ms = new MemoryStream(new byte[] { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }); + + var ras = new RandomAccessStream(ms); + // memory stream inputs should always be seekable + Assert.True(ras.CanSeek); + // same byte array reference, so lengths should match + Assert.Equal(ms.Length, ras.Length); + + // first four bytes in motorola byte order + Assert.Equal(66051, ras.GetInt32(0, true, false)); + + // first four bytes in non-motorola byte order + Assert.Equal(50462976, ras.GetInt32(0, false, false)); + } + + [Fact] + public void TestNonSeekableRasStreamUnknownLength() + { + var nss = new NonSeekableStream(new MemoryStream(new byte[] { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 })); + + // don't supply a length + var ras = new RandomAccessStream(nss); + + // nonseekable stream inputs shouldn't be seekable + Assert.False(ras.CanSeek); + // can't know the length of a nonseekable stream if it isn't supplied + Assert.Equal(int.MaxValue, ras.Length); + } + + [Fact] + public void TestNonSeekableRasStreamKnownLength() + { + var ms = new MemoryStream(new byte[] { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }); + var nss = new NonSeekableStream(ms); + + // supply a length + var ras = new RandomAccessStream(nss, ms.Length); + + // nonseekable stream inputs shouldn't be seekable + Assert.False(ras.CanSeek); + // length of a nonseekable stream was supplied to the RAS constructor + Assert.Equal(ms.Length, ras.Length); + } + + [Fact] + public void LocalPositionUnchangedIndexed() + { + var reader = CreateReader(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08); + + // indexed read should not change the local position + var _ = reader.GetBytes(0, 4); + Assert.Equal(0, reader.LocalPosition); + } + + [Fact] + public void LocalPositionChangedSequential() + { + var reader = CreateReader(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08); + + // sequential reads should change the local position by the number of bytes read + var _ = reader.GetBytes(4); + Assert.Equal(4, reader.LocalPosition); + } + + [Fact] + public void SkipSequential() + { + var reader = CreateReader(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08); + + // move local position + reader.Skip(6); + + // sequential reads should change the local position by the number of bytes read + Assert.Equal(0x06, reader.GetByte()); + Assert.Equal(7, reader.LocalPosition); + } + + [Fact] + public void IndexedAndSequentialReadsOnSameReader() + { + var reader = CreateReader(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08); + + // sequential read should change the local position by the number of bytes read + _ = reader.GetBytes(5); + + Assert.Equal(5, reader.LocalPosition); + + // indexed read should not use or change the local position + Assert.Equal(0x02, reader.GetByte(2)); + + Assert.Equal(5, reader.LocalPosition); + } + + [Fact] + public void CloneReader() + { + var reader = CreateReader(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08); + var clone = reader.Clone(); + + // both readers should have the same starting (global) position + Assert.Equal(0, reader.StartPosition); + Assert.Equal(reader.StartPosition, clone.StartPosition); + } + + [Fact] + public void CloneReaderOffset() + { + var reader = CreateReader(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08); + var clone = reader.Clone(4, true); + + // readers should have different starting (global) positions + Assert.Equal(0, reader.StartPosition); + Assert.Equal(4, clone.StartPosition); + + Assert.NotEqual(reader.StartPosition, clone.StartPosition); + } + + [Fact] + public void CloneReaderOffsetCheckLength() + { + var reader = CreateReader(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08); + var clone = reader.Clone(7, true); + + // clone should start at a different index and have a different length + Assert.Equal(0, reader.StartPosition); + Assert.Equal(7, clone.StartPosition); + + Assert.Equal(2, clone.Length); + + // bytes orders should be the same because of the Clone overload used + Assert.Equal(reader.IsMotorolaByteOrder, clone.IsMotorolaByteOrder); + } + + [Fact] + public void CloneReaderEnsureDistinct() + { + var reader = CreateReader(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08); + var clone = reader.Clone(4, true); + + Assert.Equal(0, clone.LocalPosition); + // sequentially move the reader's local position + reader.Skip(2); + + // reader's local position should be changed + Assert.Equal(2, reader.LocalPosition); + + // clone's local position should be unchanged + Assert.Equal(0, clone.LocalPosition); + } + + + + [Fact] + public void GetNullTerminatedStringWithClone() + { + var reader = CreateReader(0x41, 0x42, 0x43, 0x44, 0x00, 0x45, 0x46, 0x47); + // clone it from a different index + var clone = reader.Clone(1, true); + + Assert.Equal("A", reader.GetNullTerminatedString(0, 1)); + Assert.Equal("B", clone.GetNullTerminatedString(0, 1)); + + Assert.Equal("BCD", reader.GetNullTerminatedString(1, 3)); + Assert.Equal("BCD", clone.GetNullTerminatedString(0, 3)); + + Assert.Equal(reader.GetNullTerminatedString(1, 3), clone.GetNullTerminatedString(0, 3)); + } + + } +} diff --git a/MetadataExtractor/IO/BufferBoundsException.cs b/MetadataExtractor/IO/BufferBoundsException.cs index 0e0087e8d..2bb35e8fe 100644 --- a/MetadataExtractor/IO/BufferBoundsException.cs +++ b/MetadataExtractor/IO/BufferBoundsException.cs @@ -17,7 +17,7 @@ namespace MetadataExtractor.IO #endif public class BufferBoundsException : IOException { - public BufferBoundsException(int index, int bytesRequested, long bufferLength) + public BufferBoundsException(long index, long bytesRequested, long bufferLength) : base(GetMessage(index, bytesRequested, bufferLength)) { } @@ -31,7 +31,7 @@ public BufferBoundsException() { } - private static string GetMessage(int index, int bytesRequested, long bufferLength) + private static string GetMessage(long index, long bytesRequested, long bufferLength) { if (index < 0) return $"Attempt to read from buffer using a negative index ({index})"; diff --git a/MetadataExtractor/IO/RandomAccessStream.cs b/MetadataExtractor/IO/RandomAccessStream.cs new file mode 100644 index 000000000..410bf4fbd --- /dev/null +++ b/MetadataExtractor/IO/RandomAccessStream.cs @@ -0,0 +1,561 @@ +// Copyright (c) Drew Noakes and contributors. All Rights Reserved. Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. + +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using JetBrains.Annotations; + +namespace MetadataExtractor.IO +{ + /// Reads and buffers data in chunks and provides methods for reading data types + /// + /// This class implements buffered reading of data typically for use with + /// objects and provides methods for reading data types from it. Data is captured in configurable + /// chunks for efficiency. Both seekable and non-seekable streams are supported. + /// + /// Kevin Mott https://github.com/kwhopper + /// Drew Noakes https://drewnoakes.com + public class RandomAccessStream + { + private Stream? p_inputStream; + private long p_streamLength = -1; + + //private readonly List rdrList = new List(); + private bool p_isStreamFinished; + + private const int DefaultChunkLength = 4 * 1024; + private readonly int p_chunkLength; + public Dictionary p_chunks = new Dictionary(); + + public RandomAccessStream(Stream stream, long streamLength = -1) + { + if (stream == null) + throw new ArgumentNullException(nameof(stream)); + + if (streamLength == -1) + streamLength = stream.CanSeek ? stream.Length : int.MaxValue; + + p_inputStream = stream; + CanSeek = stream.CanSeek; + + p_chunkLength = DefaultChunkLength; + p_streamLength = streamLength; + } + + public RandomAccessStream(byte[] bytes) + { + if (bytes == null) + throw new ArgumentNullException(nameof(bytes)); + + CanSeek = true; + + // Setting these values makes p_inputStream irrelevant + // TODO: break the byte array up into DefaultChunkLength chunks + p_chunks.Add(0, bytes); + p_chunkLength = bytes.Length; + + p_streamLength = bytes.Length; + p_isStreamFinished = true; + } + + public bool CanSeek { get; private set; } = false; + + public long Length + { + get + { + // If finished and only one chunk, can bypass a lot of checks particularly when the input was a byte[] + //return (CanSeek) ? (p_isStreamFinished && p_chunks.Count == 1 ? p_streamLength : p_inputStream.Length) : (long)int.MaxValue; + return p_streamLength; + } + } + + public ReaderInfo CreateReader() => CreateReader(-1, -1, true); + public ReaderInfo CreateReader(bool isMotorolaByteOrder) => CreateReader(-1, -1, isMotorolaByteOrder); + public ReaderInfo CreateReader(long startPosition, long length, bool isMotorolaByteOrder) + { + var pos = startPosition >= 0 ? startPosition : 0; + return new ReaderInfo(this, pos, 0, length, isMotorolaByteOrder); + } + + /// Retrieves bytes, writing them into a caller-provided buffer. + /// position within the data buffer to read byte. + /// array to write bytes to. + /// starting position within to write to. + /// number of bytes to be written. + /// flag indicating if caller is using sequential access + /// The requested bytes, or as many as can be retrieved + /// + /// + public int Read(long index, byte[] buffer, int offset, int count, bool isSequential) + { + return Read(index, buffer, offset, count, isSequential, true); + } + + /// Retrieves bytes, writing them into a caller-provided buffer. + /// position within the data buffer to read byte. + /// array to write bytes to. + /// starting position within to write to. + /// number of bytes to be written. + /// flag indicating if caller is using sequential access + /// flag indicating whether count should be enforced when validating the index + /// The requested bytes, or as many as can be retrieved if is true + /// + /// + public int Read(long index, byte[] buffer, int offset, int count, bool isSequential, bool allowPartial) + { + count = (int)ValidateIndex(index, count, isSequential, allowPartial); + + // This bypasses a lot of checks particularly when the input was a byte[] + // TODO: good spot to try Span + if (p_isStreamFinished && p_chunks.Count == 1) + { + Array.Copy(p_chunks[0], (int)index, buffer, 0, count); + return count; + } + + var remaining = count; // how many bytes are requested + var fromOffset = (int)index; + var toIndex = offset > 0 ? offset : 0; + while (remaining != 0) + { + var fromChunkIndex = fromOffset / p_chunkLength; // chunk integer key + var fromInnerIndex = fromOffset % p_chunkLength; // index inside the chunk to start reading + var length = Math.Min(remaining, p_chunkLength - fromInnerIndex); + var chunk = p_chunks[fromChunkIndex]; + Array.Copy(chunk, fromInnerIndex, buffer, toIndex, length); + remaining -= length; + fromOffset += length; + toIndex += length; + } + + return toIndex - offset; + } + + /// Returns an unsigned byte at an index in the sequence. + /// the 8 bit int value, between 0 and 255 + /// position within the data buffer to read byte + /// flag indicating if caller is using sequential access + /// + /// + public byte GetByte(long index, bool isSequential) + { + ValidateIndex(index, 1, isSequential); + + // This bypasses a lot of checks particularly when the input was a byte[] + if (p_isStreamFinished && p_chunks.Count == 1) + return p_chunks[0][index]; + + var chunkIndex = index / p_chunkLength; + var innerIndex = index % p_chunkLength; + + if (p_chunks.ContainsKey(chunkIndex)) + return p_chunks[chunkIndex][innerIndex]; + else + return unchecked((byte)-1); + } + + /// Returns an unsigned 16-bit int calculated from the next two bytes of the sequence. + /// the 16 bit int value, between 0x0000 and 0xFFFF + /// position within the data buffer to read first byte + /// byte order for returning the result + /// flag indicating if caller is using sequential access + /// + /// + public ushort GetUInt16(long index, bool IsMotorolaByteOrder, bool isSequential) + { + var bytes = new byte[2]; + Read(index, bytes, 0, bytes.Length, isSequential, false); + + if (IsMotorolaByteOrder) + { + // Motorola - MSB first + return (ushort) + (bytes[0] << 8 | + bytes[1]); + } + // Intel ordering - LSB first + return (ushort) + (bytes[1] << 8 | + bytes[0]); + } + + /// Returns a signed 16-bit int calculated from two bytes of data (MSB, LSB). + /// the 16 bit int value, between 0x0000 and 0xFFFF + /// position within the data buffer to read first byte + /// byte order for returning the result + /// flag indicating if caller is using sequential access + /// + /// + public short GetInt16(long index, bool IsMotorolaByteOrder, bool isSequential) + { + var bytes = new byte[2]; + Read(index, bytes, 0, bytes.Length, isSequential, false); + + if (IsMotorolaByteOrder) + { + // Motorola - MSB first + return (short) + (bytes[0] << 8 | + bytes[1]); + } + // Intel ordering - LSB first + return (short) + (bytes[1] << 8 | + bytes[0]); + } + + /// Get a 24-bit unsigned integer from the buffer, returning it as an int. + /// the unsigned 24-bit int value as a long, between 0x00000000 and 0x00FFFFFF + /// position within the data buffer to read first byte + /// byte order for returning the result + /// flag indicating if caller is using sequential access + /// + /// + public int GetInt24(long index, bool IsMotorolaByteOrder, bool isSequential) + { + var bytes = new byte[3]; + Read(index, bytes, 0, bytes.Length, isSequential, false); + + if (IsMotorolaByteOrder) + { + // Motorola - MSB first (big endian) + return + bytes[0] << 16 | + bytes[1] << 8 | + bytes[2]; + } + // Intel ordering - LSB first (little endian) + return + bytes[2] << 16 | + bytes[1] << 8 | + bytes[0]; + } + + /// Get a 32-bit unsigned integer from the buffer, returning it as a long. + /// the unsigned 32-bit int value as a long, between 0x00000000 and 0xFFFFFFFF + /// position within the data buffer to read first byte + /// byte order for returning the result + /// flag indicating if caller is using sequential access + /// + /// + public uint GetUInt32(long index, bool IsMotorolaByteOrder, bool isSequential) + { + var bytes = new byte[4]; + Read(index, bytes, 0, bytes.Length, isSequential, false); + + if (IsMotorolaByteOrder) + { + // Motorola - MSB first (big endian) + return (uint) + (bytes[0] << 24 | + bytes[1] << 16 | + bytes[2] << 8 | + bytes[3]); + } + // Intel ordering - LSB first (little endian) + return (uint) + (bytes[3] << 24 | + bytes[2] << 16 | + bytes[1] << 8 | + bytes[0]); + } + + /// Returns a signed 32-bit integer from four bytes of data. + /// the signed 32 bit int value, between 0x00000000 and 0xFFFFFFFF + /// position within the data buffer to read first byte + /// byte order for returning the result + /// flag indicating if caller is using sequential access + /// + /// + public int GetInt32(long index, bool IsMotorolaByteOrder, bool isSequential) + { + var bytes = new byte[4]; + Read(index, bytes, 0, bytes.Length, isSequential, false); + + if (IsMotorolaByteOrder) + { + // Motorola - MSB first (big endian) + return + bytes[0] << 24 | + bytes[1] << 16 | + bytes[2] << 8 | + bytes[3]; + } + // Intel ordering - LSB first (little endian) + return + bytes[3] << 24 | + bytes[2] << 16 | + bytes[1] << 8 | + bytes[0]; + } + + /// Get a signed 64-bit integer from the buffer. + /// the 64 bit int value, between 0x0000000000000000 and 0xFFFFFFFFFFFFFFFF + /// position within the data buffer to read first byte + /// byte order for returning the result + /// flag indicating if caller is using sequential access + /// + /// + public long GetInt64(long index, bool IsMotorolaByteOrder, bool isSequential) + { + var bytes = new byte[8]; + Read(index, bytes, 0, bytes.Length, isSequential, false); + + if (IsMotorolaByteOrder) + { + // Motorola - MSB first + return + (long)bytes[0] << 56 | + (long)bytes[1] << 48 | + (long)bytes[2] << 40 | + (long)bytes[3] << 32 | + (long)bytes[4] << 24 | + (long)bytes[5] << 16 | + (long)bytes[6] << 8 | + bytes[7]; + } + // Intel ordering - LSB first + return + (long)bytes[7] << 56 | + (long)bytes[6] << 48 | + (long)bytes[5] << 40 | + (long)bytes[4] << 32 | + (long)bytes[3] << 24 | + (long)bytes[2] << 16 | + (long)bytes[1] << 8 | + bytes[0]; + } + + /// Get an usigned 64-bit integer from the buffer. + /// the unsigned 64 bit int value, between 0x0000000000000000 and 0xFFFFFFFFFFFFFFFF + /// position within the data buffer to read first byte + /// byte order for returning the result + /// flag indicating if caller is using sequential access + /// + /// + public ulong GetUInt64(long index, bool IsMotorolaByteOrder, bool isSequential) + { + var bytes = new byte[8]; + Read(index, bytes, 0, bytes.Length, isSequential, false); + + if (IsMotorolaByteOrder) + { + // Motorola - MSB first + return + (ulong)bytes[0] << 56 | + (ulong)bytes[1] << 48 | + (ulong)bytes[2] << 40 | + (ulong)bytes[3] << 32 | + (ulong)bytes[4] << 24 | + (ulong)bytes[5] << 16 | + (ulong)bytes[6] << 8 | + bytes[7]; + } + // Intel ordering - LSB first + return + (ulong)bytes[7] << 56 | + (ulong)bytes[6] << 48 | + (ulong)bytes[5] << 40 | + (ulong)bytes[4] << 32 | + (ulong)bytes[3] << 24 | + (ulong)bytes[2] << 16 | + (ulong)bytes[1] << 8 | + bytes[0]; + } + + /// Gets a s15.16 fixed point float from the buffer. + /// + /// Gets a s15.16 fixed point float from the buffer. + /// + /// This particular fixed point encoding has one sign bit, 15 numerator bits and 16 denominator bits. + /// + /// the floating point value + /// position within the data buffer to read first byte + /// byte order for returning the result + /// flag indicating if caller is using sequential access + /// + /// + public float GetS15Fixed16(long index, bool IsMotorolaByteOrder, bool isSequential) + { + var bytes = new byte[4]; + Read(index, bytes, 0, bytes.Length, isSequential, false); + + if (IsMotorolaByteOrder) + { + float res = bytes[0] << 8 | bytes[1]; + var d = bytes[2] << 8 | bytes[3]; + return (float)(res + d / 65536.0); + } + else + { + // this particular branch is untested + var d = bytes[1] << 8 | bytes[0]; + float res = bytes[3] << 8 | bytes[2]; + return (float)(res + d / 65536.0); + } + } + + /// Seeks to an index in the sequence. + /// + /// Seeks to an index in the sequence. If the sequence can't satisfy the request, exceptions are thrown. + /// + /// position within the data buffer to seek to + /// optional for testing to indicate whether sequential access was used. + /// + /// + public void Seek(long index, bool isSequential = false) + { + ValidateIndex((index == 0) ? 0 : (index - 1), 1, isSequential); + } + + /// + /// Ensures that the buffered bytes extend to cover the specified index. If not, an attempt is made + /// to read to that point. + /// + /// + /// If the stream ends before the point is reached, a is raised. + /// + /// the index from which the required bytes start + /// the number of bytes which are required + /// flag indicating if caller is using sequential access + /// flag indicating whether count should be enforced when validating the index + /// negative index, less than 0 bytes, or too many bytes are requested + /// if the stream ends before the required number of bytes are acquired + public long ValidateIndex(long index, long bytesRequested, bool isSequential, bool allowPartial = false) + { + long available = BytesAvailable(index, bytesRequested); + if (available != bytesRequested && !allowPartial) + { + if (index < 0) + throw new BufferBoundsException($"Attempt to read from buffer using a negative index ({index})"); + if (bytesRequested < 0) + throw new BufferBoundsException("Number of requested bytes must be zero or greater"); + if (index + bytesRequested - 1 > int.MaxValue) + throw new BufferBoundsException($"Number of requested bytes summed with starting index exceed maximum range of signed 32 bit integers (requested index: {index}, requested count: {bytesRequested})"); + if (index + bytesRequested >= p_streamLength) + { + if (isSequential) + throw new IOException("End of data reached."); + else + throw new BufferBoundsException(index, bytesRequested, p_streamLength); + } + + // TODO test that can continue using an instance of this type after this exception + throw new BufferBoundsException(index, bytesRequested, p_streamLength); + } + + return available; + } + + private long BytesAvailable(long index, long bytesRequested) + { + if (index < 0 || bytesRequested < 0) + return 0; + + // if there's only one chunk, there's no need to calculate anything. + // This bypasses a lot of checks particularly when the input was a byte[] + if (p_isStreamFinished && p_chunks.Count == 1) + { + if ((index + bytesRequested) < p_streamLength) + return bytesRequested; + else if (index > p_streamLength) + return 0; + else + return p_streamLength - index; + } + + + var endIndex = index + bytesRequested - 1; + if (endIndex < 0) endIndex = 0; + + // Maybe don't check this? + if (endIndex > int.MaxValue) + return 0; + + // zero-based + long chunkstart = index / p_chunkLength; + long chunkend = ((index + bytesRequested) / p_chunkLength) + 1; + + + if (!p_chunks.ContainsKey(chunkstart)) + { + if (!CanSeek) + chunkstart = p_chunks.Count == 0 ? 0 : p_chunks.Keys.Max() + 1; + } + + for (var i = chunkstart; i < chunkend; i++) + { + if (!p_chunks.ContainsKey(i)) + { + p_isStreamFinished = false; + + // chunkstart can be anywhere. Try to seek + if (CanSeek) + p_inputStream.Seek(i * p_chunkLength, SeekOrigin.Begin); + + byte[] chunk = new byte[p_chunkLength]; + + var totalBytesRead = 0; + while (!p_isStreamFinished && totalBytesRead != p_chunkLength) + { + var bytesRead = p_inputStream.Read(chunk, totalBytesRead, p_chunkLength - totalBytesRead); + + if (bytesRead == 0) + { + // the stream has ended, which may be ok + p_isStreamFinished = true; + p_streamLength = i * p_chunkLength + totalBytesRead; + + // check we have enough bytes for the requested index + if (endIndex >= p_streamLength) + { + TotalBytesRead += totalBytesRead; + p_chunks.Add(i, chunk); + return (index + bytesRequested) <= p_streamLength ? bytesRequested : p_streamLength - index; + } + } + else + { + totalBytesRead += bytesRead; + } + } + + TotalBytesRead += totalBytesRead; + p_chunks.Add(i, chunk); + } + } + + if (p_isStreamFinished) + return (index + bytesRequested) <= p_streamLength ? bytesRequested : 0; + else + return bytesRequested; + } + + /// + /// Records the total bytes buffered + /// + public long TotalBytesRead { get; private set; } = 0; + + public byte[] ToArray(long index, int count) + { + byte[] buffer; + // if this was a byte array and asking for the whole thing... + if (p_isStreamFinished && + p_chunks.Count == 1 && + index == 0 && + count == Length) + { + buffer = p_chunks[0]; + } + else + { + buffer = new byte[count]; + Read(index, buffer, 0, count, false); + } + + return buffer; + } + } +} diff --git a/MetadataExtractor/IO/ReaderInfo.cs b/MetadataExtractor/IO/ReaderInfo.cs new file mode 100644 index 000000000..2c2bf824f --- /dev/null +++ b/MetadataExtractor/IO/ReaderInfo.cs @@ -0,0 +1,694 @@ +// Copyright (c) Drew Noakes and contributors. All Rights Reserved. Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. + +using System; +using System.IO; +using System.Text; + +using JetBrains.Annotations; + +namespace MetadataExtractor.IO +{ + /// Kevin Mott https://github.com/kwhopper + /// Drew Noakes https://drewnoakes.com + public class ReaderInfo + { + // this flag is compared to index inputs and indicates sequential access + private const int SequentialFlag = int.MinValue; + + private RandomAccessStream p_ras; // = null; + private long p_length = -1; + + public ReaderInfo(RandomAccessStream parent, long startPosition = 0, long localPosition = 0, long length = -1, bool isMotorolaByteOrder = true) + { + p_ras = parent; + StartPosition = startPosition; + LocalPosition = localPosition; + p_length = length; + + IsMotorolaByteOrder = isMotorolaByteOrder; + } + + private long GlobalPosition => StartPosition + LocalPosition; + public long StartPosition { get; private set; } + public long LocalPosition { get; private set; } + + public long Length + { + get + { + return (p_length != -1) ? p_length : (p_ras.Length - StartPosition); + } + } + + /// Get and set the byte order of this reader. true by default. + /// + /// + /// true for Motorola (or big) endianness (also known as network byte order), with MSB before LSB. + /// false for Intel (or little) endianness, with LSB before MSB. + /// + /// + /// true for Motorola/big endian, false for Intel/little endian + public bool IsMotorolaByteOrder { get; set; } + + /// + /// Creates a new with the current properties of this reader + /// + /// + public ReaderInfo Clone() => Clone(0, -1, true); + public ReaderInfo Clone(bool useByteOrder) => Clone(0, useByteOrder); + public ReaderInfo Clone(long length) => Clone(0, length, true); + public ReaderInfo Clone(long offset, long length) => Clone(offset, length, true); + public ReaderInfo Clone(long offset, bool useByteOrder) => Clone(offset, -1, useByteOrder); + public ReaderInfo Clone(long offset, long length, bool useByteOrder) + { + //return p_ras.CreateReader(GlobalPosition + offset, (length > -1 ? length : Length), useByteOrder ? IsMotorolaByteOrder : !IsMotorolaByteOrder); + return p_ras.CreateReader(GlobalPosition + offset, (length > -1 ? length : Length - offset), useByteOrder ? IsMotorolaByteOrder : !IsMotorolaByteOrder); + } + + public static ReaderInfo CreateFromArray(byte[] bytes) + { + //if (bytes == null) + // throw new ArgumentNullException(nameof(bytes)); + + return new RandomAccessStream(bytes).CreateReader(); + } + + public static ReaderInfo CreateFromStream(Stream stream, long streamLength = -1) + { + //if (bytes == null) + // throw new ArgumentNullException(nameof(bytes)); + + return new RandomAccessStream(stream, streamLength).CreateReader(); + } + + /// Seeks forward or backward in the sequence. + /// + /// Skips forward or backward in the sequence. If the sequence ends, an is thrown. + /// + /// the number of bytes to seek, in either direction. + /// optional for testing to indicate whether sequential access was used. + /// the end of the sequence is reached. + /// an error occurred reading from the underlying source. + public void Skip(long offset, bool isSequential = false) + { + if (offset + LocalPosition < 0) + offset = -LocalPosition; + + p_ras.Seek(LocalPosition + offset, isSequential); + + LocalPosition += offset; + } + + /// Seeks forward or backward in the sequence, returning a boolean indicating whether the seek succeeded, or whether the sequence ended. + /// the number of bytes to seek, in either direction. + /// optional for testing to indicate whether sequential access was used. + /// a boolean indicating whether the skip succeeded, or whether the sequence ended. + /// an error occurred reading from the underlying source. + public bool TrySkip(long n, bool isSequential = false) + { + try + { + Skip(n, isSequential); + return true; + } + catch (IOException) + { + // Stream ended, or error reading from underlying source + return false; + } + } + + /// Retrieves bytes, writing them into a caller-provided buffer. + /// SequentialFlag as index indicates this call should read sequentially + /// array to write bytes to. + /// starting position within to write to. + /// number of bytes to be written. + /// The requested bytes + /// + /// + public int Read(byte[] buffer, int offset, int count) => Read(buffer, offset, SequentialFlag, count); + + /// Retrieves bytes, writing them into a caller-provided buffer. + /// Sequential access to the next byte is indicated by setting index to SequntialFlag + /// array to write bytes to. + /// starting position within to write to. + /// position within the data buffer to read byte. + /// number of bytes to be written. + /// The requested bytes + /// + /// + public int Read(byte[] buffer, int offset, long index, int count) + { + bool isSeq = (index == SequentialFlag); + long readat = isSeq ? GlobalPosition : (StartPosition + index); + + return ReadAtGlobal(readat, buffer, offset, count, isSeq, true); + } + + private int ReadAtGlobal(long readat, byte[] buffer, int offset, int count, bool isSequential, bool allowPartial) + { + int read = p_ras.Read(readat, buffer, offset, count, isSequential, allowPartial); + + if (isSequential && read > 0) + LocalPosition += read; // advance the sequential position + + return read; + } + + /// + /// Determine if the next bytes match the input pattern. Internal sequential variables are unaffected + /// + /// the byte pattern to match + /// + public bool StartsWith(byte[] pattern) + { + if (Length < pattern.Length) + return false; + + var ret = true; + // ReSharper disable once LoopCanBeConvertedToQuery + for (int i = 0; i < pattern.Length; i++) + { + if (GetByte(i) != pattern[i]) + { + ret = false; + break; + } + } + + return ret; + } + + /// Gets the byte value at the next sequential byte index. + /// The read byte value + /// if the requested byte is beyond the end of the underlying data source + /// if the byte is unable to be read + public byte GetByte() => GetByte(SequentialFlag); + + /// Gets the byte value at the specified byte index. + /// The index from which to read the byte + /// The read byte value + /// if the requested byte is beyond the end of the underlying data source + /// if the byte is unable to be read + public byte GetByte(long index) + { + bool isSeq = (index == SequentialFlag); + long readat = isSeq ? GlobalPosition : (StartPosition + index); + + var read = p_ras.GetByte(readat, isSeq); + + if (isSeq) + LocalPosition++; // advance the sequential position + + return read; + } + + /// Returns the required number of bytes sequentially from the underlying source. + /// The number of bytes to be returned + /// The requested bytes + /// if the requested bytes extend beyond the end of the underlying data source + /// if the byte is unable to be read + public byte[] GetBytes(int count) => GetBytes(SequentialFlag, count); + + /// Returns the required number of bytes from the specified index from the underlying source. + /// The index from which the bytes begins in the underlying source + /// The number of bytes to be returned + /// The requested bytes + /// if the requested bytes extend beyond the end of the underlying data source + /// if the byte is unable to be read + public byte[] GetBytes(long index, int count) + { + // validate the index now to avoid creating a byte array that could cause a heap overflow + bool isSeq = (index == SequentialFlag); + long readat = isSeq ? GlobalPosition : (StartPosition + index); + + long available = p_ras.ValidateIndex(readat, count, isSeq, false); + if (available == 0) + return new byte[0]; + + var bytes = new byte[count]; + ReadAtGlobal(readat, bytes, 0, count, isSeq, false); + + return bytes; + } + + /// Gets whether a bit at a specific index is set or not sequentially. + /// true if the bit is set, otherwise false + /// the buffer does not contain enough bytes to service the request + public bool GetBit() => GetBit(SequentialFlag); + + /// Gets whether a bit at a specific index is set or not. + /// the number of bits at which to test + /// true if the bit is set, otherwise false + /// the buffer does not contain enough bytes to service the request, or index is negative + public bool GetBit(int index) + { + var byteIndex = index / 8; + var bitIndex = index % 8; + var b = GetByte(byteIndex); + return ((b >> bitIndex) & 1) == 1; + } + + /// Returns a signed 8-bit int calculated from one byte of data sequentially. + /// the 8 bit signed byte value + /// the buffer does not contain enough bytes to service the request + public sbyte GetSByte() => GetSByte(SequentialFlag); + + /// Returns a signed 8-bit int calculated from one byte of data at the specified index. + /// position within the data buffer to read byte + /// the 8 bit signed byte value + /// the buffer does not contain enough bytes to service the request, or index is negative + public sbyte GetSByte(long index) + { + return unchecked((sbyte)GetByte(index)); + } + + /// Returns an unsigned 16-bit int calculated from the next two bytes of the sequence. + /// the 16 bit int value, between 0x0000 and 0xFFFF + /// + public ushort GetUInt16() => GetUInt16(SequentialFlag); + + /// Returns an unsigned 16-bit int calculated from the next two bytes of the sequence. + /// position within the data buffer to read byte + /// the 16 bit int value, between 0x0000 and 0xFFFF + /// + public ushort GetUInt16(long index) + { + bool isSeq = (index == SequentialFlag); + long readat = isSeq ? GlobalPosition : (StartPosition + index); + + var read = p_ras.GetUInt16(readat, IsMotorolaByteOrder, isSeq); + + if (isSeq) + LocalPosition += 2; // advance the sequential position + + return read; + } + + /// Returns an unsigned 16-bit int calculated from the next two bytes of the sequence. + /// the 16 bit int value, between 0x0000 and 0xFFFF + /// + public ushort GetUInt16(int b1, int b2) + { + if (IsMotorolaByteOrder) + { + // Motorola - MSB first + return unchecked((ushort)(b1 << 8 | b2)); + } + // Intel ordering - LSB first + return unchecked((ushort)(b2 << 8 | b1)); + } + + /// Returns a signed 16-bit int calculated from two bytes of data (MSB, LSB). + /// the 16 bit int value, between 0x0000 and 0xFFFF + /// the buffer does not contain enough bytes to service the request + public short GetInt16() => GetInt16(SequentialFlag); + + /// Returns a signed 16-bit int calculated from two bytes of data (MSB, LSB). + /// the 16 bit int value, between 0x0000 and 0xFFFF + /// the buffer does not contain enough bytes to service the request + public short GetInt16(long index) + { + bool isSeq = (index == SequentialFlag); + long readat = isSeq ? GlobalPosition : (StartPosition + index); + + var read = p_ras.GetInt16(readat, IsMotorolaByteOrder, isSeq); + + if (isSeq) + LocalPosition += 2; // advance the sequential position + + return read; + } + + /// Get a 24-bit unsigned integer from the buffer sequentially, returning it as an int. + /// the unsigned 24-bit int value as a long, between 0x00000000 and 0x00FFFFFF + /// the buffer does not contain enough bytes to service the request + public int GetInt24() => GetInt24(SequentialFlag); + + /// Get a 24-bit unsigned integer from the buffer, returning it as an int. + /// position within the data buffer to read first byte + /// the unsigned 24-bit int value as a long, between 0x00000000 and 0x00FFFFFF + /// the buffer does not contain enough bytes to service the request, or index is negative + public int GetInt24(long index) + { + bool isSeq = (index == SequentialFlag); + long readat = isSeq ? GlobalPosition : (StartPosition + index); + + var read = p_ras.GetInt24(readat, IsMotorolaByteOrder, isSeq); + + if (isSeq) + LocalPosition += 3; // advance the sequential position + + return read; + } + + /// Get a 32-bit unsigned integer from the buffer sequentially, returning it as a long. + /// the unsigned 32-bit int value as a long, between 0x00000000 and 0xFFFFFFFF + /// the buffer does not contain enough bytes to service the request + public uint GetUInt32() => GetUInt32(SequentialFlag); + + /// Get a 32-bit unsigned integer from the buffer, returning it as a long. + /// position within the data buffer to read first byte + /// the unsigned 32-bit int value as a long, between 0x00000000 and 0xFFFFFFFF + /// the buffer does not contain enough bytes to service the request, or index is negative + public uint GetUInt32(long index) + { + bool isSeq = (index == SequentialFlag); + long readat = isSeq ? GlobalPosition : (StartPosition + index); + + var read = p_ras.GetUInt32(readat, IsMotorolaByteOrder, isSeq); + + if (isSeq) + LocalPosition += 4; // advance the sequential position + + return read; + } + + /// Returns a signed 32-bit integer from four bytes of data sequentially. + /// the signed 32 bit int value, between 0x00000000 and 0xFFFFFFFF + /// the buffer does not contain enough bytes to service the request + public int GetInt32() => GetInt32(SequentialFlag); + + /// Returns a signed 32-bit integer from four bytes of data at the specified index the buffer. + /// position within the data buffer to read first byte + /// the signed 32 bit int value, between 0x00000000 and 0xFFFFFFFF + /// the buffer does not contain enough bytes to service the request, or index is negative + public int GetInt32(long index) + { + bool isSeq = (index == SequentialFlag); + long readat = isSeq ? GlobalPosition : (StartPosition + index); + + var read = p_ras.GetInt32(readat, IsMotorolaByteOrder, isSeq); + + if (isSeq) + LocalPosition += 4; // advance the sequential position + + return read; + } + + /// Get a signed 64-bit integer from the buffer sequentially. + /// the 64 bit int value, between 0x0000000000000000 and 0xFFFFFFFFFFFFFFFF + /// the buffer does not contain enough bytes to service the request + public long GetInt64() => GetInt64(SequentialFlag); + + /// Get a signed 64-bit integer from the buffer. + /// position within the data buffer to read first byte + /// the 64 bit int value, between 0x0000000000000000 and 0xFFFFFFFFFFFFFFFF + /// the buffer does not contain enough bytes to service the request, or index is negative + public long GetInt64(long index) + { + bool isSeq = (index == SequentialFlag); + long readat = isSeq ? GlobalPosition : (StartPosition + index); + + var read = p_ras.GetInt64(readat, IsMotorolaByteOrder, isSeq); + + if (isSeq) + LocalPosition += 8; // advance the sequential position + + return read; + } + + /// Get an usigned 64-bit integer from the buffer sequentially. + /// the unsigned 64 bit int value, between 0x0000000000000000 and 0xFFFFFFFFFFFFFFFF + /// the buffer does not contain enough bytes to service the request + public ulong GetUInt64() => GetUInt64(SequentialFlag); + + /// Get an usigned 64-bit integer from the buffer. + /// the unsigned 64 bit int value, between 0x0000000000000000 and 0xFFFFFFFFFFFFFFFF + /// the buffer does not contain enough bytes to service the request + public ulong GetUInt64(long index) + { + bool isSeq = (index == SequentialFlag); + long readat = isSeq ? GlobalPosition : (StartPosition + index); + + var read = p_ras.GetUInt64(readat, IsMotorolaByteOrder, isSeq); + + if (isSeq) + LocalPosition += 8; // advance the sequential position + + return read; + } + + /// Gets a s15.16 fixed point float from the buffer sequentially. + /// + /// This particular fixed point encoding has one sign bit, 15 numerator bits and 16 denominator bits. + /// + /// the floating point value + /// the buffer does not contain enough bytes to service the request + public float GetS15Fixed16() => GetS15Fixed16(SequentialFlag); + + /// Gets a s15.16 fixed point float from the buffer. + /// + /// This particular fixed point encoding has one sign bit, 15 numerator bits and 16 denominator bits. + /// + /// the floating point value + /// the buffer does not contain enough bytes to service the request, or index is negative + public float GetS15Fixed16(long index) + { + bool isSeq = (index == SequentialFlag); + long readat = isSeq ? GlobalPosition : (StartPosition + index); + + var read = p_ras.GetS15Fixed16(readat, IsMotorolaByteOrder, isSeq); + + if (isSeq) + LocalPosition += 4; // advance the sequential position + + return read; + } + + + public float GetFloat32() => GetFloat32(SequentialFlag); + + /// + public float GetFloat32(long index) => BitConverter.ToSingle(BitConverter.GetBytes(GetInt32(index)), 0); + + public double GetDouble64() => GetDouble64(SequentialFlag); + + /// + public double GetDouble64(long index) => BitConverter.Int64BitsToDouble(GetInt64(index)); + + + [NotNull] + public string GetString(int bytesRequested, [NotNull] Encoding encoding) => GetString(SequentialFlag, bytesRequested, encoding); + + /// + [NotNull] + public string GetString(long index, int bytesRequested, [NotNull] Encoding encoding) + { + var bytes = GetBytes(index, bytesRequested); + return encoding.GetString(bytes, 0, bytes.Length); + } + + public StringValue GetStringValue(int bytesRequested) => GetStringValue(bytesRequested, null); + public StringValue GetStringValue(int bytesRequested, Encoding? encoding) + { + return new StringValue(GetBytes(bytesRequested), encoding); + } + + /// + /// Creates a string starting at the current sequential index, and ending where either byte=='\0' or + /// length==maxLength. + /// + /// + /// The maximum number of bytes to read. If a zero-byte is not reached within this limit, + /// reading will stop and the string will be truncated to this length. + /// + /// The read + /// The buffer does not contain enough bytes to satisfy this request. + [NotNull] + public string GetNullTerminatedString(int maxLengthBytes) => GetNullTerminatedString(SequentialFlag, maxLengthBytes); + + /// + /// Creates a string starting at the current sequential index, and ending where either byte=='\0' or + /// length==maxLength. + /// + /// + /// The maximum number of bytes to read. If a zero-byte is not reached within this limit, + /// reading will stop and the string will be truncated to this length. + /// + /// An optional string encoding. If none is provided, is used. + /// The read + /// The buffer does not contain enough bytes to satisfy this request. + [NotNull] + public string GetNullTerminatedString(int maxLengthBytes, Encoding encoding) => GetNullTerminatedString(SequentialFlag, maxLengthBytes, encoding); + + /// + /// Creates a string starting at the specified index, and ending where either byte=='\0' or + /// length==maxLength. + /// + /// The index within the buffer at which to start reading the string. + /// + /// The maximum number of bytes to read. If a zero-byte is not reached within this limit, + /// reading will stop and the string will be truncated to this length. + /// + /// The read + /// The buffer does not contain enough bytes to satisfy this request. + [NotNull] + public string GetNullTerminatedString(int index, int maxLengthBytes) => GetNullTerminatedString(index, maxLengthBytes, null); + + /// + /// Creates a string starting at the specified index, and ending where either byte=='\0' or + /// length==maxLength. + /// + /// The index within the buffer at which to start reading the string. + /// + /// The maximum number of bytes to read. If a zero-byte is not reached within this limit, + /// reading will stop and the string will be truncated to this length. + /// + /// An optional string encoding. If none is provided, is used. + /// The read + /// The buffer does not contain enough bytes to satisfy this request. + [NotNull] + public string GetNullTerminatedString(int index, int maxLengthBytes, Encoding? encoding) + { + var bytes = GetNullTerminatedBytes(index, maxLengthBytes); + + return (encoding ?? Encoding.UTF8).GetString(bytes, 0, bytes.Length); + } + + /// + /// Creates a string starting at the specified index, and ending where either byte=='\0' or + /// length==maxLength. + /// + /// + /// The maximum number of bytes to read. If a zero-byte is not reached within this limit, + /// reading will stop and the string will be truncated to this length. + /// + /// The read + /// The buffer does not contain enough bytes to satisfy this request. + public StringValue GetNullTerminatedStringValue(int maxLengthBytes) => GetNullTerminatedStringValue(SequentialFlag, maxLengthBytes); + + /// + /// Creates a string starting at the specified index, and ending where either byte=='\0' or + /// length==maxLength. + /// + /// The index within the buffer at which to start reading the string. + /// + /// The maximum number of bytes to read. If a zero-byte is not reached within this limit, + /// reading will stop and the string will be truncated to this length. + /// + /// The read + /// The buffer does not contain enough bytes to satisfy this request. + public StringValue GetNullTerminatedStringValue(int index, int maxLengthBytes) => GetNullTerminatedStringValue(index, maxLengthBytes, null); + + /// + /// Creates a string starting at the current sequential index, and ending where either byte=='\0' or + /// length==maxLength. + /// + /// + /// The maximum number of bytes to read. If a zero-byte is not reached within this limit, + /// reading will stop and the string will be truncated to this length. + /// + /// An optional string encoding to use when interpreting bytes. + /// The read + /// The buffer does not contain enough bytes to satisfy this request. + public StringValue GetNullTerminatedStringValue(int maxLengthBytes, Encoding? encoding) => GetNullTerminatedStringValue(SequentialFlag, maxLengthBytes, encoding); + + /// + /// Creates a string starting at the specified index, and ending where either byte=='\0' or + /// length==maxLength. + /// + /// The index within the buffer at which to start reading the string. + /// + /// The maximum number of bytes to read. If a zero-byte is not reached within this limit, + /// reading will stop and the string will be truncated to this length. + /// + /// An optional string encoding to use when interpreting bytes. + /// The read + /// The buffer does not contain enough bytes to satisfy this request. + public StringValue GetNullTerminatedStringValue(int index, int maxLengthBytes, Encoding? encoding) + { + var bytes = GetNullTerminatedBytes(index, maxLengthBytes); + + return new StringValue(bytes, encoding); + } + + /// + /// Returns the sequence of bytes punctuated by a \0 value. + /// + /// + /// The maximum number of bytes to read. If a \0 byte is not reached within this limit, + /// the returned array will be long. + /// + /// The read byte array. + /// The buffer does not contain enough bytes to satisfy this request. + [NotNull] + public byte[] GetNullTerminatedBytes(int maxLengthBytes) => GetNullTerminatedBytes(SequentialFlag, maxLengthBytes); + + /// + /// Returns the sequence of bytes punctuated by a \0 value. + /// + /// The index to start reading from. + /// + /// The maximum number of bytes to read. If a \0 byte is not reached within this limit, + /// the returned array will be long. + /// + /// The read byte array. + /// The buffer does not contain enough bytes to satisfy this request. + [NotNull] + public byte[] GetNullTerminatedBytes(int index, int maxLengthBytes) + { + var isSeq = (index == SequentialFlag); + + var buffer = !isSeq ? GetBytes(index, maxLengthBytes) : new byte[maxLengthBytes]; + + // Count the number of non-null bytes + var length = 0; + while (length < buffer.Length && (!isSeq ? buffer[length] : buffer[length] = GetByte()) != 0) + length++; + + if (length == maxLengthBytes) + return buffer; + + var bytes = new byte[length]; + if (length > 0) + Array.Copy(buffer, bytes, length); + return bytes; + } + + /// Returns the bytes described by this particular reader + /// + public byte[] ToArray() + { + return p_ras.ToArray(StartPosition, (int)Length); + } + + public string? ReadLine() + { + StringBuilder sb = new StringBuilder(); + while (true) + { + if (LocalPosition == Length) + break; + + int ch = GetByte(); + if (ch == -1) break; + if (ch == '\r' || ch == '\n') + { + byte nextbyte = 0; + if (GlobalPosition + 1 < Length) + nextbyte = GetByte(); + if (!(ch == '\r' && nextbyte == '\n')) + Skip(-1); + + return sb.ToString(); + } + sb.Append((char)ch); + } + if (sb.Length > 0) return sb.ToString(); + return null; + } + + /// + /// Returns true in case the sequence supports length checking and distance to the end of the stream is less then number of bytes in parameter. + /// Otherwise false. + /// + /// + /// True if we are going to have an exception while reading next numberOfBytes bytes from the stream + public bool IsCloserToEnd(long numberOfBytes) + { + return (LocalPosition + numberOfBytes) > Length; + } + + } +} From 734f52f3053baf8a0c3bb490f3098df1db83d18b Mon Sep 17 00:00:00 2001 From: Kevin Mott Date: Wed, 6 May 2020 10:06:25 -0500 Subject: [PATCH 02/10] - overload GetByte to optionally ignore ValidateIndex calls if already done by the caller - use GetByte instead of Read for Get* methods (similar to old readers) - add the constant RandomAccessStream.UnknownLengthValue - mark NonSeekableStream class as internal (for testing only) - document the RandomAccessStream.Length property - mark RandomAccessStream.ValidateIndex method as internal - document the return value of the RandomAccessStream.ValidateIndex method --- .../IO/RAS/NonSeekableStream.cs | 8 +- .../IO/RAS/RasUsageTests.cs | 2 +- MetadataExtractor/IO/RandomAccessStream.cs | 201 ++++++++++-------- MetadataExtractor/IO/ReaderInfo.cs | 2 +- 4 files changed, 116 insertions(+), 97 deletions(-) diff --git a/MetadataExtractor.Tests/IO/RAS/NonSeekableStream.cs b/MetadataExtractor.Tests/IO/RAS/NonSeekableStream.cs index 6896626c3..a65646383 100644 --- a/MetadataExtractor.Tests/IO/RAS/NonSeekableStream.cs +++ b/MetadataExtractor.Tests/IO/RAS/NonSeekableStream.cs @@ -1,12 +1,14 @@ -using System; +// Copyright (c) Drew Noakes and contributors. All Rights Reserved. Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. + +using System; using System.IO; namespace MetadataExtractor.Tests.IO { - public class NonSeekableStream : Stream + internal class NonSeekableStream : Stream { Stream m_stream; - public NonSeekableStream(Stream baseStream) + internal NonSeekableStream(Stream baseStream) { m_stream = baseStream; } diff --git a/MetadataExtractor.Tests/IO/RAS/RasUsageTests.cs b/MetadataExtractor.Tests/IO/RAS/RasUsageTests.cs index f19c6041a..aa45d840b 100644 --- a/MetadataExtractor.Tests/IO/RAS/RasUsageTests.cs +++ b/MetadataExtractor.Tests/IO/RAS/RasUsageTests.cs @@ -64,7 +64,7 @@ public void TestNonSeekableRasStreamUnknownLength() // nonseekable stream inputs shouldn't be seekable Assert.False(ras.CanSeek); // can't know the length of a nonseekable stream if it isn't supplied - Assert.Equal(int.MaxValue, ras.Length); + Assert.Equal(RandomAccessStream.UnknownLengthValue, ras.Length); } [Fact] diff --git a/MetadataExtractor/IO/RandomAccessStream.cs b/MetadataExtractor/IO/RandomAccessStream.cs index 410bf4fbd..04ee0b3cf 100644 --- a/MetadataExtractor/IO/RandomAccessStream.cs +++ b/MetadataExtractor/IO/RandomAccessStream.cs @@ -18,6 +18,8 @@ namespace MetadataExtractor.IO /// Drew Noakes https://drewnoakes.com public class RandomAccessStream { + public const long UnknownLengthValue = long.MaxValue; + private Stream? p_inputStream; private long p_streamLength = -1; @@ -34,7 +36,7 @@ public RandomAccessStream(Stream stream, long streamLength = -1) throw new ArgumentNullException(nameof(stream)); if (streamLength == -1) - streamLength = stream.CanSeek ? stream.Length : int.MaxValue; + streamLength = stream.CanSeek ? stream.Length : UnknownLengthValue; p_inputStream = stream; CanSeek = stream.CanSeek; @@ -61,6 +63,15 @@ public RandomAccessStream(byte[] bytes) public bool CanSeek { get; private set; } = false; + /// + /// Returns the length of the underlying data source + /// + /// + /// Length is always known when the data source is an array. For data sources derived from , + /// the CanSeek property is checked. If the value is true, the 's Length property is used. + /// If the value is false, it is assumed the length cannot be determined from the itself and + /// long.MaxValue is used instead. + /// public long Length { get @@ -141,7 +152,20 @@ public int Read(long index, byte[] buffer, int offset, int count, bool isSequent /// public byte GetByte(long index, bool isSequential) { - ValidateIndex(index, 1, isSequential); + return GetByte(index, isSequential, true); + } + + /// Returns an unsigned byte at an index in the sequence. + /// the 8 bit int value, between 0 and 255 + /// position within the data buffer to read byte + /// flag indicating if caller is using sequential access + /// allows for skipping validation if already done by the caller + /// + /// + private byte GetByte(long index, bool isSequential, bool validateIndex) + { + if(validateIndex) + ValidateIndex(index, 1, isSequential); // This bypasses a lot of checks particularly when the input was a byte[] if (p_isStreamFinished && p_chunks.Count == 1) @@ -165,20 +189,19 @@ public byte GetByte(long index, bool isSequential) /// public ushort GetUInt16(long index, bool IsMotorolaByteOrder, bool isSequential) { - var bytes = new byte[2]; - Read(index, bytes, 0, bytes.Length, isSequential, false); + ValidateIndex(index, 2, isSequential); if (IsMotorolaByteOrder) { // Motorola - MSB first return (ushort) - (bytes[0] << 8 | - bytes[1]); + (GetByte(index, isSequential, false) << 8 | + GetByte(index + 1, isSequential, false)); } // Intel ordering - LSB first return (ushort) - (bytes[1] << 8 | - bytes[0]); + (GetByte(index + 1, isSequential, false) << 8 | + GetByte(index , isSequential, false)); } /// Returns a signed 16-bit int calculated from two bytes of data (MSB, LSB). @@ -190,20 +213,19 @@ public ushort GetUInt16(long index, bool IsMotorolaByteOrder, bool isSequential) /// public short GetInt16(long index, bool IsMotorolaByteOrder, bool isSequential) { - var bytes = new byte[2]; - Read(index, bytes, 0, bytes.Length, isSequential, false); + ValidateIndex(index, 2, isSequential); if (IsMotorolaByteOrder) { // Motorola - MSB first return (short) - (bytes[0] << 8 | - bytes[1]); + (GetByte(index , isSequential, false) << 8 | + GetByte(index + 1, isSequential, false)); } // Intel ordering - LSB first return (short) - (bytes[1] << 8 | - bytes[0]); + (GetByte(index + 1, isSequential, false) << 8 | + GetByte(index , isSequential, false)); } /// Get a 24-bit unsigned integer from the buffer, returning it as an int. @@ -215,22 +237,21 @@ public short GetInt16(long index, bool IsMotorolaByteOrder, bool isSequential) /// public int GetInt24(long index, bool IsMotorolaByteOrder, bool isSequential) { - var bytes = new byte[3]; - Read(index, bytes, 0, bytes.Length, isSequential, false); + ValidateIndex(index, 3, isSequential); if (IsMotorolaByteOrder) { - // Motorola - MSB first (big endian) + // Motorola - MSB first return - bytes[0] << 16 | - bytes[1] << 8 | - bytes[2]; + GetByte(index , isSequential, false) << 16 | + GetByte(index + 1, isSequential, false) << 8 | + GetByte(index + 2, isSequential, false); } - // Intel ordering - LSB first (little endian) + // Intel ordering - LSB first return - bytes[2] << 16 | - bytes[1] << 8 | - bytes[0]; + GetByte(index + 2, isSequential, false) << 16 | + GetByte(index + 1, isSequential, false) << 8 | + GetByte(index , isSequential, false); } /// Get a 32-bit unsigned integer from the buffer, returning it as a long. @@ -242,24 +263,23 @@ public int GetInt24(long index, bool IsMotorolaByteOrder, bool isSequential) /// public uint GetUInt32(long index, bool IsMotorolaByteOrder, bool isSequential) { - var bytes = new byte[4]; - Read(index, bytes, 0, bytes.Length, isSequential, false); + ValidateIndex(index, 4, isSequential); if (IsMotorolaByteOrder) { - // Motorola - MSB first (big endian) + // Motorola - MSB first return (uint) - (bytes[0] << 24 | - bytes[1] << 16 | - bytes[2] << 8 | - bytes[3]); + (GetByte(index , isSequential, false) << 24 | + GetByte(index + 1, isSequential, false) << 16 | + GetByte(index + 2, isSequential, false) << 8 | + GetByte(index + 3, isSequential, false)); } - // Intel ordering - LSB first (little endian) + // Intel ordering - LSB first return (uint) - (bytes[3] << 24 | - bytes[2] << 16 | - bytes[1] << 8 | - bytes[0]); + (GetByte(index + 3, isSequential, false) << 24 | + GetByte(index + 2, isSequential, false) << 16 | + GetByte(index + 1, isSequential, false) << 8 | + GetByte(index , isSequential, false)); } /// Returns a signed 32-bit integer from four bytes of data. @@ -271,24 +291,23 @@ public uint GetUInt32(long index, bool IsMotorolaByteOrder, bool isSequential) /// public int GetInt32(long index, bool IsMotorolaByteOrder, bool isSequential) { - var bytes = new byte[4]; - Read(index, bytes, 0, bytes.Length, isSequential, false); + ValidateIndex(index, 4, isSequential); if (IsMotorolaByteOrder) { - // Motorola - MSB first (big endian) + // Motorola - MSB first return - bytes[0] << 24 | - bytes[1] << 16 | - bytes[2] << 8 | - bytes[3]; + GetByte(index , isSequential, false) << 24 | + GetByte(index + 1, isSequential, false) << 16 | + GetByte(index + 2, isSequential, false) << 8 | + GetByte(index + 3, isSequential, false); } - // Intel ordering - LSB first (little endian) + // Intel ordering - LSB first return - bytes[3] << 24 | - bytes[2] << 16 | - bytes[1] << 8 | - bytes[0]; + GetByte(index + 3, isSequential, false) << 24 | + GetByte(index + 2, isSequential, false) << 16 | + GetByte(index + 1, isSequential, false) << 8 | + GetByte(index , isSequential, false); } /// Get a signed 64-bit integer from the buffer. @@ -300,32 +319,31 @@ public int GetInt32(long index, bool IsMotorolaByteOrder, bool isSequential) /// public long GetInt64(long index, bool IsMotorolaByteOrder, bool isSequential) { - var bytes = new byte[8]; - Read(index, bytes, 0, bytes.Length, isSequential, false); + ValidateIndex(index, 8, isSequential); if (IsMotorolaByteOrder) { // Motorola - MSB first return - (long)bytes[0] << 56 | - (long)bytes[1] << 48 | - (long)bytes[2] << 40 | - (long)bytes[3] << 32 | - (long)bytes[4] << 24 | - (long)bytes[5] << 16 | - (long)bytes[6] << 8 | - bytes[7]; + (long)GetByte(index , isSequential, false) << 56 | + (long)GetByte(index + 1, isSequential, false) << 48 | + (long)GetByte(index + 2, isSequential, false) << 40 | + (long)GetByte(index + 3, isSequential, false) << 32 | + (long)GetByte(index + 4, isSequential, false) << 24 | + (long)GetByte(index + 5, isSequential, false) << 16 | + (long)GetByte(index + 6, isSequential, false) << 8 | + GetByte(index + 7, isSequential, false); } // Intel ordering - LSB first return - (long)bytes[7] << 56 | - (long)bytes[6] << 48 | - (long)bytes[5] << 40 | - (long)bytes[4] << 32 | - (long)bytes[3] << 24 | - (long)bytes[2] << 16 | - (long)bytes[1] << 8 | - bytes[0]; + (long)GetByte(index + 7, isSequential, false) << 56 | + (long)GetByte(index + 6, isSequential, false) << 48 | + (long)GetByte(index + 5, isSequential, false) << 40 | + (long)GetByte(index + 4, isSequential, false) << 32 | + (long)GetByte(index + 3, isSequential, false) << 24 | + (long)GetByte(index + 2, isSequential, false) << 16 | + (long)GetByte(index + 1, isSequential, false) << 8 | + GetByte(index , isSequential, false); } /// Get an usigned 64-bit integer from the buffer. @@ -337,32 +355,31 @@ public long GetInt64(long index, bool IsMotorolaByteOrder, bool isSequential) /// public ulong GetUInt64(long index, bool IsMotorolaByteOrder, bool isSequential) { - var bytes = new byte[8]; - Read(index, bytes, 0, bytes.Length, isSequential, false); + ValidateIndex(index, 8, isSequential); if (IsMotorolaByteOrder) { // Motorola - MSB first return - (ulong)bytes[0] << 56 | - (ulong)bytes[1] << 48 | - (ulong)bytes[2] << 40 | - (ulong)bytes[3] << 32 | - (ulong)bytes[4] << 24 | - (ulong)bytes[5] << 16 | - (ulong)bytes[6] << 8 | - bytes[7]; + (ulong)GetByte(index , isSequential, false) << 56 | + (ulong)GetByte(index + 1, isSequential, false) << 48 | + (ulong)GetByte(index + 2, isSequential, false) << 40 | + (ulong)GetByte(index + 3, isSequential, false) << 32 | + (ulong)GetByte(index + 4, isSequential, false) << 24 | + (ulong)GetByte(index + 5, isSequential, false) << 16 | + (ulong)GetByte(index + 6, isSequential, false) << 8 | + GetByte(index + 7, isSequential, false); } // Intel ordering - LSB first return - (ulong)bytes[7] << 56 | - (ulong)bytes[6] << 48 | - (ulong)bytes[5] << 40 | - (ulong)bytes[4] << 32 | - (ulong)bytes[3] << 24 | - (ulong)bytes[2] << 16 | - (ulong)bytes[1] << 8 | - bytes[0]; + (ulong)GetByte(index + 7, isSequential, false) << 56 | + (ulong)GetByte(index + 6, isSequential, false) << 48 | + (ulong)GetByte(index + 5, isSequential, false) << 40 | + (ulong)GetByte(index + 4, isSequential, false) << 32 | + (ulong)GetByte(index + 3, isSequential, false) << 24 | + (ulong)GetByte(index + 2, isSequential, false) << 16 | + (ulong)GetByte(index + 1, isSequential, false) << 8 | + GetByte(index , isSequential, false); } /// Gets a s15.16 fixed point float from the buffer. @@ -379,20 +396,18 @@ public ulong GetUInt64(long index, bool IsMotorolaByteOrder, bool isSequential) /// public float GetS15Fixed16(long index, bool IsMotorolaByteOrder, bool isSequential) { - var bytes = new byte[4]; - Read(index, bytes, 0, bytes.Length, isSequential, false); - + ValidateIndex(index, 4, isSequential); if (IsMotorolaByteOrder) { - float res = bytes[0] << 8 | bytes[1]; - var d = bytes[2] << 8 | bytes[3]; + float res = GetByte(index, isSequential, false) << 8 | GetByte(index + 1, isSequential, false); + var d = GetByte(index + 2, isSequential, false) << 8 | GetByte(index + 3, isSequential, false); return (float)(res + d / 65536.0); } else { // this particular branch is untested - var d = bytes[1] << 8 | bytes[0]; - float res = bytes[3] << 8 | bytes[2]; + var d = GetByte(index + 1, isSequential, false) << 8 | GetByte(index, isSequential, false); + float res = GetByte(index + 3, isSequential, false) << 8 | GetByte(index + 2, isSequential, false); return (float)(res + d / 65536.0); } } @@ -414,8 +429,10 @@ public void Seek(long index, bool isSequential = false) /// Ensures that the buffered bytes extend to cover the specified index. If not, an attempt is made /// to read to that point. /// + /// The number of bytes available out of the number of bytes requested /// /// If the stream ends before the point is reached, a is raised. + /// Requesting more bytes than available raises an exception if is false /// /// the index from which the required bytes start /// the number of bytes which are required @@ -423,7 +440,7 @@ public void Seek(long index, bool isSequential = false) /// flag indicating whether count should be enforced when validating the index /// negative index, less than 0 bytes, or too many bytes are requested /// if the stream ends before the required number of bytes are acquired - public long ValidateIndex(long index, long bytesRequested, bool isSequential, bool allowPartial = false) + internal long ValidateIndex(long index, long bytesRequested, bool isSequential, bool allowPartial = false) { long available = BytesAvailable(index, bytesRequested); if (available != bytesRequested && !allowPartial) diff --git a/MetadataExtractor/IO/ReaderInfo.cs b/MetadataExtractor/IO/ReaderInfo.cs index 2c2bf824f..2deede096 100644 --- a/MetadataExtractor/IO/ReaderInfo.cs +++ b/MetadataExtractor/IO/ReaderInfo.cs @@ -211,7 +211,7 @@ public byte GetByte(long index) public byte[] GetBytes(int count) => GetBytes(SequentialFlag, count); /// Returns the required number of bytes from the specified index from the underlying source. - /// The index from which the bytes begins in the underlying source + /// The index from which the bytes begin in the underlying source /// The number of bytes to be returned /// The requested bytes /// if the requested bytes extend beyond the end of the underlying data source From a5281d5dfdf32b3d1427a4aebf5918eb13d4e02b Mon Sep 17 00:00:00 2001 From: Kevin Mott Date: Sat, 9 May 2020 06:43:48 -0500 Subject: [PATCH 03/10] - If Stream.CanSeek is true, make sure it is also able to successfully report a Length - Throw BufferBoundsException instead of IOException in ValidateIndex - Remove "IsSequential" inputs to most RandomAccessStream classes - fix isMotorolaByteOrder casing in RandomAccessStream --- .../IO/RAS/RasSequentialReaderTestBase.cs | 38 +-- .../IO/RAS/RasUsageTests.cs | 8 +- MetadataExtractor/IO/RandomAccessStream.cs | 281 +++++++++--------- MetadataExtractor/IO/ReaderInfo.cs | 117 ++++---- 4 files changed, 213 insertions(+), 231 deletions(-) diff --git a/MetadataExtractor.Tests/IO/RAS/RasSequentialReaderTestBase.cs b/MetadataExtractor.Tests/IO/RAS/RasSequentialReaderTestBase.cs index abc60eea4..8d69a5b95 100644 --- a/MetadataExtractor.Tests/IO/RAS/RasSequentialReaderTestBase.cs +++ b/MetadataExtractor.Tests/IO/RAS/RasSequentialReaderTestBase.cs @@ -49,8 +49,8 @@ public void GetByte_OutOfBounds() { var reader = CreateReader(new byte[1]); reader.GetByte(); - var ex = Assert.Throws(() => reader.GetByte()); - Assert.Equal("End of data reached.", ex.Message); + var ex = Assert.Throws(() => reader.GetByte()); + Assert.Equal("Attempt to read from beyond end of underlying data source (requested index: 1, requested count: 1, max index: 0)", ex.Message); } [Fact] @@ -93,8 +93,8 @@ public void GetUInt16() public void GetUInt16_OutOfBounds() { var reader = CreateReader(new byte[1]); - var ex = Assert.Throws(() => reader.GetUInt16()); - Assert.Equal("End of data reached.", ex.Message); + var ex = Assert.Throws(() => reader.GetUInt16()); + Assert.Equal("Attempt to read from beyond end of underlying data source (requested index: 0, requested count: 2, max index: 0)", ex.Message); } [Fact] @@ -140,8 +140,8 @@ public void GetUInt32() public void GetInt32_OutOfBounds() { var reader = CreateReader(new byte[3]); - var ex = Assert.Throws(() => reader.GetInt32()); - Assert.Equal("End of data reached.", ex.Message); + var ex = Assert.Throws(() => reader.GetInt32()); + Assert.Equal("Attempt to read from beyond end of underlying data source (requested index: 0, requested count: 4, max index: 2)", ex.Message); } [Fact] @@ -163,8 +163,8 @@ public void GetInt64() public void GetInt64_OutOfBounds() { var reader = CreateReader(new byte[7]); - var ex = Assert.Throws(() => reader.GetInt64()); - Assert.Equal("End of data reached.", ex.Message); + var ex = Assert.Throws(() => reader.GetInt64()); + Assert.Equal("Attempt to read from beyond end of underlying data source (requested index: 0, requested count: 8, max index: 6)", ex.Message); } [Fact] @@ -186,8 +186,8 @@ public void GetUInt64() public void GetUInt64_OutOfBounds() { var reader = CreateReader(new byte[7]); - var ex = Assert.Throws(() => reader.GetUInt64()); - Assert.Equal("End of data reached.", ex.Message); + var ex = Assert.Throws(() => reader.GetUInt64()); + Assert.Equal("Attempt to read from beyond end of underlying data source (requested index: 0, requested count: 8, max index: 6)", ex.Message); } [Fact] @@ -253,8 +253,8 @@ public void GetBytes() public void OverflowBoundsCalculation() { var reader = CreateReader(new byte[10]); - var ex = Assert.Throws(() => reader.GetBytes(15)); - Assert.Equal("End of data reached.", ex.Message); + var ex = Assert.Throws(() => reader.GetBytes(15)); + Assert.Equal("Attempt to read from beyond end of underlying data source (requested index: 0, requested count: 15, max index: 9)", ex.Message); } [Fact] @@ -266,7 +266,7 @@ public void GetBytesEof() reader.GetBytes(25); reader.GetBytes(25); - Assert.Throws(() => CreateReader(new byte[50]).GetBytes(51)); + Assert.Throws(() => CreateReader(new byte[50]).GetBytes(51)); } [Fact] @@ -280,7 +280,7 @@ public void GetByteEof() reader = CreateReader(new byte[1]); reader.GetByte(); - Assert.Throws(() => reader.GetByte()); + Assert.Throws(() => reader.GetByte()); } [Fact] @@ -294,18 +294,18 @@ public void SkipEof() reader = CreateReader(new byte[1]); reader.Skip(1); - Assert.Throws(() => reader.Skip(1, true)); + Assert.Throws(() => reader.Skip(1)); } [Fact] public void TrySkipEof() { - Assert.True(CreateReader(new byte[1]).TrySkip(1, true)); + Assert.True(CreateReader(new byte[1]).TrySkip(1)); var reader = CreateReader(new byte[2]); - Assert.True(reader.TrySkip(1, true)); - Assert.True(reader.TrySkip(1, true)); - Assert.False(reader.TrySkip(1, true)); + Assert.True(reader.TrySkip(1)); + Assert.True(reader.TrySkip(1)); + Assert.False(reader.TrySkip(1)); } } } diff --git a/MetadataExtractor.Tests/IO/RAS/RasUsageTests.cs b/MetadataExtractor.Tests/IO/RAS/RasUsageTests.cs index aa45d840b..6b0868e7f 100644 --- a/MetadataExtractor.Tests/IO/RAS/RasUsageTests.cs +++ b/MetadataExtractor.Tests/IO/RAS/RasUsageTests.cs @@ -29,10 +29,10 @@ public void TestRasByteArray() Assert.Equal(bytes.Length, ras.Length); // bytes two thru five in motorola byte order - Assert.Equal(33752069, ras.GetInt32(2, true, false)); + Assert.Equal(33752069, ras.GetInt32(2, true)); // bytes two thru five in non-motorola byte order - Assert.Equal(84148994, ras.GetInt32(2, false, false)); + Assert.Equal(84148994, ras.GetInt32(2, false)); } [Fact] @@ -47,10 +47,10 @@ public void TestRasMemoryStream() Assert.Equal(ms.Length, ras.Length); // first four bytes in motorola byte order - Assert.Equal(66051, ras.GetInt32(0, true, false)); + Assert.Equal(66051, ras.GetInt32(0, true)); // first four bytes in non-motorola byte order - Assert.Equal(50462976, ras.GetInt32(0, false, false)); + Assert.Equal(50462976, ras.GetInt32(0, false)); } [Fact] diff --git a/MetadataExtractor/IO/RandomAccessStream.cs b/MetadataExtractor/IO/RandomAccessStream.cs index 04ee0b3cf..be59f475a 100644 --- a/MetadataExtractor/IO/RandomAccessStream.cs +++ b/MetadataExtractor/IO/RandomAccessStream.cs @@ -35,12 +35,25 @@ public RandomAccessStream(Stream stream, long streamLength = -1) if (stream == null) throw new ArgumentNullException(nameof(stream)); - if (streamLength == -1) - streamLength = stream.CanSeek ? stream.Length : UnknownLengthValue; - p_inputStream = stream; CanSeek = stream.CanSeek; + if (streamLength == -1) + { + // Make sure a stream that can seek is also able to report a Length. + // This is an uncommon situation; don't know of any as of this writing + try + { + streamLength = stream.CanSeek ? stream.Length : UnknownLengthValue; + } + catch(NotSupportedException) + { + streamLength = UnknownLengthValue; + CanSeek = false; + } + } + + // TODO: allow a different chunk length either through this constructor or read from a context object p_chunkLength = DefaultChunkLength; p_streamLength = streamLength; } @@ -90,18 +103,17 @@ public ReaderInfo CreateReader(long startPosition, long length, bool isMotorolaB return new ReaderInfo(this, pos, 0, length, isMotorolaByteOrder); } + /// Retrieves bytes, writing them into a caller-provided buffer. /// position within the data buffer to read byte. /// array to write bytes to. /// starting position within to write to. /// number of bytes to be written. - /// flag indicating if caller is using sequential access /// The requested bytes, or as many as can be retrieved /// - /// - public int Read(long index, byte[] buffer, int offset, int count, bool isSequential) + public int Read(long index, byte[] buffer, int offset, int count) { - return Read(index, buffer, offset, count, isSequential, true); + return Read(index, buffer, offset, count, true); } /// Retrieves bytes, writing them into a caller-provided buffer. @@ -109,14 +121,12 @@ public int Read(long index, byte[] buffer, int offset, int count, bool isSequent /// array to write bytes to. /// starting position within to write to. /// number of bytes to be written. - /// flag indicating if caller is using sequential access /// flag indicating whether count should be enforced when validating the index /// The requested bytes, or as many as can be retrieved if is true /// - /// - public int Read(long index, byte[] buffer, int offset, int count, bool isSequential, bool allowPartial) + public int Read(long index, byte[] buffer, int offset, int count, bool allowPartial) { - count = (int)ValidateIndex(index, count, isSequential, allowPartial); + count = (int)ValidateIndex(index, count, allowPartial); // This bypasses a lot of checks particularly when the input was a byte[] // TODO: good spot to try Span @@ -147,25 +157,21 @@ public int Read(long index, byte[] buffer, int offset, int count, bool isSequent /// Returns an unsigned byte at an index in the sequence. /// the 8 bit int value, between 0 and 255 /// position within the data buffer to read byte - /// flag indicating if caller is using sequential access /// - /// - public byte GetByte(long index, bool isSequential) + public byte GetByte(long index) { - return GetByte(index, isSequential, true); + return GetByte(index, true); } /// Returns an unsigned byte at an index in the sequence. /// the 8 bit int value, between 0 and 255 /// position within the data buffer to read byte - /// flag indicating if caller is using sequential access /// allows for skipping validation if already done by the caller /// - /// - private byte GetByte(long index, bool isSequential, bool validateIndex) + private byte GetByte(long index, bool validateIndex) { - if(validateIndex) - ValidateIndex(index, 1, isSequential); + if (validateIndex) + ValidateIndex(index, 1); // This bypasses a lot of checks particularly when the input was a byte[] if (p_isStreamFinished && p_chunks.Count == 1) @@ -183,203 +189,189 @@ private byte GetByte(long index, bool isSequential, bool validateIndex) /// Returns an unsigned 16-bit int calculated from the next two bytes of the sequence. /// the 16 bit int value, between 0x0000 and 0xFFFF /// position within the data buffer to read first byte - /// byte order for returning the result - /// flag indicating if caller is using sequential access + /// byte order for returning the result /// - /// - public ushort GetUInt16(long index, bool IsMotorolaByteOrder, bool isSequential) + public ushort GetUInt16(long index, bool isMotorolaByteOrder) { - ValidateIndex(index, 2, isSequential); + ValidateIndex(index, 2); - if (IsMotorolaByteOrder) + if (isMotorolaByteOrder) { // Motorola - MSB first return (ushort) - (GetByte(index, isSequential, false) << 8 | - GetByte(index + 1, isSequential, false)); + (GetByte(index, false) << 8 | + GetByte(index + 1, false)); } // Intel ordering - LSB first return (ushort) - (GetByte(index + 1, isSequential, false) << 8 | - GetByte(index , isSequential, false)); + (GetByte(index + 1, false) << 8 | + GetByte(index , false)); } /// Returns a signed 16-bit int calculated from two bytes of data (MSB, LSB). /// the 16 bit int value, between 0x0000 and 0xFFFF /// position within the data buffer to read first byte - /// byte order for returning the result - /// flag indicating if caller is using sequential access + /// byte order for returning the result /// - /// - public short GetInt16(long index, bool IsMotorolaByteOrder, bool isSequential) + public short GetInt16(long index, bool isMotorolaByteOrder) { - ValidateIndex(index, 2, isSequential); + ValidateIndex(index, 2); - if (IsMotorolaByteOrder) + if (isMotorolaByteOrder) { // Motorola - MSB first return (short) - (GetByte(index , isSequential, false) << 8 | - GetByte(index + 1, isSequential, false)); + (GetByte(index , false) << 8 | + GetByte(index + 1, false)); } // Intel ordering - LSB first return (short) - (GetByte(index + 1, isSequential, false) << 8 | - GetByte(index , isSequential, false)); + (GetByte(index + 1, false) << 8 | + GetByte(index , false)); } /// Get a 24-bit unsigned integer from the buffer, returning it as an int. /// the unsigned 24-bit int value as a long, between 0x00000000 and 0x00FFFFFF /// position within the data buffer to read first byte - /// byte order for returning the result - /// flag indicating if caller is using sequential access + /// byte order for returning the result /// - /// - public int GetInt24(long index, bool IsMotorolaByteOrder, bool isSequential) + public int GetInt24(long index, bool isMotorolaByteOrder) { - ValidateIndex(index, 3, isSequential); + ValidateIndex(index, 3); - if (IsMotorolaByteOrder) + if (isMotorolaByteOrder) { // Motorola - MSB first return - GetByte(index , isSequential, false) << 16 | - GetByte(index + 1, isSequential, false) << 8 | - GetByte(index + 2, isSequential, false); + GetByte(index , false) << 16 | + GetByte(index + 1, false) << 8 | + GetByte(index + 2, false); } // Intel ordering - LSB first return - GetByte(index + 2, isSequential, false) << 16 | - GetByte(index + 1, isSequential, false) << 8 | - GetByte(index , isSequential, false); + GetByte(index + 2, false) << 16 | + GetByte(index + 1, false) << 8 | + GetByte(index , false); } /// Get a 32-bit unsigned integer from the buffer, returning it as a long. /// the unsigned 32-bit int value as a long, between 0x00000000 and 0xFFFFFFFF /// position within the data buffer to read first byte - /// byte order for returning the result - /// flag indicating if caller is using sequential access + /// byte order for returning the result /// - /// - public uint GetUInt32(long index, bool IsMotorolaByteOrder, bool isSequential) + public uint GetUInt32(long index, bool isMotorolaByteOrder) { - ValidateIndex(index, 4, isSequential); + ValidateIndex(index, 4); - if (IsMotorolaByteOrder) + if (isMotorolaByteOrder) { // Motorola - MSB first return (uint) - (GetByte(index , isSequential, false) << 24 | - GetByte(index + 1, isSequential, false) << 16 | - GetByte(index + 2, isSequential, false) << 8 | - GetByte(index + 3, isSequential, false)); + (GetByte(index , false) << 24 | + GetByte(index + 1, false) << 16 | + GetByte(index + 2, false) << 8 | + GetByte(index + 3, false)); } // Intel ordering - LSB first return (uint) - (GetByte(index + 3, isSequential, false) << 24 | - GetByte(index + 2, isSequential, false) << 16 | - GetByte(index + 1, isSequential, false) << 8 | - GetByte(index , isSequential, false)); + (GetByte(index + 3, false) << 24 | + GetByte(index + 2, false) << 16 | + GetByte(index + 1, false) << 8 | + GetByte(index , false)); } /// Returns a signed 32-bit integer from four bytes of data. /// the signed 32 bit int value, between 0x00000000 and 0xFFFFFFFF /// position within the data buffer to read first byte - /// byte order for returning the result - /// flag indicating if caller is using sequential access + /// byte order for returning the result /// - /// - public int GetInt32(long index, bool IsMotorolaByteOrder, bool isSequential) + public int GetInt32(long index, bool isMotorolaByteOrder) { - ValidateIndex(index, 4, isSequential); + ValidateIndex(index, 4); - if (IsMotorolaByteOrder) + if (isMotorolaByteOrder) { // Motorola - MSB first return - GetByte(index , isSequential, false) << 24 | - GetByte(index + 1, isSequential, false) << 16 | - GetByte(index + 2, isSequential, false) << 8 | - GetByte(index + 3, isSequential, false); + GetByte(index , false) << 24 | + GetByte(index + 1, false) << 16 | + GetByte(index + 2, false) << 8 | + GetByte(index + 3, false); } // Intel ordering - LSB first return - GetByte(index + 3, isSequential, false) << 24 | - GetByte(index + 2, isSequential, false) << 16 | - GetByte(index + 1, isSequential, false) << 8 | - GetByte(index , isSequential, false); + GetByte(index + 3, false) << 24 | + GetByte(index + 2, false) << 16 | + GetByte(index + 1, false) << 8 | + GetByte(index , false); } /// Get a signed 64-bit integer from the buffer. /// the 64 bit int value, between 0x0000000000000000 and 0xFFFFFFFFFFFFFFFF /// position within the data buffer to read first byte - /// byte order for returning the result - /// flag indicating if caller is using sequential access + /// byte order for returning the result /// - /// - public long GetInt64(long index, bool IsMotorolaByteOrder, bool isSequential) + public long GetInt64(long index, bool isMotorolaByteOrder) { - ValidateIndex(index, 8, isSequential); + ValidateIndex(index, 8); - if (IsMotorolaByteOrder) + if (isMotorolaByteOrder) { // Motorola - MSB first return - (long)GetByte(index , isSequential, false) << 56 | - (long)GetByte(index + 1, isSequential, false) << 48 | - (long)GetByte(index + 2, isSequential, false) << 40 | - (long)GetByte(index + 3, isSequential, false) << 32 | - (long)GetByte(index + 4, isSequential, false) << 24 | - (long)GetByte(index + 5, isSequential, false) << 16 | - (long)GetByte(index + 6, isSequential, false) << 8 | - GetByte(index + 7, isSequential, false); + (long)GetByte(index , false) << 56 | + (long)GetByte(index + 1, false) << 48 | + (long)GetByte(index + 2, false) << 40 | + (long)GetByte(index + 3, false) << 32 | + (long)GetByte(index + 4, false) << 24 | + (long)GetByte(index + 5, false) << 16 | + (long)GetByte(index + 6, false) << 8 | + GetByte(index + 7, false); } // Intel ordering - LSB first return - (long)GetByte(index + 7, isSequential, false) << 56 | - (long)GetByte(index + 6, isSequential, false) << 48 | - (long)GetByte(index + 5, isSequential, false) << 40 | - (long)GetByte(index + 4, isSequential, false) << 32 | - (long)GetByte(index + 3, isSequential, false) << 24 | - (long)GetByte(index + 2, isSequential, false) << 16 | - (long)GetByte(index + 1, isSequential, false) << 8 | - GetByte(index , isSequential, false); + (long)GetByte(index + 7, false) << 56 | + (long)GetByte(index + 6, false) << 48 | + (long)GetByte(index + 5, false) << 40 | + (long)GetByte(index + 4, false) << 32 | + (long)GetByte(index + 3, false) << 24 | + (long)GetByte(index + 2, false) << 16 | + (long)GetByte(index + 1, false) << 8 | + GetByte(index , false); } /// Get an usigned 64-bit integer from the buffer. /// the unsigned 64 bit int value, between 0x0000000000000000 and 0xFFFFFFFFFFFFFFFF /// position within the data buffer to read first byte - /// byte order for returning the result - /// flag indicating if caller is using sequential access + /// byte order for returning the result /// - /// - public ulong GetUInt64(long index, bool IsMotorolaByteOrder, bool isSequential) + public ulong GetUInt64(long index, bool isMotorolaByteOrder) { - ValidateIndex(index, 8, isSequential); + ValidateIndex(index, 8); - if (IsMotorolaByteOrder) + if (isMotorolaByteOrder) { // Motorola - MSB first return - (ulong)GetByte(index , isSequential, false) << 56 | - (ulong)GetByte(index + 1, isSequential, false) << 48 | - (ulong)GetByte(index + 2, isSequential, false) << 40 | - (ulong)GetByte(index + 3, isSequential, false) << 32 | - (ulong)GetByte(index + 4, isSequential, false) << 24 | - (ulong)GetByte(index + 5, isSequential, false) << 16 | - (ulong)GetByte(index + 6, isSequential, false) << 8 | - GetByte(index + 7, isSequential, false); + (ulong)GetByte(index , false) << 56 | + (ulong)GetByte(index + 1, false) << 48 | + (ulong)GetByte(index + 2, false) << 40 | + (ulong)GetByte(index + 3, false) << 32 | + (ulong)GetByte(index + 4, false) << 24 | + (ulong)GetByte(index + 5, false) << 16 | + (ulong)GetByte(index + 6, false) << 8 | + GetByte(index + 7, false); } // Intel ordering - LSB first return - (ulong)GetByte(index + 7, isSequential, false) << 56 | - (ulong)GetByte(index + 6, isSequential, false) << 48 | - (ulong)GetByte(index + 5, isSequential, false) << 40 | - (ulong)GetByte(index + 4, isSequential, false) << 32 | - (ulong)GetByte(index + 3, isSequential, false) << 24 | - (ulong)GetByte(index + 2, isSequential, false) << 16 | - (ulong)GetByte(index + 1, isSequential, false) << 8 | - GetByte(index , isSequential, false); + (ulong)GetByte(index + 7, false) << 56 | + (ulong)GetByte(index + 6, false) << 48 | + (ulong)GetByte(index + 5, false) << 40 | + (ulong)GetByte(index + 4, false) << 32 | + (ulong)GetByte(index + 3, false) << 24 | + (ulong)GetByte(index + 2, false) << 16 | + (ulong)GetByte(index + 1, false) << 8 | + GetByte(index , false); } /// Gets a s15.16 fixed point float from the buffer. @@ -390,24 +382,22 @@ public ulong GetUInt64(long index, bool IsMotorolaByteOrder, bool isSequential) /// /// the floating point value /// position within the data buffer to read first byte - /// byte order for returning the result - /// flag indicating if caller is using sequential access + /// byte order for returning the result /// - /// - public float GetS15Fixed16(long index, bool IsMotorolaByteOrder, bool isSequential) + public float GetS15Fixed16(long index, bool isMotorolaByteOrder) { - ValidateIndex(index, 4, isSequential); - if (IsMotorolaByteOrder) + ValidateIndex(index, 4); + if (isMotorolaByteOrder) { - float res = GetByte(index, isSequential, false) << 8 | GetByte(index + 1, isSequential, false); - var d = GetByte(index + 2, isSequential, false) << 8 | GetByte(index + 3, isSequential, false); + float res = GetByte(index, false) << 8 | GetByte(index + 1, false); + var d = GetByte(index + 2, false) << 8 | GetByte(index + 3, false); return (float)(res + d / 65536.0); } else { // this particular branch is untested - var d = GetByte(index + 1, isSequential, false) << 8 | GetByte(index, isSequential, false); - float res = GetByte(index + 3, isSequential, false) << 8 | GetByte(index + 2, isSequential, false); + var d = GetByte(index + 1, false) << 8 | GetByte(index, false); + float res = GetByte(index + 3, false) << 8 | GetByte(index + 2, false); return (float)(res + d / 65536.0); } } @@ -417,12 +407,10 @@ public float GetS15Fixed16(long index, bool IsMotorolaByteOrder, bool isSequenti /// Seeks to an index in the sequence. If the sequence can't satisfy the request, exceptions are thrown. /// /// position within the data buffer to seek to - /// optional for testing to indicate whether sequential access was used. /// - /// - public void Seek(long index, bool isSequential = false) + public void Seek(long index) { - ValidateIndex((index == 0) ? 0 : (index - 1), 1, isSequential); + ValidateIndex((index == 0) ? 0 : (index - 1), 1); } /// @@ -436,11 +424,9 @@ public void Seek(long index, bool isSequential = false) /// /// the index from which the required bytes start /// the number of bytes which are required - /// flag indicating if caller is using sequential access /// flag indicating whether count should be enforced when validating the index /// negative index, less than 0 bytes, or too many bytes are requested - /// if the stream ends before the required number of bytes are acquired - internal long ValidateIndex(long index, long bytesRequested, bool isSequential, bool allowPartial = false) + internal long ValidateIndex(long index, long bytesRequested, bool allowPartial = false) { long available = BytesAvailable(index, bytesRequested); if (available != bytesRequested && !allowPartial) @@ -452,12 +438,7 @@ internal long ValidateIndex(long index, long bytesRequested, bool isSequential, if (index + bytesRequested - 1 > int.MaxValue) throw new BufferBoundsException($"Number of requested bytes summed with starting index exceed maximum range of signed 32 bit integers (requested index: {index}, requested count: {bytesRequested})"); if (index + bytesRequested >= p_streamLength) - { - if (isSequential) - throw new IOException("End of data reached."); - else - throw new BufferBoundsException(index, bytesRequested, p_streamLength); - } + throw new BufferBoundsException(index, bytesRequested, p_streamLength); // TODO test that can continue using an instance of this type after this exception throw new BufferBoundsException(index, bytesRequested, p_streamLength); @@ -528,7 +509,9 @@ private long BytesAvailable(long index, long bytesRequested) // check we have enough bytes for the requested index if (endIndex >= p_streamLength) { +#if DEBUG TotalBytesRead += totalBytesRead; +#endif p_chunks.Add(i, chunk); return (index + bytesRequested) <= p_streamLength ? bytesRequested : p_streamLength - index; } @@ -539,7 +522,9 @@ private long BytesAvailable(long index, long bytesRequested) } } +#if DEBUG TotalBytesRead += totalBytesRead; +#endif p_chunks.Add(i, chunk); } } @@ -550,10 +535,12 @@ private long BytesAvailable(long index, long bytesRequested) return bytesRequested; } +#if DEBUG /// /// Records the total bytes buffered /// public long TotalBytesRead { get; private set; } = 0; +#endif public byte[] ToArray(long index, int count) { diff --git a/MetadataExtractor/IO/ReaderInfo.cs b/MetadataExtractor/IO/ReaderInfo.cs index 2deede096..d365d43cb 100644 --- a/MetadataExtractor/IO/ReaderInfo.cs +++ b/MetadataExtractor/IO/ReaderInfo.cs @@ -83,32 +83,29 @@ public static ReaderInfo CreateFromStream(Stream stream, long streamLength = -1) /// Seeks forward or backward in the sequence. /// - /// Skips forward or backward in the sequence. If the sequence ends, an is thrown. + /// Skips forward or backward in the sequence. If the sequence ends, a is thrown. /// /// the number of bytes to seek, in either direction. - /// optional for testing to indicate whether sequential access was used. - /// the end of the sequence is reached. - /// an error occurred reading from the underlying source. - public void Skip(long offset, bool isSequential = false) + /// the end of the sequence is reached, or an error occurred reading from the underlying source. + public void Skip(long offset) { if (offset + LocalPosition < 0) offset = -LocalPosition; - p_ras.Seek(LocalPosition + offset, isSequential); + p_ras.Seek(LocalPosition + offset); LocalPosition += offset; } /// Seeks forward or backward in the sequence, returning a boolean indicating whether the seek succeeded, or whether the sequence ended. /// the number of bytes to seek, in either direction. - /// optional for testing to indicate whether sequential access was used. /// a boolean indicating whether the skip succeeded, or whether the sequence ended. - /// an error occurred reading from the underlying source. - public bool TrySkip(long n, bool isSequential = false) + /// an error occurred reading from the underlying source. + public bool TrySkip(long n) { try { - Skip(n, isSequential); + Skip(n); return true; } catch (IOException) @@ -125,7 +122,6 @@ public bool TrySkip(long n, bool isSequential = false) /// number of bytes to be written. /// The requested bytes /// - /// public int Read(byte[] buffer, int offset, int count) => Read(buffer, offset, SequentialFlag, count); /// Retrieves bytes, writing them into a caller-provided buffer. @@ -136,7 +132,6 @@ public bool TrySkip(long n, bool isSequential = false) /// number of bytes to be written. /// The requested bytes /// - /// public int Read(byte[] buffer, int offset, long index, int count) { bool isSeq = (index == SequentialFlag); @@ -147,7 +142,7 @@ public int Read(byte[] buffer, int offset, long index, int count) private int ReadAtGlobal(long readat, byte[] buffer, int offset, int count, bool isSequential, bool allowPartial) { - int read = p_ras.Read(readat, buffer, offset, count, isSequential, allowPartial); + int read = p_ras.Read(readat, buffer, offset, count, allowPartial); if (isSequential && read > 0) LocalPosition += read; // advance the sequential position @@ -182,20 +177,20 @@ public bool StartsWith(byte[] pattern) /// Gets the byte value at the next sequential byte index. /// The read byte value /// if the requested byte is beyond the end of the underlying data source - /// if the byte is unable to be read + /// if the byte is unable to be read public byte GetByte() => GetByte(SequentialFlag); /// Gets the byte value at the specified byte index. /// The index from which to read the byte /// The read byte value /// if the requested byte is beyond the end of the underlying data source - /// if the byte is unable to be read + /// if the byte is unable to be read public byte GetByte(long index) { bool isSeq = (index == SequentialFlag); long readat = isSeq ? GlobalPosition : (StartPosition + index); - var read = p_ras.GetByte(readat, isSeq); + var read = p_ras.GetByte(readat); if (isSeq) LocalPosition++; // advance the sequential position @@ -207,7 +202,7 @@ public byte GetByte(long index) /// The number of bytes to be returned /// The requested bytes /// if the requested bytes extend beyond the end of the underlying data source - /// if the byte is unable to be read + /// if the byte is unable to be read public byte[] GetBytes(int count) => GetBytes(SequentialFlag, count); /// Returns the required number of bytes from the specified index from the underlying source. @@ -215,14 +210,14 @@ public byte GetByte(long index) /// The number of bytes to be returned /// The requested bytes /// if the requested bytes extend beyond the end of the underlying data source - /// if the byte is unable to be read + /// if the byte is unable to be read public byte[] GetBytes(long index, int count) { // validate the index now to avoid creating a byte array that could cause a heap overflow bool isSeq = (index == SequentialFlag); long readat = isSeq ? GlobalPosition : (StartPosition + index); - long available = p_ras.ValidateIndex(readat, count, isSeq, false); + long available = p_ras.ValidateIndex(readat, count, false); if (available == 0) return new byte[0]; @@ -234,13 +229,13 @@ public byte[] GetBytes(long index, int count) /// Gets whether a bit at a specific index is set or not sequentially. /// true if the bit is set, otherwise false - /// the buffer does not contain enough bytes to service the request + /// the buffer does not contain enough bytes to service the request public bool GetBit() => GetBit(SequentialFlag); /// Gets whether a bit at a specific index is set or not. /// the number of bits at which to test /// true if the bit is set, otherwise false - /// the buffer does not contain enough bytes to service the request, or index is negative + /// the buffer does not contain enough bytes to service the request, or index is negative public bool GetBit(int index) { var byteIndex = index / 8; @@ -251,13 +246,13 @@ public bool GetBit(int index) /// Returns a signed 8-bit int calculated from one byte of data sequentially. /// the 8 bit signed byte value - /// the buffer does not contain enough bytes to service the request + /// the buffer does not contain enough bytes to service the request public sbyte GetSByte() => GetSByte(SequentialFlag); /// Returns a signed 8-bit int calculated from one byte of data at the specified index. /// position within the data buffer to read byte /// the 8 bit signed byte value - /// the buffer does not contain enough bytes to service the request, or index is negative + /// the buffer does not contain enough bytes to service the request, or index is negative public sbyte GetSByte(long index) { return unchecked((sbyte)GetByte(index)); @@ -265,19 +260,19 @@ public sbyte GetSByte(long index) /// Returns an unsigned 16-bit int calculated from the next two bytes of the sequence. /// the 16 bit int value, between 0x0000 and 0xFFFF - /// + /// public ushort GetUInt16() => GetUInt16(SequentialFlag); /// Returns an unsigned 16-bit int calculated from the next two bytes of the sequence. /// position within the data buffer to read byte /// the 16 bit int value, between 0x0000 and 0xFFFF - /// + /// public ushort GetUInt16(long index) { bool isSeq = (index == SequentialFlag); long readat = isSeq ? GlobalPosition : (StartPosition + index); - var read = p_ras.GetUInt16(readat, IsMotorolaByteOrder, isSeq); + var read = p_ras.GetUInt16(readat, IsMotorolaByteOrder); if (isSeq) LocalPosition += 2; // advance the sequential position @@ -287,7 +282,7 @@ public ushort GetUInt16(long index) /// Returns an unsigned 16-bit int calculated from the next two bytes of the sequence. /// the 16 bit int value, between 0x0000 and 0xFFFF - /// + /// public ushort GetUInt16(int b1, int b2) { if (IsMotorolaByteOrder) @@ -301,18 +296,18 @@ public ushort GetUInt16(int b1, int b2) /// Returns a signed 16-bit int calculated from two bytes of data (MSB, LSB). /// the 16 bit int value, between 0x0000 and 0xFFFF - /// the buffer does not contain enough bytes to service the request + /// the buffer does not contain enough bytes to service the request public short GetInt16() => GetInt16(SequentialFlag); /// Returns a signed 16-bit int calculated from two bytes of data (MSB, LSB). /// the 16 bit int value, between 0x0000 and 0xFFFF - /// the buffer does not contain enough bytes to service the request + /// the buffer does not contain enough bytes to service the request public short GetInt16(long index) { bool isSeq = (index == SequentialFlag); long readat = isSeq ? GlobalPosition : (StartPosition + index); - var read = p_ras.GetInt16(readat, IsMotorolaByteOrder, isSeq); + var read = p_ras.GetInt16(readat, IsMotorolaByteOrder); if (isSeq) LocalPosition += 2; // advance the sequential position @@ -322,19 +317,19 @@ public short GetInt16(long index) /// Get a 24-bit unsigned integer from the buffer sequentially, returning it as an int. /// the unsigned 24-bit int value as a long, between 0x00000000 and 0x00FFFFFF - /// the buffer does not contain enough bytes to service the request + /// the buffer does not contain enough bytes to service the request public int GetInt24() => GetInt24(SequentialFlag); /// Get a 24-bit unsigned integer from the buffer, returning it as an int. /// position within the data buffer to read first byte /// the unsigned 24-bit int value as a long, between 0x00000000 and 0x00FFFFFF - /// the buffer does not contain enough bytes to service the request, or index is negative + /// the buffer does not contain enough bytes to service the request, or index is negative public int GetInt24(long index) { bool isSeq = (index == SequentialFlag); long readat = isSeq ? GlobalPosition : (StartPosition + index); - var read = p_ras.GetInt24(readat, IsMotorolaByteOrder, isSeq); + var read = p_ras.GetInt24(readat, IsMotorolaByteOrder); if (isSeq) LocalPosition += 3; // advance the sequential position @@ -344,19 +339,19 @@ public int GetInt24(long index) /// Get a 32-bit unsigned integer from the buffer sequentially, returning it as a long. /// the unsigned 32-bit int value as a long, between 0x00000000 and 0xFFFFFFFF - /// the buffer does not contain enough bytes to service the request + /// the buffer does not contain enough bytes to service the request public uint GetUInt32() => GetUInt32(SequentialFlag); /// Get a 32-bit unsigned integer from the buffer, returning it as a long. /// position within the data buffer to read first byte /// the unsigned 32-bit int value as a long, between 0x00000000 and 0xFFFFFFFF - /// the buffer does not contain enough bytes to service the request, or index is negative + /// the buffer does not contain enough bytes to service the request, or index is negative public uint GetUInt32(long index) { bool isSeq = (index == SequentialFlag); long readat = isSeq ? GlobalPosition : (StartPosition + index); - var read = p_ras.GetUInt32(readat, IsMotorolaByteOrder, isSeq); + var read = p_ras.GetUInt32(readat, IsMotorolaByteOrder); if (isSeq) LocalPosition += 4; // advance the sequential position @@ -366,19 +361,19 @@ public uint GetUInt32(long index) /// Returns a signed 32-bit integer from four bytes of data sequentially. /// the signed 32 bit int value, between 0x00000000 and 0xFFFFFFFF - /// the buffer does not contain enough bytes to service the request + /// the buffer does not contain enough bytes to service the request public int GetInt32() => GetInt32(SequentialFlag); /// Returns a signed 32-bit integer from four bytes of data at the specified index the buffer. /// position within the data buffer to read first byte /// the signed 32 bit int value, between 0x00000000 and 0xFFFFFFFF - /// the buffer does not contain enough bytes to service the request, or index is negative + /// the buffer does not contain enough bytes to service the request, or index is negative public int GetInt32(long index) { bool isSeq = (index == SequentialFlag); long readat = isSeq ? GlobalPosition : (StartPosition + index); - var read = p_ras.GetInt32(readat, IsMotorolaByteOrder, isSeq); + var read = p_ras.GetInt32(readat, IsMotorolaByteOrder); if (isSeq) LocalPosition += 4; // advance the sequential position @@ -388,19 +383,19 @@ public int GetInt32(long index) /// Get a signed 64-bit integer from the buffer sequentially. /// the 64 bit int value, between 0x0000000000000000 and 0xFFFFFFFFFFFFFFFF - /// the buffer does not contain enough bytes to service the request + /// the buffer does not contain enough bytes to service the request public long GetInt64() => GetInt64(SequentialFlag); /// Get a signed 64-bit integer from the buffer. /// position within the data buffer to read first byte /// the 64 bit int value, between 0x0000000000000000 and 0xFFFFFFFFFFFFFFFF - /// the buffer does not contain enough bytes to service the request, or index is negative + /// the buffer does not contain enough bytes to service the request, or index is negative public long GetInt64(long index) { bool isSeq = (index == SequentialFlag); long readat = isSeq ? GlobalPosition : (StartPosition + index); - var read = p_ras.GetInt64(readat, IsMotorolaByteOrder, isSeq); + var read = p_ras.GetInt64(readat, IsMotorolaByteOrder); if (isSeq) LocalPosition += 8; // advance the sequential position @@ -410,18 +405,18 @@ public long GetInt64(long index) /// Get an usigned 64-bit integer from the buffer sequentially. /// the unsigned 64 bit int value, between 0x0000000000000000 and 0xFFFFFFFFFFFFFFFF - /// the buffer does not contain enough bytes to service the request + /// the buffer does not contain enough bytes to service the request public ulong GetUInt64() => GetUInt64(SequentialFlag); /// Get an usigned 64-bit integer from the buffer. /// the unsigned 64 bit int value, between 0x0000000000000000 and 0xFFFFFFFFFFFFFFFF - /// the buffer does not contain enough bytes to service the request + /// the buffer does not contain enough bytes to service the request public ulong GetUInt64(long index) { bool isSeq = (index == SequentialFlag); long readat = isSeq ? GlobalPosition : (StartPosition + index); - var read = p_ras.GetUInt64(readat, IsMotorolaByteOrder, isSeq); + var read = p_ras.GetUInt64(readat, IsMotorolaByteOrder); if (isSeq) LocalPosition += 8; // advance the sequential position @@ -434,7 +429,7 @@ public ulong GetUInt64(long index) /// This particular fixed point encoding has one sign bit, 15 numerator bits and 16 denominator bits. /// /// the floating point value - /// the buffer does not contain enough bytes to service the request + /// the buffer does not contain enough bytes to service the request public float GetS15Fixed16() => GetS15Fixed16(SequentialFlag); /// Gets a s15.16 fixed point float from the buffer. @@ -442,13 +437,13 @@ public ulong GetUInt64(long index) /// This particular fixed point encoding has one sign bit, 15 numerator bits and 16 denominator bits. /// /// the floating point value - /// the buffer does not contain enough bytes to service the request, or index is negative + /// the buffer does not contain enough bytes to service the request, or index is negative public float GetS15Fixed16(long index) { bool isSeq = (index == SequentialFlag); long readat = isSeq ? GlobalPosition : (StartPosition + index); - var read = p_ras.GetS15Fixed16(readat, IsMotorolaByteOrder, isSeq); + var read = p_ras.GetS15Fixed16(readat, IsMotorolaByteOrder); if (isSeq) LocalPosition += 4; // advance the sequential position @@ -459,19 +454,19 @@ public float GetS15Fixed16(long index) public float GetFloat32() => GetFloat32(SequentialFlag); - /// + /// public float GetFloat32(long index) => BitConverter.ToSingle(BitConverter.GetBytes(GetInt32(index)), 0); public double GetDouble64() => GetDouble64(SequentialFlag); - /// + /// public double GetDouble64(long index) => BitConverter.Int64BitsToDouble(GetInt64(index)); [NotNull] public string GetString(int bytesRequested, [NotNull] Encoding encoding) => GetString(SequentialFlag, bytesRequested, encoding); - /// + /// [NotNull] public string GetString(long index, int bytesRequested, [NotNull] Encoding encoding) { @@ -494,7 +489,7 @@ public StringValue GetStringValue(int bytesRequested, Encoding? encoding) /// reading will stop and the string will be truncated to this length. /// /// The read - /// The buffer does not contain enough bytes to satisfy this request. + /// The buffer does not contain enough bytes to satisfy this request. [NotNull] public string GetNullTerminatedString(int maxLengthBytes) => GetNullTerminatedString(SequentialFlag, maxLengthBytes); @@ -508,7 +503,7 @@ public StringValue GetStringValue(int bytesRequested, Encoding? encoding) /// /// An optional string encoding. If none is provided, is used. /// The read - /// The buffer does not contain enough bytes to satisfy this request. + /// The buffer does not contain enough bytes to satisfy this request. [NotNull] public string GetNullTerminatedString(int maxLengthBytes, Encoding encoding) => GetNullTerminatedString(SequentialFlag, maxLengthBytes, encoding); @@ -522,7 +517,7 @@ public StringValue GetStringValue(int bytesRequested, Encoding? encoding) /// reading will stop and the string will be truncated to this length. /// /// The read - /// The buffer does not contain enough bytes to satisfy this request. + /// The buffer does not contain enough bytes to satisfy this request. [NotNull] public string GetNullTerminatedString(int index, int maxLengthBytes) => GetNullTerminatedString(index, maxLengthBytes, null); @@ -537,7 +532,7 @@ public StringValue GetStringValue(int bytesRequested, Encoding? encoding) /// /// An optional string encoding. If none is provided, is used. /// The read - /// The buffer does not contain enough bytes to satisfy this request. + /// The buffer does not contain enough bytes to satisfy this request. [NotNull] public string GetNullTerminatedString(int index, int maxLengthBytes, Encoding? encoding) { @@ -555,7 +550,7 @@ public string GetNullTerminatedString(int index, int maxLengthBytes, Encoding? e /// reading will stop and the string will be truncated to this length. /// /// The read - /// The buffer does not contain enough bytes to satisfy this request. + /// The buffer does not contain enough bytes to satisfy this request. public StringValue GetNullTerminatedStringValue(int maxLengthBytes) => GetNullTerminatedStringValue(SequentialFlag, maxLengthBytes); /// @@ -568,7 +563,7 @@ public string GetNullTerminatedString(int index, int maxLengthBytes, Encoding? e /// reading will stop and the string will be truncated to this length. /// /// The read - /// The buffer does not contain enough bytes to satisfy this request. + /// The buffer does not contain enough bytes to satisfy this request. public StringValue GetNullTerminatedStringValue(int index, int maxLengthBytes) => GetNullTerminatedStringValue(index, maxLengthBytes, null); /// @@ -581,7 +576,7 @@ public string GetNullTerminatedString(int index, int maxLengthBytes, Encoding? e /// /// An optional string encoding to use when interpreting bytes. /// The read - /// The buffer does not contain enough bytes to satisfy this request. + /// The buffer does not contain enough bytes to satisfy this request. public StringValue GetNullTerminatedStringValue(int maxLengthBytes, Encoding? encoding) => GetNullTerminatedStringValue(SequentialFlag, maxLengthBytes, encoding); /// @@ -595,7 +590,7 @@ public string GetNullTerminatedString(int index, int maxLengthBytes, Encoding? e /// /// An optional string encoding to use when interpreting bytes. /// The read - /// The buffer does not contain enough bytes to satisfy this request. + /// The buffer does not contain enough bytes to satisfy this request. public StringValue GetNullTerminatedStringValue(int index, int maxLengthBytes, Encoding? encoding) { var bytes = GetNullTerminatedBytes(index, maxLengthBytes); @@ -611,7 +606,7 @@ public StringValue GetNullTerminatedStringValue(int index, int maxLengthBytes, E /// the returned array will be long. /// /// The read byte array. - /// The buffer does not contain enough bytes to satisfy this request. + /// The buffer does not contain enough bytes to satisfy this request. [NotNull] public byte[] GetNullTerminatedBytes(int maxLengthBytes) => GetNullTerminatedBytes(SequentialFlag, maxLengthBytes); @@ -624,7 +619,7 @@ public StringValue GetNullTerminatedStringValue(int index, int maxLengthBytes, E /// the returned array will be long. /// /// The read byte array. - /// The buffer does not contain enough bytes to satisfy this request. + /// The buffer does not contain enough bytes to satisfy this request. [NotNull] public byte[] GetNullTerminatedBytes(int index, int maxLengthBytes) { From b5472166568251dcffba992dea8287fa02b49c3e Mon Sep 17 00:00:00 2001 From: Kevin Mott Date: Sat, 9 May 2020 07:01:28 -0500 Subject: [PATCH 04/10] - perform ValidateIndex call directly in GetByte - rename overload of GetByte to GetByteNoValidation and remove validateIndex param --- MetadataExtractor/IO/RandomAccessStream.cs | 146 ++++++++++----------- 1 file changed, 72 insertions(+), 74 deletions(-) diff --git a/MetadataExtractor/IO/RandomAccessStream.cs b/MetadataExtractor/IO/RandomAccessStream.cs index be59f475a..86898b9f9 100644 --- a/MetadataExtractor/IO/RandomAccessStream.cs +++ b/MetadataExtractor/IO/RandomAccessStream.cs @@ -122,7 +122,7 @@ public int Read(long index, byte[] buffer, int offset, int count) /// starting position within to write to. /// number of bytes to be written. /// flag indicating whether count should be enforced when validating the index - /// The requested bytes, or as many as can be retrieved if is true + /// The requested bytes read into , and either the count of bytes read or as many as can be retrieved if is true /// public int Read(long index, byte[] buffer, int offset, int count, bool allowPartial) { @@ -160,19 +160,17 @@ public int Read(long index, byte[] buffer, int offset, int count, bool allowPart /// public byte GetByte(long index) { - return GetByte(index, true); + ValidateIndex(index, 1); + + return GetByteNoValidation(index); } - /// Returns an unsigned byte at an index in the sequence. + /// Returns an unsigned byte at an index in the sequence. The index is not validated before reading /// the 8 bit int value, between 0 and 255 /// position within the data buffer to read byte - /// allows for skipping validation if already done by the caller /// - private byte GetByte(long index, bool validateIndex) + private byte GetByteNoValidation(long index) { - if (validateIndex) - ValidateIndex(index, 1); - // This bypasses a lot of checks particularly when the input was a byte[] if (p_isStreamFinished && p_chunks.Count == 1) return p_chunks[0][index]; @@ -199,13 +197,13 @@ public ushort GetUInt16(long index, bool isMotorolaByteOrder) { // Motorola - MSB first return (ushort) - (GetByte(index, false) << 8 | - GetByte(index + 1, false)); + (GetByteNoValidation(index ) << 8 | + GetByteNoValidation(index + 1)); } // Intel ordering - LSB first return (ushort) - (GetByte(index + 1, false) << 8 | - GetByte(index , false)); + (GetByteNoValidation(index + 1) << 8 | + GetByteNoValidation(index )); } /// Returns a signed 16-bit int calculated from two bytes of data (MSB, LSB). @@ -221,13 +219,13 @@ public short GetInt16(long index, bool isMotorolaByteOrder) { // Motorola - MSB first return (short) - (GetByte(index , false) << 8 | - GetByte(index + 1, false)); + (GetByteNoValidation(index ) << 8 | + GetByteNoValidation(index + 1)); } // Intel ordering - LSB first return (short) - (GetByte(index + 1, false) << 8 | - GetByte(index , false)); + (GetByteNoValidation(index + 1) << 8 | + GetByteNoValidation(index )); } /// Get a 24-bit unsigned integer from the buffer, returning it as an int. @@ -243,15 +241,15 @@ public int GetInt24(long index, bool isMotorolaByteOrder) { // Motorola - MSB first return - GetByte(index , false) << 16 | - GetByte(index + 1, false) << 8 | - GetByte(index + 2, false); + GetByteNoValidation(index ) << 16 | + GetByteNoValidation(index + 1) << 8 | + GetByteNoValidation(index + 2); } // Intel ordering - LSB first return - GetByte(index + 2, false) << 16 | - GetByte(index + 1, false) << 8 | - GetByte(index , false); + GetByteNoValidation(index + 2) << 16 | + GetByteNoValidation(index + 1) << 8 | + GetByteNoValidation(index ); } /// Get a 32-bit unsigned integer from the buffer, returning it as a long. @@ -267,17 +265,17 @@ public uint GetUInt32(long index, bool isMotorolaByteOrder) { // Motorola - MSB first return (uint) - (GetByte(index , false) << 24 | - GetByte(index + 1, false) << 16 | - GetByte(index + 2, false) << 8 | - GetByte(index + 3, false)); + (GetByteNoValidation(index ) << 24 | + GetByteNoValidation(index + 1) << 16 | + GetByteNoValidation(index + 2) << 8 | + GetByteNoValidation(index + 3)); } // Intel ordering - LSB first return (uint) - (GetByte(index + 3, false) << 24 | - GetByte(index + 2, false) << 16 | - GetByte(index + 1, false) << 8 | - GetByte(index , false)); + (GetByteNoValidation(index + 3) << 24 | + GetByteNoValidation(index + 2) << 16 | + GetByteNoValidation(index + 1) << 8 | + GetByteNoValidation(index )); } /// Returns a signed 32-bit integer from four bytes of data. @@ -293,17 +291,17 @@ public int GetInt32(long index, bool isMotorolaByteOrder) { // Motorola - MSB first return - GetByte(index , false) << 24 | - GetByte(index + 1, false) << 16 | - GetByte(index + 2, false) << 8 | - GetByte(index + 3, false); + GetByteNoValidation(index ) << 24 | + GetByteNoValidation(index + 1) << 16 | + GetByteNoValidation(index + 2) << 8 | + GetByteNoValidation(index + 3); } // Intel ordering - LSB first return - GetByte(index + 3, false) << 24 | - GetByte(index + 2, false) << 16 | - GetByte(index + 1, false) << 8 | - GetByte(index , false); + GetByteNoValidation(index + 3) << 24 | + GetByteNoValidation(index + 2) << 16 | + GetByteNoValidation(index + 1) << 8 | + GetByteNoValidation(index ); } /// Get a signed 64-bit integer from the buffer. @@ -319,25 +317,25 @@ public long GetInt64(long index, bool isMotorolaByteOrder) { // Motorola - MSB first return - (long)GetByte(index , false) << 56 | - (long)GetByte(index + 1, false) << 48 | - (long)GetByte(index + 2, false) << 40 | - (long)GetByte(index + 3, false) << 32 | - (long)GetByte(index + 4, false) << 24 | - (long)GetByte(index + 5, false) << 16 | - (long)GetByte(index + 6, false) << 8 | - GetByte(index + 7, false); + (long)GetByteNoValidation(index ) << 56 | + (long)GetByteNoValidation(index + 1) << 48 | + (long)GetByteNoValidation(index + 2) << 40 | + (long)GetByteNoValidation(index + 3) << 32 | + (long)GetByteNoValidation(index + 4) << 24 | + (long)GetByteNoValidation(index + 5) << 16 | + (long)GetByteNoValidation(index + 6) << 8 | + GetByteNoValidation(index + 7); } // Intel ordering - LSB first return - (long)GetByte(index + 7, false) << 56 | - (long)GetByte(index + 6, false) << 48 | - (long)GetByte(index + 5, false) << 40 | - (long)GetByte(index + 4, false) << 32 | - (long)GetByte(index + 3, false) << 24 | - (long)GetByte(index + 2, false) << 16 | - (long)GetByte(index + 1, false) << 8 | - GetByte(index , false); + (long)GetByteNoValidation(index + 7) << 56 | + (long)GetByteNoValidation(index + 6) << 48 | + (long)GetByteNoValidation(index + 5) << 40 | + (long)GetByteNoValidation(index + 4) << 32 | + (long)GetByteNoValidation(index + 3) << 24 | + (long)GetByteNoValidation(index + 2) << 16 | + (long)GetByteNoValidation(index + 1) << 8 | + GetByteNoValidation(index ); } /// Get an usigned 64-bit integer from the buffer. @@ -353,25 +351,25 @@ public ulong GetUInt64(long index, bool isMotorolaByteOrder) { // Motorola - MSB first return - (ulong)GetByte(index , false) << 56 | - (ulong)GetByte(index + 1, false) << 48 | - (ulong)GetByte(index + 2, false) << 40 | - (ulong)GetByte(index + 3, false) << 32 | - (ulong)GetByte(index + 4, false) << 24 | - (ulong)GetByte(index + 5, false) << 16 | - (ulong)GetByte(index + 6, false) << 8 | - GetByte(index + 7, false); + (ulong)GetByteNoValidation(index ) << 56 | + (ulong)GetByteNoValidation(index + 1) << 48 | + (ulong)GetByteNoValidation(index + 2) << 40 | + (ulong)GetByteNoValidation(index + 3) << 32 | + (ulong)GetByteNoValidation(index + 4) << 24 | + (ulong)GetByteNoValidation(index + 5) << 16 | + (ulong)GetByteNoValidation(index + 6) << 8 | + GetByteNoValidation(index + 7); } // Intel ordering - LSB first return - (ulong)GetByte(index + 7, false) << 56 | - (ulong)GetByte(index + 6, false) << 48 | - (ulong)GetByte(index + 5, false) << 40 | - (ulong)GetByte(index + 4, false) << 32 | - (ulong)GetByte(index + 3, false) << 24 | - (ulong)GetByte(index + 2, false) << 16 | - (ulong)GetByte(index + 1, false) << 8 | - GetByte(index , false); + (ulong)GetByteNoValidation(index + 7) << 56 | + (ulong)GetByteNoValidation(index + 6) << 48 | + (ulong)GetByteNoValidation(index + 5) << 40 | + (ulong)GetByteNoValidation(index + 4) << 32 | + (ulong)GetByteNoValidation(index + 3) << 24 | + (ulong)GetByteNoValidation(index + 2) << 16 | + (ulong)GetByteNoValidation(index + 1) << 8 | + GetByteNoValidation(index ); } /// Gets a s15.16 fixed point float from the buffer. @@ -389,15 +387,15 @@ public float GetS15Fixed16(long index, bool isMotorolaByteOrder) ValidateIndex(index, 4); if (isMotorolaByteOrder) { - float res = GetByte(index, false) << 8 | GetByte(index + 1, false); - var d = GetByte(index + 2, false) << 8 | GetByte(index + 3, false); + float res = GetByteNoValidation(index) << 8 | GetByteNoValidation(index + 1); + var d = GetByteNoValidation(index + 2) << 8 | GetByteNoValidation(index + 3); return (float)(res + d / 65536.0); } else { // this particular branch is untested - var d = GetByte(index + 1, false) << 8 | GetByte(index, false); - float res = GetByte(index + 3, false) << 8 | GetByte(index + 2, false); + var d = GetByteNoValidation(index + 1) << 8 | GetByteNoValidation(index); + float res = GetByteNoValidation(index + 3) << 8 | GetByteNoValidation(index + 2); return (float)(res + d / 65536.0); } } From 28daa1ac605b9b85db7ca3e1dd6b3b55e2675a97 Mon Sep 17 00:00:00 2001 From: Kevin Mott Date: Sat, 9 May 2020 08:13:38 -0500 Subject: [PATCH 05/10] Rename ValidateIndex to ValidateRange --- MetadataExtractor/IO/RandomAccessStream.cs | 24 +++++++++++----------- MetadataExtractor/IO/ReaderInfo.cs | 5 ++--- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/MetadataExtractor/IO/RandomAccessStream.cs b/MetadataExtractor/IO/RandomAccessStream.cs index 86898b9f9..3a5cde7b3 100644 --- a/MetadataExtractor/IO/RandomAccessStream.cs +++ b/MetadataExtractor/IO/RandomAccessStream.cs @@ -126,7 +126,7 @@ public int Read(long index, byte[] buffer, int offset, int count) /// public int Read(long index, byte[] buffer, int offset, int count, bool allowPartial) { - count = (int)ValidateIndex(index, count, allowPartial); + count = (int)ValidateRange(index, count, allowPartial); // This bypasses a lot of checks particularly when the input was a byte[] // TODO: good spot to try Span @@ -160,7 +160,7 @@ public int Read(long index, byte[] buffer, int offset, int count, bool allowPart /// public byte GetByte(long index) { - ValidateIndex(index, 1); + ValidateRange(index, 1); return GetByteNoValidation(index); } @@ -191,7 +191,7 @@ private byte GetByteNoValidation(long index) /// public ushort GetUInt16(long index, bool isMotorolaByteOrder) { - ValidateIndex(index, 2); + ValidateRange(index, 2); if (isMotorolaByteOrder) { @@ -213,7 +213,7 @@ public ushort GetUInt16(long index, bool isMotorolaByteOrder) /// public short GetInt16(long index, bool isMotorolaByteOrder) { - ValidateIndex(index, 2); + ValidateRange(index, 2); if (isMotorolaByteOrder) { @@ -235,7 +235,7 @@ public short GetInt16(long index, bool isMotorolaByteOrder) /// public int GetInt24(long index, bool isMotorolaByteOrder) { - ValidateIndex(index, 3); + ValidateRange(index, 3); if (isMotorolaByteOrder) { @@ -259,7 +259,7 @@ public int GetInt24(long index, bool isMotorolaByteOrder) /// public uint GetUInt32(long index, bool isMotorolaByteOrder) { - ValidateIndex(index, 4); + ValidateRange(index, 4); if (isMotorolaByteOrder) { @@ -285,7 +285,7 @@ public uint GetUInt32(long index, bool isMotorolaByteOrder) /// public int GetInt32(long index, bool isMotorolaByteOrder) { - ValidateIndex(index, 4); + ValidateRange(index, 4); if (isMotorolaByteOrder) { @@ -311,7 +311,7 @@ public int GetInt32(long index, bool isMotorolaByteOrder) /// public long GetInt64(long index, bool isMotorolaByteOrder) { - ValidateIndex(index, 8); + ValidateRange(index, 8); if (isMotorolaByteOrder) { @@ -345,7 +345,7 @@ public long GetInt64(long index, bool isMotorolaByteOrder) /// public ulong GetUInt64(long index, bool isMotorolaByteOrder) { - ValidateIndex(index, 8); + ValidateRange(index, 8); if (isMotorolaByteOrder) { @@ -384,7 +384,7 @@ public ulong GetUInt64(long index, bool isMotorolaByteOrder) /// public float GetS15Fixed16(long index, bool isMotorolaByteOrder) { - ValidateIndex(index, 4); + ValidateRange(index, 4); if (isMotorolaByteOrder) { float res = GetByteNoValidation(index) << 8 | GetByteNoValidation(index + 1); @@ -408,7 +408,7 @@ public float GetS15Fixed16(long index, bool isMotorolaByteOrder) /// public void Seek(long index) { - ValidateIndex((index == 0) ? 0 : (index - 1), 1); + ValidateRange((index == 0) ? 0 : (index - 1), 1); } /// @@ -424,7 +424,7 @@ public void Seek(long index) /// the number of bytes which are required /// flag indicating whether count should be enforced when validating the index /// negative index, less than 0 bytes, or too many bytes are requested - internal long ValidateIndex(long index, long bytesRequested, bool allowPartial = false) + internal long ValidateRange(long index, long bytesRequested, bool allowPartial = false) { long available = BytesAvailable(index, bytesRequested); if (available != bytesRequested && !allowPartial) diff --git a/MetadataExtractor/IO/ReaderInfo.cs b/MetadataExtractor/IO/ReaderInfo.cs index d365d43cb..43200c013 100644 --- a/MetadataExtractor/IO/ReaderInfo.cs +++ b/MetadataExtractor/IO/ReaderInfo.cs @@ -209,15 +209,14 @@ public byte GetByte(long index) /// The index from which the bytes begin in the underlying source /// The number of bytes to be returned /// The requested bytes - /// if the requested bytes extend beyond the end of the underlying data source - /// if the byte is unable to be read + /// if the requested bytes extend beyond the end of the underlying data source or if the byte is unable to be read public byte[] GetBytes(long index, int count) { // validate the index now to avoid creating a byte array that could cause a heap overflow bool isSeq = (index == SequentialFlag); long readat = isSeq ? GlobalPosition : (StartPosition + index); - long available = p_ras.ValidateIndex(readat, count, false); + long available = p_ras.ValidateRange(readat, count); if (available == 0) return new byte[0]; From c16d02e33e32dbe9fc3ba7431f62b9c5f2177968 Mon Sep 17 00:00:00 2001 From: Kevin Mott Date: Sat, 9 May 2020 08:22:09 -0500 Subject: [PATCH 06/10] - skip validation and directly call BytesAvailable in Read if allowPartial is true - remove allowPartial param from ValidateRange --- MetadataExtractor/IO/RandomAccessStream.cs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/MetadataExtractor/IO/RandomAccessStream.cs b/MetadataExtractor/IO/RandomAccessStream.cs index 3a5cde7b3..45265eb8e 100644 --- a/MetadataExtractor/IO/RandomAccessStream.cs +++ b/MetadataExtractor/IO/RandomAccessStream.cs @@ -105,7 +105,7 @@ public ReaderInfo CreateReader(long startPosition, long length, bool isMotorolaB /// Retrieves bytes, writing them into a caller-provided buffer. - /// position within the data buffer to read byte. + /// position within the data buffer to start reading. /// array to write bytes to. /// starting position within to write to. /// number of bytes to be written. @@ -117,7 +117,7 @@ public int Read(long index, byte[] buffer, int offset, int count) } /// Retrieves bytes, writing them into a caller-provided buffer. - /// position within the data buffer to read byte. + /// position within the data buffer to start reading. /// array to write bytes to. /// starting position within to write to. /// number of bytes to be written. @@ -126,7 +126,10 @@ public int Read(long index, byte[] buffer, int offset, int count) /// public int Read(long index, byte[] buffer, int offset, int count, bool allowPartial) { - count = (int)ValidateRange(index, count, allowPartial); + if (allowPartial) + count = (int)BytesAvailable(index, count); // skips validation overhead + else + count = (int)ValidateRange(index, count); // This bypasses a lot of checks particularly when the input was a byte[] // TODO: good spot to try Span @@ -422,12 +425,11 @@ public void Seek(long index) /// /// the index from which the required bytes start /// the number of bytes which are required - /// flag indicating whether count should be enforced when validating the index /// negative index, less than 0 bytes, or too many bytes are requested - internal long ValidateRange(long index, long bytesRequested, bool allowPartial = false) + internal long ValidateRange(long index, long bytesRequested) { long available = BytesAvailable(index, bytesRequested); - if (available != bytesRequested && !allowPartial) + if (available != bytesRequested) { if (index < 0) throw new BufferBoundsException($"Attempt to read from buffer using a negative index ({index})"); From 5a16c6a14adc93062a41c35b61eb8826ee6702a6 Mon Sep 17 00:00:00 2001 From: Kevin Mott Date: Sat, 9 May 2020 08:53:42 -0500 Subject: [PATCH 07/10] NEEDS TESTING - throw an IOException when the chunkIndex doesn't exist in the Dictionary instead of returning -1 --- MetadataExtractor/IO/RandomAccessStream.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MetadataExtractor/IO/RandomAccessStream.cs b/MetadataExtractor/IO/RandomAccessStream.cs index 45265eb8e..adf654426 100644 --- a/MetadataExtractor/IO/RandomAccessStream.cs +++ b/MetadataExtractor/IO/RandomAccessStream.cs @@ -184,7 +184,7 @@ private byte GetByteNoValidation(long index) if (p_chunks.ContainsKey(chunkIndex)) return p_chunks[chunkIndex][innerIndex]; else - return unchecked((byte)-1); + throw new IOException("End of data reached."); } /// Returns an unsigned 16-bit int calculated from the next two bytes of the sequence. From 3a00e0712859d37a5aecdb2ee2e0f8b7e1ed6a78 Mon Sep 17 00:00:00 2001 From: Kevin Mott Date: Sat, 23 May 2020 16:37:21 -0500 Subject: [PATCH 08/10] - use a List instead of Dictionary for RAS chunk buffer - add IRandomAccessStream interface - keep dictionary version of RAS class --- .../IO/RAS/RasUsageTests.cs | 25 + MetadataExtractor/IO/IRandomAccessStream.cs | 34 ++ MetadataExtractor/IO/RandomAccessStream.cs | 41 +- .../IO/RandomAccessStreamDictionary.cs | 564 ++++++++++++++++++ MetadataExtractor/IO/ReaderInfo.cs | 6 +- 5 files changed, 652 insertions(+), 18 deletions(-) create mode 100644 MetadataExtractor/IO/IRandomAccessStream.cs create mode 100644 MetadataExtractor/IO/RandomAccessStreamDictionary.cs diff --git a/MetadataExtractor.Tests/IO/RAS/RasUsageTests.cs b/MetadataExtractor.Tests/IO/RAS/RasUsageTests.cs index 6b0868e7f..5eb756950 100644 --- a/MetadataExtractor.Tests/IO/RAS/RasUsageTests.cs +++ b/MetadataExtractor.Tests/IO/RAS/RasUsageTests.cs @@ -17,6 +17,11 @@ private ReaderInfo CreateReader(params byte[] bytes) return ReaderInfo.CreateFromStream(new MemoryStream(bytes)); } + private ReaderInfo CreateReader(Stream stream) + { + return ReaderInfo.CreateFromStream(stream); + } + [Fact] public void TestRasByteArray() { @@ -206,5 +211,25 @@ public void GetNullTerminatedStringWithClone() Assert.Equal(reader.GetNullTerminatedString(1, 3), clone.GetNullTerminatedString(0, 3)); } + [Fact] + public void TestAllocateListSeekable() + { + var testStream = TestDataUtil.OpenRead("Data/manuallyAddedThumbnail.jpg"); + var reader = CreateReader(testStream); + + Assert.Equal(0x0d3c, reader.GetInt16(15000)); + Assert.Equal(0x7306, reader.GetInt16(8000)); + } + + [Fact] + public void TestAllocateListNonseekable() + { + var testStream = new NonSeekableStream(TestDataUtil.OpenRead("Data/manuallyAddedThumbnail.jpg")); + var reader = CreateReader(testStream); + + Assert.Equal(0x0d3c, reader.GetInt16(15000)); + Assert.Equal(0x7306, reader.GetInt16(8000)); + } + } } diff --git a/MetadataExtractor/IO/IRandomAccessStream.cs b/MetadataExtractor/IO/IRandomAccessStream.cs new file mode 100644 index 000000000..3a722cf99 --- /dev/null +++ b/MetadataExtractor/IO/IRandomAccessStream.cs @@ -0,0 +1,34 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace MetadataExtractor.IO +{ + public interface IRandomAccessStream + { + ReaderInfo CreateReader(); + ReaderInfo CreateReader(bool isMotorolaByteOrder); + ReaderInfo CreateReader(long startPosition, long length, bool isMotorolaByteOrder); + + byte GetByte(long index); + ushort GetUInt16(long index, bool isMotorolaByteOrder); + short GetInt16(long index, bool isMotorolaByteOrder); + int GetInt24(long index, bool isMotorolaByteOrder); + uint GetUInt32(long index, bool isMotorolaByteOrder); + int GetInt32(long index, bool isMotorolaByteOrder); + long GetInt64(long index, bool isMotorolaByteOrder); + ulong GetUInt64(long index, bool isMotorolaByteOrder); + float GetS15Fixed16(long index, bool isMotorolaByteOrder); + + + long Length { get; } + + int Read(long index, byte[] buffer, int offset, int count); + int Read(long index, byte[] buffer, int offset, int count, bool allowPartial); + + void Seek(long index); + byte[] ToArray(long index, int count); + + long ValidateRange(long index, long bytesRequested); + } +} diff --git a/MetadataExtractor/IO/RandomAccessStream.cs b/MetadataExtractor/IO/RandomAccessStream.cs index adf654426..33bc9b59f 100644 --- a/MetadataExtractor/IO/RandomAccessStream.cs +++ b/MetadataExtractor/IO/RandomAccessStream.cs @@ -16,7 +16,7 @@ namespace MetadataExtractor.IO /// /// Kevin Mott https://github.com/kwhopper /// Drew Noakes https://drewnoakes.com - public class RandomAccessStream + public class RandomAccessStream : IRandomAccessStream { public const long UnknownLengthValue = long.MaxValue; @@ -28,7 +28,10 @@ public class RandomAccessStream private const int DefaultChunkLength = 4 * 1024; private readonly int p_chunkLength; - public Dictionary p_chunks = new Dictionary(); + + // Set initial chunk capacity to a factor of 2 that most metadata reading will reasonably require. + // Although not always expensive, avoids some internal array copies if the capacity changes. + private List p_chunks = new List(64); public RandomAccessStream(Stream stream, long streamLength = -1) { @@ -67,7 +70,7 @@ public RandomAccessStream(byte[] bytes) // Setting these values makes p_inputStream irrelevant // TODO: break the byte array up into DefaultChunkLength chunks - p_chunks.Add(0, bytes); + p_chunks.Add(bytes); p_chunkLength = bytes.Length; p_streamLength = bytes.Length; @@ -178,10 +181,11 @@ private byte GetByteNoValidation(long index) if (p_isStreamFinished && p_chunks.Count == 1) return p_chunks[0][index]; - var chunkIndex = index / p_chunkLength; + var chunkIndex = (int)(index / p_chunkLength); var innerIndex = index % p_chunkLength; - if (p_chunks.ContainsKey(chunkIndex)) + //if (p_chunks.ContainsKey(chunkIndex)) + if (p_chunks.Count - 1 >= chunkIndex) return p_chunks[chunkIndex][innerIndex]; else throw new IOException("End of data reached."); @@ -421,12 +425,11 @@ public void Seek(long index) /// The number of bytes available out of the number of bytes requested /// /// If the stream ends before the point is reached, a is raised. - /// Requesting more bytes than available raises an exception if is false /// /// the index from which the required bytes start /// the number of bytes which are required /// negative index, less than 0 bytes, or too many bytes are requested - internal long ValidateRange(long index, long bytesRequested) + public long ValidateRange(long index, long bytesRequested) { long available = BytesAvailable(index, bytesRequested); if (available != bytesRequested) @@ -473,19 +476,27 @@ private long BytesAvailable(long index, long bytesRequested) return 0; // zero-based - long chunkstart = index / p_chunkLength; - long chunkend = ((index + bytesRequested) / p_chunkLength) + 1; - + int chunkstart = (int)(index / p_chunkLength); + int chunkend = (int)( ((index + bytesRequested) / p_chunkLength) + 1 ); - if (!p_chunks.ContainsKey(chunkstart)) + if (p_chunks.Count - 1 < chunkstart || p_chunks[chunkstart] == null) { if (!CanSeek) - chunkstart = p_chunks.Count == 0 ? 0 : p_chunks.Keys.Max() + 1; + chunkstart = p_chunks.Count; + } + + // fill the chunks List with enough nulls to cover the start/end range + if (p_chunks.Count < chunkend) + { + for (int i = p_chunks.Count; i < chunkend; i++) + { + p_chunks.Add(null); + } } for (var i = chunkstart; i < chunkend; i++) { - if (!p_chunks.ContainsKey(i)) + if (p_chunks[i] == null) { p_isStreamFinished = false; @@ -512,7 +523,7 @@ private long BytesAvailable(long index, long bytesRequested) #if DEBUG TotalBytesRead += totalBytesRead; #endif - p_chunks.Add(i, chunk); + p_chunks[i] = chunk; return (index + bytesRequested) <= p_streamLength ? bytesRequested : p_streamLength - index; } } @@ -525,7 +536,7 @@ private long BytesAvailable(long index, long bytesRequested) #if DEBUG TotalBytesRead += totalBytesRead; #endif - p_chunks.Add(i, chunk); + p_chunks[i] = chunk; } } diff --git a/MetadataExtractor/IO/RandomAccessStreamDictionary.cs b/MetadataExtractor/IO/RandomAccessStreamDictionary.cs new file mode 100644 index 000000000..ed81acb7d --- /dev/null +++ b/MetadataExtractor/IO/RandomAccessStreamDictionary.cs @@ -0,0 +1,564 @@ +// Copyright (c) Drew Noakes and contributors. All Rights Reserved. Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. + +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using JetBrains.Annotations; + +namespace MetadataExtractor.IO +{ + /// Reads and buffers data in chunks and provides methods for reading data types + /// + /// This class implements buffered reading of data typically for use with + /// objects and provides methods for reading data types from it. Data is captured in configurable + /// chunks for efficiency. Both seekable and non-seekable streams are supported. + /// + /// Kevin Mott https://github.com/kwhopper + /// Drew Noakes https://drewnoakes.com + public class RandomAccessStreamDictionary : IRandomAccessStream + { + public const long UnknownLengthValue = long.MaxValue; + + private Stream? p_inputStream; + private long p_streamLength = -1; + + //private readonly List rdrList = new List(); + private bool p_isStreamFinished; + + private const int DefaultChunkLength = 4 * 1024; + private readonly int p_chunkLength; + public Dictionary p_chunks = new Dictionary(); + + public RandomAccessStreamDictionary(Stream stream, long streamLength = -1) + { + if (stream == null) + throw new ArgumentNullException(nameof(stream)); + + p_inputStream = stream; + CanSeek = stream.CanSeek; + + if (streamLength == -1) + { + // Make sure a stream that can seek is also able to report a Length. + // This is an uncommon situation; don't know of any as of this writing + try + { + streamLength = stream.CanSeek ? stream.Length : UnknownLengthValue; + } + catch(NotSupportedException) + { + streamLength = UnknownLengthValue; + CanSeek = false; + } + } + + // TODO: allow a different chunk length either through this constructor or read from a context object + p_chunkLength = DefaultChunkLength; + p_streamLength = streamLength; + } + + public RandomAccessStreamDictionary(byte[] bytes) + { + if (bytes == null) + throw new ArgumentNullException(nameof(bytes)); + + CanSeek = true; + + // Setting these values makes p_inputStream irrelevant + // TODO: break the byte array up into DefaultChunkLength chunks + p_chunks.Add(0, bytes); + p_chunkLength = bytes.Length; + + p_streamLength = bytes.Length; + p_isStreamFinished = true; + } + + public bool CanSeek { get; private set; } = false; + + /// + /// Returns the length of the underlying data source + /// + /// + /// Length is always known when the data source is an array. For data sources derived from , + /// the CanSeek property is checked. If the value is true, the 's Length property is used. + /// If the value is false, it is assumed the length cannot be determined from the itself and + /// long.MaxValue is used instead. + /// + public long Length + { + get + { + // If finished and only one chunk, can bypass a lot of checks particularly when the input was a byte[] + //return (CanSeek) ? (p_isStreamFinished && p_chunks.Count == 1 ? p_streamLength : p_inputStream.Length) : (long)int.MaxValue; + return p_streamLength; + } + } + + public ReaderInfo CreateReader() => CreateReader(-1, -1, true); + public ReaderInfo CreateReader(bool isMotorolaByteOrder) => CreateReader(-1, -1, isMotorolaByteOrder); + public ReaderInfo CreateReader(long startPosition, long length, bool isMotorolaByteOrder) + { + var pos = startPosition >= 0 ? startPosition : 0; + return new ReaderInfo(this, pos, 0, length, isMotorolaByteOrder); + } + + + /// Retrieves bytes, writing them into a caller-provided buffer. + /// position within the data buffer to start reading. + /// array to write bytes to. + /// starting position within to write to. + /// number of bytes to be written. + /// The requested bytes, or as many as can be retrieved + /// + public int Read(long index, byte[] buffer, int offset, int count) + { + return Read(index, buffer, offset, count, true); + } + + /// Retrieves bytes, writing them into a caller-provided buffer. + /// position within the data buffer to start reading. + /// array to write bytes to. + /// starting position within to write to. + /// number of bytes to be written. + /// flag indicating whether count should be enforced when validating the index + /// The requested bytes read into , and either the count of bytes read or as many as can be retrieved if is true + /// + public int Read(long index, byte[] buffer, int offset, int count, bool allowPartial) + { + if (allowPartial) + count = (int)BytesAvailable(index, count); // skips validation overhead + else + count = (int)ValidateRange(index, count); + + // This bypasses a lot of checks particularly when the input was a byte[] + // TODO: good spot to try Span + if (p_isStreamFinished && p_chunks.Count == 1) + { + Array.Copy(p_chunks[0], (int)index, buffer, 0, count); + return count; + } + + var remaining = count; // how many bytes are requested + var fromOffset = (int)index; + var toIndex = offset > 0 ? offset : 0; + while (remaining != 0) + { + var fromChunkIndex = fromOffset / p_chunkLength; // chunk integer key + var fromInnerIndex = fromOffset % p_chunkLength; // index inside the chunk to start reading + var length = Math.Min(remaining, p_chunkLength - fromInnerIndex); + var chunk = p_chunks[fromChunkIndex]; + Array.Copy(chunk, fromInnerIndex, buffer, toIndex, length); + remaining -= length; + fromOffset += length; + toIndex += length; + } + + return toIndex - offset; + } + + /// Returns an unsigned byte at an index in the sequence. + /// the 8 bit int value, between 0 and 255 + /// position within the data buffer to read byte + /// + public byte GetByte(long index) + { + ValidateRange(index, 1); + + return GetByteNoValidation(index); + } + + /// Returns an unsigned byte at an index in the sequence. The index is not validated before reading + /// the 8 bit int value, between 0 and 255 + /// position within the data buffer to read byte + /// + private byte GetByteNoValidation(long index) + { + // This bypasses a lot of checks particularly when the input was a byte[] + if (p_isStreamFinished && p_chunks.Count == 1) + return p_chunks[0][index]; + + var chunkIndex = index / p_chunkLength; + var innerIndex = index % p_chunkLength; + + if (p_chunks.ContainsKey(chunkIndex)) + return p_chunks[chunkIndex][innerIndex]; + else + throw new IOException("End of data reached."); + } + + /// Returns an unsigned 16-bit int calculated from the next two bytes of the sequence. + /// the 16 bit int value, between 0x0000 and 0xFFFF + /// position within the data buffer to read first byte + /// byte order for returning the result + /// + public ushort GetUInt16(long index, bool isMotorolaByteOrder) + { + ValidateRange(index, 2); + + if (isMotorolaByteOrder) + { + // Motorola - MSB first + return (ushort) + (GetByteNoValidation(index ) << 8 | + GetByteNoValidation(index + 1)); + } + // Intel ordering - LSB first + return (ushort) + (GetByteNoValidation(index + 1) << 8 | + GetByteNoValidation(index )); + } + + /// Returns a signed 16-bit int calculated from two bytes of data (MSB, LSB). + /// the 16 bit int value, between 0x0000 and 0xFFFF + /// position within the data buffer to read first byte + /// byte order for returning the result + /// + public short GetInt16(long index, bool isMotorolaByteOrder) + { + ValidateRange(index, 2); + + if (isMotorolaByteOrder) + { + // Motorola - MSB first + return (short) + (GetByteNoValidation(index ) << 8 | + GetByteNoValidation(index + 1)); + } + // Intel ordering - LSB first + return (short) + (GetByteNoValidation(index + 1) << 8 | + GetByteNoValidation(index )); + } + + /// Get a 24-bit unsigned integer from the buffer, returning it as an int. + /// the unsigned 24-bit int value as a long, between 0x00000000 and 0x00FFFFFF + /// position within the data buffer to read first byte + /// byte order for returning the result + /// + public int GetInt24(long index, bool isMotorolaByteOrder) + { + ValidateRange(index, 3); + + if (isMotorolaByteOrder) + { + // Motorola - MSB first + return + GetByteNoValidation(index ) << 16 | + GetByteNoValidation(index + 1) << 8 | + GetByteNoValidation(index + 2); + } + // Intel ordering - LSB first + return + GetByteNoValidation(index + 2) << 16 | + GetByteNoValidation(index + 1) << 8 | + GetByteNoValidation(index ); + } + + /// Get a 32-bit unsigned integer from the buffer, returning it as a long. + /// the unsigned 32-bit int value as a long, between 0x00000000 and 0xFFFFFFFF + /// position within the data buffer to read first byte + /// byte order for returning the result + /// + public uint GetUInt32(long index, bool isMotorolaByteOrder) + { + ValidateRange(index, 4); + + if (isMotorolaByteOrder) + { + // Motorola - MSB first + return (uint) + (GetByteNoValidation(index ) << 24 | + GetByteNoValidation(index + 1) << 16 | + GetByteNoValidation(index + 2) << 8 | + GetByteNoValidation(index + 3)); + } + // Intel ordering - LSB first + return (uint) + (GetByteNoValidation(index + 3) << 24 | + GetByteNoValidation(index + 2) << 16 | + GetByteNoValidation(index + 1) << 8 | + GetByteNoValidation(index )); + } + + /// Returns a signed 32-bit integer from four bytes of data. + /// the signed 32 bit int value, between 0x00000000 and 0xFFFFFFFF + /// position within the data buffer to read first byte + /// byte order for returning the result + /// + public int GetInt32(long index, bool isMotorolaByteOrder) + { + ValidateRange(index, 4); + + if (isMotorolaByteOrder) + { + // Motorola - MSB first + return + GetByteNoValidation(index ) << 24 | + GetByteNoValidation(index + 1) << 16 | + GetByteNoValidation(index + 2) << 8 | + GetByteNoValidation(index + 3); + } + // Intel ordering - LSB first + return + GetByteNoValidation(index + 3) << 24 | + GetByteNoValidation(index + 2) << 16 | + GetByteNoValidation(index + 1) << 8 | + GetByteNoValidation(index ); + } + + /// Get a signed 64-bit integer from the buffer. + /// the 64 bit int value, between 0x0000000000000000 and 0xFFFFFFFFFFFFFFFF + /// position within the data buffer to read first byte + /// byte order for returning the result + /// + public long GetInt64(long index, bool isMotorolaByteOrder) + { + ValidateRange(index, 8); + + if (isMotorolaByteOrder) + { + // Motorola - MSB first + return + (long)GetByteNoValidation(index ) << 56 | + (long)GetByteNoValidation(index + 1) << 48 | + (long)GetByteNoValidation(index + 2) << 40 | + (long)GetByteNoValidation(index + 3) << 32 | + (long)GetByteNoValidation(index + 4) << 24 | + (long)GetByteNoValidation(index + 5) << 16 | + (long)GetByteNoValidation(index + 6) << 8 | + GetByteNoValidation(index + 7); + } + // Intel ordering - LSB first + return + (long)GetByteNoValidation(index + 7) << 56 | + (long)GetByteNoValidation(index + 6) << 48 | + (long)GetByteNoValidation(index + 5) << 40 | + (long)GetByteNoValidation(index + 4) << 32 | + (long)GetByteNoValidation(index + 3) << 24 | + (long)GetByteNoValidation(index + 2) << 16 | + (long)GetByteNoValidation(index + 1) << 8 | + GetByteNoValidation(index ); + } + + /// Get an usigned 64-bit integer from the buffer. + /// the unsigned 64 bit int value, between 0x0000000000000000 and 0xFFFFFFFFFFFFFFFF + /// position within the data buffer to read first byte + /// byte order for returning the result + /// + public ulong GetUInt64(long index, bool isMotorolaByteOrder) + { + ValidateRange(index, 8); + + if (isMotorolaByteOrder) + { + // Motorola - MSB first + return + (ulong)GetByteNoValidation(index ) << 56 | + (ulong)GetByteNoValidation(index + 1) << 48 | + (ulong)GetByteNoValidation(index + 2) << 40 | + (ulong)GetByteNoValidation(index + 3) << 32 | + (ulong)GetByteNoValidation(index + 4) << 24 | + (ulong)GetByteNoValidation(index + 5) << 16 | + (ulong)GetByteNoValidation(index + 6) << 8 | + GetByteNoValidation(index + 7); + } + // Intel ordering - LSB first + return + (ulong)GetByteNoValidation(index + 7) << 56 | + (ulong)GetByteNoValidation(index + 6) << 48 | + (ulong)GetByteNoValidation(index + 5) << 40 | + (ulong)GetByteNoValidation(index + 4) << 32 | + (ulong)GetByteNoValidation(index + 3) << 24 | + (ulong)GetByteNoValidation(index + 2) << 16 | + (ulong)GetByteNoValidation(index + 1) << 8 | + GetByteNoValidation(index ); + } + + /// Gets a s15.16 fixed point float from the buffer. + /// + /// Gets a s15.16 fixed point float from the buffer. + /// + /// This particular fixed point encoding has one sign bit, 15 numerator bits and 16 denominator bits. + /// + /// the floating point value + /// position within the data buffer to read first byte + /// byte order for returning the result + /// + public float GetS15Fixed16(long index, bool isMotorolaByteOrder) + { + ValidateRange(index, 4); + if (isMotorolaByteOrder) + { + float res = GetByteNoValidation(index) << 8 | GetByteNoValidation(index + 1); + var d = GetByteNoValidation(index + 2) << 8 | GetByteNoValidation(index + 3); + return (float)(res + d / 65536.0); + } + else + { + // this particular branch is untested + var d = GetByteNoValidation(index + 1) << 8 | GetByteNoValidation(index); + float res = GetByteNoValidation(index + 3) << 8 | GetByteNoValidation(index + 2); + return (float)(res + d / 65536.0); + } + } + + /// Seeks to an index in the sequence. + /// + /// Seeks to an index in the sequence. If the sequence can't satisfy the request, exceptions are thrown. + /// + /// position within the data buffer to seek to + /// + public void Seek(long index) + { + ValidateRange((index == 0) ? 0 : (index - 1), 1); + } + + /// + /// Ensures that the buffered bytes extend to cover the specified index. If not, an attempt is made + /// to read to that point. + /// + /// The number of bytes available out of the number of bytes requested + /// + /// If the stream ends before the point is reached, a is raised. + /// + /// the index from which the required bytes start + /// the number of bytes which are required + /// negative index, less than 0 bytes, or too many bytes are requested + public long ValidateRange(long index, long bytesRequested) + { + long available = BytesAvailable(index, bytesRequested); + if (available != bytesRequested) + { + if (index < 0) + throw new BufferBoundsException($"Attempt to read from buffer using a negative index ({index})"); + if (bytesRequested < 0) + throw new BufferBoundsException("Number of requested bytes must be zero or greater"); + if (index + bytesRequested - 1 > int.MaxValue) + throw new BufferBoundsException($"Number of requested bytes summed with starting index exceed maximum range of signed 32 bit integers (requested index: {index}, requested count: {bytesRequested})"); + if (index + bytesRequested >= p_streamLength) + throw new BufferBoundsException(index, bytesRequested, p_streamLength); + + // TODO test that can continue using an instance of this type after this exception + throw new BufferBoundsException(index, bytesRequested, p_streamLength); + } + + return available; + } + + private long BytesAvailable(long index, long bytesRequested) + { + if (index < 0 || bytesRequested < 0) + return 0; + + // if there's only one chunk, there's no need to calculate anything. + // This bypasses a lot of checks particularly when the input was a byte[] + if (p_isStreamFinished && p_chunks.Count == 1) + { + if ((index + bytesRequested) < p_streamLength) + return bytesRequested; + else if (index > p_streamLength) + return 0; + else + return p_streamLength - index; + } + + + var endIndex = index + bytesRequested - 1; + if (endIndex < 0) endIndex = 0; + + // Maybe don't check this? + if (endIndex > int.MaxValue) + return 0; + + // zero-based + long chunkstart = index / p_chunkLength; + long chunkend = ((index + bytesRequested) / p_chunkLength) + 1; + + + if (!p_chunks.ContainsKey(chunkstart)) + { + if (!CanSeek) + chunkstart = p_chunks.Count == 0 ? 0 : p_chunks.Keys.Max() + 1; + } + + for (var i = chunkstart; i < chunkend; i++) + { + if (!p_chunks.ContainsKey(i)) + { + p_isStreamFinished = false; + + // chunkstart can be anywhere. Try to seek + if (CanSeek) + p_inputStream.Seek(i * p_chunkLength, SeekOrigin.Begin); + + byte[] chunk = new byte[p_chunkLength]; + + var totalBytesRead = 0; + while (!p_isStreamFinished && totalBytesRead != p_chunkLength) + { + var bytesRead = p_inputStream.Read(chunk, totalBytesRead, p_chunkLength - totalBytesRead); + + if (bytesRead == 0) + { + // the stream has ended, which may be ok + p_isStreamFinished = true; + p_streamLength = i * p_chunkLength + totalBytesRead; + + // check we have enough bytes for the requested index + if (endIndex >= p_streamLength) + { +#if DEBUG + TotalBytesRead += totalBytesRead; +#endif + p_chunks.Add(i, chunk); + return (index + bytesRequested) <= p_streamLength ? bytesRequested : p_streamLength - index; + } + } + else + { + totalBytesRead += bytesRead; + } + } + +#if DEBUG + TotalBytesRead += totalBytesRead; +#endif + p_chunks.Add(i, chunk); + } + } + + if (p_isStreamFinished) + return (index + bytesRequested) <= p_streamLength ? bytesRequested : 0; + else + return bytesRequested; + } + +#if DEBUG + /// + /// Records the total bytes buffered + /// + public long TotalBytesRead { get; private set; } = 0; +#endif + + public byte[] ToArray(long index, int count) + { + byte[] buffer; + // if this was a byte array and asking for the whole thing... + if (p_isStreamFinished && + p_chunks.Count == 1 && + index == 0 && + count == Length) + { + buffer = p_chunks[0]; + } + else + { + buffer = new byte[count]; + Read(index, buffer, 0, count, false); + } + + return buffer; + } + } +} diff --git a/MetadataExtractor/IO/ReaderInfo.cs b/MetadataExtractor/IO/ReaderInfo.cs index 43200c013..f0b01b158 100644 --- a/MetadataExtractor/IO/ReaderInfo.cs +++ b/MetadataExtractor/IO/ReaderInfo.cs @@ -15,10 +15,10 @@ public class ReaderInfo // this flag is compared to index inputs and indicates sequential access private const int SequentialFlag = int.MinValue; - private RandomAccessStream p_ras; // = null; + private IRandomAccessStream p_ras; // = null; private long p_length = -1; - public ReaderInfo(RandomAccessStream parent, long startPosition = 0, long localPosition = 0, long length = -1, bool isMotorolaByteOrder = true) + public ReaderInfo(IRandomAccessStream parent, long startPosition = 0, long localPosition = 0, long length = -1, bool isMotorolaByteOrder = true) { p_ras = parent; StartPosition = startPosition; @@ -125,7 +125,7 @@ public bool TrySkip(long n) public int Read(byte[] buffer, int offset, int count) => Read(buffer, offset, SequentialFlag, count); /// Retrieves bytes, writing them into a caller-provided buffer. - /// Sequential access to the next byte is indicated by setting index to SequntialFlag + /// Sequential access to the next byte is indicated by setting index to SequentialFlag /// array to write bytes to. /// starting position within to write to. /// position within the data buffer to read byte. From f95f556142895540dc57fb991e44e63daec821cd Mon Sep 17 00:00:00 2001 From: Kevin Mott Date: Sun, 24 May 2020 21:52:39 -0500 Subject: [PATCH 09/10] - change ValidateRange to return int instead of long - remove nullable designator from RandomAccessStream.p_inputStream (optimization) - convert long index values to int for benchmarks --- MetadataExtractor/IO/IRandomAccessStream.cs | 2 +- MetadataExtractor/IO/RandomAccessStream.cs | 49 ++++++++++--------- .../IO/RandomAccessStreamDictionary.cs | 30 +++++++----- 3 files changed, 45 insertions(+), 36 deletions(-) diff --git a/MetadataExtractor/IO/IRandomAccessStream.cs b/MetadataExtractor/IO/IRandomAccessStream.cs index 3a722cf99..d8d0de590 100644 --- a/MetadataExtractor/IO/IRandomAccessStream.cs +++ b/MetadataExtractor/IO/IRandomAccessStream.cs @@ -29,6 +29,6 @@ public interface IRandomAccessStream void Seek(long index); byte[] ToArray(long index, int count); - long ValidateRange(long index, long bytesRequested); + int ValidateRange(long index, int bytesRequested); } } diff --git a/MetadataExtractor/IO/RandomAccessStream.cs b/MetadataExtractor/IO/RandomAccessStream.cs index 33bc9b59f..f22ae4f3c 100644 --- a/MetadataExtractor/IO/RandomAccessStream.cs +++ b/MetadataExtractor/IO/RandomAccessStream.cs @@ -3,8 +3,6 @@ using System; using System.Collections.Generic; using System.IO; -using System.Linq; -using JetBrains.Annotations; namespace MetadataExtractor.IO { @@ -20,7 +18,7 @@ public class RandomAccessStream : IRandomAccessStream { public const long UnknownLengthValue = long.MaxValue; - private Stream? p_inputStream; + private readonly Stream p_inputStream; private long p_streamLength = -1; //private readonly List rdrList = new List(); @@ -29,16 +27,11 @@ public class RandomAccessStream : IRandomAccessStream private const int DefaultChunkLength = 4 * 1024; private readonly int p_chunkLength; - // Set initial chunk capacity to a factor of 2 that most metadata reading will reasonably require. - // Although not always expensive, avoids some internal array copies if the capacity changes. - private List p_chunks = new List(64); + private List p_chunks = new List(); public RandomAccessStream(Stream stream, long streamLength = -1) { - if (stream == null) - throw new ArgumentNullException(nameof(stream)); - - p_inputStream = stream; + p_inputStream = stream ?? throw new ArgumentNullException(nameof(stream)); CanSeek = stream.CanSeek; if (streamLength == -1) @@ -181,10 +174,15 @@ private byte GetByteNoValidation(long index) if (p_isStreamFinished && p_chunks.Count == 1) return p_chunks[0][index]; - var chunkIndex = (int)(index / p_chunkLength); - var innerIndex = index % p_chunkLength; + // micro-optimization for benchmarks + var intIndex = (int)index; + + var chunkIndex = intIndex / p_chunkLength; + var innerIndex = intIndex % p_chunkLength; + + //var chunkIndex = (int)(index / p_chunkLength); + //var innerIndex = (int)(index % p_chunkLength); - //if (p_chunks.ContainsKey(chunkIndex)) if (p_chunks.Count - 1 >= chunkIndex) return p_chunks[chunkIndex][innerIndex]; else @@ -429,9 +427,9 @@ public void Seek(long index) /// the index from which the required bytes start /// the number of bytes which are required /// negative index, less than 0 bytes, or too many bytes are requested - public long ValidateRange(long index, long bytesRequested) + public int ValidateRange(long index, int bytesRequested) { - long available = BytesAvailable(index, bytesRequested); + var available = BytesAvailable(index, bytesRequested); if (available != bytesRequested) { if (index < 0) @@ -450,9 +448,9 @@ public long ValidateRange(long index, long bytesRequested) return available; } - private long BytesAvailable(long index, long bytesRequested) + private int BytesAvailable(long index, int bytesRequested) { - if (index < 0 || bytesRequested < 0) + if (index < 0L || bytesRequested < 0) return 0; // if there's only one chunk, there's no need to calculate anything. @@ -464,20 +462,25 @@ private long BytesAvailable(long index, long bytesRequested) else if (index > p_streamLength) return 0; else - return p_streamLength - index; + return (int)(p_streamLength - index); } - var endIndex = index + bytesRequested - 1; - if (endIndex < 0) endIndex = 0; + var endIndex = index + bytesRequested - 1L; + if (endIndex < 0L) endIndex = 0L; // Maybe don't check this? if (endIndex > int.MaxValue) return 0; + // micro-optimization for benchmarks + var intIndex = (int)index; + // zero-based - int chunkstart = (int)(index / p_chunkLength); - int chunkend = (int)( ((index + bytesRequested) / p_chunkLength) + 1 ); + int chunkstart = intIndex / p_chunkLength; + int chunkend = ((intIndex + bytesRequested) / p_chunkLength) + 1; + //int chunkstart = (int)(index / p_chunkLength); + //int chunkend = chunkstart + (bytesRequested / p_chunkLength) + 1; if (p_chunks.Count - 1 < chunkstart || p_chunks[chunkstart] == null) { @@ -524,7 +527,7 @@ private long BytesAvailable(long index, long bytesRequested) TotalBytesRead += totalBytesRead; #endif p_chunks[i] = chunk; - return (index + bytesRequested) <= p_streamLength ? bytesRequested : p_streamLength - index; + return (index + bytesRequested) <= p_streamLength ? bytesRequested : (int)(p_streamLength - index); } } else diff --git a/MetadataExtractor/IO/RandomAccessStreamDictionary.cs b/MetadataExtractor/IO/RandomAccessStreamDictionary.cs index ed81acb7d..0b09b4ddd 100644 --- a/MetadataExtractor/IO/RandomAccessStreamDictionary.cs +++ b/MetadataExtractor/IO/RandomAccessStreamDictionary.cs @@ -4,7 +4,6 @@ using System.Collections.Generic; using System.IO; using System.Linq; -using JetBrains.Annotations; namespace MetadataExtractor.IO { @@ -20,7 +19,7 @@ public class RandomAccessStreamDictionary : IRandomAccessStream { public const long UnknownLengthValue = long.MaxValue; - private Stream? p_inputStream; + private readonly Stream p_inputStream; private long p_streamLength = -1; //private readonly List rdrList = new List(); @@ -28,6 +27,7 @@ public class RandomAccessStreamDictionary : IRandomAccessStream private const int DefaultChunkLength = 4 * 1024; private readonly int p_chunkLength; + public Dictionary p_chunks = new Dictionary(); public RandomAccessStreamDictionary(Stream stream, long streamLength = -1) @@ -178,8 +178,11 @@ private byte GetByteNoValidation(long index) if (p_isStreamFinished && p_chunks.Count == 1) return p_chunks[0][index]; - var chunkIndex = index / p_chunkLength; - var innerIndex = index % p_chunkLength; + // micro-optimization for benchmarks + var intIndex = (int)index; + + var chunkIndex = intIndex / p_chunkLength; + var innerIndex = intIndex % p_chunkLength; if (p_chunks.ContainsKey(chunkIndex)) return p_chunks[chunkIndex][innerIndex]; @@ -425,9 +428,9 @@ public void Seek(long index) /// the index from which the required bytes start /// the number of bytes which are required /// negative index, less than 0 bytes, or too many bytes are requested - public long ValidateRange(long index, long bytesRequested) + public int ValidateRange(long index, int bytesRequested) { - long available = BytesAvailable(index, bytesRequested); + var available = BytesAvailable(index, bytesRequested); if (available != bytesRequested) { if (index < 0) @@ -446,7 +449,7 @@ public long ValidateRange(long index, long bytesRequested) return available; } - private long BytesAvailable(long index, long bytesRequested) + private int BytesAvailable(long index, int bytesRequested) { if (index < 0 || bytesRequested < 0) return 0; @@ -460,7 +463,7 @@ private long BytesAvailable(long index, long bytesRequested) else if (index > p_streamLength) return 0; else - return p_streamLength - index; + return (int)(p_streamLength - index); } @@ -471,15 +474,18 @@ private long BytesAvailable(long index, long bytesRequested) if (endIndex > int.MaxValue) return 0; + // micro-optimization for benchmarks + var intIndex = (int)index; + // zero-based - long chunkstart = index / p_chunkLength; - long chunkend = ((index + bytesRequested) / p_chunkLength) + 1; + int chunkstart = intIndex / p_chunkLength; + int chunkend = ((intIndex + bytesRequested) / p_chunkLength) + 1; if (!p_chunks.ContainsKey(chunkstart)) { if (!CanSeek) - chunkstart = p_chunks.Count == 0 ? 0 : p_chunks.Keys.Max() + 1; + chunkstart = p_chunks.Count == 0 ? 0 : (int)p_chunks.Keys.Max() + 1; } for (var i = chunkstart; i < chunkend; i++) @@ -512,7 +518,7 @@ private long BytesAvailable(long index, long bytesRequested) TotalBytesRead += totalBytesRead; #endif p_chunks.Add(i, chunk); - return (index + bytesRequested) <= p_streamLength ? bytesRequested : p_streamLength - index; + return (index + bytesRequested) <= p_streamLength ? bytesRequested : (int)(p_streamLength - index); } } else From 03bcbc7085979f2ae3aebd4e675aeb6eeba1f2b8 Mon Sep 17 00:00:00 2001 From: Kevin Mott Date: Sun, 24 May 2020 23:11:31 -0500 Subject: [PATCH 10/10] - BenchmarkDotNet 0.10.12 -> 0.12.1 - leave support for long Stream indexes in RandomAccessStream (and take the mild read hit) - add RASBenchmark class --- .../MetadataExtractor.Benchmarks.csproj | 3 +- .../NonSeekableStream.cs | 51 +++++ MetadataExtractor.Benchmarks/Program.cs | 3 +- MetadataExtractor.Benchmarks/RASBenchmark.cs | 183 ++++++++++++++++++ MetadataExtractor/IO/RandomAccessStream.cs | 20 +- 5 files changed, 247 insertions(+), 13 deletions(-) create mode 100644 MetadataExtractor.Benchmarks/NonSeekableStream.cs create mode 100644 MetadataExtractor.Benchmarks/RASBenchmark.cs diff --git a/MetadataExtractor.Benchmarks/MetadataExtractor.Benchmarks.csproj b/MetadataExtractor.Benchmarks/MetadataExtractor.Benchmarks.csproj index f5a89b467..2196e2b47 100644 --- a/MetadataExtractor.Benchmarks/MetadataExtractor.Benchmarks.csproj +++ b/MetadataExtractor.Benchmarks/MetadataExtractor.Benchmarks.csproj @@ -10,8 +10,7 @@ - - + diff --git a/MetadataExtractor.Benchmarks/NonSeekableStream.cs b/MetadataExtractor.Benchmarks/NonSeekableStream.cs new file mode 100644 index 000000000..197f72818 --- /dev/null +++ b/MetadataExtractor.Benchmarks/NonSeekableStream.cs @@ -0,0 +1,51 @@ +// Copyright (c) Drew Noakes and contributors. All Rights Reserved. Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. + +using System; +using System.IO; + +namespace MetadataExtractor.Benchmarks +{ + internal class NonSeekableStream : Stream + { + Stream m_stream; + internal NonSeekableStream(Stream baseStream) + { + m_stream = baseStream; + } + + public override bool CanRead => m_stream.CanRead; + + public override bool CanSeek => false; + + public override bool CanWrite => m_stream.CanWrite; + + public override long Length => throw new NotSupportedException(); + + public override long Position { get => m_stream.Position; set => throw new NotSupportedException(); } + + public override void Flush() + { + m_stream.Flush(); + } + + public override int Read(byte[] buffer, int offset, int count) + { + return m_stream.Read(buffer, offset, count); + } + + public override long Seek(long offset, SeekOrigin origin) + { + throw new NotImplementedException(); + } + + public override void SetLength(long value) + { + throw new NotSupportedException(); + } + + public override void Write(byte[] buffer, int offset, int count) + { + m_stream.Write(buffer, offset, count); + } + } +} diff --git a/MetadataExtractor.Benchmarks/Program.cs b/MetadataExtractor.Benchmarks/Program.cs index 0624de47d..bd9878fca 100644 --- a/MetadataExtractor.Benchmarks/Program.cs +++ b/MetadataExtractor.Benchmarks/Program.cs @@ -17,7 +17,8 @@ namespace MetadataExtractor.Benchmarks { internal static class Program { - private static void Main() => BenchmarkRunner.Run(); + //private static void Main() => BenchmarkRunner.Run(); + private static void Main() => BenchmarkRunner.Run(); } /* diff --git a/MetadataExtractor.Benchmarks/RASBenchmark.cs b/MetadataExtractor.Benchmarks/RASBenchmark.cs new file mode 100644 index 000000000..b65e51c8f --- /dev/null +++ b/MetadataExtractor.Benchmarks/RASBenchmark.cs @@ -0,0 +1,183 @@ +using System; +using System.IO; + +using BenchmarkDotNet.Attributes; + +using MetadataExtractor.IO; + +namespace MetadataExtractor.Benchmarks +{ + [MemoryDiagnoser] + public class RASBenchmark + { + private readonly MemoryStream _stream; + + private readonly ReaderInfo _reader; + public RASBenchmark() + { + _stream = new MemoryStream(); + + // This is the largest JPEG file in this repository + using var fs = File.OpenRead("../../../../MetadataExtractor.Tests/Data/nikonMakernoteType2b.jpg"); + fs.CopyTo(_stream); + + _reader = new RandomAccessStream(_stream).CreateReader(); + } + + [Benchmark(Baseline = true)] + public void RASListBenchmark() + { + _stream.Position = 0; + + //var reader = new RandomAccessStream(_stream).CreateReader(); + RunReader(_reader); + } + + + /* + [Benchmark]] + public void RASDictionaryBenchmark() + { + _stream.Position = 0; + + var reader = new RandomAccessStreamDictionary(_stream).CreateReader(); + RunReader(reader); + } + + [Benchmark] + public void RASDictionaryNonseekableBenchmark() + { + _stream.Position = 0; + + var reader = new RandomAccessStreamDictionary(new NonSeekableStream(_stream)).CreateReader(); + RunReader(reader); + } + + [Benchmark] + public void RASListNonseekableBenchmark() + { + _stream.Position = 0; + + var reader = new RandomAccessStream(new NonSeekableStream(_stream)).CreateReader(); + RunReader(reader); + } + */ + + + [Benchmark] + public void IndexedCapturingReaderBenchmark() + { + _stream.Position = 0; + + var reader = new IndexedCapturingReader(_stream, 4096); + RunIndexedReader(reader); + } + + [Benchmark] + public void IndexedSeekingReaderBenchmark() + { + _stream.Position = 0; + + var reader = new IndexedSeekingReader(_stream); + RunIndexedReader(reader); + } + + private void RunReader(ReaderInfo reader) + { + int offset = 4 * 1024 + 10; // skip over at least one buffer, just because + + // Nothing mathematical intended here other than jumping around in the file + for (int i = 0; i < 10; i++) + { + var calcoffset2 = GetLongOffset(i, offset, 2); + var calcoffset3 = GetLongOffset(i, offset, 3); + + + reader.GetInt16(calcoffset2); + reader.GetInt16(calcoffset3); + + reader.GetInt24(calcoffset2); + reader.GetInt24(calcoffset3); + + reader.GetInt32(calcoffset2); + reader.GetInt32(calcoffset3); + + reader.GetBytes(calcoffset2, 128); + reader.GetBytes(calcoffset3, 128); + + reader.GetInt64(calcoffset2); + reader.GetInt64(calcoffset3); + + for (int j = 0; j < 1000; j++) + { + reader.GetByte(calcoffset2 + j); + reader.GetByte(calcoffset3 + j); + } + + reader.GetUInt16(calcoffset2); + reader.GetUInt16(calcoffset3); + + reader.GetUInt32(calcoffset2); + reader.GetUInt32(calcoffset3); + + + //reader.GetUInt64(calcoffset2); + //reader.GetUInt64(calcoffset3); + } + } + + private void RunIndexedReader(IndexedReader reader) + { + int offset = 4 * 1024 + 10; // skip over at least one buffer, just because + + // Nothing mathematical intended here other than jumping around in the file + for (int i = 0; i < 10; i++) + { + var calcoffset2 = GetIntOffset(i, offset, 2); + var calcoffset3 = GetIntOffset(i, offset, 3); + + + reader.GetInt16(calcoffset2); + reader.GetInt16(calcoffset3); + + reader.GetInt24(calcoffset2); + reader.GetInt24(calcoffset3); + + reader.GetInt32(calcoffset2); + reader.GetInt32(calcoffset3); + + reader.GetBytes(calcoffset2, 128); + reader.GetBytes(calcoffset3, 128); + + reader.GetInt64(calcoffset2); + reader.GetInt64(calcoffset3); + + for (int j = 0; j < 1000; j++) + { + reader.GetByte(calcoffset2 + j); + reader.GetByte(calcoffset3 + j); + } + + reader.GetUInt16(calcoffset2); + reader.GetUInt16(calcoffset3); + + reader.GetUInt32(calcoffset2); + reader.GetUInt32(calcoffset3); + + + //reader.GetUInt64(calcoffset2); + //reader.GetUInt64(calcoffset3); + } + } + + private static long GetLongOffset(int i, long offset, int power) + { + return (long)(i * offset + Math.Pow(power, i)); + } + + private static int GetIntOffset(int i, long offset, int power) + { + return (int)(i * offset + Math.Pow(power, i)); + } + } +} diff --git a/MetadataExtractor/IO/RandomAccessStream.cs b/MetadataExtractor/IO/RandomAccessStream.cs index f22ae4f3c..8dabe474d 100644 --- a/MetadataExtractor/IO/RandomAccessStream.cs +++ b/MetadataExtractor/IO/RandomAccessStream.cs @@ -175,13 +175,13 @@ private byte GetByteNoValidation(long index) return p_chunks[0][index]; // micro-optimization for benchmarks - var intIndex = (int)index; + //var intIndex = (int)index; - var chunkIndex = intIndex / p_chunkLength; - var innerIndex = intIndex % p_chunkLength; + //var chunkIndex = intIndex / p_chunkLength; + //var innerIndex = intIndex % p_chunkLength; - //var chunkIndex = (int)(index / p_chunkLength); - //var innerIndex = (int)(index % p_chunkLength); + var chunkIndex = (int)(index / p_chunkLength); + var innerIndex = (int)(index % p_chunkLength); if (p_chunks.Count - 1 >= chunkIndex) return p_chunks[chunkIndex][innerIndex]; @@ -474,13 +474,13 @@ private int BytesAvailable(long index, int bytesRequested) return 0; // micro-optimization for benchmarks - var intIndex = (int)index; + //var intIndex = (int)index; // zero-based - int chunkstart = intIndex / p_chunkLength; - int chunkend = ((intIndex + bytesRequested) / p_chunkLength) + 1; - //int chunkstart = (int)(index / p_chunkLength); - //int chunkend = chunkstart + (bytesRequested / p_chunkLength) + 1; + //int chunkstart = intIndex / p_chunkLength; + //int chunkend = ((intIndex + bytesRequested) / p_chunkLength) + 1; + int chunkstart = (int)(index / p_chunkLength); + int chunkend = chunkstart + (bytesRequested / p_chunkLength) + 1; if (p_chunks.Count - 1 < chunkstart || p_chunks[chunkstart] == null) {