diff --git a/file.go b/file.go index b4b23cc..6a460d5 100644 --- a/file.go +++ b/file.go @@ -1,9 +1,13 @@ package jfif import ( + "bytes" "errors" "io" + "math" "os" + + xio "neilpa.me/go-x/io" ) var ( @@ -13,24 +17,84 @@ var ( // ErrOversizePayload means there's not enough space to update // the segment data in-place. ErrOversizePayload = errors.New("Oversize payload") + + // ErrOversizeSegment means segment data was to large to append. + ErrOversizeSegment = errors.New("Oversize segment") ) +// Patch is used to insert new JFIF segments just before the SOS segment. +type Patch struct { // TODO better name or use segments directly ignoring offset + // Marker is the type of segment + Marker Marker + // Data are the segment bytes that will be appended. Max size is 0xFFFF-2 + Data []byte +} + +// Append new JFIF segments to the file at path. +// +// Notes: +// - Under the hood this creates a temp-copy of the original file so +// that it can safely insert the new segments in the middle of the +// file. This avoids potential for corrupting data if an error is +// hit in the middle of the update. At the end the original path +// is replaced with a single os.Rename operation. +// +// TODO: Higher-level version of this that could be smarter for XMP data +// TODO: Return the updated pointer data? +func Append(path string, patches ...Patch) error { + // Prep the buffer for writing + var buf bytes.Buffer + for _, p := range patches { + seg := Segment{} + seg.Marker = p.Marker + seg.Offset = -1 + + l := len(seg.Data) + 2 + if l > math.MaxUint16 { + return ErrOversizeSegment + } + seg.Length = uint16(l) // TODO not right for oversize segments + // TODO: what about an embedded Data where the first two bytes are the length + seg.Data = p.Data + + // TODO Would be nice to avoid yet-another-copy of data and plumb + // through a custom reader that calculated the size + if err := EncodeSegment(&buf, seg); err != nil { + return err + } + } + + f, err := os.Open(path) + ptrs, err := ScanSegments(f) + if err != nil { + return err + } + last := ptrs[len(ptrs)-1] + + return xio.SpliceFile(f, buf.Bytes(), last.Offset) +} + // File is used to perform in-place updates to JFIF segments to a backing // file on-disk. type File struct { // f is the underlying file on disk. f *os.File - // refs are the intially scanned segment pointers. - refs []SegmentP + // refs are the minimally scanned segment pointers. + refs []Pointer } // Edit opens and scans segments from a JFIF file. This should be // used to replace segments in-place without having to re-write the // full file. Note that this will fail on attempts to write segments -// that would expend beyond the current bounds. Otherise, "short-segments" -// retain the desired size but there are 0xFF fill bytes used for padding -// until the next segment. +// that would expand beyond the current bounds. +// +// TODO: Otherise, "short-segments" retain the desired size but there +// are 0xFF fill bytes used for padding until the next segment. +// +// TODO: This may not be all that valuable verse doing a proper splice. +// in a copied version of the file and replacing over top of it. This +// can lead to file corruption if not careful... func Edit(path string) (*File, error) { f, err := os.OpenFile(path, os.O_RDWR, 0) if err != nil { @@ -57,8 +121,8 @@ func (f *File) Close() (err error) { } // Query finds existing segments that matches the given marker -func (f *File) Query(m Marker) ([]SegmentP, error) { - refs := make([]SegmentP, 0) +func (f *File) Query(m Marker) ([]Pointer, error) { + refs := make([]Pointer, 0) for _, r := range f.refs { if r.Marker == m { refs = append(refs, r) @@ -67,10 +131,68 @@ func (f *File) Query(m Marker) ([]SegmentP, error) { return refs, nil } +// Add new segments at the end of the JFIF header, before +// the first SOI marker. +// +// Notes: +// * This is an expensive operation. It requires shifting all of +// image bytes on disk to make space. +// * Offset and Length are ignored on the incoming segments. They +// are simply calcluted from the provided Data in order. +// +// TODO What about a multi-segment that could have splitting/chunking behavior? +// +// TODO: This interface doesn't quite work since we need to close the file +// descriptor and do the move... +func (f *File) Add(segs ...Segment) error { + // TODO Probably want an xio primitive to insert-in-middle operation + if len(f.refs) < 3 { + return errors.New("todo: Not enough file segments to start") + } + + last := f.refs[len(f.refs)-1] + + // Calculate how much extra space we need. + size := int64(0) + insert := last.Offset + for i, s := range segs { + s.Offset = insert + size + if len(s.Data) > 0 { + l := len(s.Data) + 2 + if l > 0xffff { // TODO double-check actual max segment size + return ErrOversizeSegment + } + s.Length = uint16(l) + } else { + s.Length = 0 + } + size += s.DiskSize() + segs[i] = s + } + + // TODO Apply bookkeeping for the `last` SOS marker offset + + return nil +} + +// Sync writes any updated contents of the segment back to disk +// +// TODO: Note that at some point this may "do the right thing" when +// further downstream allocations need ot happen. +func (f *File) Sync(s Segment) error { + return f.Update(s.Pointer, s.Data) +} + // Update replaces the payload for the given segment ref. Returns an // error if it's too large or doesn't match a known segment in this // file. -func (f *File) Update(r SegmentP, buf []byte) error { +// +// Note: +// - This updates the file in-place so all of the general warnings +// apply w.r.t. potential file corruption. This should be limited +// to files that have already been copied and are intended to +// be edited directly. +func (f *File) Update(r Pointer, buf []byte) error { var i int for ; i < len(f.refs); i++ { if f.refs[i] == r { @@ -92,14 +214,15 @@ func (f *File) Update(r SegmentP, buf []byte) error { } // Encode the updated segment to disk - // TODO Need to make sure to update our SegmentP copy + // TODO Need to make sure to update our Pointer copy _, err := f.f.Seek(r.Offset, io.SeekStart) if err != nil { return err } seg := Segment{ // TODO Can I avoid all the "+/- 2's" everywhere - SegmentP{r.Offset, r.Marker, uint16(len(buf) + 2)}, buf, + Pointer{r.Offset, r.Marker, uint16(len(buf) + 2)}, + buf, } err = EncodeSegment(f.f, seg) if err != nil { @@ -110,6 +233,6 @@ func (f *File) Update(r SegmentP, buf []byte) error { } // Update our in-memory location - f.refs[i] = seg.SegmentP + f.refs[i] = seg.Pointer return nil } diff --git a/file_test.go b/file_test.go index c966bb0..e624088 100644 --- a/file_test.go +++ b/file_test.go @@ -2,6 +2,7 @@ package jfif import ( "bytes" + "fmt" "io" "io/ioutil" "os" @@ -15,14 +16,14 @@ func TestFileQuery(t *testing.T) { // TODO func TestFileUpdate(t *testing.T) { var tests = []struct { name string - ref SegmentP + ref Pointer buf []byte golden string }{ { "min", - SegmentP{Offset: 2, Marker: DQT, Length: 67}, + Pointer{Offset: 2, Marker: DQT, Length: 67}, []byte{0, // Pq and Tq bytes // Arbitrary DQT table for testing 16, 11, 10, 16, 24, 40, 51, 61, @@ -86,3 +87,44 @@ func TestFileUpdate(t *testing.T) { }) } } + +func TestFileAdd(t *testing.T) { + + var tests = []struct { + name string + ref Pointer + buf []byte + + golden string + }{ + { + "min", + Pointer{Offset: 2, Marker: DQT, Length: 67}, + []byte{0, // Pq and Tq bytes + // Arbitrary DQT table for testing + 16, 11, 10, 16, 24, 40, 51, 61, + 12, 12, 14, 19, 26, 58, 60, 55, + 14, 13, 16, 24, 40, 57, 69, 56, + 14, 17, 22, 29, 51, 87, 80, 62, + 18, 22, 37, 56, 68, 109, 103, 77, + 24, 35, 55, 64, 81, 104, 113, 92, + 49, 64, 78, 87, 103, 121, 120, 101, + 72, 92, 95, 98, 112, 100, 103, 99, + }, + "min.dqt.jpg", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + temp, err := ioutil.TempFile(os.TempDir(), "jfif-test-add-"+tt.name) + if err != nil { + t.Fatal(err) + } + path := temp.Name() + //defer os.Remove(path) + defer temp.Close() + + fmt.Println("TODO: TestFileAdd:", path) + }) + } +} diff --git a/go.mod b/go.mod index 588b06b..1fcf8a6 100644 --- a/go.mod +++ b/go.mod @@ -2,4 +2,4 @@ module neilpa.me/go-jfif go 1.14 -require neilpa.me/go-x v0.2.0 +require neilpa.me/go-x v0.2.1-0.20200507232743-5243b9624d5e diff --git a/go.sum b/go.sum index 43a7a33..b4772de 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +1,4 @@ neilpa.me/go-x v0.2.0 h1:GbLRDtAZ9MgVrwrIe3jWnHF2W40LCFA9Ng/aDbd9GVs= neilpa.me/go-x v0.2.0/go.mod h1:aIemU+pQYLLV3dygXotHKF7SantXe5HzZR6VIjzY/4g= +neilpa.me/go-x v0.2.1-0.20200507232743-5243b9624d5e h1:+O171A50t9HKAT8lpmTeTVscpTNYUvwDlPvOAOpgq/E= +neilpa.me/go-x v0.2.1-0.20200507232743-5243b9624d5e/go.mod h1:aIemU+pQYLLV3dygXotHKF7SantXe5HzZR6VIjzY/4g= diff --git a/jfif.go b/jfif.go index b4a407d..9bf7161 100644 --- a/jfif.go +++ b/jfif.go @@ -21,6 +21,10 @@ var ( // ErrShortSegment means a segment length was < 2 bytes. ErrShortSegment = errors.New("Short segment") + // ErrPointerLoadMismatch means the pointer marker or length didn't + // match the expected segment header. + ErrPointerLoadMismatch = errors.New("Pointer load mismatch") + // ErrWrongMarker means a segment method was called where the marker // didn't match an expected type. ErrWrongMarker = errors.New("Wrong marker") @@ -31,10 +35,13 @@ var ( // ErrUnseekableReader means a Seek was attempted from the start or end // of an io.Reader that only supports streaming. ErrUnseekableReader = errors.New("Unseekable reader") + + // ByteOrder is the endian-ness of JFIF integers (big-endian). + ByteOrder = binary.BigEndian ) -// SegmentP represents a "pointer" to a distinct region of a JPEG file. -type SegmentP struct { +// Pointer to a distinct region/segment of a JPEG file. +type Pointer struct { // Offset is the address of the 0xff byte that started this segment that // is then followed by the marker. Offset int64 @@ -45,10 +52,52 @@ type SegmentP struct { Length uint16 } +// DiskSize is the number of bytes required to encode the segment in a file +func (p Pointer) DiskSize() int64 { + // 0xff, marker, length, [data]... + return 2 + int64(p.Length) +} + +// LoadSegment creates a segment by reading data at the pointer. It +// validates that the marker and length match _before_ reading data. +// +// TODO Note the semantics or ReaderAt blocking for the total bytes? +func (p Pointer) LoadSegment(r io.ReaderAt) (Segment, error) { + s := Segment{p, nil} + if p.Length == 1 { + return s, ErrShortSegment + } + + buf := make([]byte, 2) + _, err := r.ReadAt(buf, p.Offset) + if err != nil { + return s, err + } + if buf[0] != 0xFF || buf[1] != byte(p.Marker) { + return s, ErrPointerLoadMismatch // TODO embed the marker difference + } + + if p.Length > 0 { + if _, err = r.ReadAt(buf, p.Offset+2); err != nil { + return s, err + } + if buf[0] != byte(p.Length >> 8) || buf[1] != byte(p.Length) { + return s, ErrPointerLoadMismatch // TODO embed the length difference + } + + s.Data = make([]byte, int(p.Length)-2) // TODO s.DataLength = p.Length-2 + if _, err = r.ReadAt(s.Data, p.Offset+4); err != nil { + return s, err + } + } + + return s, nil +} + // Segment represents a distinct region of a JPEG file. type Segment struct { - // SegmentP embeds the positional information of the segment. - SegmentP + // Pointer embeds the positional information of the segment. + Pointer // Data is the raw bytes of a segment, excluding the initial 4 bytes // (e.g. 0xff, marker, and 2-byte length). For segments lacking a // length it will be nil. @@ -73,9 +122,11 @@ func (s Segment) AppPayload() (string, []byte, error) { // ScanSegments finds segment markers until the start of stream (SOS) // marker is read, or an error is encountered, including EOF. -func ScanSegments(r io.Reader) ([]SegmentP, error) { - var segs []SegmentP - err := readSegments(r, func(r io.ReadSeeker, sp SegmentP) error { +// +// TODO Rename to be ScanPointers (or ScanLocs and rename Pointer => Loc) +func ScanSegments(r io.Reader) ([]Pointer, error) { + var segs []Pointer + err := readSegments(r, func(r io.ReadSeeker, sp Pointer) error { if sp.Length > 0 { // Simply skip past the length of the segment if _, err := r.Seek(int64(sp.Length)-2, io.SeekCurrent); err != nil { @@ -94,8 +145,8 @@ func ScanSegments(r io.Reader) ([]SegmentP, error) { // data. func DecodeSegments(r io.Reader) ([]Segment, error) { var segs []Segment - err := readSegments(r, func(r io.ReadSeeker, sp SegmentP) error { - s := Segment{SegmentP: sp} + err := readSegments(r, func(r io.ReadSeeker, sp Pointer) error { + s := Segment{Pointer: sp} if s.Length > 0 { // Length includes the 2 bytes for itself s.Data = make([]byte, int(s.Length)-2) @@ -114,7 +165,7 @@ func DecodeSegments(r io.Reader) ([]Segment, error) { // the payload data. This function must advance the reader to the end of the // given segment for the next read. // TODO Could forego that requirement given the use of xio.TrackingReader -func readSegments(r io.Reader, fn func(io.ReadSeeker, SegmentP) error) error { +func readSegments(r io.Reader, fn func(io.ReadSeeker, Pointer) error) error { tr, ok := r.(*xio.TrackingReader) if !ok { tr = xio.NewTrackingReader(r) @@ -130,7 +181,7 @@ func readSegments(r io.Reader, fn func(io.ReadSeeker, SegmentP) error) error { return ErrInvalidJPEG } - err = fn(tr, SegmentP{Marker: Marker(magic[1])}) + err = fn(tr, Pointer{Marker: Marker(magic[1])}) if err != nil { return err } @@ -167,16 +218,16 @@ func readSegments(r io.Reader, fn func(io.ReadSeeker, SegmentP) error) error { } // Set the offset to the 0xff byte preceding the marker - s := SegmentP{Marker: Marker(marker), Offset: tr.Offset() - 2} + p := Pointer{Marker: Marker(marker), Offset: tr.Offset() - 2} // TODO Are there expected zero-length markers that can be skipped - if err = binary.Read(r, binary.BigEndian, &s.Length); err != nil { + if err = binary.Read(r, binary.BigEndian, &p.Length); err != nil { return err } - if s.Length < 2 { + if p.Length < 2 { return ErrShortSegment } - if err = fn(tr, s); err != nil { + if err = fn(tr, p); err != nil { return err } if marker == byte(SOS) { @@ -187,6 +238,24 @@ func readSegments(r io.Reader, fn func(io.ReadSeeker, SegmentP) error) error { return nil } +//// WriteTo TODO +//func (seg Segment) WriteTo(w io.Writer) int64, error { +// // Everything else needs the 0xff, marker and potential payload +// n, err := w.Write([]byte{0xff, byte(seg.Marker)}) +// if err != nil || seg.Data == nil { +// return n, err +// } +// +// // Payload size includes it's own 2-bytes +// // TODO Validate the length of Data here? +// err = binary.Write(w, binary.BigEndian, uint16(len(seg.Data))+2) +// if err != nil { +// return err +// } +// _, err = w.Write(seg.Data) +// return err +//} + // EncodeSegment writes the given segment. func EncodeSegment(w io.Writer, seg Segment) error { // Everything else needs the 0xff, marker and potential payload @@ -195,7 +264,7 @@ func EncodeSegment(w io.Writer, seg Segment) error { return err } // Payload size includes it's own 2-bytes - // TODO Validate the lenght of Data here? + // TODO Validate the length of Data here? err = binary.Write(w, binary.BigEndian, uint16(len(seg.Data))+2) if err != nil { return err @@ -204,7 +273,9 @@ func EncodeSegment(w io.Writer, seg Segment) error { return err } -func readByte(r io.Reader) (b byte, err error) { + + +func readByte(r io.Reader) (b byte, err error) { // TODO This is probably slow err = binary.Read(r, binary.BigEndian, &b) return } diff --git a/jfif_test.go b/jfif_test.go index af3467b..c224017 100644 --- a/jfif_test.go +++ b/jfif_test.go @@ -8,11 +8,11 @@ import ( var tests = []struct { path string - refs []SegmentP + refs []Pointer }{ { path: "min.jpg", - refs: []SegmentP{ + refs: []Pointer{ {0, SOI, 0}, {2, DQT, 67}, {71, SOF9, 11}, @@ -22,7 +22,7 @@ var tests = []struct { }, { path: "lego.jpg", - refs: []SegmentP{ + refs: []Pointer{ {0, SOI, 0}, {2, APP0, 16}, {20, APP1, 11310}, @@ -68,9 +68,9 @@ func TestDecodeSegments(t *testing.T) { t.Fatal(err) } - refs := make([]SegmentP, len(segs)) + refs := make([]Pointer, len(segs)) for i, s := range segs { - refs[i] = s.SegmentP + refs[i] = s.Pointer if s.Length > 0 && len(s.Data)+2 != int(s.Length) { t.Errorf("data %d: got %d, want %d", i, len(s.Data), s.Length-2) } @@ -83,7 +83,10 @@ func TestDecodeSegments(t *testing.T) { func TestEncodeSegment(t *testing.T) { // TODO } -func verifySegments(t *testing.T, got, want []SegmentP) { +func TestLoadSegment(t *testing.T) { // TODO +} + +func verifySegments(t *testing.T, got, want []Pointer) { if len(got) != len(want) { t.Errorf("len: got %d, want %d", len(got), len(want)) return