From 4e84d9ea5b6bc0e5c5c1fc830c7ab337e78e13f1 Mon Sep 17 00:00:00 2001 From: Mattias Wadman Date: Mon, 26 Feb 2024 11:49:32 +0100 Subject: [PATCH] Sync difftest from fq Replaces difflib with go's internal diff --- README.md | 2 +- go.mod | 4 +- go.sum | 2 - internal/difftest/difftest.go | 305 ++++++++++++++++++++++++++++++---- 4 files changed, 276 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index 7c5cff4..38cfc80 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ Color themes are the ones from https://github.com/mbadolato/iTerm2-Color-Schemes ## Install and build -To build you will need at least go 1.16 or later. +To build you will need at least go 1.18 or later. Install latest master and copy it to `/usr/local/bin`: ```sh diff --git a/go.mod b/go.mod index 77d40f9..ea28895 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,3 @@ module github.com/wader/ansisvg -go 1.17 - -require github.com/pmezard/go-difflib v1.0.0 +go 1.18 diff --git a/go.sum b/go.sum index 5d60ca7..e69de29 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +0,0 @@ -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= diff --git a/internal/difftest/difftest.go b/internal/difftest/difftest.go index 09d0430..86a325f 100644 --- a/internal/difftest/difftest.go +++ b/internal/difftest/difftest.go @@ -1,9 +1,8 @@ -// Package difftest implement test based on serialized string output +// Package difftest implement test based on diffing serialized string output // -// User provides a function that gets a input string. It returns a output string -// based on the input somehow and a output path to file with content to compare it -// to or to write to if in write mode. -// If there is a difference test will fail with a diff. +// User provides a function that get a input path and input string and returns a +// output path and output string. Content of output path and output string is compared +// and if there is a difference the test fails with a diff. // // Test inputs are read from files matching Pattern from Path. // @@ -12,48 +11,29 @@ package difftest import ( + "bytes" + "fmt" "os" "path/filepath" + "sort" "strings" "testing" - - "github.com/pmezard/go-difflib/difflib" ) type tf interface { Helper() - Errorf(format string, args ...interface{}) - Fatalf(format string, args ...interface{}) + Errorf(format string, args ...any) + Fatalf(format string, args ...any) } const green = "\x1b[32m" const red = "\x1b[31m" const reset = "\x1b[0m" -type Fn func(t *testing.T, path string, input string) (string, string, error) - -type Options struct { - Path string - Pattern string - ColorDiff bool - WriteOutput bool - Fn Fn -} - -func testDeepEqual(t tf, color bool, printfFn func(format string, args ...interface{}), expected string, actual string) { +func testDeepEqual(t tf, color bool, printfFn func(format string, args ...any), expected string, actual string) { t.Helper() - diff := difflib.UnifiedDiff{ - A: difflib.SplitLines(expected), - B: difflib.SplitLines(actual), - FromFile: "expected", - ToFile: "actual", - Context: 3, - } - uDiff, err := difflib.GetUnifiedDiffString(diff) - if err != nil { - t.Fatalf("%s", err) - } + uDiff := string(Diff("a", []byte(expected), "b", []byte(actual))) if uDiff == "" { return } @@ -110,6 +90,16 @@ func Fatal(t tf, expected string, actual string) { testDeepEqual(t, false, t.Fatalf, expected, actual) } +type Fn func(t *testing.T, path string, input string) (string, string, error) + +type Options struct { + Path string + Pattern string + ColorDiff bool + WriteOutput bool + Fn Fn +} + func TestWithOptions(t *testing.T, opts Options) { t.Helper() @@ -179,3 +169,256 @@ func Test(t *testing.T, pattern string, fn Fn) { Fn: fn, }) } + +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file + +// A pair is a pair of values tracked for both the x and y side of a diff. +// It is typically a pair of line indexes. +type pair struct{ x, y int } + +// Diff returns an anchored diff of the two texts old and new +// in the “unified diff” format. If old and new are identical, +// Diff returns a nil slice (no output). +// +// Unix diff implementations typically look for a diff with +// the smallest number of lines inserted and removed, +// which can in the worst case take time quadratic in the +// number of lines in the texts. As a result, many implementations +// either can be made to run for a long time or cut off the search +// after a predetermined amount of work. +// +// In contrast, this implementation looks for a diff with the +// smallest number of “unique” lines inserted and removed, +// where unique means a line that appears just once in both old and new. +// We call this an “anchored diff” because the unique lines anchor +// the chosen matching regions. An anchored diff is usually clearer +// than a standard diff, because the algorithm does not try to +// reuse unrelated blank lines or closing braces. +// The algorithm also guarantees to run in O(n log n) time +// instead of the standard O(n²) time. +// +// Some systems call this approach a “patience diff,” named for +// the “patience sorting” algorithm, itself named for a solitaire card game. +// We avoid that name for two reasons. First, the name has been used +// for a few different variants of the algorithm, so it is imprecise. +// Second, the name is frequently interpreted as meaning that you have +// to wait longer (to be patient) for the diff, meaning that it is a slower algorithm, +// when in fact the algorithm is faster than the standard one. +func Diff(oldName string, old []byte, newName string, new_ []byte) []byte { + if bytes.Equal(old, new_) { + return nil + } + x := lines(old) + y := lines(new_) + + // Print diff header. + var out bytes.Buffer + fmt.Fprintf(&out, "diff %s %s\n", oldName, newName) + fmt.Fprintf(&out, "--- %s\n", oldName) + fmt.Fprintf(&out, "+++ %s\n", newName) + + // Loop over matches to consider, + // expanding each match to include surrounding lines, + // and then printing diff chunks. + // To avoid setup/teardown cases outside the loop, + // tgs returns a leading {0,0} and trailing {len(x), len(y)} pair + // in the sequence of matches. + var ( + done pair // printed up to x[:done.x] and y[:done.y] + chunk pair // start lines of current chunk + count pair // number of lines from each side in current chunk + ctext []string // lines for current chunk + ) + for _, m := range tgs(x, y) { + if m.x < done.x { + // Already handled scanning forward from earlier match. + continue + } + + // Expand matching lines as far as possible, + // establishing that x[start.x:end.x] == y[start.y:end.y]. + // Note that on the first (or last) iteration we may (or definitely do) + // have an empty match: start.x==end.x and start.y==end.y. + start := m + for start.x > done.x && start.y > done.y && x[start.x-1] == y[start.y-1] { + start.x-- + start.y-- + } + end := m + for end.x < len(x) && end.y < len(y) && x[end.x] == y[end.y] { + end.x++ + end.y++ + } + + // Emit the mismatched lines before start into this chunk. + // (No effect on first sentinel iteration, when start = {0,0}.) + for _, s := range x[done.x:start.x] { + ctext = append(ctext, "-"+s) + count.x++ + } + for _, s := range y[done.y:start.y] { + ctext = append(ctext, "+"+s) + count.y++ + } + + // If we're not at EOF and have too few common lines, + // the chunk includes all the common lines and continues. + const C = 3 // number of context lines + if (end.x < len(x) || end.y < len(y)) && + (end.x-start.x < C || (len(ctext) > 0 && end.x-start.x < 2*C)) { + for _, s := range x[start.x:end.x] { + ctext = append(ctext, " "+s) + count.x++ + count.y++ + } + done = end + continue + } + + // End chunk with common lines for context. + if len(ctext) > 0 { + n := end.x - start.x + if n > C { + n = C + } + for _, s := range x[start.x : start.x+n] { + ctext = append(ctext, " "+s) + count.x++ + count.y++ + } + done = pair{start.x + n, start.y + n} + + // Format and emit chunk. + // Convert line numbers to 1-indexed. + // Special case: empty file shows up as 0,0 not 1,0. + if count.x > 0 { + chunk.x++ + } + if count.y > 0 { + chunk.y++ + } + fmt.Fprintf(&out, "@@ -%d,%d +%d,%d @@\n", chunk.x, count.x, chunk.y, count.y) + for _, s := range ctext { + out.WriteString(s) + } + count.x = 0 + count.y = 0 + ctext = ctext[:0] + } + + // If we reached EOF, we're done. + if end.x >= len(x) && end.y >= len(y) { + break + } + + // Otherwise start a new chunk. + chunk = pair{end.x - C, end.y - C} + for _, s := range x[chunk.x:end.x] { + ctext = append(ctext, " "+s) + count.x++ + count.y++ + } + done = end + } + + return out.Bytes() +} + +// lines returns the lines in the file x, including newlines. +// If the file does not end in a newline, one is supplied +// along with a warning about the missing newline. +func lines(x []byte) []string { + l := strings.SplitAfter(string(x), "\n") + if l[len(l)-1] == "" { + l = l[:len(l)-1] + } else { + // Treat last line as having a message about the missing newline attached, + // using the same text as BSD/GNU diff (including the leading backslash). + l[len(l)-1] += "\n\\ No newline at end of file\n" + } + return l +} + +// tgs returns the pairs of indexes of the longest common subsequence +// of unique lines in x and y, where a unique line is one that appears +// once in x and once in y. +// +// The longest common subsequence algorithm is as described in +// Thomas G. Szymanski, “A Special Case of the Maximal Common +// Subsequence Problem,” Princeton TR #170 (January 1975), +// available at https://research.swtch.com/tgs170.pdf. +func tgs(x, y []string) []pair { + // Count the number of times each string appears in a and b. + // We only care about 0, 1, many, counted as 0, -1, -2 + // for the x side and 0, -4, -8 for the y side. + // Using negative numbers now lets us distinguish positive line numbers later. + m := make(map[string]int) + for _, s := range x { + if c := m[s]; c > -2 { + m[s] = c - 1 + } + } + for _, s := range y { + if c := m[s]; c > -8 { + m[s] = c - 4 + } + } + + // Now unique strings can be identified by m[s] = -1+-4. + // + // Gather the indexes of those strings in x and y, building: + // xi[i] = increasing indexes of unique strings in x. + // yi[i] = increasing indexes of unique strings in y. + // inv[i] = index j such that x[xi[i]] = y[yi[j]]. + var xi, yi, inv []int + for i, s := range y { + if m[s] == -1+-4 { + m[s] = len(yi) + yi = append(yi, i) + } + } + for i, s := range x { + if j, ok := m[s]; ok && j >= 0 { + xi = append(xi, i) + inv = append(inv, j) + } + } + + // Apply Algorithm A from Szymanski's paper. + // In those terms, A = J = inv and B = [0, n). + // We add sentinel pairs {0,0}, and {len(x),len(y)} + // to the returned sequence, to help the processing loop. + J := inv + n := len(xi) + T := make([]int, n) + L := make([]int, n) + for i := range T { + T[i] = n + 1 + } + for i := 0; i < n; i++ { + k := sort.Search(n, func(k int) bool { + return T[k] >= J[i] + }) + T[k] = J[i] + L[i] = k + 1 + } + k := 0 + for _, v := range L { + if k < v { + k = v + } + } + seq := make([]pair, 2+k) + seq[1+k] = pair{len(x), len(y)} // sentinel at end + lastj := n + for i := n - 1; i >= 0; i-- { + if L[i] == k && J[i] < lastj { + seq[k] = pair{xi[i], yi[J[i]]} + k-- + } + } + seq[0] = pair{0, 0} // sentinel at start + return seq +}