From cd90f33be86a5cc9a9fbe7d2a7e673a31845bf83 Mon Sep 17 00:00:00 2001 From: Sourik Ghosh <61813998+sourikghosh@users.noreply.github.com> Date: Wed, 13 Apr 2022 23:08:05 +0530 Subject: [PATCH] smart diff to testutil.GatherAndCompare (#998) * added smart diff to testutil.GatherAndCompare Signed-off-by: Sourik Ghosh * v2 testuitls added for better diff Signed-off-by: Sourik Ghosh * replaced CollectAndCompare to CollectAndCompareV2 in testutil_test Signed-off-by: Sourik Ghosh * renamed methods from v2 to withT Signed-off-by: Sourik Ghosh * replaced testify with custom diff func Signed-off-by: Sourik Ghosh * difflib GetUnifiedDiffString added with test Signed-off-by: Sourik Ghosh * license meta data added to file Signed-off-by: Sourik Ghosh * moved difflib to internal Signed-off-by: Sourik Ghosh --- go.mod | 1 + prometheus/internal/difflib.go | 649 +++++++++++++++++++++++++++ prometheus/internal/difflib_test.go | 266 +++++++++++ prometheus/testutil/testutil.go | 75 +++- prometheus/testutil/testutil_test.go | 21 +- 5 files changed, 992 insertions(+), 20 deletions(-) create mode 100644 prometheus/internal/difflib.go create mode 100644 prometheus/internal/difflib_test.go diff --git a/go.mod b/go.mod index 77f6115..5135421 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/prometheus/client_golang require ( github.com/beorn7/perks v1.0.1 github.com/cespare/xxhash/v2 v2.1.2 + github.com/davecgh/go-spew v1.1.1 github.com/golang/protobuf v1.5.2 github.com/jpillora/backoff v1.0.0 // indirect github.com/json-iterator/go v1.1.12 diff --git a/prometheus/internal/difflib.go b/prometheus/internal/difflib.go new file mode 100644 index 0000000..1789006 --- /dev/null +++ b/prometheus/internal/difflib.go @@ -0,0 +1,649 @@ +// Copyright 2022 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// It provides tools to compare sequences of strings and generate textual diffs. +// +// Maintaining `GetUnifiedDiffString` here because original repository +// (https://github.com/pmezard/go-difflib) is no loger maintained. +package internal + +import ( + "bufio" + "bytes" + "fmt" + "io" + "strings" +) + +func min(a, b int) int { + if a < b { + return a + } + return b +} + +func max(a, b int) int { + if a > b { + return a + } + return b +} + +func calculateRatio(matches, length int) float64 { + if length > 0 { + return 2.0 * float64(matches) / float64(length) + } + return 1.0 +} + +type Match struct { + A int + B int + Size int +} + +type OpCode struct { + Tag byte + I1 int + I2 int + J1 int + J2 int +} + +// SequenceMatcher compares sequence of strings. The basic +// algorithm predates, and is a little fancier than, an algorithm +// published in the late 1980's by Ratcliff and Obershelp under the +// hyperbolic name "gestalt pattern matching". The basic idea is to find +// the longest contiguous matching subsequence that contains no "junk" +// elements (R-O doesn't address junk). The same idea is then applied +// recursively to the pieces of the sequences to the left and to the right +// of the matching subsequence. This does not yield minimal edit +// sequences, but does tend to yield matches that "look right" to people. +// +// SequenceMatcher tries to compute a "human-friendly diff" between two +// sequences. Unlike e.g. UNIX(tm) diff, the fundamental notion is the +// longest *contiguous* & junk-free matching subsequence. That's what +// catches peoples' eyes. The Windows(tm) windiff has another interesting +// notion, pairing up elements that appear uniquely in each sequence. +// That, and the method here, appear to yield more intuitive difference +// reports than does diff. This method appears to be the least vulnerable +// to synching up on blocks of "junk lines", though (like blank lines in +// ordinary text files, or maybe "

" lines in HTML files). That may be +// because this is the only method of the 3 that has a *concept* of +// "junk" . +// +// Timing: Basic R-O is cubic time worst case and quadratic time expected +// case. SequenceMatcher is quadratic time for the worst case and has +// expected-case behavior dependent in a complicated way on how many +// elements the sequences have in common; best case time is linear. +type SequenceMatcher struct { + a []string + b []string + b2j map[string][]int + IsJunk func(string) bool + autoJunk bool + bJunk map[string]struct{} + matchingBlocks []Match + fullBCount map[string]int + bPopular map[string]struct{} + opCodes []OpCode +} + +func NewMatcher(a, b []string) *SequenceMatcher { + m := SequenceMatcher{autoJunk: true} + m.SetSeqs(a, b) + return &m +} + +func NewMatcherWithJunk(a, b []string, autoJunk bool, + isJunk func(string) bool) *SequenceMatcher { + + m := SequenceMatcher{IsJunk: isJunk, autoJunk: autoJunk} + m.SetSeqs(a, b) + return &m +} + +// Set two sequences to be compared. +func (m *SequenceMatcher) SetSeqs(a, b []string) { + m.SetSeq1(a) + m.SetSeq2(b) +} + +// Set the first sequence to be compared. The second sequence to be compared is +// not changed. +// +// SequenceMatcher computes and caches detailed information about the second +// sequence, so if you want to compare one sequence S against many sequences, +// use .SetSeq2(s) once and call .SetSeq1(x) repeatedly for each of the other +// sequences. +// +// See also SetSeqs() and SetSeq2(). +func (m *SequenceMatcher) SetSeq1(a []string) { + if &a == &m.a { + return + } + m.a = a + m.matchingBlocks = nil + m.opCodes = nil +} + +// Set the second sequence to be compared. The first sequence to be compared is +// not changed. +func (m *SequenceMatcher) SetSeq2(b []string) { + if &b == &m.b { + return + } + m.b = b + m.matchingBlocks = nil + m.opCodes = nil + m.fullBCount = nil + m.chainB() +} + +func (m *SequenceMatcher) chainB() { + // Populate line -> index mapping + b2j := map[string][]int{} + for i, s := range m.b { + indices := b2j[s] + indices = append(indices, i) + b2j[s] = indices + } + + // Purge junk elements + m.bJunk = map[string]struct{}{} + if m.IsJunk != nil { + junk := m.bJunk + for s, _ := range b2j { + if m.IsJunk(s) { + junk[s] = struct{}{} + } + } + for s, _ := range junk { + delete(b2j, s) + } + } + + // Purge remaining popular elements + popular := map[string]struct{}{} + n := len(m.b) + if m.autoJunk && n >= 200 { + ntest := n/100 + 1 + for s, indices := range b2j { + if len(indices) > ntest { + popular[s] = struct{}{} + } + } + for s, _ := range popular { + delete(b2j, s) + } + } + m.bPopular = popular + m.b2j = b2j +} + +func (m *SequenceMatcher) isBJunk(s string) bool { + _, ok := m.bJunk[s] + return ok +} + +// Find longest matching block in a[alo:ahi] and b[blo:bhi]. +// +// If IsJunk is not defined: +// +// Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where +// alo <= i <= i+k <= ahi +// blo <= j <= j+k <= bhi +// and for all (i',j',k') meeting those conditions, +// k >= k' +// i <= i' +// and if i == i', j <= j' +// +// In other words, of all maximal matching blocks, return one that +// starts earliest in a, and of all those maximal matching blocks that +// start earliest in a, return the one that starts earliest in b. +// +// If IsJunk is defined, first the longest matching block is +// determined as above, but with the additional restriction that no +// junk element appears in the block. Then that block is extended as +// far as possible by matching (only) junk elements on both sides. So +// the resulting block never matches on junk except as identical junk +// happens to be adjacent to an "interesting" match. +// +// If no blocks match, return (alo, blo, 0). +func (m *SequenceMatcher) findLongestMatch(alo, ahi, blo, bhi int) Match { + // CAUTION: stripping common prefix or suffix would be incorrect. + // E.g., + // ab + // acab + // Longest matching block is "ab", but if common prefix is + // stripped, it's "a" (tied with "b"). UNIX(tm) diff does so + // strip, so ends up claiming that ab is changed to acab by + // inserting "ca" in the middle. That's minimal but unintuitive: + // "it's obvious" that someone inserted "ac" at the front. + // Windiff ends up at the same place as diff, but by pairing up + // the unique 'b's and then matching the first two 'a's. + besti, bestj, bestsize := alo, blo, 0 + + // find longest junk-free match + // during an iteration of the loop, j2len[j] = length of longest + // junk-free match ending with a[i-1] and b[j] + j2len := map[int]int{} + for i := alo; i != ahi; i++ { + // look at all instances of a[i] in b; note that because + // b2j has no junk keys, the loop is skipped if a[i] is junk + newj2len := map[int]int{} + for _, j := range m.b2j[m.a[i]] { + // a[i] matches b[j] + if j < blo { + continue + } + if j >= bhi { + break + } + k := j2len[j-1] + 1 + newj2len[j] = k + if k > bestsize { + besti, bestj, bestsize = i-k+1, j-k+1, k + } + } + j2len = newj2len + } + + // Extend the best by non-junk elements on each end. In particular, + // "popular" non-junk elements aren't in b2j, which greatly speeds + // the inner loop above, but also means "the best" match so far + // doesn't contain any junk *or* popular non-junk elements. + for besti > alo && bestj > blo && !m.isBJunk(m.b[bestj-1]) && + m.a[besti-1] == m.b[bestj-1] { + besti, bestj, bestsize = besti-1, bestj-1, bestsize+1 + } + for besti+bestsize < ahi && bestj+bestsize < bhi && + !m.isBJunk(m.b[bestj+bestsize]) && + m.a[besti+bestsize] == m.b[bestj+bestsize] { + bestsize += 1 + } + + // Now that we have a wholly interesting match (albeit possibly + // empty!), we may as well suck up the matching junk on each + // side of it too. Can't think of a good reason not to, and it + // saves post-processing the (possibly considerable) expense of + // figuring out what to do with it. In the case of an empty + // interesting match, this is clearly the right thing to do, + // because no other kind of match is possible in the regions. + for besti > alo && bestj > blo && m.isBJunk(m.b[bestj-1]) && + m.a[besti-1] == m.b[bestj-1] { + besti, bestj, bestsize = besti-1, bestj-1, bestsize+1 + } + for besti+bestsize < ahi && bestj+bestsize < bhi && + m.isBJunk(m.b[bestj+bestsize]) && + m.a[besti+bestsize] == m.b[bestj+bestsize] { + bestsize += 1 + } + + return Match{A: besti, B: bestj, Size: bestsize} +} + +// Return list of triples describing matching subsequences. +// +// Each triple is of the form (i, j, n), and means that +// a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in +// i and in j. It's also guaranteed that if (i, j, n) and (i', j', n') are +// adjacent triples in the list, and the second is not the last triple in the +// list, then i+n != i' or j+n != j'. IOW, adjacent triples never describe +// adjacent equal blocks. +// +// The last triple is a dummy, (len(a), len(b), 0), and is the only +// triple with n==0. +func (m *SequenceMatcher) GetMatchingBlocks() []Match { + if m.matchingBlocks != nil { + return m.matchingBlocks + } + + var matchBlocks func(alo, ahi, blo, bhi int, matched []Match) []Match + matchBlocks = func(alo, ahi, blo, bhi int, matched []Match) []Match { + match := m.findLongestMatch(alo, ahi, blo, bhi) + i, j, k := match.A, match.B, match.Size + if match.Size > 0 { + if alo < i && blo < j { + matched = matchBlocks(alo, i, blo, j, matched) + } + matched = append(matched, match) + if i+k < ahi && j+k < bhi { + matched = matchBlocks(i+k, ahi, j+k, bhi, matched) + } + } + return matched + } + matched := matchBlocks(0, len(m.a), 0, len(m.b), nil) + + // It's possible that we have adjacent equal blocks in the + // matching_blocks list now. + nonAdjacent := []Match{} + i1, j1, k1 := 0, 0, 0 + for _, b := range matched { + // Is this block adjacent to i1, j1, k1? + i2, j2, k2 := b.A, b.B, b.Size + if i1+k1 == i2 && j1+k1 == j2 { + // Yes, so collapse them -- this just increases the length of + // the first block by the length of the second, and the first + // block so lengthened remains the block to compare against. + k1 += k2 + } else { + // Not adjacent. Remember the first block (k1==0 means it's + // the dummy we started with), and make the second block the + // new block to compare against. + if k1 > 0 { + nonAdjacent = append(nonAdjacent, Match{i1, j1, k1}) + } + i1, j1, k1 = i2, j2, k2 + } + } + if k1 > 0 { + nonAdjacent = append(nonAdjacent, Match{i1, j1, k1}) + } + + nonAdjacent = append(nonAdjacent, Match{len(m.a), len(m.b), 0}) + m.matchingBlocks = nonAdjacent + return m.matchingBlocks +} + +// Return list of 5-tuples describing how to turn a into b. +// +// Each tuple is of the form (tag, i1, i2, j1, j2). The first tuple +// has i1 == j1 == 0, and remaining tuples have i1 == the i2 from the +// tuple preceding it, and likewise for j1 == the previous j2. +// +// The tags are characters, with these meanings: +// +// 'r' (replace): a[i1:i2] should be replaced by b[j1:j2] +// +// 'd' (delete): a[i1:i2] should be deleted, j1==j2 in this case. +// +// 'i' (insert): b[j1:j2] should be inserted at a[i1:i1], i1==i2 in this case. +// +// 'e' (equal): a[i1:i2] == b[j1:j2] +func (m *SequenceMatcher) GetOpCodes() []OpCode { + if m.opCodes != nil { + return m.opCodes + } + i, j := 0, 0 + matching := m.GetMatchingBlocks() + opCodes := make([]OpCode, 0, len(matching)) + for _, m := range matching { + // invariant: we've pumped out correct diffs to change + // a[:i] into b[:j], and the next matching block is + // a[ai:ai+size] == b[bj:bj+size]. So we need to pump + // out a diff to change a[i:ai] into b[j:bj], pump out + // the matching block, and move (i,j) beyond the match + ai, bj, size := m.A, m.B, m.Size + tag := byte(0) + if i < ai && j < bj { + tag = 'r' + } else if i < ai { + tag = 'd' + } else if j < bj { + tag = 'i' + } + if tag > 0 { + opCodes = append(opCodes, OpCode{tag, i, ai, j, bj}) + } + i, j = ai+size, bj+size + // the list of matching blocks is terminated by a + // sentinel with size 0 + if size > 0 { + opCodes = append(opCodes, OpCode{'e', ai, i, bj, j}) + } + } + m.opCodes = opCodes + return m.opCodes +} + +// Isolate change clusters by eliminating ranges with no changes. +// +// Return a generator of groups with up to n lines of context. +// Each group is in the same format as returned by GetOpCodes(). +func (m *SequenceMatcher) GetGroupedOpCodes(n int) [][]OpCode { + if n < 0 { + n = 3 + } + codes := m.GetOpCodes() + if len(codes) == 0 { + codes = []OpCode{{'e', 0, 1, 0, 1}} + } + // Fixup leading and trailing groups if they show no changes. + if codes[0].Tag == 'e' { + c := codes[0] + i1, i2, j1, j2 := c.I1, c.I2, c.J1, c.J2 + codes[0] = OpCode{c.Tag, max(i1, i2-n), i2, max(j1, j2-n), j2} + } + if codes[len(codes)-1].Tag == 'e' { + c := codes[len(codes)-1] + i1, i2, j1, j2 := c.I1, c.I2, c.J1, c.J2 + codes[len(codes)-1] = OpCode{c.Tag, i1, min(i2, i1+n), j1, min(j2, j1+n)} + } + nn := n + n + groups := [][]OpCode{} + group := []OpCode{} + for _, c := range codes { + i1, i2, j1, j2 := c.I1, c.I2, c.J1, c.J2 + // End the current group and start a new one whenever + // there is a large range with no changes. + if c.Tag == 'e' && i2-i1 > nn { + group = append(group, OpCode{c.Tag, i1, min(i2, i1+n), + j1, min(j2, j1+n)}) + groups = append(groups, group) + group = []OpCode{} + i1, j1 = max(i1, i2-n), max(j1, j2-n) + } + group = append(group, OpCode{c.Tag, i1, i2, j1, j2}) + } + if len(group) > 0 && !(len(group) == 1 && group[0].Tag == 'e') { + groups = append(groups, group) + } + return groups +} + +// Return a measure of the sequences' similarity (float in [0,1]). +// +// Where T is the total number of elements in both sequences, and +// M is the number of matches, this is 2.0*M / T. +// Note that this is 1 if the sequences are identical, and 0 if +// they have nothing in common. +// +// .Ratio() is expensive to compute if you haven't already computed +// .GetMatchingBlocks() or .GetOpCodes(), in which case you may +// want to try .QuickRatio() or .RealQuickRation() first to get an +// upper bound. +func (m *SequenceMatcher) Ratio() float64 { + matches := 0 + for _, m := range m.GetMatchingBlocks() { + matches += m.Size + } + return calculateRatio(matches, len(m.a)+len(m.b)) +} + +// Return an upper bound on ratio() relatively quickly. +// +// This isn't defined beyond that it is an upper bound on .Ratio(), and +// is faster to compute. +func (m *SequenceMatcher) QuickRatio() float64 { + // viewing a and b as multisets, set matches to the cardinality + // of their intersection; this counts the number of matches + // without regard to order, so is clearly an upper bound + if m.fullBCount == nil { + m.fullBCount = map[string]int{} + for _, s := range m.b { + m.fullBCount[s] = m.fullBCount[s] + 1 + } + } + + // avail[x] is the number of times x appears in 'b' less the + // number of times we've seen it in 'a' so far ... kinda + avail := map[string]int{} + matches := 0 + for _, s := range m.a { + n, ok := avail[s] + if !ok { + n = m.fullBCount[s] + } + avail[s] = n - 1 + if n > 0 { + matches += 1 + } + } + return calculateRatio(matches, len(m.a)+len(m.b)) +} + +// Return an upper bound on ratio() very quickly. +// +// This isn't defined beyond that it is an upper bound on .Ratio(), and +// is faster to compute than either .Ratio() or .QuickRatio(). +func (m *SequenceMatcher) RealQuickRatio() float64 { + la, lb := len(m.a), len(m.b) + return calculateRatio(min(la, lb), la+lb) +} + +// Convert range to the "ed" format +func formatRangeUnified(start, stop int) string { + // Per the diff spec at http://www.unix.org/single_unix_specification/ + beginning := start + 1 // lines start numbering with one + length := stop - start + if length == 1 { + return fmt.Sprintf("%d", beginning) + } + if length == 0 { + beginning -= 1 // empty ranges begin at line just before the range + } + return fmt.Sprintf("%d,%d", beginning, length) +} + +// Unified diff parameters +type UnifiedDiff struct { + A []string // First sequence lines + FromFile string // First file name + FromDate string // First file time + B []string // Second sequence lines + ToFile string // Second file name + ToDate string // Second file time + Eol string // Headers end of line, defaults to LF + Context int // Number of context lines +} + +// Compare two sequences of lines; generate the delta as a unified diff. +// +// Unified diffs are a compact way of showing line changes and a few +// lines of context. The number of context lines is set by 'n' which +// defaults to three. +// +// By default, the diff control lines (those with ---, +++, or @@) are +// created with a trailing newline. This is helpful so that inputs +// created from file.readlines() result in diffs that are suitable for +// file.writelines() since both the inputs and outputs have trailing +// newlines. +// +// For inputs that do not have trailing newlines, set the lineterm +// argument to "" so that the output will be uniformly newline free. +// +// The unidiff format normally has a header for filenames and modification +// times. Any or all of these may be specified using strings for +// 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. +// The modification times are normally expressed in the ISO 8601 format. +func WriteUnifiedDiff(writer io.Writer, diff UnifiedDiff) error { + buf := bufio.NewWriter(writer) + defer buf.Flush() + wf := func(format string, args ...interface{}) error { + _, err := buf.WriteString(fmt.Sprintf(format, args...)) + return err + } + ws := func(s string) error { + _, err := buf.WriteString(s) + return err + } + + if len(diff.Eol) == 0 { + diff.Eol = "\n" + } + + started := false + m := NewMatcher(diff.A, diff.B) + for _, g := range m.GetGroupedOpCodes(diff.Context) { + if !started { + started = true + fromDate := "" + if len(diff.FromDate) > 0 { + fromDate = "\t" + diff.FromDate + } + toDate := "" + if len(diff.ToDate) > 0 { + toDate = "\t" + diff.ToDate + } + if diff.FromFile != "" || diff.ToFile != "" { + err := wf("--- %s%s%s", diff.FromFile, fromDate, diff.Eol) + if err != nil { + return err + } + err = wf("+++ %s%s%s", diff.ToFile, toDate, diff.Eol) + if err != nil { + return err + } + } + } + first, last := g[0], g[len(g)-1] + range1 := formatRangeUnified(first.I1, last.I2) + range2 := formatRangeUnified(first.J1, last.J2) + if err := wf("@@ -%s +%s @@%s", range1, range2, diff.Eol); err != nil { + return err + } + for _, c := range g { + i1, i2, j1, j2 := c.I1, c.I2, c.J1, c.J2 + if c.Tag == 'e' { + for _, line := range diff.A[i1:i2] { + if err := ws(" " + line); err != nil { + return err + } + } + continue + } + if c.Tag == 'r' || c.Tag == 'd' { + for _, line := range diff.A[i1:i2] { + if err := ws("-" + line); err != nil { + return err + } + } + } + if c.Tag == 'r' || c.Tag == 'i' { + for _, line := range diff.B[j1:j2] { + if err := ws("+" + line); err != nil { + return err + } + } + } + } + } + return nil +} + +// Like WriteUnifiedDiff but returns the diff a string. +func GetUnifiedDiffString(diff UnifiedDiff) (string, error) { + w := &bytes.Buffer{} + err := WriteUnifiedDiff(w, diff) + return string(w.Bytes()), err +} + +// Split a string on "\n" while preserving them. The output can be used +// as input for UnifiedDiff and ContextDiff structures. +func SplitLines(s string) []string { + lines := strings.SplitAfter(s, "\n") + lines[len(lines)-1] += "\n" + return lines +} diff --git a/prometheus/internal/difflib_test.go b/prometheus/internal/difflib_test.go new file mode 100644 index 0000000..1a2bb34 --- /dev/null +++ b/prometheus/internal/difflib_test.go @@ -0,0 +1,266 @@ +// Copyright 2022 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package internal + +import ( + "bytes" + "fmt" + "math" + "reflect" + "strings" + "testing" +) + +func assertAlmostEqual(t *testing.T, a, b float64, places int) { + if math.Abs(a-b) > math.Pow10(-places) { + t.Errorf("%.7f != %.7f", a, b) + } +} + +func assertEqual(t *testing.T, a, b interface{}) { + if !reflect.DeepEqual(a, b) { + t.Errorf("%v != %v", a, b) + } +} + +func splitChars(s string) []string { + chars := make([]string, 0, len(s)) + // Assume ASCII inputs + for i := 0; i != len(s); i++ { + chars = append(chars, string(s[i])) + } + return chars +} + +func TestSequenceMatcherRatio(t *testing.T) { + s := NewMatcher(splitChars("abcd"), splitChars("bcde")) + assertEqual(t, s.Ratio(), 0.75) + assertEqual(t, s.QuickRatio(), 0.75) + assertEqual(t, s.RealQuickRatio(), 1.0) +} + +func TestGetOptCodes(t *testing.T) { + a := "qabxcd" + b := "abycdf" + s := NewMatcher(splitChars(a), splitChars(b)) + w := &bytes.Buffer{} + for _, op := range s.GetOpCodes() { + fmt.Fprintf(w, "%s a[%d:%d], (%s) b[%d:%d] (%s)\n", string(op.Tag), + op.I1, op.I2, a[op.I1:op.I2], op.J1, op.J2, b[op.J1:op.J2]) + } + result := string(w.Bytes()) + expected := `d a[0:1], (q) b[0:0] () +e a[1:3], (ab) b[0:2] (ab) +r a[3:4], (x) b[2:3] (y) +e a[4:6], (cd) b[3:5] (cd) +i a[6:6], () b[5:6] (f) +` + if expected != result { + t.Errorf("unexpected op codes: \n%s", result) + } +} + +func TestGroupedOpCodes(t *testing.T) { + a := []string{} + for i := 0; i != 39; i++ { + a = append(a, fmt.Sprintf("%02d", i)) + } + b := []string{} + b = append(b, a[:8]...) + b = append(b, " i") + b = append(b, a[8:19]...) + b = append(b, " x") + b = append(b, a[20:22]...) + b = append(b, a[27:34]...) + b = append(b, " y") + b = append(b, a[35:]...) + s := NewMatcher(a, b) + w := &bytes.Buffer{} + for _, g := range s.GetGroupedOpCodes(-1) { + fmt.Fprintf(w, "group\n") + for _, op := range g { + fmt.Fprintf(w, " %s, %d, %d, %d, %d\n", string(op.Tag), + op.I1, op.I2, op.J1, op.J2) + } + } + result := string(w.Bytes()) + expected := `group + e, 5, 8, 5, 8 + i, 8, 8, 8, 9 + e, 8, 11, 9, 12 +group + e, 16, 19, 17, 20 + r, 19, 20, 20, 21 + e, 20, 22, 21, 23 + d, 22, 27, 23, 23 + e, 27, 30, 23, 26 +group + e, 31, 34, 27, 30 + r, 34, 35, 30, 31 + e, 35, 38, 31, 34 +` + if expected != result { + t.Errorf("unexpected op codes: \n%s", result) + } +} + +func ExampleGetUnifiedDiffCode() { + a := `one +two +three +four +fmt.Printf("%s,%T",a,b)` + b := `zero +one +three +four` + diff := UnifiedDiff{ + A: SplitLines(a), + B: SplitLines(b), + FromFile: "Original", + FromDate: "2005-01-26 23:30:50", + ToFile: "Current", + ToDate: "2010-04-02 10:20:52", + Context: 3, + } + result, _ := GetUnifiedDiffString(diff) + fmt.Println(strings.Replace(result, "\t", " ", -1)) + // Output: + // --- Original 2005-01-26 23:30:50 + // +++ Current 2010-04-02 10:20:52 + // @@ -1,5 +1,4 @@ + // +zero + // one + // -two + // three + // four + // -fmt.Printf("%s,%T",a,b) +} + +func rep(s string, count int) string { + return strings.Repeat(s, count) +} + +func TestWithAsciiOneInsert(t *testing.T) { + sm := NewMatcher(splitChars(rep("b", 100)), + splitChars("a"+rep("b", 100))) + assertAlmostEqual(t, sm.Ratio(), 0.995, 3) + assertEqual(t, sm.GetOpCodes(), + []OpCode{{'i', 0, 0, 0, 1}, {'e', 0, 100, 1, 101}}) + assertEqual(t, len(sm.bPopular), 0) + + sm = NewMatcher(splitChars(rep("b", 100)), + splitChars(rep("b", 50)+"a"+rep("b", 50))) + assertAlmostEqual(t, sm.Ratio(), 0.995, 3) + assertEqual(t, sm.GetOpCodes(), + []OpCode{{'e', 0, 50, 0, 50}, {'i', 50, 50, 50, 51}, {'e', 50, 100, 51, 101}}) + assertEqual(t, len(sm.bPopular), 0) +} + +func TestWithAsciiOnDelete(t *testing.T) { + sm := NewMatcher(splitChars(rep("a", 40)+"c"+rep("b", 40)), + splitChars(rep("a", 40)+rep("b", 40))) + assertAlmostEqual(t, sm.Ratio(), 0.994, 3) + assertEqual(t, sm.GetOpCodes(), + []OpCode{{'e', 0, 40, 0, 40}, {'d', 40, 41, 40, 40}, {'e', 41, 81, 40, 80}}) +} + +func TestWithAsciiBJunk(t *testing.T) { + isJunk := func(s string) bool { + return s == " " + } + sm := NewMatcherWithJunk(splitChars(rep("a", 40)+rep("b", 40)), + splitChars(rep("a", 44)+rep("b", 40)), true, isJunk) + assertEqual(t, sm.bJunk, map[string]struct{}{}) + + sm = NewMatcherWithJunk(splitChars(rep("a", 40)+rep("b", 40)), + splitChars(rep("a", 44)+rep("b", 40)+rep(" ", 20)), false, isJunk) + assertEqual(t, sm.bJunk, map[string]struct{}{" ": struct{}{}}) + + isJunk = func(s string) bool { + return s == " " || s == "b" + } + sm = NewMatcherWithJunk(splitChars(rep("a", 40)+rep("b", 40)), + splitChars(rep("a", 44)+rep("b", 40)+rep(" ", 20)), false, isJunk) + assertEqual(t, sm.bJunk, map[string]struct{}{" ": struct{}{}, "b": struct{}{}}) +} + +func TestSFBugsRatioForNullSeqn(t *testing.T) { + sm := NewMatcher(nil, nil) + assertEqual(t, sm.Ratio(), 1.0) + assertEqual(t, sm.QuickRatio(), 1.0) + assertEqual(t, sm.RealQuickRatio(), 1.0) +} + +func TestSFBugsComparingEmptyLists(t *testing.T) { + groups := NewMatcher(nil, nil).GetGroupedOpCodes(-1) + assertEqual(t, len(groups), 0) + diff := UnifiedDiff{ + FromFile: "Original", + ToFile: "Current", + Context: 3, + } + result, err := GetUnifiedDiffString(diff) + assertEqual(t, err, nil) + assertEqual(t, result, "") +} + +func TestOutputFormatRangeFormatUnified(t *testing.T) { + // Per the diff spec at http://www.unix.org/single_unix_specification/ + // + // Each field shall be of the form: + // %1d", if the range contains exactly one line, + // and: + // "%1d,%1d", , otherwise. + // If a range is empty, its beginning line number shall be the number of + // the line just before the range, or 0 if the empty range starts the file. + fm := formatRangeUnified + assertEqual(t, fm(3, 3), "3,0") + assertEqual(t, fm(3, 4), "4") + assertEqual(t, fm(3, 5), "4,2") + assertEqual(t, fm(3, 6), "4,3") + assertEqual(t, fm(0, 0), "0,0") +} + +func TestSplitLines(t *testing.T) { + allTests := []struct { + input string + want []string + }{ + {"foo", []string{"foo\n"}}, + {"foo\nbar", []string{"foo\n", "bar\n"}}, + {"foo\nbar\n", []string{"foo\n", "bar\n", "\n"}}, + } + for _, test := range allTests { + assertEqual(t, SplitLines(test.input), test.want) + } +} + +func benchmarkSplitLines(b *testing.B, count int) { + str := strings.Repeat("foo\n", count) + + b.ResetTimer() + + n := 0 + for i := 0; i < b.N; i++ { + n += len(SplitLines(str)) + } +} + +func BenchmarkSplitLines100(b *testing.B) { + benchmarkSplitLines(b, 100) +} + +func BenchmarkSplitLines10000(b *testing.B) { + benchmarkSplitLines(b, 10000) +} diff --git a/prometheus/testutil/testutil.go b/prometheus/testutil/testutil.go index bf95bea..115979d 100644 --- a/prometheus/testutil/testutil.go +++ b/prometheus/testutil/testutil.go @@ -41,7 +41,9 @@ import ( "bytes" "fmt" "io" + "reflect" + "github.com/davecgh/go-spew/spew" "github.com/prometheus/common/expfmt" dto "github.com/prometheus/client_model/go" @@ -211,20 +213,73 @@ func compare(got, want []*dto.MetricFamily) error { return fmt.Errorf("encoding expected metrics failed: %s", err) } } - - if wantBuf.String() != gotBuf.String() { - return fmt.Errorf(` -metric output does not match expectation; want: - -%s -got: - -%s`, wantBuf.String(), gotBuf.String()) - + if diffErr := diff(wantBuf, gotBuf); diffErr != "" { + return fmt.Errorf(diffErr) } return nil } +// diff returns a diff of both values as long as both are of the same type and +// are a struct, map, slice, array or string. Otherwise it returns an empty string. +func diff(expected interface{}, actual interface{}) string { + if expected == nil || actual == nil { + return "" + } + + et, ek := typeAndKind(expected) + at, _ := typeAndKind(actual) + if et != at { + return "" + } + + if ek != reflect.Struct && ek != reflect.Map && ek != reflect.Slice && ek != reflect.Array && ek != reflect.String { + return "" + } + + var e, a string + c := spew.ConfigState{ + Indent: " ", + DisablePointerAddresses: true, + DisableCapacities: true, + SortKeys: true, + } + if et != reflect.TypeOf("") { + e = c.Sdump(expected) + a = c.Sdump(actual) + } else { + e = reflect.ValueOf(expected).String() + a = reflect.ValueOf(actual).String() + } + + diff, _ := internal.GetUnifiedDiffString(internal.UnifiedDiff{ + A: internal.SplitLines(e), + B: internal.SplitLines(a), + FromFile: "metric output does not match expectation; want", + FromDate: "", + ToFile: "got:", + ToDate: "", + Context: 1, + }) + + if diff == "" { + return "" + } + + return "\n\nDiff:\n" + diff +} + +// typeAndKind returns the type and kind of the given interface{} +func typeAndKind(v interface{}) (reflect.Type, reflect.Kind) { + t := reflect.TypeOf(v) + k := t.Kind() + + if k == reflect.Ptr { + t = t.Elem() + k = t.Kind() + } + return t, k +} + func filterMetrics(metrics []*dto.MetricFamily, names []string) []*dto.MetricFamily { var filtered []*dto.MetricFamily for _, m := range metrics { diff --git a/prometheus/testutil/testutil_test.go b/prometheus/testutil/testutil_test.go index 56d9933..96a6f38 100644 --- a/prometheus/testutil/testutil_test.go +++ b/prometheus/testutil/testutil_test.go @@ -284,17 +284,18 @@ func TestMetricNotFound(t *testing.T) { ` expectedError := ` -metric output does not match expectation; want: -# HELP some_other_metric A value that represents a counter. -# TYPE some_other_metric counter -some_other_metric{label1="value1"} 1 - -got: - -# HELP some_total A value that represents a counter. -# TYPE some_total counter -some_total{label1="value1"} 1 +Diff: +--- metric output does not match expectation; want ++++ got: +@@ -1,4 +1,4 @@ +-(bytes.Buffer) # HELP some_other_metric A value that represents a counter. +-# TYPE some_other_metric counter +-some_other_metric{label1="value1"} 1 ++(bytes.Buffer) # HELP some_total A value that represents a counter. ++# TYPE some_total counter ++some_total{label1="value1"} 1 + ` err := CollectAndCompare(c, strings.NewReader(metadata+expected))