diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:23:18 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:23:18 +0000 |
commit | 43a123c1ae6613b3efeed291fa552ecd909d3acf (patch) | |
tree | fd92518b7024bc74031f78a1cf9e454b65e73665 /src/sort | |
parent | Initial commit. (diff) | |
download | golang-1.20-upstream.tar.xz golang-1.20-upstream.zip |
Adding upstream version 1.20.14.upstream/1.20.14upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | src/sort/example_interface_test.go | 58 | ||||
-rw-r--r-- | src/sort/example_keys_test.go | 96 | ||||
-rw-r--r-- | src/sort/example_multi_test.go | 132 | ||||
-rw-r--r-- | src/sort/example_search_test.go | 74 | ||||
-rw-r--r-- | src/sort/example_test.go | 122 | ||||
-rw-r--r-- | src/sort/example_wrapper_test.go | 77 | ||||
-rw-r--r-- | src/sort/export_test.go | 13 | ||||
-rw-r--r-- | src/sort/gen_sort_variants.go | 663 | ||||
-rw-r--r-- | src/sort/search.go | 150 | ||||
-rw-r--r-- | src/sort/search_test.go | 266 | ||||
-rw-r--r-- | src/sort/slice.go | 52 | ||||
-rw-r--r-- | src/sort/sort.go | 262 | ||||
-rw-r--r-- | src/sort/sort_test.go | 744 | ||||
-rw-r--r-- | src/sort/zsortfunc.go | 479 | ||||
-rw-r--r-- | src/sort/zsortinterface.go | 479 |
15 files changed, 3667 insertions, 0 deletions
diff --git a/src/sort/example_interface_test.go b/src/sort/example_interface_test.go new file mode 100644 index 0000000..72d3017 --- /dev/null +++ b/src/sort/example_interface_test.go @@ -0,0 +1,58 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sort_test + +import ( + "fmt" + "sort" +) + +type Person struct { + Name string + Age int +} + +func (p Person) String() string { + return fmt.Sprintf("%s: %d", p.Name, p.Age) +} + +// ByAge implements sort.Interface for []Person based on +// the Age field. +type ByAge []Person + +func (a ByAge) Len() int { return len(a) } +func (a ByAge) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a ByAge) Less(i, j int) bool { return a[i].Age < a[j].Age } + +func Example() { + people := []Person{ + {"Bob", 31}, + {"John", 42}, + {"Michael", 17}, + {"Jenny", 26}, + } + + fmt.Println(people) + // There are two ways to sort a slice. First, one can define + // a set of methods for the slice type, as with ByAge, and + // call sort.Sort. In this first example we use that technique. + sort.Sort(ByAge(people)) + fmt.Println(people) + + // The other way is to use sort.Slice with a custom Less + // function, which can be provided as a closure. In this + // case no methods are needed. (And if they exist, they + // are ignored.) Here we re-sort in reverse order: compare + // the closure with ByAge.Less. + sort.Slice(people, func(i, j int) bool { + return people[i].Age > people[j].Age + }) + fmt.Println(people) + + // Output: + // [Bob: 31 John: 42 Michael: 17 Jenny: 26] + // [Michael: 17 Jenny: 26 Bob: 31 John: 42] + // [John: 42 Bob: 31 Jenny: 26 Michael: 17] +} diff --git a/src/sort/example_keys_test.go b/src/sort/example_keys_test.go new file mode 100644 index 0000000..648f919 --- /dev/null +++ b/src/sort/example_keys_test.go @@ -0,0 +1,96 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sort_test + +import ( + "fmt" + "sort" +) + +// A couple of type definitions to make the units clear. +type earthMass float64 +type au float64 + +// A Planet defines the properties of a solar system object. +type Planet struct { + name string + mass earthMass + distance au +} + +// By is the type of a "less" function that defines the ordering of its Planet arguments. +type By func(p1, p2 *Planet) bool + +// Sort is a method on the function type, By, that sorts the argument slice according to the function. +func (by By) Sort(planets []Planet) { + ps := &planetSorter{ + planets: planets, + by: by, // The Sort method's receiver is the function (closure) that defines the sort order. + } + sort.Sort(ps) +} + +// planetSorter joins a By function and a slice of Planets to be sorted. +type planetSorter struct { + planets []Planet + by func(p1, p2 *Planet) bool // Closure used in the Less method. +} + +// Len is part of sort.Interface. +func (s *planetSorter) Len() int { + return len(s.planets) +} + +// Swap is part of sort.Interface. +func (s *planetSorter) Swap(i, j int) { + s.planets[i], s.planets[j] = s.planets[j], s.planets[i] +} + +// Less is part of sort.Interface. It is implemented by calling the "by" closure in the sorter. +func (s *planetSorter) Less(i, j int) bool { + return s.by(&s.planets[i], &s.planets[j]) +} + +var planets = []Planet{ + {"Mercury", 0.055, 0.4}, + {"Venus", 0.815, 0.7}, + {"Earth", 1.0, 1.0}, + {"Mars", 0.107, 1.5}, +} + +// ExampleSortKeys demonstrates a technique for sorting a struct type using programmable sort criteria. +func Example_sortKeys() { + // Closures that order the Planet structure. + name := func(p1, p2 *Planet) bool { + return p1.name < p2.name + } + mass := func(p1, p2 *Planet) bool { + return p1.mass < p2.mass + } + distance := func(p1, p2 *Planet) bool { + return p1.distance < p2.distance + } + decreasingDistance := func(p1, p2 *Planet) bool { + return distance(p2, p1) + } + + // Sort the planets by the various criteria. + By(name).Sort(planets) + fmt.Println("By name:", planets) + + By(mass).Sort(planets) + fmt.Println("By mass:", planets) + + By(distance).Sort(planets) + fmt.Println("By distance:", planets) + + By(decreasingDistance).Sort(planets) + fmt.Println("By decreasing distance:", planets) + + // Output: By name: [{Earth 1 1} {Mars 0.107 1.5} {Mercury 0.055 0.4} {Venus 0.815 0.7}] + // By mass: [{Mercury 0.055 0.4} {Mars 0.107 1.5} {Venus 0.815 0.7} {Earth 1 1}] + // By distance: [{Mercury 0.055 0.4} {Venus 0.815 0.7} {Earth 1 1} {Mars 0.107 1.5}] + // By decreasing distance: [{Mars 0.107 1.5} {Earth 1 1} {Venus 0.815 0.7} {Mercury 0.055 0.4}] +} diff --git a/src/sort/example_multi_test.go b/src/sort/example_multi_test.go new file mode 100644 index 0000000..93f2d3e --- /dev/null +++ b/src/sort/example_multi_test.go @@ -0,0 +1,132 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sort_test + +import ( + "fmt" + "sort" +) + +// A Change is a record of source code changes, recording user, language, and delta size. +type Change struct { + user string + language string + lines int +} + +type lessFunc func(p1, p2 *Change) bool + +// multiSorter implements the Sort interface, sorting the changes within. +type multiSorter struct { + changes []Change + less []lessFunc +} + +// Sort sorts the argument slice according to the less functions passed to OrderedBy. +func (ms *multiSorter) Sort(changes []Change) { + ms.changes = changes + sort.Sort(ms) +} + +// OrderedBy returns a Sorter that sorts using the less functions, in order. +// Call its Sort method to sort the data. +func OrderedBy(less ...lessFunc) *multiSorter { + return &multiSorter{ + less: less, + } +} + +// Len is part of sort.Interface. +func (ms *multiSorter) Len() int { + return len(ms.changes) +} + +// Swap is part of sort.Interface. +func (ms *multiSorter) Swap(i, j int) { + ms.changes[i], ms.changes[j] = ms.changes[j], ms.changes[i] +} + +// Less is part of sort.Interface. It is implemented by looping along the +// less functions until it finds a comparison that discriminates between +// the two items (one is less than the other). Note that it can call the +// less functions twice per call. We could change the functions to return +// -1, 0, 1 and reduce the number of calls for greater efficiency: an +// exercise for the reader. +func (ms *multiSorter) Less(i, j int) bool { + p, q := &ms.changes[i], &ms.changes[j] + // Try all but the last comparison. + var k int + for k = 0; k < len(ms.less)-1; k++ { + less := ms.less[k] + switch { + case less(p, q): + // p < q, so we have a decision. + return true + case less(q, p): + // p > q, so we have a decision. + return false + } + // p == q; try the next comparison. + } + // All comparisons to here said "equal", so just return whatever + // the final comparison reports. + return ms.less[k](p, q) +} + +var changes = []Change{ + {"gri", "Go", 100}, + {"ken", "C", 150}, + {"glenda", "Go", 200}, + {"rsc", "Go", 200}, + {"r", "Go", 100}, + {"ken", "Go", 200}, + {"dmr", "C", 100}, + {"r", "C", 150}, + {"gri", "Smalltalk", 80}, +} + +// ExampleMultiKeys demonstrates a technique for sorting a struct type using different +// sets of multiple fields in the comparison. We chain together "Less" functions, each of +// which compares a single field. +func Example_sortMultiKeys() { + // Closures that order the Change structure. + user := func(c1, c2 *Change) bool { + return c1.user < c2.user + } + language := func(c1, c2 *Change) bool { + return c1.language < c2.language + } + increasingLines := func(c1, c2 *Change) bool { + return c1.lines < c2.lines + } + decreasingLines := func(c1, c2 *Change) bool { + return c1.lines > c2.lines // Note: > orders downwards. + } + + // Simple use: Sort by user. + OrderedBy(user).Sort(changes) + fmt.Println("By user:", changes) + + // More examples. + OrderedBy(user, increasingLines).Sort(changes) + fmt.Println("By user,<lines:", changes) + + OrderedBy(user, decreasingLines).Sort(changes) + fmt.Println("By user,>lines:", changes) + + OrderedBy(language, increasingLines).Sort(changes) + fmt.Println("By language,<lines:", changes) + + OrderedBy(language, increasingLines, user).Sort(changes) + fmt.Println("By language,<lines,user:", changes) + + // Output: + // By user: [{dmr C 100} {glenda Go 200} {gri Go 100} {gri Smalltalk 80} {ken C 150} {ken Go 200} {r Go 100} {r C 150} {rsc Go 200}] + // By user,<lines: [{dmr C 100} {glenda Go 200} {gri Smalltalk 80} {gri Go 100} {ken C 150} {ken Go 200} {r Go 100} {r C 150} {rsc Go 200}] + // By user,>lines: [{dmr C 100} {glenda Go 200} {gri Go 100} {gri Smalltalk 80} {ken Go 200} {ken C 150} {r C 150} {r Go 100} {rsc Go 200}] + // By language,<lines: [{dmr C 100} {ken C 150} {r C 150} {gri Go 100} {r Go 100} {glenda Go 200} {ken Go 200} {rsc Go 200} {gri Smalltalk 80}] + // By language,<lines,user: [{dmr C 100} {ken C 150} {r C 150} {gri Go 100} {r Go 100} {glenda Go 200} {ken Go 200} {rsc Go 200} {gri Smalltalk 80}] + +} diff --git a/src/sort/example_search_test.go b/src/sort/example_search_test.go new file mode 100644 index 0000000..856422a --- /dev/null +++ b/src/sort/example_search_test.go @@ -0,0 +1,74 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sort_test + +import ( + "fmt" + "sort" +) + +// This example demonstrates searching a list sorted in ascending order. +func ExampleSearch() { + a := []int{1, 3, 6, 10, 15, 21, 28, 36, 45, 55} + x := 6 + + i := sort.Search(len(a), func(i int) bool { return a[i] >= x }) + if i < len(a) && a[i] == x { + fmt.Printf("found %d at index %d in %v\n", x, i, a) + } else { + fmt.Printf("%d not found in %v\n", x, a) + } + // Output: + // found 6 at index 2 in [1 3 6 10 15 21 28 36 45 55] +} + +// This example demonstrates searching a list sorted in descending order. +// The approach is the same as searching a list in ascending order, +// but with the condition inverted. +func ExampleSearch_descendingOrder() { + a := []int{55, 45, 36, 28, 21, 15, 10, 6, 3, 1} + x := 6 + + i := sort.Search(len(a), func(i int) bool { return a[i] <= x }) + if i < len(a) && a[i] == x { + fmt.Printf("found %d at index %d in %v\n", x, i, a) + } else { + fmt.Printf("%d not found in %v\n", x, a) + } + // Output: + // found 6 at index 7 in [55 45 36 28 21 15 10 6 3 1] +} + +// This example demonstrates searching for float64 in a list sorted in ascending order. +func ExampleSearchFloat64s() { + a := []float64{1.0, 2.0, 3.3, 4.6, 6.1, 7.2, 8.0} + + x := 2.0 + i := sort.SearchFloat64s(a, x) + fmt.Printf("found %g at index %d in %v\n", x, i, a) + + x = 0.5 + i = sort.SearchFloat64s(a, x) + fmt.Printf("%g not found, can be inserted at index %d in %v\n", x, i, a) + // Output: + // found 2 at index 1 in [1 2 3.3 4.6 6.1 7.2 8] + // 0.5 not found, can be inserted at index 0 in [1 2 3.3 4.6 6.1 7.2 8] +} + +// This example demonstrates searching for int in a list sorted in ascending order. +func ExampleSearchInts() { + a := []int{1, 2, 3, 4, 6, 7, 8} + + x := 2 + i := sort.SearchInts(a, x) + fmt.Printf("found %d at index %d in %v\n", x, i, a) + + x = 5 + i = sort.SearchInts(a, x) + fmt.Printf("%d not found, can be inserted at index %d in %v\n", x, i, a) + // Output: + // found 2 at index 1 in [1 2 3 4 6 7 8] + // 5 not found, can be inserted at index 4 in [1 2 3 4 6 7 8] +} diff --git a/src/sort/example_test.go b/src/sort/example_test.go new file mode 100644 index 0000000..1f85dbc --- /dev/null +++ b/src/sort/example_test.go @@ -0,0 +1,122 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sort_test + +import ( + "fmt" + "math" + "sort" +) + +func ExampleInts() { + s := []int{5, 2, 6, 3, 1, 4} // unsorted + sort.Ints(s) + fmt.Println(s) + // Output: [1 2 3 4 5 6] +} + +func ExampleIntsAreSorted() { + s := []int{1, 2, 3, 4, 5, 6} // sorted ascending + fmt.Println(sort.IntsAreSorted(s)) + + s = []int{6, 5, 4, 3, 2, 1} // sorted descending + fmt.Println(sort.IntsAreSorted(s)) + + s = []int{3, 2, 4, 1, 5} // unsorted + fmt.Println(sort.IntsAreSorted(s)) + + // Output: true + // false + // false +} + +func ExampleFloat64s() { + s := []float64{5.2, -1.3, 0.7, -3.8, 2.6} // unsorted + sort.Float64s(s) + fmt.Println(s) + + s = []float64{math.Inf(1), math.NaN(), math.Inf(-1), 0.0} // unsorted + sort.Float64s(s) + fmt.Println(s) + + // Output: [-3.8 -1.3 0.7 2.6 5.2] + // [NaN -Inf 0 +Inf] +} + +func ExampleFloat64sAreSorted() { + s := []float64{0.7, 1.3, 2.6, 3.8, 5.2} // sorted ascending + fmt.Println(sort.Float64sAreSorted(s)) + + s = []float64{5.2, 3.8, 2.6, 1.3, 0.7} // sorted descending + fmt.Println(sort.Float64sAreSorted(s)) + + s = []float64{5.2, 1.3, 0.7, 3.8, 2.6} // unsorted + fmt.Println(sort.Float64sAreSorted(s)) + + // Output: true + // false + // false +} + +func ExampleReverse() { + s := []int{5, 2, 6, 3, 1, 4} // unsorted + sort.Sort(sort.Reverse(sort.IntSlice(s))) + fmt.Println(s) + // Output: [6 5 4 3 2 1] +} + +func ExampleSlice() { + people := []struct { + Name string + Age int + }{ + {"Gopher", 7}, + {"Alice", 55}, + {"Vera", 24}, + {"Bob", 75}, + } + sort.Slice(people, func(i, j int) bool { return people[i].Name < people[j].Name }) + fmt.Println("By name:", people) + + sort.Slice(people, func(i, j int) bool { return people[i].Age < people[j].Age }) + fmt.Println("By age:", people) + // Output: By name: [{Alice 55} {Bob 75} {Gopher 7} {Vera 24}] + // By age: [{Gopher 7} {Vera 24} {Alice 55} {Bob 75}] +} + +func ExampleSliceStable() { + + people := []struct { + Name string + Age int + }{ + {"Alice", 25}, + {"Elizabeth", 75}, + {"Alice", 75}, + {"Bob", 75}, + {"Alice", 75}, + {"Bob", 25}, + {"Colin", 25}, + {"Elizabeth", 25}, + } + + // Sort by name, preserving original order + sort.SliceStable(people, func(i, j int) bool { return people[i].Name < people[j].Name }) + fmt.Println("By name:", people) + + // Sort by age preserving name order + sort.SliceStable(people, func(i, j int) bool { return people[i].Age < people[j].Age }) + fmt.Println("By age,name:", people) + + // Output: By name: [{Alice 25} {Alice 75} {Alice 75} {Bob 75} {Bob 25} {Colin 25} {Elizabeth 75} {Elizabeth 25}] + // By age,name: [{Alice 25} {Bob 25} {Colin 25} {Elizabeth 25} {Alice 75} {Alice 75} {Bob 75} {Elizabeth 75}] +} + +func ExampleStrings() { + s := []string{"Go", "Bravo", "Gopher", "Alpha", "Grin", "Delta"} + sort.Strings(s) + fmt.Println(s) + // Output: [Alpha Bravo Delta Go Gopher Grin] +} diff --git a/src/sort/example_wrapper_test.go b/src/sort/example_wrapper_test.go new file mode 100644 index 0000000..cf6d74c --- /dev/null +++ b/src/sort/example_wrapper_test.go @@ -0,0 +1,77 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sort_test + +import ( + "fmt" + "sort" +) + +type Grams int + +func (g Grams) String() string { return fmt.Sprintf("%dg", int(g)) } + +type Organ struct { + Name string + Weight Grams +} + +type Organs []*Organ + +func (s Organs) Len() int { return len(s) } +func (s Organs) Swap(i, j int) { s[i], s[j] = s[j], s[i] } + +// ByName implements sort.Interface by providing Less and using the Len and +// Swap methods of the embedded Organs value. +type ByName struct{ Organs } + +func (s ByName) Less(i, j int) bool { return s.Organs[i].Name < s.Organs[j].Name } + +// ByWeight implements sort.Interface by providing Less and using the Len and +// Swap methods of the embedded Organs value. +type ByWeight struct{ Organs } + +func (s ByWeight) Less(i, j int) bool { return s.Organs[i].Weight < s.Organs[j].Weight } + +func Example_sortWrapper() { + s := []*Organ{ + {"brain", 1340}, + {"heart", 290}, + {"liver", 1494}, + {"pancreas", 131}, + {"prostate", 62}, + {"spleen", 162}, + } + + sort.Sort(ByWeight{s}) + fmt.Println("Organs by weight:") + printOrgans(s) + + sort.Sort(ByName{s}) + fmt.Println("Organs by name:") + printOrgans(s) + + // Output: + // Organs by weight: + // prostate (62g) + // pancreas (131g) + // spleen (162g) + // heart (290g) + // brain (1340g) + // liver (1494g) + // Organs by name: + // brain (1340g) + // heart (290g) + // liver (1494g) + // pancreas (131g) + // prostate (62g) + // spleen (162g) +} + +func printOrgans(s []*Organ) { + for _, o := range s { + fmt.Printf("%-8s (%v)\n", o.Name, o.Weight) + } +} diff --git a/src/sort/export_test.go b/src/sort/export_test.go new file mode 100644 index 0000000..2a3c21f --- /dev/null +++ b/src/sort/export_test.go @@ -0,0 +1,13 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sort + +func Heapsort(data Interface) { + heapSort(data, 0, data.Len()) +} + +func ReverseRange(data Interface, a, b int) { + reverseRange(data, a, b) +} diff --git a/src/sort/gen_sort_variants.go b/src/sort/gen_sort_variants.go new file mode 100644 index 0000000..d738cac --- /dev/null +++ b/src/sort/gen_sort_variants.go @@ -0,0 +1,663 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build ignore +// +build ignore + +// This program is run via "go generate" (via a directive in sort.go) +// to generate implementation variants of the underlying sorting algorithm. +// When passed the -generic flag it generates generic variants of sorting; +// otherwise it generates the non-generic variants used by the sort package. + +package main + +import ( + "bytes" + "flag" + "fmt" + "go/format" + "log" + "os" + "text/template" +) + +type Variant struct { + // Name is the variant name: should be unique among variants. + Name string + + // Path is the file path into which the generator will emit the code for this + // variant. + Path string + + // Package is the package this code will be emitted into. + Package string + + // Imports is the imports needed for this package. + Imports string + + // FuncSuffix is appended to all function names in this variant's code. All + // suffixes should be unique within a package. + FuncSuffix string + + // DataType is the type of the data parameter of functions in this variant's + // code. + DataType string + + // TypeParam is the optional type parameter for the function. + TypeParam string + + // ExtraParam is an extra parameter to pass to the function. Should begin with + // ", " to separate from other params. + ExtraParam string + + // ExtraArg is an extra argument to pass to calls between functions; typically + // it invokes ExtraParam. Should begin with ", " to separate from other args. + ExtraArg string + + // Funcs is a map of functions used from within the template. The following + // functions are expected to exist: + // + // Less (name, i, j): + // emits a comparison expression that checks if the value `name` at + // index `i` is smaller than at index `j`. + // + // Swap (name, i, j): + // emits a statement that performs a data swap between elements `i` and + // `j` of the value `name`. + Funcs template.FuncMap +} + +func main() { + genGeneric := flag.Bool("generic", false, "generate generic versions") + flag.Parse() + + if *genGeneric { + generate(&Variant{ + Name: "generic_ordered", + Path: "zsortordered.go", + Package: "slices", + Imports: "import \"constraints\"\n", + FuncSuffix: "Ordered", + TypeParam: "[E constraints.Ordered]", + ExtraParam: "", + ExtraArg: "", + DataType: "[]E", + Funcs: template.FuncMap{ + "Less": func(name, i, j string) string { + return fmt.Sprintf("(%s[%s] < %s[%s])", name, i, name, j) + }, + "Swap": func(name, i, j string) string { + return fmt.Sprintf("%s[%s], %s[%s] = %s[%s], %s[%s]", name, i, name, j, name, j, name, i) + }, + }, + }) + + generate(&Variant{ + Name: "generic_func", + Path: "zsortanyfunc.go", + Package: "slices", + FuncSuffix: "LessFunc", + TypeParam: "[E any]", + ExtraParam: ", less func(a, b E) bool", + ExtraArg: ", less", + DataType: "[]E", + Funcs: template.FuncMap{ + "Less": func(name, i, j string) string { + return fmt.Sprintf("less(%s[%s], %s[%s])", name, i, name, j) + }, + "Swap": func(name, i, j string) string { + return fmt.Sprintf("%s[%s], %s[%s] = %s[%s], %s[%s]", name, i, name, j, name, j, name, i) + }, + }, + }) + } else { + generate(&Variant{ + Name: "interface", + Path: "zsortinterface.go", + Package: "sort", + Imports: "", + FuncSuffix: "", + TypeParam: "", + ExtraParam: "", + ExtraArg: "", + DataType: "Interface", + Funcs: template.FuncMap{ + "Less": func(name, i, j string) string { + return fmt.Sprintf("%s.Less(%s, %s)", name, i, j) + }, + "Swap": func(name, i, j string) string { + return fmt.Sprintf("%s.Swap(%s, %s)", name, i, j) + }, + }, + }) + + generate(&Variant{ + Name: "func", + Path: "zsortfunc.go", + Package: "sort", + Imports: "", + FuncSuffix: "_func", + TypeParam: "", + ExtraParam: "", + ExtraArg: "", + DataType: "lessSwap", + Funcs: template.FuncMap{ + "Less": func(name, i, j string) string { + return fmt.Sprintf("%s.Less(%s, %s)", name, i, j) + }, + "Swap": func(name, i, j string) string { + return fmt.Sprintf("%s.Swap(%s, %s)", name, i, j) + }, + }, + }) + } +} + +// generate generates the code for variant `v` into a file named by `v.Path`. +func generate(v *Variant) { + // Parse templateCode anew for each variant because Parse requires Funcs to be + // registered, and it helps type-check the funcs. + tmpl, err := template.New("gen").Funcs(v.Funcs).Parse(templateCode) + if err != nil { + log.Fatal("template Parse:", err) + } + + var out bytes.Buffer + err = tmpl.Execute(&out, v) + if err != nil { + log.Fatal("template Execute:", err) + } + + formatted, err := format.Source(out.Bytes()) + if err != nil { + log.Fatal("format:", err) + } + + if err := os.WriteFile(v.Path, formatted, 0644); err != nil { + log.Fatal("WriteFile:", err) + } +} + +var templateCode = `// Code generated by gen_sort_variants.go; DO NOT EDIT. + +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package {{.Package}} + +{{.Imports}} + +// insertionSort{{.FuncSuffix}} sorts data[a:b] using insertion sort. +func insertionSort{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int {{.ExtraParam}}) { + for i := a + 1; i < b; i++ { + for j := i; j > a && {{Less "data" "j" "j-1"}}; j-- { + {{Swap "data" "j" "j-1"}} + } + } +} + +// siftDown{{.FuncSuffix}} implements the heap property on data[lo:hi]. +// first is an offset into the array where the root of the heap lies. +func siftDown{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, lo, hi, first int {{.ExtraParam}}) { + root := lo + for { + child := 2*root + 1 + if child >= hi { + break + } + if child+1 < hi && {{Less "data" "first+child" "first+child+1"}} { + child++ + } + if !{{Less "data" "first+root" "first+child"}} { + return + } + {{Swap "data" "first+root" "first+child"}} + root = child + } +} + +func heapSort{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int {{.ExtraParam}}) { + first := a + lo := 0 + hi := b - a + + // Build heap with greatest element at top. + for i := (hi - 1) / 2; i >= 0; i-- { + siftDown{{.FuncSuffix}}(data, i, hi, first {{.ExtraArg}}) + } + + // Pop elements, largest first, into end of data. + for i := hi - 1; i >= 0; i-- { + {{Swap "data" "first" "first+i"}} + siftDown{{.FuncSuffix}}(data, lo, i, first {{.ExtraArg}}) + } +} + +// pdqsort{{.FuncSuffix}} sorts data[a:b]. +// The algorithm based on pattern-defeating quicksort(pdqsort), but without the optimizations from BlockQuicksort. +// pdqsort paper: https://arxiv.org/pdf/2106.05123.pdf +// C++ implementation: https://github.com/orlp/pdqsort +// Rust implementation: https://docs.rs/pdqsort/latest/pdqsort/ +// limit is the number of allowed bad (very unbalanced) pivots before falling back to heapsort. +func pdqsort{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, limit int {{.ExtraParam}}) { + const maxInsertion = 12 + + var ( + wasBalanced = true // whether the last partitioning was reasonably balanced + wasPartitioned = true // whether the slice was already partitioned + ) + + for { + length := b - a + + if length <= maxInsertion { + insertionSort{{.FuncSuffix}}(data, a, b {{.ExtraArg}}) + return + } + + // Fall back to heapsort if too many bad choices were made. + if limit == 0 { + heapSort{{.FuncSuffix}}(data, a, b {{.ExtraArg}}) + return + } + + // If the last partitioning was imbalanced, we need to breaking patterns. + if !wasBalanced { + breakPatterns{{.FuncSuffix}}(data, a, b {{.ExtraArg}}) + limit-- + } + + pivot, hint := choosePivot{{.FuncSuffix}}(data, a, b {{.ExtraArg}}) + if hint == decreasingHint { + reverseRange{{.FuncSuffix}}(data, a, b {{.ExtraArg}}) + // The chosen pivot was pivot-a elements after the start of the array. + // After reversing it is pivot-a elements before the end of the array. + // The idea came from Rust's implementation. + pivot = (b - 1) - (pivot - a) + hint = increasingHint + } + + // The slice is likely already sorted. + if wasBalanced && wasPartitioned && hint == increasingHint { + if partialInsertionSort{{.FuncSuffix}}(data, a, b {{.ExtraArg}}) { + return + } + } + + // Probably the slice contains many duplicate elements, partition the slice into + // elements equal to and elements greater than the pivot. + if a > 0 && !{{Less "data" "a-1" "pivot"}} { + mid := partitionEqual{{.FuncSuffix}}(data, a, b, pivot {{.ExtraArg}}) + a = mid + continue + } + + mid, alreadyPartitioned := partition{{.FuncSuffix}}(data, a, b, pivot {{.ExtraArg}}) + wasPartitioned = alreadyPartitioned + + leftLen, rightLen := mid-a, b-mid + balanceThreshold := length / 8 + if leftLen < rightLen { + wasBalanced = leftLen >= balanceThreshold + pdqsort{{.FuncSuffix}}(data, a, mid, limit {{.ExtraArg}}) + a = mid + 1 + } else { + wasBalanced = rightLen >= balanceThreshold + pdqsort{{.FuncSuffix}}(data, mid+1, b, limit {{.ExtraArg}}) + b = mid + } + } +} + +// partition{{.FuncSuffix}} does one quicksort partition. +// Let p = data[pivot] +// Moves elements in data[a:b] around, so that data[i]<p and data[j]>=p for i<newpivot and j>newpivot. +// On return, data[newpivot] = p +func partition{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, pivot int {{.ExtraParam}}) (newpivot int, alreadyPartitioned bool) { + {{Swap "data" "a" "pivot"}} + i, j := a+1, b-1 // i and j are inclusive of the elements remaining to be partitioned + + for i <= j && {{Less "data" "i" "a"}} { + i++ + } + for i <= j && !{{Less "data" "j" "a"}} { + j-- + } + if i > j { + {{Swap "data" "j" "a"}} + return j, true + } + {{Swap "data" "i" "j"}} + i++ + j-- + + for { + for i <= j && {{Less "data" "i" "a"}} { + i++ + } + for i <= j && !{{Less "data" "j" "a"}} { + j-- + } + if i > j { + break + } + {{Swap "data" "i" "j"}} + i++ + j-- + } + {{Swap "data" "j" "a"}} + return j, false +} + +// partitionEqual{{.FuncSuffix}} partitions data[a:b] into elements equal to data[pivot] followed by elements greater than data[pivot]. +// It assumed that data[a:b] does not contain elements smaller than the data[pivot]. +func partitionEqual{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, pivot int {{.ExtraParam}}) (newpivot int) { + {{Swap "data" "a" "pivot"}} + i, j := a+1, b-1 // i and j are inclusive of the elements remaining to be partitioned + + for { + for i <= j && !{{Less "data" "a" "i"}} { + i++ + } + for i <= j && {{Less "data" "a" "j"}} { + j-- + } + if i > j { + break + } + {{Swap "data" "i" "j"}} + i++ + j-- + } + return i +} + +// partialInsertionSort{{.FuncSuffix}} partially sorts a slice, returns true if the slice is sorted at the end. +func partialInsertionSort{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int {{.ExtraParam}}) bool { + const ( + maxSteps = 5 // maximum number of adjacent out-of-order pairs that will get shifted + shortestShifting = 50 // don't shift any elements on short arrays + ) + i := a + 1 + for j := 0; j < maxSteps; j++ { + for i < b && !{{Less "data" "i" "i-1"}} { + i++ + } + + if i == b { + return true + } + + if b-a < shortestShifting { + return false + } + + {{Swap "data" "i" "i-1"}} + + // Shift the smaller one to the left. + if i-a >= 2 { + for j := i - 1; j >= 1; j-- { + if !{{Less "data" "j" "j-1"}} { + break + } + {{Swap "data" "j" "j-1"}} + } + } + // Shift the greater one to the right. + if b-i >= 2 { + for j := i + 1; j < b; j++ { + if !{{Less "data" "j" "j-1"}} { + break + } + {{Swap "data" "j" "j-1"}} + } + } + } + return false +} + +// breakPatterns{{.FuncSuffix}} scatters some elements around in an attempt to break some patterns +// that might cause imbalanced partitions in quicksort. +func breakPatterns{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int {{.ExtraParam}}) { + length := b - a + if length >= 8 { + random := xorshift(length) + modulus := nextPowerOfTwo(length) + + for idx := a + (length/4)*2 - 1; idx <= a + (length/4)*2 + 1; idx++ { + other := int(uint(random.Next()) & (modulus - 1)) + if other >= length { + other -= length + } + {{Swap "data" "idx" "a+other"}} + } + } +} + +// choosePivot{{.FuncSuffix}} chooses a pivot in data[a:b]. +// +// [0,8): chooses a static pivot. +// [8,shortestNinther): uses the simple median-of-three method. +// [shortestNinther,∞): uses the Tukey ninther method. +func choosePivot{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int {{.ExtraParam}}) (pivot int, hint sortedHint) { + const ( + shortestNinther = 50 + maxSwaps = 4 * 3 + ) + + l := b - a + + var ( + swaps int + i = a + l/4*1 + j = a + l/4*2 + k = a + l/4*3 + ) + + if l >= 8 { + if l >= shortestNinther { + // Tukey ninther method, the idea came from Rust's implementation. + i = medianAdjacent{{.FuncSuffix}}(data, i, &swaps {{.ExtraArg}}) + j = medianAdjacent{{.FuncSuffix}}(data, j, &swaps {{.ExtraArg}}) + k = medianAdjacent{{.FuncSuffix}}(data, k, &swaps {{.ExtraArg}}) + } + // Find the median among i, j, k and stores it into j. + j = median{{.FuncSuffix}}(data, i, j, k, &swaps {{.ExtraArg}}) + } + + switch swaps { + case 0: + return j, increasingHint + case maxSwaps: + return j, decreasingHint + default: + return j, unknownHint + } +} + +// order2{{.FuncSuffix}} returns x,y where data[x] <= data[y], where x,y=a,b or x,y=b,a. +func order2{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int, swaps *int {{.ExtraParam}}) (int, int) { + if {{Less "data" "b" "a"}} { + *swaps++ + return b, a + } + return a, b +} + +// median{{.FuncSuffix}} returns x where data[x] is the median of data[a],data[b],data[c], where x is a, b, or c. +func median{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, c int, swaps *int {{.ExtraParam}}) int { + a, b = order2{{.FuncSuffix}}(data, a, b, swaps {{.ExtraArg}}) + b, c = order2{{.FuncSuffix}}(data, b, c, swaps {{.ExtraArg}}) + a, b = order2{{.FuncSuffix}}(data, a, b, swaps {{.ExtraArg}}) + return b +} + +// medianAdjacent{{.FuncSuffix}} finds the median of data[a - 1], data[a], data[a + 1] and stores the index into a. +func medianAdjacent{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a int, swaps *int {{.ExtraParam}}) int { + return median{{.FuncSuffix}}(data, a-1, a, a+1, swaps {{.ExtraArg}}) +} + +func reverseRange{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int {{.ExtraParam}}) { + i := a + j := b - 1 + for i < j { + {{Swap "data" "i" "j"}} + i++ + j-- + } +} + +func swapRange{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, n int {{.ExtraParam}}) { + for i := 0; i < n; i++ { + {{Swap "data" "a+i" "b+i"}} + } +} + +func stable{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, n int {{.ExtraParam}}) { + blockSize := 20 // must be > 0 + a, b := 0, blockSize + for b <= n { + insertionSort{{.FuncSuffix}}(data, a, b {{.ExtraArg}}) + a = b + b += blockSize + } + insertionSort{{.FuncSuffix}}(data, a, n {{.ExtraArg}}) + + for blockSize < n { + a, b = 0, 2*blockSize + for b <= n { + symMerge{{.FuncSuffix}}(data, a, a+blockSize, b {{.ExtraArg}}) + a = b + b += 2 * blockSize + } + if m := a + blockSize; m < n { + symMerge{{.FuncSuffix}}(data, a, m, n {{.ExtraArg}}) + } + blockSize *= 2 + } +} + +// symMerge{{.FuncSuffix}} merges the two sorted subsequences data[a:m] and data[m:b] using +// the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum +// Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz +// Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in +// Computer Science, pages 714-723. Springer, 2004. +// +// Let M = m-a and N = b-n. Wolog M < N. +// The recursion depth is bound by ceil(log(N+M)). +// The algorithm needs O(M*log(N/M + 1)) calls to data.Less. +// The algorithm needs O((M+N)*log(M)) calls to data.Swap. +// +// The paper gives O((M+N)*log(M)) as the number of assignments assuming a +// rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation +// in the paper carries through for Swap operations, especially as the block +// swapping rotate uses only O(M+N) Swaps. +// +// symMerge assumes non-degenerate arguments: a < m && m < b. +// Having the caller check this condition eliminates many leaf recursion calls, +// which improves performance. +func symMerge{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, m, b int {{.ExtraParam}}) { + // Avoid unnecessary recursions of symMerge + // by direct insertion of data[a] into data[m:b] + // if data[a:m] only contains one element. + if m-a == 1 { + // Use binary search to find the lowest index i + // such that data[i] >= data[a] for m <= i < b. + // Exit the search loop with i == b in case no such index exists. + i := m + j := b + for i < j { + h := int(uint(i+j) >> 1) + if {{Less "data" "h" "a"}} { + i = h + 1 + } else { + j = h + } + } + // Swap values until data[a] reaches the position before i. + for k := a; k < i-1; k++ { + {{Swap "data" "k" "k+1"}} + } + return + } + + // Avoid unnecessary recursions of symMerge + // by direct insertion of data[m] into data[a:m] + // if data[m:b] only contains one element. + if b-m == 1 { + // Use binary search to find the lowest index i + // such that data[i] > data[m] for a <= i < m. + // Exit the search loop with i == m in case no such index exists. + i := a + j := m + for i < j { + h := int(uint(i+j) >> 1) + if !{{Less "data" "m" "h"}} { + i = h + 1 + } else { + j = h + } + } + // Swap values until data[m] reaches the position i. + for k := m; k > i; k-- { + {{Swap "data" "k" "k-1"}} + } + return + } + + mid := int(uint(a+b) >> 1) + n := mid + m + var start, r int + if m > mid { + start = n - b + r = mid + } else { + start = a + r = m + } + p := n - 1 + + for start < r { + c := int(uint(start+r) >> 1) + if !{{Less "data" "p-c" "c"}} { + start = c + 1 + } else { + r = c + } + } + + end := n - start + if start < m && m < end { + rotate{{.FuncSuffix}}(data, start, m, end {{.ExtraArg}}) + } + if a < start && start < mid { + symMerge{{.FuncSuffix}}(data, a, start, mid {{.ExtraArg}}) + } + if mid < end && end < b { + symMerge{{.FuncSuffix}}(data, mid, end, b {{.ExtraArg}}) + } +} + +// rotate{{.FuncSuffix}} rotates two consecutive blocks u = data[a:m] and v = data[m:b] in data: +// Data of the form 'x u v y' is changed to 'x v u y'. +// rotate performs at most b-a many calls to data.Swap, +// and it assumes non-degenerate arguments: a < m && m < b. +func rotate{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, m, b int {{.ExtraParam}}) { + i := m - a + j := b - m + + for i != j { + if i > j { + swapRange{{.FuncSuffix}}(data, m-i, m, j {{.ExtraArg}}) + i -= j + } else { + swapRange{{.FuncSuffix}}(data, m-i, m+j-i, i {{.ExtraArg}}) + j -= i + } + } + // i == j + swapRange{{.FuncSuffix}}(data, m-i, m, i {{.ExtraArg}}) +} +` diff --git a/src/sort/search.go b/src/sort/search.go new file mode 100644 index 0000000..874e408 --- /dev/null +++ b/src/sort/search.go @@ -0,0 +1,150 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements binary search. + +package sort + +// Search uses binary search to find and return the smallest index i +// in [0, n) at which f(i) is true, assuming that on the range [0, n), +// f(i) == true implies f(i+1) == true. That is, Search requires that +// f is false for some (possibly empty) prefix of the input range [0, n) +// and then true for the (possibly empty) remainder; Search returns +// the first true index. If there is no such index, Search returns n. +// (Note that the "not found" return value is not -1 as in, for instance, +// strings.Index.) +// Search calls f(i) only for i in the range [0, n). +// +// A common use of Search is to find the index i for a value x in +// a sorted, indexable data structure such as an array or slice. +// In this case, the argument f, typically a closure, captures the value +// to be searched for, and how the data structure is indexed and +// ordered. +// +// For instance, given a slice data sorted in ascending order, +// the call Search(len(data), func(i int) bool { return data[i] >= 23 }) +// returns the smallest index i such that data[i] >= 23. If the caller +// wants to find whether 23 is in the slice, it must test data[i] == 23 +// separately. +// +// Searching data sorted in descending order would use the <= +// operator instead of the >= operator. +// +// To complete the example above, the following code tries to find the value +// x in an integer slice data sorted in ascending order: +// +// x := 23 +// i := sort.Search(len(data), func(i int) bool { return data[i] >= x }) +// if i < len(data) && data[i] == x { +// // x is present at data[i] +// } else { +// // x is not present in data, +// // but i is the index where it would be inserted. +// } +// +// As a more whimsical example, this program guesses your number: +// +// func GuessingGame() { +// var s string +// fmt.Printf("Pick an integer from 0 to 100.\n") +// answer := sort.Search(100, func(i int) bool { +// fmt.Printf("Is your number <= %d? ", i) +// fmt.Scanf("%s", &s) +// return s != "" && s[0] == 'y' +// }) +// fmt.Printf("Your number is %d.\n", answer) +// } +func Search(n int, f func(int) bool) int { + // Define f(-1) == false and f(n) == true. + // Invariant: f(i-1) == false, f(j) == true. + i, j := 0, n + for i < j { + h := int(uint(i+j) >> 1) // avoid overflow when computing h + // i ≤ h < j + if !f(h) { + i = h + 1 // preserves f(i-1) == false + } else { + j = h // preserves f(j) == true + } + } + // i == j, f(i-1) == false, and f(j) (= f(i)) == true => answer is i. + return i +} + +// Find uses binary search to find and return the smallest index i in [0, n) +// at which cmp(i) <= 0. If there is no such index i, Find returns i = n. +// The found result is true if i < n and cmp(i) == 0. +// Find calls cmp(i) only for i in the range [0, n). +// +// To permit binary search, Find requires that cmp(i) > 0 for a leading +// prefix of the range, cmp(i) == 0 in the middle, and cmp(i) < 0 for +// the final suffix of the range. (Each subrange could be empty.) +// The usual way to establish this condition is to interpret cmp(i) +// as a comparison of a desired target value t against entry i in an +// underlying indexed data structure x, returning <0, 0, and >0 +// when t < x[i], t == x[i], and t > x[i], respectively. +// +// For example, to look for a particular string in a sorted, random-access +// list of strings: +// +// i, found := sort.Find(x.Len(), func(i int) int { +// return strings.Compare(target, x.At(i)) +// }) +// if found { +// fmt.Printf("found %s at entry %d\n", target, i) +// } else { +// fmt.Printf("%s not found, would insert at %d", target, i) +// } +func Find(n int, cmp func(int) int) (i int, found bool) { + // The invariants here are similar to the ones in Search. + // Define cmp(-1) > 0 and cmp(n) <= 0 + // Invariant: cmp(i-1) > 0, cmp(j) <= 0 + i, j := 0, n + for i < j { + h := int(uint(i+j) >> 1) // avoid overflow when computing h + // i ≤ h < j + if cmp(h) > 0 { + i = h + 1 // preserves cmp(i-1) > 0 + } else { + j = h // preserves cmp(j) <= 0 + } + } + // i == j, cmp(i-1) > 0 and cmp(j) <= 0 + return i, i < n && cmp(i) == 0 +} + +// Convenience wrappers for common cases. + +// SearchInts searches for x in a sorted slice of ints and returns the index +// as specified by Search. The return value is the index to insert x if x is +// not present (it could be len(a)). +// The slice must be sorted in ascending order. +func SearchInts(a []int, x int) int { + return Search(len(a), func(i int) bool { return a[i] >= x }) +} + +// SearchFloat64s searches for x in a sorted slice of float64s and returns the index +// as specified by Search. The return value is the index to insert x if x is not +// present (it could be len(a)). +// The slice must be sorted in ascending order. +func SearchFloat64s(a []float64, x float64) int { + return Search(len(a), func(i int) bool { return a[i] >= x }) +} + +// SearchStrings searches for x in a sorted slice of strings and returns the index +// as specified by Search. The return value is the index to insert x if x is not +// present (it could be len(a)). +// The slice must be sorted in ascending order. +func SearchStrings(a []string, x string) int { + return Search(len(a), func(i int) bool { return a[i] >= x }) +} + +// Search returns the result of applying SearchInts to the receiver and x. +func (p IntSlice) Search(x int) int { return SearchInts(p, x) } + +// Search returns the result of applying SearchFloat64s to the receiver and x. +func (p Float64Slice) Search(x float64) int { return SearchFloat64s(p, x) } + +// Search returns the result of applying SearchStrings to the receiver and x. +func (p StringSlice) Search(x string) int { return SearchStrings(p, x) } diff --git a/src/sort/search_test.go b/src/sort/search_test.go new file mode 100644 index 0000000..49813ea --- /dev/null +++ b/src/sort/search_test.go @@ -0,0 +1,266 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sort_test + +import ( + "runtime" + . "sort" + stringspkg "strings" + "testing" +) + +func f(a []int, x int) func(int) bool { + return func(i int) bool { + return a[i] >= x + } +} + +var data = []int{0: -10, 1: -5, 2: 0, 3: 1, 4: 2, 5: 3, 6: 5, 7: 7, 8: 11, 9: 100, 10: 100, 11: 100, 12: 1000, 13: 10000} + +var tests = []struct { + name string + n int + f func(int) bool + i int +}{ + {"empty", 0, nil, 0}, + {"1 1", 1, func(i int) bool { return i >= 1 }, 1}, + {"1 true", 1, func(i int) bool { return true }, 0}, + {"1 false", 1, func(i int) bool { return false }, 1}, + {"1e9 991", 1e9, func(i int) bool { return i >= 991 }, 991}, + {"1e9 true", 1e9, func(i int) bool { return true }, 0}, + {"1e9 false", 1e9, func(i int) bool { return false }, 1e9}, + {"data -20", len(data), f(data, -20), 0}, + {"data -10", len(data), f(data, -10), 0}, + {"data -9", len(data), f(data, -9), 1}, + {"data -6", len(data), f(data, -6), 1}, + {"data -5", len(data), f(data, -5), 1}, + {"data 3", len(data), f(data, 3), 5}, + {"data 11", len(data), f(data, 11), 8}, + {"data 99", len(data), f(data, 99), 9}, + {"data 100", len(data), f(data, 100), 9}, + {"data 101", len(data), f(data, 101), 12}, + {"data 10000", len(data), f(data, 10000), 13}, + {"data 10001", len(data), f(data, 10001), 14}, + {"descending a", 7, func(i int) bool { return []int{99, 99, 59, 42, 7, 0, -1, -1}[i] <= 7 }, 4}, + {"descending 7", 1e9, func(i int) bool { return 1e9-i <= 7 }, 1e9 - 7}, + {"overflow", 2e9, func(i int) bool { return false }, 2e9}, +} + +func TestSearch(t *testing.T) { + for _, e := range tests { + i := Search(e.n, e.f) + if i != e.i { + t.Errorf("%s: expected index %d; got %d", e.name, e.i, i) + } + } +} + +func TestFind(t *testing.T) { + str1 := []string{"foo"} + str2 := []string{"ab", "ca"} + str3 := []string{"mo", "qo", "vo"} + str4 := []string{"ab", "ad", "ca", "xy"} + + // slice with repeating elements + strRepeats := []string{"ba", "ca", "da", "da", "da", "ka", "ma", "ma", "ta"} + + // slice with all element equal + strSame := []string{"xx", "xx", "xx"} + + tests := []struct { + data []string + target string + wantPos int + wantFound bool + }{ + {[]string{}, "foo", 0, false}, + {[]string{}, "", 0, false}, + + {str1, "foo", 0, true}, + {str1, "bar", 0, false}, + {str1, "zx", 1, false}, + + {str2, "aa", 0, false}, + {str2, "ab", 0, true}, + {str2, "ad", 1, false}, + {str2, "ca", 1, true}, + {str2, "ra", 2, false}, + + {str3, "bb", 0, false}, + {str3, "mo", 0, true}, + {str3, "nb", 1, false}, + {str3, "qo", 1, true}, + {str3, "tr", 2, false}, + {str3, "vo", 2, true}, + {str3, "xr", 3, false}, + + {str4, "aa", 0, false}, + {str4, "ab", 0, true}, + {str4, "ac", 1, false}, + {str4, "ad", 1, true}, + {str4, "ax", 2, false}, + {str4, "ca", 2, true}, + {str4, "cc", 3, false}, + {str4, "dd", 3, false}, + {str4, "xy", 3, true}, + {str4, "zz", 4, false}, + + {strRepeats, "da", 2, true}, + {strRepeats, "db", 5, false}, + {strRepeats, "ma", 6, true}, + {strRepeats, "mb", 8, false}, + + {strSame, "xx", 0, true}, + {strSame, "ab", 0, false}, + {strSame, "zz", 3, false}, + } + + for _, tt := range tests { + t.Run(tt.target, func(t *testing.T) { + cmp := func(i int) int { + return stringspkg.Compare(tt.target, tt.data[i]) + } + + pos, found := Find(len(tt.data), cmp) + if pos != tt.wantPos || found != tt.wantFound { + t.Errorf("Find got (%v, %v), want (%v, %v)", pos, found, tt.wantPos, tt.wantFound) + } + }) + } +} + +// log2 computes the binary logarithm of x, rounded up to the next integer. +// (log2(0) == 0, log2(1) == 0, log2(2) == 1, log2(3) == 2, etc.) +func log2(x int) int { + n := 0 + for p := 1; p < x; p += p { + // p == 2**n + n++ + } + // p/2 < x <= p == 2**n + return n +} + +func TestSearchEfficiency(t *testing.T) { + n := 100 + step := 1 + for exp := 2; exp < 10; exp++ { + // n == 10**exp + // step == 10**(exp-2) + max := log2(n) + for x := 0; x < n; x += step { + count := 0 + i := Search(n, func(i int) bool { count++; return i >= x }) + if i != x { + t.Errorf("n = %d: expected index %d; got %d", n, x, i) + } + if count > max { + t.Errorf("n = %d, x = %d: expected <= %d calls; got %d", n, x, max, count) + } + } + n *= 10 + step *= 10 + } +} + +// Smoke tests for convenience wrappers - not comprehensive. + +var fdata = []float64{0: -3.14, 1: 0, 2: 1, 3: 2, 4: 1000.7} +var sdata = []string{0: "f", 1: "foo", 2: "foobar", 3: "x"} + +var wrappertests = []struct { + name string + result int + i int +}{ + {"SearchInts", SearchInts(data, 11), 8}, + {"SearchFloat64s", SearchFloat64s(fdata, 2.1), 4}, + {"SearchStrings", SearchStrings(sdata, ""), 0}, + {"IntSlice.Search", IntSlice(data).Search(0), 2}, + {"Float64Slice.Search", Float64Slice(fdata).Search(2.0), 3}, + {"StringSlice.Search", StringSlice(sdata).Search("x"), 3}, +} + +func TestSearchWrappers(t *testing.T) { + for _, e := range wrappertests { + if e.result != e.i { + t.Errorf("%s: expected index %d; got %d", e.name, e.i, e.result) + } + } +} + +func runSearchWrappers() { + SearchInts(data, 11) + SearchFloat64s(fdata, 2.1) + SearchStrings(sdata, "") + IntSlice(data).Search(0) + Float64Slice(fdata).Search(2.0) + StringSlice(sdata).Search("x") +} + +func TestSearchWrappersDontAlloc(t *testing.T) { + if testing.Short() { + t.Skip("skipping malloc count in short mode") + } + if runtime.GOMAXPROCS(0) > 1 { + t.Skip("skipping; GOMAXPROCS>1") + } + allocs := testing.AllocsPerRun(100, runSearchWrappers) + if allocs != 0 { + t.Errorf("expected no allocs for runSearchWrappers, got %v", allocs) + } +} + +func BenchmarkSearchWrappers(b *testing.B) { + for i := 0; i < b.N; i++ { + runSearchWrappers() + } +} + +// Abstract exhaustive test: all sizes up to 100, +// all possible return values. If there are any small +// corner cases, this test exercises them. +func TestSearchExhaustive(t *testing.T) { + for size := 0; size <= 100; size++ { + for targ := 0; targ <= size; targ++ { + i := Search(size, func(i int) bool { return i >= targ }) + if i != targ { + t.Errorf("Search(%d, %d) = %d", size, targ, i) + } + } + } +} + +// Abstract exhaustive test for Find. +func TestFindExhaustive(t *testing.T) { + // Test Find for different sequence sizes and search targets. + // For each size, we have a (unmaterialized) sequence of integers: + // 2,4...size*2 + // And we're looking for every possible integer between 1 and size*2 + 1. + for size := 0; size <= 100; size++ { + for x := 1; x <= size*2+1; x++ { + var wantFound bool + var wantPos int + + cmp := func(i int) int { + // Encodes the unmaterialized sequence with elem[i] == (i+1)*2 + return x - (i+1)*2 + } + pos, found := Find(size, cmp) + + if x%2 == 0 { + wantPos = x/2 - 1 + wantFound = true + } else { + wantPos = x / 2 + wantFound = false + } + if found != wantFound || pos != wantPos { + t.Errorf("Find(%d, %d): got (%v, %v), want (%v, %v)", size, x, pos, found, wantPos, wantFound) + } + } + } +} diff --git a/src/sort/slice.go b/src/sort/slice.go new file mode 100644 index 0000000..d0b2102 --- /dev/null +++ b/src/sort/slice.go @@ -0,0 +1,52 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sort + +import ( + "internal/reflectlite" + "math/bits" +) + +// Slice sorts the slice x given the provided less function. +// It panics if x is not a slice. +// +// The sort is not guaranteed to be stable: equal elements +// may be reversed from their original order. +// For a stable sort, use SliceStable. +// +// The less function must satisfy the same requirements as +// the Interface type's Less method. +func Slice(x any, less func(i, j int) bool) { + rv := reflectlite.ValueOf(x) + swap := reflectlite.Swapper(x) + length := rv.Len() + limit := bits.Len(uint(length)) + pdqsort_func(lessSwap{less, swap}, 0, length, limit) +} + +// SliceStable sorts the slice x using the provided less +// function, keeping equal elements in their original order. +// It panics if x is not a slice. +// +// The less function must satisfy the same requirements as +// the Interface type's Less method. +func SliceStable(x any, less func(i, j int) bool) { + rv := reflectlite.ValueOf(x) + swap := reflectlite.Swapper(x) + stable_func(lessSwap{less, swap}, rv.Len()) +} + +// SliceIsSorted reports whether the slice x is sorted according to the provided less function. +// It panics if x is not a slice. +func SliceIsSorted(x any, less func(i, j int) bool) bool { + rv := reflectlite.ValueOf(x) + n := rv.Len() + for i := n - 1; i > 0; i-- { + if less(i, i-1) { + return false + } + } + return true +} diff --git a/src/sort/sort.go b/src/sort/sort.go new file mode 100644 index 0000000..68e2f0d --- /dev/null +++ b/src/sort/sort.go @@ -0,0 +1,262 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:generate go run gen_sort_variants.go + +// Package sort provides primitives for sorting slices and user-defined collections. +package sort + +import "math/bits" + +// An implementation of Interface can be sorted by the routines in this package. +// The methods refer to elements of the underlying collection by integer index. +type Interface interface { + // Len is the number of elements in the collection. + Len() int + + // Less reports whether the element with index i + // must sort before the element with index j. + // + // If both Less(i, j) and Less(j, i) are false, + // then the elements at index i and j are considered equal. + // Sort may place equal elements in any order in the final result, + // while Stable preserves the original input order of equal elements. + // + // Less must describe a transitive ordering: + // - if both Less(i, j) and Less(j, k) are true, then Less(i, k) must be true as well. + // - if both Less(i, j) and Less(j, k) are false, then Less(i, k) must be false as well. + // + // Note that floating-point comparison (the < operator on float32 or float64 values) + // is not a transitive ordering when not-a-number (NaN) values are involved. + // See Float64Slice.Less for a correct implementation for floating-point values. + Less(i, j int) bool + + // Swap swaps the elements with indexes i and j. + Swap(i, j int) +} + +// Sort sorts data in ascending order as determined by the Less method. +// It makes one call to data.Len to determine n and O(n*log(n)) calls to +// data.Less and data.Swap. The sort is not guaranteed to be stable. +func Sort(data Interface) { + n := data.Len() + if n <= 1 { + return + } + limit := bits.Len(uint(n)) + pdqsort(data, 0, n, limit) +} + +type sortedHint int // hint for pdqsort when choosing the pivot + +const ( + unknownHint sortedHint = iota + increasingHint + decreasingHint +) + +// xorshift paper: https://www.jstatsoft.org/article/view/v008i14/xorshift.pdf +type xorshift uint64 + +func (r *xorshift) Next() uint64 { + *r ^= *r << 13 + *r ^= *r >> 17 + *r ^= *r << 5 + return uint64(*r) +} + +func nextPowerOfTwo(length int) uint { + shift := uint(bits.Len(uint(length))) + return uint(1 << shift) +} + +// lessSwap is a pair of Less and Swap function for use with the +// auto-generated func-optimized variant of sort.go in +// zfuncversion.go. +type lessSwap struct { + Less func(i, j int) bool + Swap func(i, j int) +} + +type reverse struct { + // This embedded Interface permits Reverse to use the methods of + // another Interface implementation. + Interface +} + +// Less returns the opposite of the embedded implementation's Less method. +func (r reverse) Less(i, j int) bool { + return r.Interface.Less(j, i) +} + +// Reverse returns the reverse order for data. +func Reverse(data Interface) Interface { + return &reverse{data} +} + +// IsSorted reports whether data is sorted. +func IsSorted(data Interface) bool { + n := data.Len() + for i := n - 1; i > 0; i-- { + if data.Less(i, i-1) { + return false + } + } + return true +} + +// Convenience types for common cases + +// IntSlice attaches the methods of Interface to []int, sorting in increasing order. +type IntSlice []int + +func (x IntSlice) Len() int { return len(x) } +func (x IntSlice) Less(i, j int) bool { return x[i] < x[j] } +func (x IntSlice) Swap(i, j int) { x[i], x[j] = x[j], x[i] } + +// Sort is a convenience method: x.Sort() calls Sort(x). +func (x IntSlice) Sort() { Sort(x) } + +// Float64Slice implements Interface for a []float64, sorting in increasing order, +// with not-a-number (NaN) values ordered before other values. +type Float64Slice []float64 + +func (x Float64Slice) Len() int { return len(x) } + +// Less reports whether x[i] should be ordered before x[j], as required by the sort Interface. +// Note that floating-point comparison by itself is not a transitive relation: it does not +// report a consistent ordering for not-a-number (NaN) values. +// This implementation of Less places NaN values before any others, by using: +// +// x[i] < x[j] || (math.IsNaN(x[i]) && !math.IsNaN(x[j])) +func (x Float64Slice) Less(i, j int) bool { return x[i] < x[j] || (isNaN(x[i]) && !isNaN(x[j])) } +func (x Float64Slice) Swap(i, j int) { x[i], x[j] = x[j], x[i] } + +// isNaN is a copy of math.IsNaN to avoid a dependency on the math package. +func isNaN(f float64) bool { + return f != f +} + +// Sort is a convenience method: x.Sort() calls Sort(x). +func (x Float64Slice) Sort() { Sort(x) } + +// StringSlice attaches the methods of Interface to []string, sorting in increasing order. +type StringSlice []string + +func (x StringSlice) Len() int { return len(x) } +func (x StringSlice) Less(i, j int) bool { return x[i] < x[j] } +func (x StringSlice) Swap(i, j int) { x[i], x[j] = x[j], x[i] } + +// Sort is a convenience method: x.Sort() calls Sort(x). +func (x StringSlice) Sort() { Sort(x) } + +// Convenience wrappers for common cases + +// Ints sorts a slice of ints in increasing order. +func Ints(x []int) { Sort(IntSlice(x)) } + +// Float64s sorts a slice of float64s in increasing order. +// Not-a-number (NaN) values are ordered before other values. +func Float64s(x []float64) { Sort(Float64Slice(x)) } + +// Strings sorts a slice of strings in increasing order. +func Strings(x []string) { Sort(StringSlice(x)) } + +// IntsAreSorted reports whether the slice x is sorted in increasing order. +func IntsAreSorted(x []int) bool { return IsSorted(IntSlice(x)) } + +// Float64sAreSorted reports whether the slice x is sorted in increasing order, +// with not-a-number (NaN) values before any other values. +func Float64sAreSorted(x []float64) bool { return IsSorted(Float64Slice(x)) } + +// StringsAreSorted reports whether the slice x is sorted in increasing order. +func StringsAreSorted(x []string) bool { return IsSorted(StringSlice(x)) } + +// Notes on stable sorting: +// The used algorithms are simple and provable correct on all input and use +// only logarithmic additional stack space. They perform well if compared +// experimentally to other stable in-place sorting algorithms. +// +// Remarks on other algorithms evaluated: +// - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++: +// Not faster. +// - GCC's __rotate for block rotations: Not faster. +// - "Practical in-place mergesort" from Jyrki Katajainen, Tomi A. Pasanen +// and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40: +// The given algorithms are in-place, number of Swap and Assignments +// grow as n log n but the algorithm is not stable. +// - "Fast Stable In-Place Sorting with O(n) Data Moves" J.I. Munro and +// V. Raman in Algorithmica (1996) 16, 115-160: +// This algorithm either needs additional 2n bits or works only if there +// are enough different elements available to encode some permutations +// which have to be undone later (so not stable on any input). +// - All the optimal in-place sorting/merging algorithms I found are either +// unstable or rely on enough different elements in each step to encode the +// performed block rearrangements. See also "In-Place Merging Algorithms", +// Denham Coates-Evely, Department of Computer Science, Kings College, +// January 2004 and the references in there. +// - Often "optimal" algorithms are optimal in the number of assignments +// but Interface has only Swap as operation. + +// Stable sorts data in ascending order as determined by the Less method, +// while keeping the original order of equal elements. +// +// It makes one call to data.Len to determine n, O(n*log(n)) calls to +// data.Less and O(n*log(n)*log(n)) calls to data.Swap. +func Stable(data Interface) { + stable(data, data.Len()) +} + +/* +Complexity of Stable Sorting + + +Complexity of block swapping rotation + +Each Swap puts one new element into its correct, final position. +Elements which reach their final position are no longer moved. +Thus block swapping rotation needs |u|+|v| calls to Swaps. +This is best possible as each element might need a move. + +Pay attention when comparing to other optimal algorithms which +typically count the number of assignments instead of swaps: +E.g. the optimal algorithm of Dudzinski and Dydek for in-place +rotations uses O(u + v + gcd(u,v)) assignments which is +better than our O(3 * (u+v)) as gcd(u,v) <= u. + + +Stable sorting by SymMerge and BlockSwap rotations + +SymMerg complexity for same size input M = N: +Calls to Less: O(M*log(N/M+1)) = O(N*log(2)) = O(N) +Calls to Swap: O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N)) + +(The following argument does not fuzz over a missing -1 or +other stuff which does not impact the final result). + +Let n = data.Len(). Assume n = 2^k. + +Plain merge sort performs log(n) = k iterations. +On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i. + +Thus iteration i of merge sort performs: +Calls to Less O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n) +Calls to Swap O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i) + +In total k = log(n) iterations are performed; so in total: +Calls to Less O(log(n) * n) +Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n) + = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n)) + + +Above results should generalize to arbitrary n = 2^k + p +and should not be influenced by the initial insertion sort phase: +Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of +size bs at n/bs blocks: O(bs*n) Swaps and Less during insertion sort. +Merge sort iterations start at i = log(bs). With t = log(bs) constant: +Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n) + = O(n * log(n)) +Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n)) + +*/ diff --git a/src/sort/sort_test.go b/src/sort/sort_test.go new file mode 100644 index 0000000..862bba2 --- /dev/null +++ b/src/sort/sort_test.go @@ -0,0 +1,744 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sort_test + +import ( + "fmt" + "internal/testenv" + "math" + "math/rand" + . "sort" + "strconv" + stringspkg "strings" + "testing" +) + +var ints = [...]int{74, 59, 238, -784, 9845, 959, 905, 0, 0, 42, 7586, -5467984, 7586} +var float64s = [...]float64{74.3, 59.0, math.Inf(1), 238.2, -784.0, 2.3, math.NaN(), math.NaN(), math.Inf(-1), 9845.768, -959.7485, 905, 7.8, 7.8} +var strings = [...]string{"", "Hello", "foo", "bar", "foo", "f00", "%*&^*&^&", "***"} + +func TestSortIntSlice(t *testing.T) { + data := ints + a := IntSlice(data[0:]) + Sort(a) + if !IsSorted(a) { + t.Errorf("sorted %v", ints) + t.Errorf(" got %v", data) + } +} + +func TestSortFloat64Slice(t *testing.T) { + data := float64s + a := Float64Slice(data[0:]) + Sort(a) + if !IsSorted(a) { + t.Errorf("sorted %v", float64s) + t.Errorf(" got %v", data) + } +} + +func TestSortStringSlice(t *testing.T) { + data := strings + a := StringSlice(data[0:]) + Sort(a) + if !IsSorted(a) { + t.Errorf("sorted %v", strings) + t.Errorf(" got %v", data) + } +} + +func TestInts(t *testing.T) { + data := ints + Ints(data[0:]) + if !IntsAreSorted(data[0:]) { + t.Errorf("sorted %v", ints) + t.Errorf(" got %v", data) + } +} + +func TestFloat64s(t *testing.T) { + data := float64s + Float64s(data[0:]) + if !Float64sAreSorted(data[0:]) { + t.Errorf("sorted %v", float64s) + t.Errorf(" got %v", data) + } +} + +func TestStrings(t *testing.T) { + data := strings + Strings(data[0:]) + if !StringsAreSorted(data[0:]) { + t.Errorf("sorted %v", strings) + t.Errorf(" got %v", data) + } +} + +func TestSlice(t *testing.T) { + data := strings + Slice(data[:], func(i, j int) bool { + return data[i] < data[j] + }) + if !SliceIsSorted(data[:], func(i, j int) bool { return data[i] < data[j] }) { + t.Errorf("sorted %v", strings) + t.Errorf(" got %v", data) + } +} + +func TestSortLarge_Random(t *testing.T) { + n := 1000000 + if testing.Short() { + n /= 100 + } + data := make([]int, n) + for i := 0; i < len(data); i++ { + data[i] = rand.Intn(100) + } + if IntsAreSorted(data) { + t.Fatalf("terrible rand.rand") + } + Ints(data) + if !IntsAreSorted(data) { + t.Errorf("sort didn't sort - 1M ints") + } +} + +func TestReverseSortIntSlice(t *testing.T) { + data := ints + data1 := ints + a := IntSlice(data[0:]) + Sort(a) + r := IntSlice(data1[0:]) + Sort(Reverse(r)) + for i := 0; i < len(data); i++ { + if a[i] != r[len(data)-1-i] { + t.Errorf("reverse sort didn't sort") + } + if i > len(data)/2 { + break + } + } +} + +func TestBreakPatterns(t *testing.T) { + // Special slice used to trigger breakPatterns. + data := make([]int, 30) + for i := range data { + data[i] = 10 + } + data[(len(data)/4)*1] = 0 + data[(len(data)/4)*2] = 1 + data[(len(data)/4)*3] = 2 + Sort(IntSlice(data)) +} + +func TestReverseRange(t *testing.T) { + data := []int{1, 2, 3, 4, 5, 6, 7} + ReverseRange(IntSlice(data), 0, len(data)) + for i := len(data) - 1; i > 0; i-- { + if data[i] > data[i-1] { + t.Fatalf("reverseRange didn't work") + } + } + + data1 := []int{1, 2, 3, 4, 5, 6, 7} + data2 := []int{1, 2, 5, 4, 3, 6, 7} + ReverseRange(IntSlice(data1), 2, 5) + for i, v := range data1 { + if v != data2[i] { + t.Fatalf("reverseRange didn't work") + } + } +} + +type nonDeterministicTestingData struct { + r *rand.Rand +} + +func (t *nonDeterministicTestingData) Len() int { + return 500 +} +func (t *nonDeterministicTestingData) Less(i, j int) bool { + if i < 0 || j < 0 || i >= t.Len() || j >= t.Len() { + panic("nondeterministic comparison out of bounds") + } + return t.r.Float32() < 0.5 +} +func (t *nonDeterministicTestingData) Swap(i, j int) { + if i < 0 || j < 0 || i >= t.Len() || j >= t.Len() { + panic("nondeterministic comparison out of bounds") + } +} + +func TestNonDeterministicComparison(t *testing.T) { + // Ensure that sort.Sort does not panic when Less returns inconsistent results. + // See https://golang.org/issue/14377. + defer func() { + if r := recover(); r != nil { + t.Error(r) + } + }() + + td := &nonDeterministicTestingData{ + r: rand.New(rand.NewSource(0)), + } + + for i := 0; i < 10; i++ { + Sort(td) + } +} + +func BenchmarkSortString1K(b *testing.B) { + b.StopTimer() + unsorted := make([]string, 1<<10) + for i := range unsorted { + unsorted[i] = strconv.Itoa(i ^ 0x2cc) + } + data := make([]string, len(unsorted)) + + for i := 0; i < b.N; i++ { + copy(data, unsorted) + b.StartTimer() + Strings(data) + b.StopTimer() + } +} + +func BenchmarkSortString1K_Slice(b *testing.B) { + b.StopTimer() + unsorted := make([]string, 1<<10) + for i := range unsorted { + unsorted[i] = strconv.Itoa(i ^ 0x2cc) + } + data := make([]string, len(unsorted)) + + for i := 0; i < b.N; i++ { + copy(data, unsorted) + b.StartTimer() + Slice(data, func(i, j int) bool { return data[i] < data[j] }) + b.StopTimer() + } +} + +func BenchmarkStableString1K(b *testing.B) { + b.StopTimer() + unsorted := make([]string, 1<<10) + for i := range unsorted { + unsorted[i] = strconv.Itoa(i ^ 0x2cc) + } + data := make([]string, len(unsorted)) + + for i := 0; i < b.N; i++ { + copy(data, unsorted) + b.StartTimer() + Stable(StringSlice(data)) + b.StopTimer() + } +} + +func BenchmarkSortInt1K(b *testing.B) { + b.StopTimer() + for i := 0; i < b.N; i++ { + data := make([]int, 1<<10) + for i := 0; i < len(data); i++ { + data[i] = i ^ 0x2cc + } + b.StartTimer() + Ints(data) + b.StopTimer() + } +} + +func BenchmarkSortInt1K_Sorted(b *testing.B) { + b.StopTimer() + for i := 0; i < b.N; i++ { + data := make([]int, 1<<10) + for i := 0; i < len(data); i++ { + data[i] = i + } + b.StartTimer() + Ints(data) + b.StopTimer() + } +} + +func BenchmarkSortInt1K_Reversed(b *testing.B) { + b.StopTimer() + for i := 0; i < b.N; i++ { + data := make([]int, 1<<10) + for i := 0; i < len(data); i++ { + data[i] = len(data) - i + } + b.StartTimer() + Ints(data) + b.StopTimer() + } +} + +func BenchmarkSortInt1K_Mod8(b *testing.B) { + b.StopTimer() + for i := 0; i < b.N; i++ { + data := make([]int, 1<<10) + for i := 0; i < len(data); i++ { + data[i] = i % 8 + } + b.StartTimer() + Ints(data) + b.StopTimer() + } +} + +func BenchmarkStableInt1K(b *testing.B) { + b.StopTimer() + unsorted := make([]int, 1<<10) + for i := range unsorted { + unsorted[i] = i ^ 0x2cc + } + data := make([]int, len(unsorted)) + for i := 0; i < b.N; i++ { + copy(data, unsorted) + b.StartTimer() + Stable(IntSlice(data)) + b.StopTimer() + } +} + +func BenchmarkStableInt1K_Slice(b *testing.B) { + b.StopTimer() + unsorted := make([]int, 1<<10) + for i := range unsorted { + unsorted[i] = i ^ 0x2cc + } + data := make([]int, len(unsorted)) + for i := 0; i < b.N; i++ { + copy(data, unsorted) + b.StartTimer() + SliceStable(data, func(i, j int) bool { return data[i] < data[j] }) + b.StopTimer() + } +} + +func BenchmarkSortInt64K(b *testing.B) { + b.StopTimer() + for i := 0; i < b.N; i++ { + data := make([]int, 1<<16) + for i := 0; i < len(data); i++ { + data[i] = i ^ 0xcccc + } + b.StartTimer() + Ints(data) + b.StopTimer() + } +} + +func BenchmarkSortInt64K_Slice(b *testing.B) { + b.StopTimer() + for i := 0; i < b.N; i++ { + data := make([]int, 1<<16) + for i := 0; i < len(data); i++ { + data[i] = i ^ 0xcccc + } + b.StartTimer() + Slice(data, func(i, j int) bool { return data[i] < data[j] }) + b.StopTimer() + } +} + +func BenchmarkStableInt64K(b *testing.B) { + b.StopTimer() + for i := 0; i < b.N; i++ { + data := make([]int, 1<<16) + for i := 0; i < len(data); i++ { + data[i] = i ^ 0xcccc + } + b.StartTimer() + Stable(IntSlice(data)) + b.StopTimer() + } +} + +const ( + _Sawtooth = iota + _Rand + _Stagger + _Plateau + _Shuffle + _NDist +) + +const ( + _Copy = iota + _Reverse + _ReverseFirstHalf + _ReverseSecondHalf + _Sorted + _Dither + _NMode +) + +type testingData struct { + desc string + t *testing.T + data []int + maxswap int // number of swaps allowed + ncmp, nswap int +} + +func (d *testingData) Len() int { return len(d.data) } +func (d *testingData) Less(i, j int) bool { + d.ncmp++ + return d.data[i] < d.data[j] +} +func (d *testingData) Swap(i, j int) { + if d.nswap >= d.maxswap { + d.t.Fatalf("%s: used %d swaps sorting slice of %d", d.desc, d.nswap, len(d.data)) + } + d.nswap++ + d.data[i], d.data[j] = d.data[j], d.data[i] +} + +func min(a, b int) int { + if a < b { + return a + } + return b +} + +func lg(n int) int { + i := 0 + for 1<<uint(i) < n { + i++ + } + return i +} + +func testBentleyMcIlroy(t *testing.T, sort func(Interface), maxswap func(int) int) { + sizes := []int{100, 1023, 1024, 1025} + if testing.Short() { + sizes = []int{100, 127, 128, 129} + } + dists := []string{"sawtooth", "rand", "stagger", "plateau", "shuffle"} + modes := []string{"copy", "reverse", "reverse1", "reverse2", "sort", "dither"} + var tmp1, tmp2 [1025]int + for _, n := range sizes { + for m := 1; m < 2*n; m *= 2 { + for dist := 0; dist < _NDist; dist++ { + j := 0 + k := 1 + data := tmp1[0:n] + for i := 0; i < n; i++ { + switch dist { + case _Sawtooth: + data[i] = i % m + case _Rand: + data[i] = rand.Intn(m) + case _Stagger: + data[i] = (i*m + i) % n + case _Plateau: + data[i] = min(i, m) + case _Shuffle: + if rand.Intn(m) != 0 { + j += 2 + data[i] = j + } else { + k += 2 + data[i] = k + } + } + } + + mdata := tmp2[0:n] + for mode := 0; mode < _NMode; mode++ { + switch mode { + case _Copy: + for i := 0; i < n; i++ { + mdata[i] = data[i] + } + case _Reverse: + for i := 0; i < n; i++ { + mdata[i] = data[n-i-1] + } + case _ReverseFirstHalf: + for i := 0; i < n/2; i++ { + mdata[i] = data[n/2-i-1] + } + for i := n / 2; i < n; i++ { + mdata[i] = data[i] + } + case _ReverseSecondHalf: + for i := 0; i < n/2; i++ { + mdata[i] = data[i] + } + for i := n / 2; i < n; i++ { + mdata[i] = data[n-(i-n/2)-1] + } + case _Sorted: + for i := 0; i < n; i++ { + mdata[i] = data[i] + } + // Ints is known to be correct + // because mode Sort runs after mode _Copy. + Ints(mdata) + case _Dither: + for i := 0; i < n; i++ { + mdata[i] = data[i] + i%5 + } + } + + desc := fmt.Sprintf("n=%d m=%d dist=%s mode=%s", n, m, dists[dist], modes[mode]) + d := &testingData{desc: desc, t: t, data: mdata[0:n], maxswap: maxswap(n)} + sort(d) + // Uncomment if you are trying to improve the number of compares/swaps. + //t.Logf("%s: ncmp=%d, nswp=%d", desc, d.ncmp, d.nswap) + + // If we were testing C qsort, we'd have to make a copy + // of the slice and sort it ourselves and then compare + // x against it, to ensure that qsort was only permuting + // the data, not (for example) overwriting it with zeros. + // + // In go, we don't have to be so paranoid: since the only + // mutating method Sort can call is TestingData.swap, + // it suffices here just to check that the final slice is sorted. + if !IntsAreSorted(mdata) { + t.Fatalf("%s: ints not sorted\n\t%v", desc, mdata) + } + } + } + } + } +} + +func TestSortBM(t *testing.T) { + testBentleyMcIlroy(t, Sort, func(n int) int { return n * lg(n) * 12 / 10 }) +} + +func TestHeapsortBM(t *testing.T) { + testBentleyMcIlroy(t, Heapsort, func(n int) int { return n * lg(n) * 12 / 10 }) +} + +func TestStableBM(t *testing.T) { + testBentleyMcIlroy(t, Stable, func(n int) int { return n * lg(n) * lg(n) / 3 }) +} + +// This is based on the "antiquicksort" implementation by M. Douglas McIlroy. +// See https://www.cs.dartmouth.edu/~doug/mdmspe.pdf for more info. +type adversaryTestingData struct { + t *testing.T + data []int // item values, initialized to special gas value and changed by Less + maxcmp int // number of comparisons allowed + ncmp int // number of comparisons (calls to Less) + nsolid int // number of elements that have been set to non-gas values + candidate int // guess at current pivot + gas int // special value for unset elements, higher than everything else +} + +func (d *adversaryTestingData) Len() int { return len(d.data) } + +func (d *adversaryTestingData) Less(i, j int) bool { + if d.ncmp >= d.maxcmp { + d.t.Fatalf("used %d comparisons sorting adversary data with size %d", d.ncmp, len(d.data)) + } + d.ncmp++ + + if d.data[i] == d.gas && d.data[j] == d.gas { + if i == d.candidate { + // freeze i + d.data[i] = d.nsolid + d.nsolid++ + } else { + // freeze j + d.data[j] = d.nsolid + d.nsolid++ + } + } + + if d.data[i] == d.gas { + d.candidate = i + } else if d.data[j] == d.gas { + d.candidate = j + } + + return d.data[i] < d.data[j] +} + +func (d *adversaryTestingData) Swap(i, j int) { + d.data[i], d.data[j] = d.data[j], d.data[i] +} + +func newAdversaryTestingData(t *testing.T, size int, maxcmp int) *adversaryTestingData { + gas := size - 1 + data := make([]int, size) + for i := 0; i < size; i++ { + data[i] = gas + } + return &adversaryTestingData{t: t, data: data, maxcmp: maxcmp, gas: gas} +} + +func TestAdversary(t *testing.T) { + const size = 10000 // large enough to distinguish between O(n^2) and O(n*log(n)) + maxcmp := size * lg(size) * 4 // the factor 4 was found by trial and error + d := newAdversaryTestingData(t, size, maxcmp) + Sort(d) // This should degenerate to heapsort. + // Check data is fully populated and sorted. + for i, v := range d.data { + if v != i { + t.Fatalf("adversary data not fully sorted") + } + } +} + +func TestStableInts(t *testing.T) { + data := ints + Stable(IntSlice(data[0:])) + if !IntsAreSorted(data[0:]) { + t.Errorf("nsorted %v\n got %v", ints, data) + } +} + +type intPairs []struct { + a, b int +} + +// IntPairs compare on a only. +func (d intPairs) Len() int { return len(d) } +func (d intPairs) Less(i, j int) bool { return d[i].a < d[j].a } +func (d intPairs) Swap(i, j int) { d[i], d[j] = d[j], d[i] } + +// Record initial order in B. +func (d intPairs) initB() { + for i := range d { + d[i].b = i + } +} + +// InOrder checks if a-equal elements were not reordered. +func (d intPairs) inOrder() bool { + lastA, lastB := -1, 0 + for i := 0; i < len(d); i++ { + if lastA != d[i].a { + lastA = d[i].a + lastB = d[i].b + continue + } + if d[i].b <= lastB { + return false + } + lastB = d[i].b + } + return true +} + +func TestStability(t *testing.T) { + n, m := 100000, 1000 + if testing.Short() { + n, m = 1000, 100 + } + data := make(intPairs, n) + + // random distribution + for i := 0; i < len(data); i++ { + data[i].a = rand.Intn(m) + } + if IsSorted(data) { + t.Fatalf("terrible rand.rand") + } + data.initB() + Stable(data) + if !IsSorted(data) { + t.Errorf("Stable didn't sort %d ints", n) + } + if !data.inOrder() { + t.Errorf("Stable wasn't stable on %d ints", n) + } + + // already sorted + data.initB() + Stable(data) + if !IsSorted(data) { + t.Errorf("Stable shuffled sorted %d ints (order)", n) + } + if !data.inOrder() { + t.Errorf("Stable shuffled sorted %d ints (stability)", n) + } + + // sorted reversed + for i := 0; i < len(data); i++ { + data[i].a = len(data) - i + } + data.initB() + Stable(data) + if !IsSorted(data) { + t.Errorf("Stable didn't sort %d ints", n) + } + if !data.inOrder() { + t.Errorf("Stable wasn't stable on %d ints", n) + } +} + +var countOpsSizes = []int{1e2, 3e2, 1e3, 3e3, 1e4, 3e4, 1e5, 3e5, 1e6} + +func countOps(t *testing.T, algo func(Interface), name string) { + sizes := countOpsSizes + if testing.Short() { + sizes = sizes[:5] + } + if !testing.Verbose() { + t.Skip("Counting skipped as non-verbose mode.") + } + for _, n := range sizes { + td := testingData{ + desc: name, + t: t, + data: make([]int, n), + maxswap: 1<<31 - 1, + } + for i := 0; i < n; i++ { + td.data[i] = rand.Intn(n / 5) + } + algo(&td) + t.Logf("%s %8d elements: %11d Swap, %10d Less", name, n, td.nswap, td.ncmp) + } +} + +func TestCountStableOps(t *testing.T) { countOps(t, Stable, "Stable") } +func TestCountSortOps(t *testing.T) { countOps(t, Sort, "Sort ") } + +func bench(b *testing.B, size int, algo func(Interface), name string) { + if stringspkg.HasSuffix(testenv.Builder(), "-race") && size > 1e4 { + b.Skip("skipping slow benchmark on race builder") + } + b.StopTimer() + data := make(intPairs, size) + x := ^uint32(0) + for i := 0; i < b.N; i++ { + for n := size - 3; n <= size+3; n++ { + for i := 0; i < len(data); i++ { + x += x + x ^= 1 + if int32(x) < 0 { + x ^= 0x88888eef + } + data[i].a = int(x % uint32(n/5)) + } + data.initB() + b.StartTimer() + algo(data) + b.StopTimer() + if !IsSorted(data) { + b.Errorf("%s did not sort %d ints", name, n) + } + if name == "Stable" && !data.inOrder() { + b.Errorf("%s unstable on %d ints", name, n) + } + } + } +} + +func BenchmarkSort1e2(b *testing.B) { bench(b, 1e2, Sort, "Sort") } +func BenchmarkStable1e2(b *testing.B) { bench(b, 1e2, Stable, "Stable") } +func BenchmarkSort1e4(b *testing.B) { bench(b, 1e4, Sort, "Sort") } +func BenchmarkStable1e4(b *testing.B) { bench(b, 1e4, Stable, "Stable") } +func BenchmarkSort1e6(b *testing.B) { bench(b, 1e6, Sort, "Sort") } +func BenchmarkStable1e6(b *testing.B) { bench(b, 1e6, Stable, "Stable") } diff --git a/src/sort/zsortfunc.go b/src/sort/zsortfunc.go new file mode 100644 index 0000000..49b6169 --- /dev/null +++ b/src/sort/zsortfunc.go @@ -0,0 +1,479 @@ +// Code generated by gen_sort_variants.go; DO NOT EDIT. + +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sort + +// insertionSort_func sorts data[a:b] using insertion sort. +func insertionSort_func(data lessSwap, a, b int) { + for i := a + 1; i < b; i++ { + for j := i; j > a && data.Less(j, j-1); j-- { + data.Swap(j, j-1) + } + } +} + +// siftDown_func implements the heap property on data[lo:hi]. +// first is an offset into the array where the root of the heap lies. +func siftDown_func(data lessSwap, lo, hi, first int) { + root := lo + for { + child := 2*root + 1 + if child >= hi { + break + } + if child+1 < hi && data.Less(first+child, first+child+1) { + child++ + } + if !data.Less(first+root, first+child) { + return + } + data.Swap(first+root, first+child) + root = child + } +} + +func heapSort_func(data lessSwap, a, b int) { + first := a + lo := 0 + hi := b - a + + // Build heap with greatest element at top. + for i := (hi - 1) / 2; i >= 0; i-- { + siftDown_func(data, i, hi, first) + } + + // Pop elements, largest first, into end of data. + for i := hi - 1; i >= 0; i-- { + data.Swap(first, first+i) + siftDown_func(data, lo, i, first) + } +} + +// pdqsort_func sorts data[a:b]. +// The algorithm based on pattern-defeating quicksort(pdqsort), but without the optimizations from BlockQuicksort. +// pdqsort paper: https://arxiv.org/pdf/2106.05123.pdf +// C++ implementation: https://github.com/orlp/pdqsort +// Rust implementation: https://docs.rs/pdqsort/latest/pdqsort/ +// limit is the number of allowed bad (very unbalanced) pivots before falling back to heapsort. +func pdqsort_func(data lessSwap, a, b, limit int) { + const maxInsertion = 12 + + var ( + wasBalanced = true // whether the last partitioning was reasonably balanced + wasPartitioned = true // whether the slice was already partitioned + ) + + for { + length := b - a + + if length <= maxInsertion { + insertionSort_func(data, a, b) + return + } + + // Fall back to heapsort if too many bad choices were made. + if limit == 0 { + heapSort_func(data, a, b) + return + } + + // If the last partitioning was imbalanced, we need to breaking patterns. + if !wasBalanced { + breakPatterns_func(data, a, b) + limit-- + } + + pivot, hint := choosePivot_func(data, a, b) + if hint == decreasingHint { + reverseRange_func(data, a, b) + // The chosen pivot was pivot-a elements after the start of the array. + // After reversing it is pivot-a elements before the end of the array. + // The idea came from Rust's implementation. + pivot = (b - 1) - (pivot - a) + hint = increasingHint + } + + // The slice is likely already sorted. + if wasBalanced && wasPartitioned && hint == increasingHint { + if partialInsertionSort_func(data, a, b) { + return + } + } + + // Probably the slice contains many duplicate elements, partition the slice into + // elements equal to and elements greater than the pivot. + if a > 0 && !data.Less(a-1, pivot) { + mid := partitionEqual_func(data, a, b, pivot) + a = mid + continue + } + + mid, alreadyPartitioned := partition_func(data, a, b, pivot) + wasPartitioned = alreadyPartitioned + + leftLen, rightLen := mid-a, b-mid + balanceThreshold := length / 8 + if leftLen < rightLen { + wasBalanced = leftLen >= balanceThreshold + pdqsort_func(data, a, mid, limit) + a = mid + 1 + } else { + wasBalanced = rightLen >= balanceThreshold + pdqsort_func(data, mid+1, b, limit) + b = mid + } + } +} + +// partition_func does one quicksort partition. +// Let p = data[pivot] +// Moves elements in data[a:b] around, so that data[i]<p and data[j]>=p for i<newpivot and j>newpivot. +// On return, data[newpivot] = p +func partition_func(data lessSwap, a, b, pivot int) (newpivot int, alreadyPartitioned bool) { + data.Swap(a, pivot) + i, j := a+1, b-1 // i and j are inclusive of the elements remaining to be partitioned + + for i <= j && data.Less(i, a) { + i++ + } + for i <= j && !data.Less(j, a) { + j-- + } + if i > j { + data.Swap(j, a) + return j, true + } + data.Swap(i, j) + i++ + j-- + + for { + for i <= j && data.Less(i, a) { + i++ + } + for i <= j && !data.Less(j, a) { + j-- + } + if i > j { + break + } + data.Swap(i, j) + i++ + j-- + } + data.Swap(j, a) + return j, false +} + +// partitionEqual_func partitions data[a:b] into elements equal to data[pivot] followed by elements greater than data[pivot]. +// It assumed that data[a:b] does not contain elements smaller than the data[pivot]. +func partitionEqual_func(data lessSwap, a, b, pivot int) (newpivot int) { + data.Swap(a, pivot) + i, j := a+1, b-1 // i and j are inclusive of the elements remaining to be partitioned + + for { + for i <= j && !data.Less(a, i) { + i++ + } + for i <= j && data.Less(a, j) { + j-- + } + if i > j { + break + } + data.Swap(i, j) + i++ + j-- + } + return i +} + +// partialInsertionSort_func partially sorts a slice, returns true if the slice is sorted at the end. +func partialInsertionSort_func(data lessSwap, a, b int) bool { + const ( + maxSteps = 5 // maximum number of adjacent out-of-order pairs that will get shifted + shortestShifting = 50 // don't shift any elements on short arrays + ) + i := a + 1 + for j := 0; j < maxSteps; j++ { + for i < b && !data.Less(i, i-1) { + i++ + } + + if i == b { + return true + } + + if b-a < shortestShifting { + return false + } + + data.Swap(i, i-1) + + // Shift the smaller one to the left. + if i-a >= 2 { + for j := i - 1; j >= 1; j-- { + if !data.Less(j, j-1) { + break + } + data.Swap(j, j-1) + } + } + // Shift the greater one to the right. + if b-i >= 2 { + for j := i + 1; j < b; j++ { + if !data.Less(j, j-1) { + break + } + data.Swap(j, j-1) + } + } + } + return false +} + +// breakPatterns_func scatters some elements around in an attempt to break some patterns +// that might cause imbalanced partitions in quicksort. +func breakPatterns_func(data lessSwap, a, b int) { + length := b - a + if length >= 8 { + random := xorshift(length) + modulus := nextPowerOfTwo(length) + + for idx := a + (length/4)*2 - 1; idx <= a+(length/4)*2+1; idx++ { + other := int(uint(random.Next()) & (modulus - 1)) + if other >= length { + other -= length + } + data.Swap(idx, a+other) + } + } +} + +// choosePivot_func chooses a pivot in data[a:b]. +// +// [0,8): chooses a static pivot. +// [8,shortestNinther): uses the simple median-of-three method. +// [shortestNinther,∞): uses the Tukey ninther method. +func choosePivot_func(data lessSwap, a, b int) (pivot int, hint sortedHint) { + const ( + shortestNinther = 50 + maxSwaps = 4 * 3 + ) + + l := b - a + + var ( + swaps int + i = a + l/4*1 + j = a + l/4*2 + k = a + l/4*3 + ) + + if l >= 8 { + if l >= shortestNinther { + // Tukey ninther method, the idea came from Rust's implementation. + i = medianAdjacent_func(data, i, &swaps) + j = medianAdjacent_func(data, j, &swaps) + k = medianAdjacent_func(data, k, &swaps) + } + // Find the median among i, j, k and stores it into j. + j = median_func(data, i, j, k, &swaps) + } + + switch swaps { + case 0: + return j, increasingHint + case maxSwaps: + return j, decreasingHint + default: + return j, unknownHint + } +} + +// order2_func returns x,y where data[x] <= data[y], where x,y=a,b or x,y=b,a. +func order2_func(data lessSwap, a, b int, swaps *int) (int, int) { + if data.Less(b, a) { + *swaps++ + return b, a + } + return a, b +} + +// median_func returns x where data[x] is the median of data[a],data[b],data[c], where x is a, b, or c. +func median_func(data lessSwap, a, b, c int, swaps *int) int { + a, b = order2_func(data, a, b, swaps) + b, c = order2_func(data, b, c, swaps) + a, b = order2_func(data, a, b, swaps) + return b +} + +// medianAdjacent_func finds the median of data[a - 1], data[a], data[a + 1] and stores the index into a. +func medianAdjacent_func(data lessSwap, a int, swaps *int) int { + return median_func(data, a-1, a, a+1, swaps) +} + +func reverseRange_func(data lessSwap, a, b int) { + i := a + j := b - 1 + for i < j { + data.Swap(i, j) + i++ + j-- + } +} + +func swapRange_func(data lessSwap, a, b, n int) { + for i := 0; i < n; i++ { + data.Swap(a+i, b+i) + } +} + +func stable_func(data lessSwap, n int) { + blockSize := 20 // must be > 0 + a, b := 0, blockSize + for b <= n { + insertionSort_func(data, a, b) + a = b + b += blockSize + } + insertionSort_func(data, a, n) + + for blockSize < n { + a, b = 0, 2*blockSize + for b <= n { + symMerge_func(data, a, a+blockSize, b) + a = b + b += 2 * blockSize + } + if m := a + blockSize; m < n { + symMerge_func(data, a, m, n) + } + blockSize *= 2 + } +} + +// symMerge_func merges the two sorted subsequences data[a:m] and data[m:b] using +// the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum +// Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz +// Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in +// Computer Science, pages 714-723. Springer, 2004. +// +// Let M = m-a and N = b-n. Wolog M < N. +// The recursion depth is bound by ceil(log(N+M)). +// The algorithm needs O(M*log(N/M + 1)) calls to data.Less. +// The algorithm needs O((M+N)*log(M)) calls to data.Swap. +// +// The paper gives O((M+N)*log(M)) as the number of assignments assuming a +// rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation +// in the paper carries through for Swap operations, especially as the block +// swapping rotate uses only O(M+N) Swaps. +// +// symMerge assumes non-degenerate arguments: a < m && m < b. +// Having the caller check this condition eliminates many leaf recursion calls, +// which improves performance. +func symMerge_func(data lessSwap, a, m, b int) { + // Avoid unnecessary recursions of symMerge + // by direct insertion of data[a] into data[m:b] + // if data[a:m] only contains one element. + if m-a == 1 { + // Use binary search to find the lowest index i + // such that data[i] >= data[a] for m <= i < b. + // Exit the search loop with i == b in case no such index exists. + i := m + j := b + for i < j { + h := int(uint(i+j) >> 1) + if data.Less(h, a) { + i = h + 1 + } else { + j = h + } + } + // Swap values until data[a] reaches the position before i. + for k := a; k < i-1; k++ { + data.Swap(k, k+1) + } + return + } + + // Avoid unnecessary recursions of symMerge + // by direct insertion of data[m] into data[a:m] + // if data[m:b] only contains one element. + if b-m == 1 { + // Use binary search to find the lowest index i + // such that data[i] > data[m] for a <= i < m. + // Exit the search loop with i == m in case no such index exists. + i := a + j := m + for i < j { + h := int(uint(i+j) >> 1) + if !data.Less(m, h) { + i = h + 1 + } else { + j = h + } + } + // Swap values until data[m] reaches the position i. + for k := m; k > i; k-- { + data.Swap(k, k-1) + } + return + } + + mid := int(uint(a+b) >> 1) + n := mid + m + var start, r int + if m > mid { + start = n - b + r = mid + } else { + start = a + r = m + } + p := n - 1 + + for start < r { + c := int(uint(start+r) >> 1) + if !data.Less(p-c, c) { + start = c + 1 + } else { + r = c + } + } + + end := n - start + if start < m && m < end { + rotate_func(data, start, m, end) + } + if a < start && start < mid { + symMerge_func(data, a, start, mid) + } + if mid < end && end < b { + symMerge_func(data, mid, end, b) + } +} + +// rotate_func rotates two consecutive blocks u = data[a:m] and v = data[m:b] in data: +// Data of the form 'x u v y' is changed to 'x v u y'. +// rotate performs at most b-a many calls to data.Swap, +// and it assumes non-degenerate arguments: a < m && m < b. +func rotate_func(data lessSwap, a, m, b int) { + i := m - a + j := b - m + + for i != j { + if i > j { + swapRange_func(data, m-i, m, j) + i -= j + } else { + swapRange_func(data, m-i, m+j-i, i) + j -= i + } + } + // i == j + swapRange_func(data, m-i, m, i) +} diff --git a/src/sort/zsortinterface.go b/src/sort/zsortinterface.go new file mode 100644 index 0000000..51fa503 --- /dev/null +++ b/src/sort/zsortinterface.go @@ -0,0 +1,479 @@ +// Code generated by gen_sort_variants.go; DO NOT EDIT. + +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sort + +// insertionSort sorts data[a:b] using insertion sort. +func insertionSort(data Interface, a, b int) { + for i := a + 1; i < b; i++ { + for j := i; j > a && data.Less(j, j-1); j-- { + data.Swap(j, j-1) + } + } +} + +// siftDown implements the heap property on data[lo:hi]. +// first is an offset into the array where the root of the heap lies. +func siftDown(data Interface, lo, hi, first int) { + root := lo + for { + child := 2*root + 1 + if child >= hi { + break + } + if child+1 < hi && data.Less(first+child, first+child+1) { + child++ + } + if !data.Less(first+root, first+child) { + return + } + data.Swap(first+root, first+child) + root = child + } +} + +func heapSort(data Interface, a, b int) { + first := a + lo := 0 + hi := b - a + + // Build heap with greatest element at top. + for i := (hi - 1) / 2; i >= 0; i-- { + siftDown(data, i, hi, first) + } + + // Pop elements, largest first, into end of data. + for i := hi - 1; i >= 0; i-- { + data.Swap(first, first+i) + siftDown(data, lo, i, first) + } +} + +// pdqsort sorts data[a:b]. +// The algorithm based on pattern-defeating quicksort(pdqsort), but without the optimizations from BlockQuicksort. +// pdqsort paper: https://arxiv.org/pdf/2106.05123.pdf +// C++ implementation: https://github.com/orlp/pdqsort +// Rust implementation: https://docs.rs/pdqsort/latest/pdqsort/ +// limit is the number of allowed bad (very unbalanced) pivots before falling back to heapsort. +func pdqsort(data Interface, a, b, limit int) { + const maxInsertion = 12 + + var ( + wasBalanced = true // whether the last partitioning was reasonably balanced + wasPartitioned = true // whether the slice was already partitioned + ) + + for { + length := b - a + + if length <= maxInsertion { + insertionSort(data, a, b) + return + } + + // Fall back to heapsort if too many bad choices were made. + if limit == 0 { + heapSort(data, a, b) + return + } + + // If the last partitioning was imbalanced, we need to breaking patterns. + if !wasBalanced { + breakPatterns(data, a, b) + limit-- + } + + pivot, hint := choosePivot(data, a, b) + if hint == decreasingHint { + reverseRange(data, a, b) + // The chosen pivot was pivot-a elements after the start of the array. + // After reversing it is pivot-a elements before the end of the array. + // The idea came from Rust's implementation. + pivot = (b - 1) - (pivot - a) + hint = increasingHint + } + + // The slice is likely already sorted. + if wasBalanced && wasPartitioned && hint == increasingHint { + if partialInsertionSort(data, a, b) { + return + } + } + + // Probably the slice contains many duplicate elements, partition the slice into + // elements equal to and elements greater than the pivot. + if a > 0 && !data.Less(a-1, pivot) { + mid := partitionEqual(data, a, b, pivot) + a = mid + continue + } + + mid, alreadyPartitioned := partition(data, a, b, pivot) + wasPartitioned = alreadyPartitioned + + leftLen, rightLen := mid-a, b-mid + balanceThreshold := length / 8 + if leftLen < rightLen { + wasBalanced = leftLen >= balanceThreshold + pdqsort(data, a, mid, limit) + a = mid + 1 + } else { + wasBalanced = rightLen >= balanceThreshold + pdqsort(data, mid+1, b, limit) + b = mid + } + } +} + +// partition does one quicksort partition. +// Let p = data[pivot] +// Moves elements in data[a:b] around, so that data[i]<p and data[j]>=p for i<newpivot and j>newpivot. +// On return, data[newpivot] = p +func partition(data Interface, a, b, pivot int) (newpivot int, alreadyPartitioned bool) { + data.Swap(a, pivot) + i, j := a+1, b-1 // i and j are inclusive of the elements remaining to be partitioned + + for i <= j && data.Less(i, a) { + i++ + } + for i <= j && !data.Less(j, a) { + j-- + } + if i > j { + data.Swap(j, a) + return j, true + } + data.Swap(i, j) + i++ + j-- + + for { + for i <= j && data.Less(i, a) { + i++ + } + for i <= j && !data.Less(j, a) { + j-- + } + if i > j { + break + } + data.Swap(i, j) + i++ + j-- + } + data.Swap(j, a) + return j, false +} + +// partitionEqual partitions data[a:b] into elements equal to data[pivot] followed by elements greater than data[pivot]. +// It assumed that data[a:b] does not contain elements smaller than the data[pivot]. +func partitionEqual(data Interface, a, b, pivot int) (newpivot int) { + data.Swap(a, pivot) + i, j := a+1, b-1 // i and j are inclusive of the elements remaining to be partitioned + + for { + for i <= j && !data.Less(a, i) { + i++ + } + for i <= j && data.Less(a, j) { + j-- + } + if i > j { + break + } + data.Swap(i, j) + i++ + j-- + } + return i +} + +// partialInsertionSort partially sorts a slice, returns true if the slice is sorted at the end. +func partialInsertionSort(data Interface, a, b int) bool { + const ( + maxSteps = 5 // maximum number of adjacent out-of-order pairs that will get shifted + shortestShifting = 50 // don't shift any elements on short arrays + ) + i := a + 1 + for j := 0; j < maxSteps; j++ { + for i < b && !data.Less(i, i-1) { + i++ + } + + if i == b { + return true + } + + if b-a < shortestShifting { + return false + } + + data.Swap(i, i-1) + + // Shift the smaller one to the left. + if i-a >= 2 { + for j := i - 1; j >= 1; j-- { + if !data.Less(j, j-1) { + break + } + data.Swap(j, j-1) + } + } + // Shift the greater one to the right. + if b-i >= 2 { + for j := i + 1; j < b; j++ { + if !data.Less(j, j-1) { + break + } + data.Swap(j, j-1) + } + } + } + return false +} + +// breakPatterns scatters some elements around in an attempt to break some patterns +// that might cause imbalanced partitions in quicksort. +func breakPatterns(data Interface, a, b int) { + length := b - a + if length >= 8 { + random := xorshift(length) + modulus := nextPowerOfTwo(length) + + for idx := a + (length/4)*2 - 1; idx <= a+(length/4)*2+1; idx++ { + other := int(uint(random.Next()) & (modulus - 1)) + if other >= length { + other -= length + } + data.Swap(idx, a+other) + } + } +} + +// choosePivot chooses a pivot in data[a:b]. +// +// [0,8): chooses a static pivot. +// [8,shortestNinther): uses the simple median-of-three method. +// [shortestNinther,∞): uses the Tukey ninther method. +func choosePivot(data Interface, a, b int) (pivot int, hint sortedHint) { + const ( + shortestNinther = 50 + maxSwaps = 4 * 3 + ) + + l := b - a + + var ( + swaps int + i = a + l/4*1 + j = a + l/4*2 + k = a + l/4*3 + ) + + if l >= 8 { + if l >= shortestNinther { + // Tukey ninther method, the idea came from Rust's implementation. + i = medianAdjacent(data, i, &swaps) + j = medianAdjacent(data, j, &swaps) + k = medianAdjacent(data, k, &swaps) + } + // Find the median among i, j, k and stores it into j. + j = median(data, i, j, k, &swaps) + } + + switch swaps { + case 0: + return j, increasingHint + case maxSwaps: + return j, decreasingHint + default: + return j, unknownHint + } +} + +// order2 returns x,y where data[x] <= data[y], where x,y=a,b or x,y=b,a. +func order2(data Interface, a, b int, swaps *int) (int, int) { + if data.Less(b, a) { + *swaps++ + return b, a + } + return a, b +} + +// median returns x where data[x] is the median of data[a],data[b],data[c], where x is a, b, or c. +func median(data Interface, a, b, c int, swaps *int) int { + a, b = order2(data, a, b, swaps) + b, c = order2(data, b, c, swaps) + a, b = order2(data, a, b, swaps) + return b +} + +// medianAdjacent finds the median of data[a - 1], data[a], data[a + 1] and stores the index into a. +func medianAdjacent(data Interface, a int, swaps *int) int { + return median(data, a-1, a, a+1, swaps) +} + +func reverseRange(data Interface, a, b int) { + i := a + j := b - 1 + for i < j { + data.Swap(i, j) + i++ + j-- + } +} + +func swapRange(data Interface, a, b, n int) { + for i := 0; i < n; i++ { + data.Swap(a+i, b+i) + } +} + +func stable(data Interface, n int) { + blockSize := 20 // must be > 0 + a, b := 0, blockSize + for b <= n { + insertionSort(data, a, b) + a = b + b += blockSize + } + insertionSort(data, a, n) + + for blockSize < n { + a, b = 0, 2*blockSize + for b <= n { + symMerge(data, a, a+blockSize, b) + a = b + b += 2 * blockSize + } + if m := a + blockSize; m < n { + symMerge(data, a, m, n) + } + blockSize *= 2 + } +} + +// symMerge merges the two sorted subsequences data[a:m] and data[m:b] using +// the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum +// Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz +// Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in +// Computer Science, pages 714-723. Springer, 2004. +// +// Let M = m-a and N = b-n. Wolog M < N. +// The recursion depth is bound by ceil(log(N+M)). +// The algorithm needs O(M*log(N/M + 1)) calls to data.Less. +// The algorithm needs O((M+N)*log(M)) calls to data.Swap. +// +// The paper gives O((M+N)*log(M)) as the number of assignments assuming a +// rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation +// in the paper carries through for Swap operations, especially as the block +// swapping rotate uses only O(M+N) Swaps. +// +// symMerge assumes non-degenerate arguments: a < m && m < b. +// Having the caller check this condition eliminates many leaf recursion calls, +// which improves performance. +func symMerge(data Interface, a, m, b int) { + // Avoid unnecessary recursions of symMerge + // by direct insertion of data[a] into data[m:b] + // if data[a:m] only contains one element. + if m-a == 1 { + // Use binary search to find the lowest index i + // such that data[i] >= data[a] for m <= i < b. + // Exit the search loop with i == b in case no such index exists. + i := m + j := b + for i < j { + h := int(uint(i+j) >> 1) + if data.Less(h, a) { + i = h + 1 + } else { + j = h + } + } + // Swap values until data[a] reaches the position before i. + for k := a; k < i-1; k++ { + data.Swap(k, k+1) + } + return + } + + // Avoid unnecessary recursions of symMerge + // by direct insertion of data[m] into data[a:m] + // if data[m:b] only contains one element. + if b-m == 1 { + // Use binary search to find the lowest index i + // such that data[i] > data[m] for a <= i < m. + // Exit the search loop with i == m in case no such index exists. + i := a + j := m + for i < j { + h := int(uint(i+j) >> 1) + if !data.Less(m, h) { + i = h + 1 + } else { + j = h + } + } + // Swap values until data[m] reaches the position i. + for k := m; k > i; k-- { + data.Swap(k, k-1) + } + return + } + + mid := int(uint(a+b) >> 1) + n := mid + m + var start, r int + if m > mid { + start = n - b + r = mid + } else { + start = a + r = m + } + p := n - 1 + + for start < r { + c := int(uint(start+r) >> 1) + if !data.Less(p-c, c) { + start = c + 1 + } else { + r = c + } + } + + end := n - start + if start < m && m < end { + rotate(data, start, m, end) + } + if a < start && start < mid { + symMerge(data, a, start, mid) + } + if mid < end && end < b { + symMerge(data, mid, end, b) + } +} + +// rotate rotates two consecutive blocks u = data[a:m] and v = data[m:b] in data: +// Data of the form 'x u v y' is changed to 'x v u y'. +// rotate performs at most b-a many calls to data.Swap, +// and it assumes non-degenerate arguments: a < m && m < b. +func rotate(data Interface, a, m, b int) { + i := m - a + j := b - m + + for i != j { + if i > j { + swapRange(data, m-i, m, j) + i -= j + } else { + swapRange(data, m-i, m+j-i, i) + j -= i + } + } + // i == j + swapRange(data, m-i, m, i) +} |