diff options
Diffstat (limited to 'src/sort/sort.go')
-rw-r--r-- | src/sort/sort.go | 262 |
1 files changed, 262 insertions, 0 deletions
diff --git a/src/sort/sort.go b/src/sort/sort.go new file mode 100644 index 0000000..68e2f0d --- /dev/null +++ b/src/sort/sort.go @@ -0,0 +1,262 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:generate go run gen_sort_variants.go + +// Package sort provides primitives for sorting slices and user-defined collections. +package sort + +import "math/bits" + +// An implementation of Interface can be sorted by the routines in this package. +// The methods refer to elements of the underlying collection by integer index. +type Interface interface { + // Len is the number of elements in the collection. + Len() int + + // Less reports whether the element with index i + // must sort before the element with index j. + // + // If both Less(i, j) and Less(j, i) are false, + // then the elements at index i and j are considered equal. + // Sort may place equal elements in any order in the final result, + // while Stable preserves the original input order of equal elements. + // + // Less must describe a transitive ordering: + // - if both Less(i, j) and Less(j, k) are true, then Less(i, k) must be true as well. + // - if both Less(i, j) and Less(j, k) are false, then Less(i, k) must be false as well. + // + // Note that floating-point comparison (the < operator on float32 or float64 values) + // is not a transitive ordering when not-a-number (NaN) values are involved. + // See Float64Slice.Less for a correct implementation for floating-point values. + Less(i, j int) bool + + // Swap swaps the elements with indexes i and j. + Swap(i, j int) +} + +// Sort sorts data in ascending order as determined by the Less method. +// It makes one call to data.Len to determine n and O(n*log(n)) calls to +// data.Less and data.Swap. The sort is not guaranteed to be stable. +func Sort(data Interface) { + n := data.Len() + if n <= 1 { + return + } + limit := bits.Len(uint(n)) + pdqsort(data, 0, n, limit) +} + +type sortedHint int // hint for pdqsort when choosing the pivot + +const ( + unknownHint sortedHint = iota + increasingHint + decreasingHint +) + +// xorshift paper: https://www.jstatsoft.org/article/view/v008i14/xorshift.pdf +type xorshift uint64 + +func (r *xorshift) Next() uint64 { + *r ^= *r << 13 + *r ^= *r >> 17 + *r ^= *r << 5 + return uint64(*r) +} + +func nextPowerOfTwo(length int) uint { + shift := uint(bits.Len(uint(length))) + return uint(1 << shift) +} + +// lessSwap is a pair of Less and Swap function for use with the +// auto-generated func-optimized variant of sort.go in +// zfuncversion.go. +type lessSwap struct { + Less func(i, j int) bool + Swap func(i, j int) +} + +type reverse struct { + // This embedded Interface permits Reverse to use the methods of + // another Interface implementation. + Interface +} + +// Less returns the opposite of the embedded implementation's Less method. +func (r reverse) Less(i, j int) bool { + return r.Interface.Less(j, i) +} + +// Reverse returns the reverse order for data. +func Reverse(data Interface) Interface { + return &reverse{data} +} + +// IsSorted reports whether data is sorted. +func IsSorted(data Interface) bool { + n := data.Len() + for i := n - 1; i > 0; i-- { + if data.Less(i, i-1) { + return false + } + } + return true +} + +// Convenience types for common cases + +// IntSlice attaches the methods of Interface to []int, sorting in increasing order. +type IntSlice []int + +func (x IntSlice) Len() int { return len(x) } +func (x IntSlice) Less(i, j int) bool { return x[i] < x[j] } +func (x IntSlice) Swap(i, j int) { x[i], x[j] = x[j], x[i] } + +// Sort is a convenience method: x.Sort() calls Sort(x). +func (x IntSlice) Sort() { Sort(x) } + +// Float64Slice implements Interface for a []float64, sorting in increasing order, +// with not-a-number (NaN) values ordered before other values. +type Float64Slice []float64 + +func (x Float64Slice) Len() int { return len(x) } + +// Less reports whether x[i] should be ordered before x[j], as required by the sort Interface. +// Note that floating-point comparison by itself is not a transitive relation: it does not +// report a consistent ordering for not-a-number (NaN) values. +// This implementation of Less places NaN values before any others, by using: +// +// x[i] < x[j] || (math.IsNaN(x[i]) && !math.IsNaN(x[j])) +func (x Float64Slice) Less(i, j int) bool { return x[i] < x[j] || (isNaN(x[i]) && !isNaN(x[j])) } +func (x Float64Slice) Swap(i, j int) { x[i], x[j] = x[j], x[i] } + +// isNaN is a copy of math.IsNaN to avoid a dependency on the math package. +func isNaN(f float64) bool { + return f != f +} + +// Sort is a convenience method: x.Sort() calls Sort(x). +func (x Float64Slice) Sort() { Sort(x) } + +// StringSlice attaches the methods of Interface to []string, sorting in increasing order. +type StringSlice []string + +func (x StringSlice) Len() int { return len(x) } +func (x StringSlice) Less(i, j int) bool { return x[i] < x[j] } +func (x StringSlice) Swap(i, j int) { x[i], x[j] = x[j], x[i] } + +// Sort is a convenience method: x.Sort() calls Sort(x). +func (x StringSlice) Sort() { Sort(x) } + +// Convenience wrappers for common cases + +// Ints sorts a slice of ints in increasing order. +func Ints(x []int) { Sort(IntSlice(x)) } + +// Float64s sorts a slice of float64s in increasing order. +// Not-a-number (NaN) values are ordered before other values. +func Float64s(x []float64) { Sort(Float64Slice(x)) } + +// Strings sorts a slice of strings in increasing order. +func Strings(x []string) { Sort(StringSlice(x)) } + +// IntsAreSorted reports whether the slice x is sorted in increasing order. +func IntsAreSorted(x []int) bool { return IsSorted(IntSlice(x)) } + +// Float64sAreSorted reports whether the slice x is sorted in increasing order, +// with not-a-number (NaN) values before any other values. +func Float64sAreSorted(x []float64) bool { return IsSorted(Float64Slice(x)) } + +// StringsAreSorted reports whether the slice x is sorted in increasing order. +func StringsAreSorted(x []string) bool { return IsSorted(StringSlice(x)) } + +// Notes on stable sorting: +// The used algorithms are simple and provable correct on all input and use +// only logarithmic additional stack space. They perform well if compared +// experimentally to other stable in-place sorting algorithms. +// +// Remarks on other algorithms evaluated: +// - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++: +// Not faster. +// - GCC's __rotate for block rotations: Not faster. +// - "Practical in-place mergesort" from Jyrki Katajainen, Tomi A. Pasanen +// and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40: +// The given algorithms are in-place, number of Swap and Assignments +// grow as n log n but the algorithm is not stable. +// - "Fast Stable In-Place Sorting with O(n) Data Moves" J.I. Munro and +// V. Raman in Algorithmica (1996) 16, 115-160: +// This algorithm either needs additional 2n bits or works only if there +// are enough different elements available to encode some permutations +// which have to be undone later (so not stable on any input). +// - All the optimal in-place sorting/merging algorithms I found are either +// unstable or rely on enough different elements in each step to encode the +// performed block rearrangements. See also "In-Place Merging Algorithms", +// Denham Coates-Evely, Department of Computer Science, Kings College, +// January 2004 and the references in there. +// - Often "optimal" algorithms are optimal in the number of assignments +// but Interface has only Swap as operation. + +// Stable sorts data in ascending order as determined by the Less method, +// while keeping the original order of equal elements. +// +// It makes one call to data.Len to determine n, O(n*log(n)) calls to +// data.Less and O(n*log(n)*log(n)) calls to data.Swap. +func Stable(data Interface) { + stable(data, data.Len()) +} + +/* +Complexity of Stable Sorting + + +Complexity of block swapping rotation + +Each Swap puts one new element into its correct, final position. +Elements which reach their final position are no longer moved. +Thus block swapping rotation needs |u|+|v| calls to Swaps. +This is best possible as each element might need a move. + +Pay attention when comparing to other optimal algorithms which +typically count the number of assignments instead of swaps: +E.g. the optimal algorithm of Dudzinski and Dydek for in-place +rotations uses O(u + v + gcd(u,v)) assignments which is +better than our O(3 * (u+v)) as gcd(u,v) <= u. + + +Stable sorting by SymMerge and BlockSwap rotations + +SymMerg complexity for same size input M = N: +Calls to Less: O(M*log(N/M+1)) = O(N*log(2)) = O(N) +Calls to Swap: O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N)) + +(The following argument does not fuzz over a missing -1 or +other stuff which does not impact the final result). + +Let n = data.Len(). Assume n = 2^k. + +Plain merge sort performs log(n) = k iterations. +On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i. + +Thus iteration i of merge sort performs: +Calls to Less O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n) +Calls to Swap O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i) + +In total k = log(n) iterations are performed; so in total: +Calls to Less O(log(n) * n) +Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n) + = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n)) + + +Above results should generalize to arbitrary n = 2^k + p +and should not be influenced by the initial insertion sort phase: +Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of +size bs at n/bs blocks: O(bs*n) Swaps and Less during insertion sort. +Merge sort iterations start at i = log(bs). With t = log(bs) constant: +Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n) + = O(n * log(n)) +Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n)) + +*/ |