Use LSD radix sort for Result sorting in matcher

Replace comparison-based pdqsort with LSD radix sort on the uint64 sort key. Radix sort is O(n) vs O(n log n) and avoids pointer-chasing cache misses in the comparison function. Sort scratch buffer is reused across iterations to reduce GC pressure. Benchmark (single-threaded, Chromium file list): - linux query (180K matches): ~16% faster - src query (high match count): ~31% faster - Rare matches: equivalent (falls back to pdqsort for n < 128)
2026-04-18 21:57:53 +08:00 · 2026-02-28 11:35:34 +09:00
parent 3e751c4e87
commit 5887edc6ba
5 changed files with 145 additions and 6 deletions
--- a/src/matcher.go
+++ b/src/matcher.go
@@ -3,7 +3,6 @@ package fzf
 import (
 	"fmt"
 	"runtime"
-	"sort"
 	"sync"
 	"time"

@@ -43,6 +42,7 @@ type Matcher struct {
 	reqBox         *util.EventBox
 	partitions     int
 	slab           []*util.Slab
+	sortBuf        [][]Result
 	mergerCache    map[string]MatchResult
 	revision       revision
 }
@@ -68,6 +68,7 @@ func NewMatcher(cache *ChunkCache, patternBuilder func([]rune) *Pattern,
 		reqBox:         util.NewEventBox(),
 		partitions:     partitions,
 		slab:           make([]*util.Slab, partitions),
+		sortBuf:        make([][]Result, partitions),
 		mergerCache:    make(map[string]MatchResult),
 		revision:       revision}
 }
@@ -215,11 +216,7 @@ func (m *Matcher) scan(request MatchRequest) MatchResult {
 				sliceMatches = append(sliceMatches, matches...)
 			}
 			if m.sort && request.pattern.sortable {
-				if m.tac {
-					sort.Sort(ByRelevanceTac(sliceMatches))
-				} else {
-					sort.Sort(ByRelevance(sliceMatches))
-				}
+				m.sortBuf[idx] = radixSortResults(sliceMatches, m.tac, m.sortBuf[idx])
 			}
 			resultChan <- partialResult{idx, sliceMatches}
 		}(idx, m.slab[idx], chunks)
--- a/src/result.go
+++ b/src/result.go
@@ -339,3 +339,79 @@ func (a ByRelevanceTac) Swap(i, j int) {
 func (a ByRelevanceTac) Less(i, j int) bool {
 	return compareRanks(a[i], a[j], true)
 }
+
+// radixSortResults sorts Results by their points key using LSD radix sort.
+// O(n) time complexity vs O(n log n) for comparison sort.
+// The sort is stable, so equal-key items maintain original (item-index) order.
+// For tac mode, runs of equal keys are reversed after sorting.
+func radixSortResults(a []Result, tac bool, scratch []Result) []Result {
+	n := len(a)
+	if n < 128 {
+		if tac {
+			sort.Sort(ByRelevanceTac(a))
+		} else {
+			sort.Sort(ByRelevance(a))
+		}
+		return scratch[:0]
+	}
+
+	if cap(scratch) < n {
+		scratch = make([]Result, n)
+	}
+	buf := scratch[:n]
+	src, dst := a, buf
+	scattered := 0
+
+	for pass := range 8 {
+		shift := uint(pass) * 8
+
+		var count [256]int
+		for i := range src {
+			count[byte(sortKey(&src[i])>>shift)]++
+		}
+
+		// Skip if all items have the same byte value at this position
+		if count[byte(sortKey(&src[0])>>shift)] == n {
+			continue
+		}
+
+		var offset [256]int
+		for i := 1; i < 256; i++ {
+			offset[i] = offset[i-1] + count[i-1]
+		}
+
+		for i := range src {
+			b := byte(sortKey(&src[i]) >> shift)
+			dst[offset[b]] = src[i]
+			offset[b]++
+		}
+
+		src, dst = dst, src
+		scattered++
+	}
+
+	// If odd number of scatters, data is in buf, copy back to a
+	if scattered%2 == 1 {
+		copy(a, src)
+	}
+
+	// Handle tac: reverse runs of equal keys so equal-key items
+	// are in reverse item-index order
+	if tac {
+		i := 0
+		for i < n {
+			ki := sortKey(&a[i])
+			j := i + 1
+			for j < n && sortKey(&a[j]) == ki {
+				j++
+			}
+			if j-i > 1 {
+				for l, r := i, j-1; l < r; l, r = l+1, r-1 {
+					a[l], a[r] = a[r], a[l]
+				}
+			}
+			i = j
+		}
+	}
+	return scratch
+}
--- a/src/result_others.go
+++ b/src/result_others.go
@@ -14,3 +14,7 @@ func compareRanks(irank Result, jrank Result, tac bool) bool {
 	}
 	return (irank.item.Index() <= jrank.item.Index()) != tac
 }
+
+func sortKey(r *Result) uint64 {
+	return uint64(r.points[0]) | uint64(r.points[1])<<16 | uint64(r.points[2])<<32 | uint64(r.points[3])<<48
+}
--- a/src/result_test.go
+++ b/src/result_test.go
@@ -2,6 +2,7 @@ package fzf

 import (
 	"math"
+	"math/rand"
 	"sort"
 	"testing"

@@ -182,3 +183,60 @@ func TestColorOffset(t *testing.T) {
 		assert(11, 39, 40, tui.NewColorPair(4, 8, tui.Bold))
 	}
 }
+
+func TestRadixSortResults(t *testing.T) {
+	sortCriteria = []criterion{byScore, byLength}
+
+	rng := rand.New(rand.NewSource(42))
+
+	for _, n := range []int{128, 256, 500, 1000} {
+		for _, tac := range []bool{false, true} {
+			// Build items with random points and indices
+			items := make([]*Item, n)
+			for i := range items {
+				items[i] = &Item{text: util.Chars{Index: int32(i)}}
+			}
+
+			results := make([]Result, n)
+			for i := range results {
+				results[i] = Result{
+					item: items[i],
+					points: [4]uint16{
+						uint16(rng.Intn(256)),
+						uint16(rng.Intn(256)),
+						uint16(rng.Intn(256)),
+						uint16(rng.Intn(256)),
+					},
+				}
+			}
+
+			// Make some duplicates to test stability
+			for i := 0; i < n/4; i++ {
+				j := rng.Intn(n)
+				k := rng.Intn(n)
+				results[j].points = results[k].points
+			}
+
+			// Copy for reference sort
+			expected := make([]Result, n)
+			copy(expected, results)
+			if tac {
+				sort.Sort(ByRelevanceTac(expected))
+			} else {
+				sort.Sort(ByRelevance(expected))
+			}
+
+			// Radix sort
+			var scratch []Result
+			scratch = radixSortResults(results, tac, scratch)
+
+			for i := range results {
+				if results[i] != expected[i] {
+					t.Errorf("n=%d tac=%v: mismatch at index %d: got item %d, want item %d",
+						n, tac, i, results[i].item.Index(), expected[i].item.Index())
+					break
+				}
+			}
+		}
+	}
+}
--- a/src/result_x86.go
+++ b/src/result_x86.go
@@ -14,3 +14,7 @@ func compareRanks(irank Result, jrank Result, tac bool) bool {
 	}
 	return (irank.item.Index() <= jrank.item.Index()) != tac
 }
+
+func sortKey(r *Result) uint64 {
+	return *(*uint64)(unsafe.Pointer(&r.points[0]))
+}