Replace []Result cache with bitmap cache for reduced memory usage

Replace the per-chunk query cache from []Result slices to fixed-size bitmaps (ChunkBitmap: [16]uint64 = 128 bytes per entry). Each bit indicates whether the corresponding item in the chunk matched. This reduces cache memory by 86 times in testing: - Old []Result cache: ~22KB per chunk per query (for 500 matches) - New bitmap cache: ~262 bytes per chunk per query (fixed) With the reduced per-entry cost, queryCacheMax is raised from chunkSize/5 to chunkSize/2, allowing broader queries (up to 50% match rate) to be cached while still using far less memory.
2026-06-21 16:40:36 +08:00 · 2026-03-08 11:40:37 +09:00
parent 9249ea1739
commit 2f27a3ede2
5 changed files with 191 additions and 87 deletions
@@ -2,10 +2,13 @@ package fzf

 import "sync"

-// queryCache associates strings to lists of items
-type queryCache map[string][]Result
+// ChunkBitmap is a bitmap with one bit per item in a chunk.
+type ChunkBitmap [chunkBitWords]uint64

-// ChunkCache associates Chunk and query string to lists of items
+// queryCache associates query strings to bitmaps of matching items
+type queryCache map[string]ChunkBitmap
+
+// ChunkCache associates Chunk and query string to bitmaps
 type ChunkCache struct {
 	mutex sync.Mutex
 	cache map[*Chunk]*queryCache
@@ -30,9 +33,9 @@ func (cc *ChunkCache) retire(chunk ...*Chunk) {
 	cc.mutex.Unlock()
 }

-// Add adds the list to the cache
-func (cc *ChunkCache) Add(chunk *Chunk, key string, list []Result) {
-	if len(key) == 0 || !chunk.IsFull() || len(list) > queryCacheMax {
+// Add stores the bitmap for the given chunk and key
+func (cc *ChunkCache) Add(chunk *Chunk, key string, bitmap ChunkBitmap, matchCount int) {
+	if len(key) == 0 || !chunk.IsFull() || matchCount > queryCacheMax {
 		return
 	}

@@ -44,11 +47,11 @@ func (cc *ChunkCache) Add(chunk *Chunk, key string, list []Result) {
 		cc.cache[chunk] = &queryCache{}
 		qc = cc.cache[chunk]
 	}
-	(*qc)[key] = list
+	(*qc)[key] = bitmap
 }

-// Lookup is called to lookup ChunkCache
-func (cc *ChunkCache) Lookup(chunk *Chunk, key string) []Result {
+// Lookup returns the bitmap for the exact key
+func (cc *ChunkCache) Lookup(chunk *Chunk, key string) *ChunkBitmap {
 	if len(key) == 0 || !chunk.IsFull() {
 		return nil
 	}
@@ -58,15 +61,15 @@ func (cc *ChunkCache) Lookup(chunk *Chunk, key string) []Result {

 	qc, ok := cc.cache[chunk]
 	if ok {
-		list, ok := (*qc)[key]
-		if ok {
-			return list
+		if bm, ok := (*qc)[key]; ok {
+			return &bm
 		}
 	}
 	return nil
 }

-func (cc *ChunkCache) Search(chunk *Chunk, key string) []Result {
+// Search finds the bitmap for the longest prefix or suffix of the key
+func (cc *ChunkCache) Search(chunk *Chunk, key string) *ChunkBitmap {
 	if len(key) == 0 || !chunk.IsFull() {
 		return nil
 	}
@@ -86,8 +89,8 @@ func (cc *ChunkCache) Search(chunk *Chunk, key string) []Result {
 		prefix := key[:len(key)-idx]
 		suffix := key[idx:]
 		for _, substr := range [2]string{prefix, suffix} {
-			if cached, found := (*qc)[substr]; found {
-				return cached
+			if bm, found := (*qc)[substr]; found {
+				return &bm
 			}
 		}
 	}
@@ -6,34 +6,34 @@ func TestChunkCache(t *testing.T) {
 	cache := NewChunkCache()
 	chunk1p := &Chunk{}
 	chunk2p := &Chunk{count: chunkSize}
-	items1 := []Result{{}}
-	items2 := []Result{{}, {}}
-	cache.Add(chunk1p, "foo", items1)
-	cache.Add(chunk2p, "foo", items1)
-	cache.Add(chunk2p, "bar", items2)
+	bm1 := ChunkBitmap{1}
+	bm2 := ChunkBitmap{1, 2}
+	cache.Add(chunk1p, "foo", bm1, 1)
+	cache.Add(chunk2p, "foo", bm1, 1)
+	cache.Add(chunk2p, "bar", bm2, 2)

 	{ // chunk1 is not full
 		cached := cache.Lookup(chunk1p, "foo")
 		if cached != nil {
-			t.Error("Cached disabled for non-empty chunks", cached)
+			t.Error("Cached disabled for non-full chunks", cached)
 		}
 	}
 	{
 		cached := cache.Lookup(chunk2p, "foo")
-		if cached == nil || len(cached) != 1 {
-			t.Error("Expected 1 item cached", cached)
+		if cached == nil || cached[0] != 1 {
+			t.Error("Expected bitmap cached", cached)
 		}
 	}
 	{
 		cached := cache.Lookup(chunk2p, "bar")
-		if cached == nil || len(cached) != 2 {
-			t.Error("Expected 2 items cached", cached)
+		if cached == nil || cached[1] != 2 {
+			t.Error("Expected bitmap cached", cached)
 		}
 	}
 	{
 		cached := cache.Lookup(chunk1p, "foobar")
 		if cached != nil {
-			t.Error("Expected 0 item cached", cached)
+			t.Error("Expected nil cached", cached)
 		}
 	}
 }
@@ -37,14 +37,15 @@ const (
 	progressMinDuration = 200 * time.Millisecond

 	// Capacity of each chunk
-	chunkSize int = 1000
+	chunkSize     int = 1024
+	chunkBitWords     = (chunkSize + 63) / 64

 	// Pre-allocated memory slices to minimize GC
 	slab16Size int = 100 * 1024 // 200KB * 32 = 12.8MB
 	slab32Size int = 2048       // 8KB * 32 = 256KB

 	// Do not cache results of low selectivity queries
-	queryCacheMax int = chunkSize / 5
+	queryCacheMax int = chunkSize / 2

 	// Not to cache mergers with large lists
 	mergerCacheMax int = 100000
@@ -300,104 +300,87 @@ func (p *Pattern) CacheKey() string {

 // Match returns the list of matches Items in the given Chunk
 func (p *Pattern) Match(chunk *Chunk, slab *util.Slab) []Result {
-	// ChunkCache: Exact match
 	cacheKey := p.CacheKey()
+
+	// Bitmap cache: exact match or prefix/suffix
+	var cachedBitmap *ChunkBitmap
 	if p.cacheable {
-		if cached := p.cache.Lookup(chunk, cacheKey); cached != nil {
-			return cached
-		}
+		cachedBitmap = p.cache.Lookup(chunk, cacheKey)
+	}
+	if cachedBitmap == nil {
+		cachedBitmap = p.cache.Search(chunk, cacheKey)
 	}

-	// Prefix/suffix cache
-	space := p.cache.Search(chunk, cacheKey)
-
-	matches := p.matchChunk(chunk, space, slab)
+	matches, bitmap := p.matchChunk(chunk, cachedBitmap, slab)

 	if p.cacheable {
-		p.cache.Add(chunk, cacheKey, matches)
+		p.cache.Add(chunk, cacheKey, bitmap, len(matches))
 	}
 	return matches
 }

-func (p *Pattern) matchChunk(chunk *Chunk, space []Result, slab *util.Slab) []Result {
+func (p *Pattern) matchChunk(chunk *Chunk, cachedBitmap *ChunkBitmap, slab *util.Slab) ([]Result, ChunkBitmap) {
 	matches := []Result{}
+	var bitmap ChunkBitmap

 	// Skip header items in chunks that contain them
 	startIdx := 0
 	if p.startIndex > 0 && chunk.count > 0 && chunk.items[0].Index() < p.startIndex {
 		startIdx = int(p.startIndex - chunk.items[0].Index())
 		if startIdx >= chunk.count {
-			return matches
+			return matches, bitmap
 		}
 	}

+	hasCachedBitmap := cachedBitmap != nil
+
 	// Fast path: single fuzzy term, no nth, no denylist.
 	// Calls the algo function directly, bypassing MatchItem/extendedMatch/iter
 	// and avoiding per-match []Offset heap allocation.
 	if p.directAlgo != nil && len(p.denylist) == 0 {
 		t := p.directTerm
-		if space == nil {
-			for idx := startIdx; idx < chunk.count; idx++ {
-				res, _ := p.directAlgo(t.caseSensitive, t.normalize, p.forward,
-					&chunk.items[idx].text, t.text, p.withPos, slab)
-				if res.Start >= 0 {
-					matches = append(matches, buildResultFromBounds(
-						&chunk.items[idx], res.Score,
-						int(res.Start), int(res.End), int(res.End), true))
-				}
+		for idx := startIdx; idx < chunk.count; idx++ {
+			if hasCachedBitmap && cachedBitmap[idx/64]&(uint64(1)<<(idx%64)) == 0 {
+				continue
 			}
-		} else {
-			for _, result := range space {
-				res, _ := p.directAlgo(t.caseSensitive, t.normalize, p.forward,
-					&result.item.text, t.text, p.withPos, slab)
-				if res.Start >= 0 {
-					matches = append(matches, buildResultFromBounds(
-						result.item, res.Score,
-						int(res.Start), int(res.End), int(res.End), true))
-				}
+			res, _ := p.directAlgo(t.caseSensitive, t.normalize, p.forward,
+				&chunk.items[idx].text, t.text, p.withPos, slab)
+			if res.Start >= 0 {
+				bitmap[idx/64] |= uint64(1) << (idx % 64)
+				matches = append(matches, buildResultFromBounds(
+					&chunk.items[idx], res.Score,
+					int(res.Start), int(res.End), int(res.End), true))
 			}
 		}
-		return matches
+		return matches, bitmap
 	}

 	if len(p.denylist) == 0 {
-		// Huge code duplication for minimizing unnecessary map lookups
-		if space == nil {
-			for idx := startIdx; idx < chunk.count; idx++ {
-				if match, _, _ := p.MatchItem(&chunk.items[idx], p.withPos, slab); match.item != nil {
-					matches = append(matches, match)
-				}
-			}
-		} else {
-			for _, result := range space {
-				if match, _, _ := p.MatchItem(result.item, p.withPos, slab); match.item != nil {
-					matches = append(matches, match)
-				}
-			}
-		}
-		return matches
-	}
-
-	if space == nil {
 		for idx := startIdx; idx < chunk.count; idx++ {
-			if _, prs := p.denylist[chunk.items[idx].Index()]; prs {
+			if hasCachedBitmap && cachedBitmap[idx/64]&(uint64(1)<<(idx%64)) == 0 {
 				continue
 			}
 			if match, _, _ := p.MatchItem(&chunk.items[idx], p.withPos, slab); match.item != nil {
+				bitmap[idx/64] |= uint64(1) << (idx % 64)
 				matches = append(matches, match)
 			}
 		}
-	} else {
-		for _, result := range space {
-			if _, prs := p.denylist[result.item.Index()]; prs {
-				continue
-			}
-			if match, _, _ := p.MatchItem(result.item, p.withPos, slab); match.item != nil {
-				matches = append(matches, match)
-			}
+		return matches, bitmap
+	}
+
+	for idx := startIdx; idx < chunk.count; idx++ {
+		if hasCachedBitmap && cachedBitmap[idx/64]&(uint64(1)<<(idx%64)) == 0 {
+			continue
+		}
+		if _, prs := p.denylist[chunk.items[idx].Index()]; prs {
+			continue
+		}
+		if match, _, _ := p.MatchItem(&chunk.items[idx], p.withPos, slab); match.item != nil {
+			bitmap[idx/64] |= uint64(1) << (idx % 64)
+			matches = append(matches, match)
 		}
 	}
-	return matches
+	return matches, bitmap
 }

 // MatchItem returns the match result if the Item is a match.
@@ -2,6 +2,7 @@ package fzf

 import (
 	"reflect"
+	"runtime"
 	"testing"

 	"github.com/junegunn/fzf/src/algo"
@@ -137,7 +138,7 @@ func TestOrigTextAndTransformed(t *testing.T) {
 			origText:    &origBytes,
 			transformed: &transformed{pattern.revision, trans}}
 		pattern.extended = extended
-		matches := pattern.matchChunk(&chunk, nil, slab) // No cache
+		matches, _ := pattern.matchChunk(&chunk, nil, slab) // No cache
 		if !(matches[0].item.text.ToString() == "junegunn" &&
 			string(*matches[0].item.origText) == "junegunn.choi" &&
 			reflect.DeepEqual((*matches[0].item.transformed).tokens, trans)) {
@@ -199,3 +200,119 @@ func TestCacheable(t *testing.T) {
 	test(false, "foo 'bar", "foo", false)
 	test(false, "foo !bar", "foo", false)
 }
+
+func buildChunks(numChunks int) []*Chunk {
+	chunks := make([]*Chunk, numChunks)
+	words := []string{
+		"src/main/java/com/example/service/UserService.java",
+		"src/test/java/com/example/service/UserServiceTest.java",
+		"docs/api/reference/endpoints.md",
+		"lib/internal/utils/string_helper.go",
+		"pkg/server/http/handler/auth.go",
+		"build/output/release/app.exe",
+		"config/production/database.yml",
+		"scripts/deploy/kubernetes/setup.sh",
+		"vendor/github.com/junegunn/fzf/src/core.go",
+		"node_modules/.cache/babel/transform.js",
+	}
+	for ci := range numChunks {
+		chunks[ci] = &Chunk{count: chunkSize}
+		for i := range chunkSize {
+			text := words[(ci*chunkSize+i)%len(words)]
+			chunks[ci].items[i] = Item{text: util.ToChars([]byte(text))}
+			chunks[ci].items[i].text.Index = int32(ci*chunkSize + i)
+		}
+	}
+	return chunks
+}
+
+func buildPatternWith(cache *ChunkCache, runes []rune) *Pattern {
+	return BuildPattern(cache, make(map[string]*Pattern),
+		true, algo.FuzzyMatchV2, true, CaseSmart, false, true,
+		false, true, []Range{}, Delimiter{}, revision{}, runes, nil, 0)
+}
+
+func TestBitmapCacheBenefit(t *testing.T) {
+	numChunks := 100
+	chunks := buildChunks(numChunks)
+	queries := []string{"s", "se", "ser", "serv", "servi"}
+
+	// 1. Run all queries with shared cache (simulates incremental typing)
+	cache := NewChunkCache()
+	for _, q := range queries {
+		pat := buildPatternWith(cache, []rune(q))
+		for _, chunk := range chunks {
+			pat.Match(chunk, slab)
+		}
+	}
+
+	// 2. GC and measure memory with cache populated
+	runtime.GC()
+	runtime.GC()
+	var memWith runtime.MemStats
+	runtime.ReadMemStats(&memWith)
+
+	// 3. Clear cache, GC, measure again
+	cache.Clear()
+	runtime.GC()
+	runtime.GC()
+	var memWithout runtime.MemStats
+	runtime.ReadMemStats(&memWithout)
+
+	cacheMem := int64(memWith.Alloc) - int64(memWithout.Alloc)
+	t.Logf("Chunks: %d, Queries: %d", numChunks, len(queries))
+	t.Logf("Cache memory: %d bytes (%.1f KB)", cacheMem, float64(cacheMem)/1024)
+	t.Logf("Per-chunk-per-query: %.0f bytes", float64(cacheMem)/float64(numChunks*len(queries)))
+
+	// 4. Verify correctness: cached vs uncached produce same results
+	cache2 := NewChunkCache()
+	for _, q := range queries {
+		pat := buildPatternWith(cache2, []rune(q))
+		for _, chunk := range chunks {
+			pat.Match(chunk, slab)
+		}
+	}
+	for _, q := range queries {
+		patCached := buildPatternWith(cache2, []rune(q))
+		patFresh := buildPatternWith(NewChunkCache(), []rune(q))
+		var countCached, countFresh int
+		for _, chunk := range chunks {
+			countCached += len(patCached.Match(chunk, slab))
+			countFresh += len(patFresh.Match(chunk, slab))
+		}
+		if countCached != countFresh {
+			t.Errorf("query=%q: cached=%d, fresh=%d", q, countCached, countFresh)
+		}
+		t.Logf("query=%q: matches=%d", q, countCached)
+	}
+}
+
+func BenchmarkWithCache(b *testing.B) {
+	numChunks := 100
+	chunks := buildChunks(numChunks)
+	queries := []string{"s", "se", "ser", "serv", "servi"}
+
+	b.Run("cached", func(b *testing.B) {
+		for range b.N {
+			cache := NewChunkCache()
+			for _, q := range queries {
+				pat := buildPatternWith(cache, []rune(q))
+				for _, chunk := range chunks {
+					pat.Match(chunk, slab)
+				}
+			}
+		}
+	})
+
+	b.Run("uncached", func(b *testing.B) {
+		for range b.N {
+			for _, q := range queries {
+				cache := NewChunkCache()
+				pat := buildPatternWith(cache, []rune(q))
+				for _, chunk := range chunks {
+					pat.Match(chunk, slab)
+				}
+			}
+		}
+	})
+}