Replace []Result cache with bitmap cache for reduced memory usage

Replace the per-chunk query cache from []Result slices to fixed-size bitmaps (ChunkBitmap: [16]uint64 = 128 bytes per entry). Each bit indicates whether the corresponding item in the chunk matched. This reduces cache memory by 86 times in testing: - Old []Result cache: ~22KB per chunk per query (for 500 matches) - New bitmap cache: ~262 bytes per chunk per query (fixed) With the reduced per-entry cost, queryCacheMax is raised from chunkSize/5 to chunkSize/2, allowing broader queries (up to 50% match rate) to be cached while still using far less memory.
2026-06-19 15:42:32 +08:00 · 2026-03-08 11:40:37 +09:00
parent 9249ea1739
commit 2f27a3ede2
5 changed files with 191 additions and 87 deletions
@@ -2,6 +2,7 @@ package fzf

 import (
 	"reflect"
+	"runtime"
 	"testing"

 	"github.com/junegunn/fzf/src/algo"
@@ -137,7 +138,7 @@ func TestOrigTextAndTransformed(t *testing.T) {
 			origText:    &origBytes,
 			transformed: &transformed{pattern.revision, trans}}
 		pattern.extended = extended
-		matches := pattern.matchChunk(&chunk, nil, slab) // No cache
+		matches, _ := pattern.matchChunk(&chunk, nil, slab) // No cache
 		if !(matches[0].item.text.ToString() == "junegunn" &&
 			string(*matches[0].item.origText) == "junegunn.choi" &&
 			reflect.DeepEqual((*matches[0].item.transformed).tokens, trans)) {
@@ -199,3 +200,119 @@ func TestCacheable(t *testing.T) {
 	test(false, "foo 'bar", "foo", false)
 	test(false, "foo !bar", "foo", false)
 }
+
+func buildChunks(numChunks int) []*Chunk {
+	chunks := make([]*Chunk, numChunks)
+	words := []string{
+		"src/main/java/com/example/service/UserService.java",
+		"src/test/java/com/example/service/UserServiceTest.java",
+		"docs/api/reference/endpoints.md",
+		"lib/internal/utils/string_helper.go",
+		"pkg/server/http/handler/auth.go",
+		"build/output/release/app.exe",
+		"config/production/database.yml",
+		"scripts/deploy/kubernetes/setup.sh",
+		"vendor/github.com/junegunn/fzf/src/core.go",
+		"node_modules/.cache/babel/transform.js",
+	}
+	for ci := range numChunks {
+		chunks[ci] = &Chunk{count: chunkSize}
+		for i := range chunkSize {
+			text := words[(ci*chunkSize+i)%len(words)]
+			chunks[ci].items[i] = Item{text: util.ToChars([]byte(text))}
+			chunks[ci].items[i].text.Index = int32(ci*chunkSize + i)
+		}
+	}
+	return chunks
+}
+
+func buildPatternWith(cache *ChunkCache, runes []rune) *Pattern {
+	return BuildPattern(cache, make(map[string]*Pattern),
+		true, algo.FuzzyMatchV2, true, CaseSmart, false, true,
+		false, true, []Range{}, Delimiter{}, revision{}, runes, nil, 0)
+}
+
+func TestBitmapCacheBenefit(t *testing.T) {
+	numChunks := 100
+	chunks := buildChunks(numChunks)
+	queries := []string{"s", "se", "ser", "serv", "servi"}
+
+	// 1. Run all queries with shared cache (simulates incremental typing)
+	cache := NewChunkCache()
+	for _, q := range queries {
+		pat := buildPatternWith(cache, []rune(q))
+		for _, chunk := range chunks {
+			pat.Match(chunk, slab)
+		}
+	}
+
+	// 2. GC and measure memory with cache populated
+	runtime.GC()
+	runtime.GC()
+	var memWith runtime.MemStats
+	runtime.ReadMemStats(&memWith)
+
+	// 3. Clear cache, GC, measure again
+	cache.Clear()
+	runtime.GC()
+	runtime.GC()
+	var memWithout runtime.MemStats
+	runtime.ReadMemStats(&memWithout)
+
+	cacheMem := int64(memWith.Alloc) - int64(memWithout.Alloc)
+	t.Logf("Chunks: %d, Queries: %d", numChunks, len(queries))
+	t.Logf("Cache memory: %d bytes (%.1f KB)", cacheMem, float64(cacheMem)/1024)
+	t.Logf("Per-chunk-per-query: %.0f bytes", float64(cacheMem)/float64(numChunks*len(queries)))
+
+	// 4. Verify correctness: cached vs uncached produce same results
+	cache2 := NewChunkCache()
+	for _, q := range queries {
+		pat := buildPatternWith(cache2, []rune(q))
+		for _, chunk := range chunks {
+			pat.Match(chunk, slab)
+		}
+	}
+	for _, q := range queries {
+		patCached := buildPatternWith(cache2, []rune(q))
+		patFresh := buildPatternWith(NewChunkCache(), []rune(q))
+		var countCached, countFresh int
+		for _, chunk := range chunks {
+			countCached += len(patCached.Match(chunk, slab))
+			countFresh += len(patFresh.Match(chunk, slab))
+		}
+		if countCached != countFresh {
+			t.Errorf("query=%q: cached=%d, fresh=%d", q, countCached, countFresh)
+		}
+		t.Logf("query=%q: matches=%d", q, countCached)
+	}
+}
+
+func BenchmarkWithCache(b *testing.B) {
+	numChunks := 100
+	chunks := buildChunks(numChunks)
+	queries := []string{"s", "se", "ser", "serv", "servi"}
+
+	b.Run("cached", func(b *testing.B) {
+		for range b.N {
+			cache := NewChunkCache()
+			for _, q := range queries {
+				pat := buildPatternWith(cache, []rune(q))
+				for _, chunk := range chunks {
+					pat.Match(chunk, slab)
+				}
+			}
+		}
+	})
+
+	b.Run("uncached", func(b *testing.B) {
+		for range b.N {
+			for _, q := range queries {
+				cache := NewChunkCache()
+				pat := buildPatternWith(cache, []rune(q))
+				for _, chunk := range chunks {
+					pat.Match(chunk, slab)
+				}
+			}
+		}
+	})
+}