Replace []Result cache with bitmap cache for reduced memory usage

Replace the per-chunk query cache from []Result slices to fixed-size
bitmaps (ChunkBitmap: [16]uint64 = 128 bytes per entry). Each bit
indicates whether the corresponding item in the chunk matched.

This reduces cache memory by 86 times in testing:
- Old []Result cache: ~22KB per chunk per query (for 500 matches)
- New bitmap cache:   ~262 bytes per chunk per query (fixed)

With the reduced per-entry cost, queryCacheMax is raised from
chunkSize/5 to chunkSize/2, allowing broader queries (up to 50% match
rate) to be cached while still using far less memory.
This commit is contained in:
Junegunn Choi
2026-03-08 11:40:37 +09:00
parent 9249ea1739
commit 2f27a3ede2
5 changed files with 191 additions and 87 deletions
+118 -1
View File
@@ -2,6 +2,7 @@ package fzf
import (
"reflect"
"runtime"
"testing"
"github.com/junegunn/fzf/src/algo"
@@ -137,7 +138,7 @@ func TestOrigTextAndTransformed(t *testing.T) {
origText: &origBytes,
transformed: &transformed{pattern.revision, trans}}
pattern.extended = extended
matches := pattern.matchChunk(&chunk, nil, slab) // No cache
matches, _ := pattern.matchChunk(&chunk, nil, slab) // No cache
if !(matches[0].item.text.ToString() == "junegunn" &&
string(*matches[0].item.origText) == "junegunn.choi" &&
reflect.DeepEqual((*matches[0].item.transformed).tokens, trans)) {
@@ -199,3 +200,119 @@ func TestCacheable(t *testing.T) {
test(false, "foo 'bar", "foo", false)
test(false, "foo !bar", "foo", false)
}
func buildChunks(numChunks int) []*Chunk {
chunks := make([]*Chunk, numChunks)
words := []string{
"src/main/java/com/example/service/UserService.java",
"src/test/java/com/example/service/UserServiceTest.java",
"docs/api/reference/endpoints.md",
"lib/internal/utils/string_helper.go",
"pkg/server/http/handler/auth.go",
"build/output/release/app.exe",
"config/production/database.yml",
"scripts/deploy/kubernetes/setup.sh",
"vendor/github.com/junegunn/fzf/src/core.go",
"node_modules/.cache/babel/transform.js",
}
for ci := range numChunks {
chunks[ci] = &Chunk{count: chunkSize}
for i := range chunkSize {
text := words[(ci*chunkSize+i)%len(words)]
chunks[ci].items[i] = Item{text: util.ToChars([]byte(text))}
chunks[ci].items[i].text.Index = int32(ci*chunkSize + i)
}
}
return chunks
}
func buildPatternWith(cache *ChunkCache, runes []rune) *Pattern {
return BuildPattern(cache, make(map[string]*Pattern),
true, algo.FuzzyMatchV2, true, CaseSmart, false, true,
false, true, []Range{}, Delimiter{}, revision{}, runes, nil, 0)
}
func TestBitmapCacheBenefit(t *testing.T) {
numChunks := 100
chunks := buildChunks(numChunks)
queries := []string{"s", "se", "ser", "serv", "servi"}
// 1. Run all queries with shared cache (simulates incremental typing)
cache := NewChunkCache()
for _, q := range queries {
pat := buildPatternWith(cache, []rune(q))
for _, chunk := range chunks {
pat.Match(chunk, slab)
}
}
// 2. GC and measure memory with cache populated
runtime.GC()
runtime.GC()
var memWith runtime.MemStats
runtime.ReadMemStats(&memWith)
// 3. Clear cache, GC, measure again
cache.Clear()
runtime.GC()
runtime.GC()
var memWithout runtime.MemStats
runtime.ReadMemStats(&memWithout)
cacheMem := int64(memWith.Alloc) - int64(memWithout.Alloc)
t.Logf("Chunks: %d, Queries: %d", numChunks, len(queries))
t.Logf("Cache memory: %d bytes (%.1f KB)", cacheMem, float64(cacheMem)/1024)
t.Logf("Per-chunk-per-query: %.0f bytes", float64(cacheMem)/float64(numChunks*len(queries)))
// 4. Verify correctness: cached vs uncached produce same results
cache2 := NewChunkCache()
for _, q := range queries {
pat := buildPatternWith(cache2, []rune(q))
for _, chunk := range chunks {
pat.Match(chunk, slab)
}
}
for _, q := range queries {
patCached := buildPatternWith(cache2, []rune(q))
patFresh := buildPatternWith(NewChunkCache(), []rune(q))
var countCached, countFresh int
for _, chunk := range chunks {
countCached += len(patCached.Match(chunk, slab))
countFresh += len(patFresh.Match(chunk, slab))
}
if countCached != countFresh {
t.Errorf("query=%q: cached=%d, fresh=%d", q, countCached, countFresh)
}
t.Logf("query=%q: matches=%d", q, countCached)
}
}
func BenchmarkWithCache(b *testing.B) {
numChunks := 100
chunks := buildChunks(numChunks)
queries := []string{"s", "se", "ser", "serv", "servi"}
b.Run("cached", func(b *testing.B) {
for range b.N {
cache := NewChunkCache()
for _, q := range queries {
pat := buildPatternWith(cache, []rune(q))
for _, chunk := range chunks {
pat.Match(chunk, slab)
}
}
}
})
b.Run("uncached", func(b *testing.B) {
for range b.N {
for _, q := range queries {
cache := NewChunkCache()
pat := buildPatternWith(cache, []rune(q))
for _, chunk := range chunks {
pat.Match(chunk, slab)
}
}
}
})
}