From f3ca0b136540c2e042462b31ed91e2f89b906658 Mon Sep 17 00:00:00 2001 From: Junegunn Choi Date: Sun, 8 Mar 2026 13:47:56 +0900 Subject: [PATCH] Fix OSC8 hyperlinks mangled when URL contains unicode Fix #4707 --- CHANGELOG.md | 23 ++++++++++++++++------- src/algo/algo.go | 2 +- src/algo/indexbyte2_amd64.go | 2 +- src/algo/indexbyte2_amd64.s | 4 ++-- src/algo/indexbyte2_arm64.go | 2 +- src/algo/indexbyte2_arm64.s | 4 ++-- src/algo/indexbyte2_other.go | 2 +- src/algo/indexbyte2_test.go | 22 +++++++++++----------- src/ansi.go | 33 +++++++++++++++++---------------- 9 files changed, 52 insertions(+), 42 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b80b98d3..3736a6a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,13 +4,22 @@ CHANGELOG 0.70.1 ------ - Performance improvements - - Replaced `[]Result` cache with bitmap cache (~86x less memory per cache entry) - - Raised `queryCacheMax` from `chunkSize/5` to `chunkSize/2` for broader cache coverage - - Replaced procFun map with fixed-size array for faster algo dispatch - - Replaced static chunk partitioning with a shared work queue in matcher - - Changed chunk size from 1000 to 1024 for clean 64-bit alignment -- Fixed AWK tokenizer not treating a new line character as whitespace -- Fixed `--{accept,with}-nth` removing trailing whitespaces with a non-default `--delimiter` + - The search performance now scales linearly with the number of CPU cores, as we dropped static partitioning to allow better load balancing across threads. + ``` + === query: 'linux' === + [all] baseline: 17.12ms current: 14.28ms (1.20x) matches: 179966 (12.79%) + [1T] baseline: 136.49ms current: 137.25ms (0.99x) matches: 179966 (12.79%) + [2T] baseline: 75.74ms current: 68.75ms (1.10x) matches: 179966 (12.79%) + [4T] baseline: 41.16ms current: 34.97ms (1.18x) matches: 179966 (12.79%) + [8T] baseline: 32.82ms current: 17.79ms (1.84x) matches: 179966 (12.79%) + ``` + - Improved the cache structure, reducing memory footprint per entry by 86x. + - With the reduced per-entry cost, the cache now has broader coverage. +- fish: Improved command history (CTRL-R) (#44703) (@bitraid) +- Bug fixes + - Fixed AWK tokenizer not treating a new line character as whitespace + - Fixed `--{accept,with}-nth` removing trailing whitespaces with a non-default `--delimiter` + - Fixed OSC8 hyperlinks being mangled when the URL contains unicode characters (#4707) 0.70.0 ------ diff --git a/src/algo/algo.go b/src/algo/algo.go index 72d35946..77d346b1 100644 --- a/src/algo/algo.go +++ b/src/algo/algo.go @@ -323,7 +323,7 @@ func trySkip(input *util.Chars, caseSensitive bool, b byte, from int) int { byteArray := input.Bytes()[from:] // For case-insensitive search of a letter, search for both cases in one pass if !caseSensitive && b >= 'a' && b <= 'z' { - idx := indexByteTwo(byteArray, b, b-32) + idx := IndexByteTwo(byteArray, b, b-32) if idx < 0 { return -1 } diff --git a/src/algo/indexbyte2_amd64.go b/src/algo/indexbyte2_amd64.go index eca483c5..cb2ee0ff 100644 --- a/src/algo/indexbyte2_amd64.go +++ b/src/algo/indexbyte2_amd64.go @@ -15,7 +15,7 @@ func cpuHasAVX2() bool // or -1 if neither is present. Uses AVX2 when available, SSE2 otherwise. // //go:noescape -func indexByteTwo(s []byte, b1, b2 byte) int +func IndexByteTwo(s []byte, b1, b2 byte) int // lastIndexByteTwo returns the index of the last occurrence of b1 or b2 in s, // or -1 if neither is present. Uses AVX2 when available, SSE2 otherwise. diff --git a/src/algo/indexbyte2_amd64.s b/src/algo/indexbyte2_amd64.s index 56f8ea61..ec5dc6bd 100644 --- a/src/algo/indexbyte2_amd64.s +++ b/src/algo/indexbyte2_amd64.s @@ -41,11 +41,11 @@ cpuid_no: MOVB $0, ret+0(FP) RET -// func indexByteTwo(s []byte, b1, b2 byte) int +// func IndexByteTwo(s []byte, b1, b2 byte) int // // Returns the index of the first occurrence of b1 or b2 in s, or -1. // Uses AVX2 (32 bytes/iter) when available, SSE2 (16 bytes/iter) otherwise. -TEXT ·indexByteTwo(SB),NOSPLIT,$0-40 +TEXT ·IndexByteTwo(SB),NOSPLIT,$0-40 MOVQ s_base+0(FP), SI MOVQ s_len+8(FP), BX MOVBLZX b1+24(FP), AX diff --git a/src/algo/indexbyte2_arm64.go b/src/algo/indexbyte2_arm64.go index fa028aff..9e60e9e0 100644 --- a/src/algo/indexbyte2_arm64.go +++ b/src/algo/indexbyte2_arm64.go @@ -7,7 +7,7 @@ package algo // to search for both bytes in a single pass. // //go:noescape -func indexByteTwo(s []byte, b1, b2 byte) int +func IndexByteTwo(s []byte, b1, b2 byte) int // lastIndexByteTwo returns the index of the last occurrence of b1 or b2 in s, // or -1 if neither is present. Implemented in assembly using ARM64 NEON, diff --git a/src/algo/indexbyte2_arm64.s b/src/algo/indexbyte2_arm64.s index 7442c4dd..38da3617 100644 --- a/src/algo/indexbyte2_arm64.s +++ b/src/algo/indexbyte2_arm64.s @@ -1,11 +1,11 @@ #include "textflag.h" -// func indexByteTwo(s []byte, b1, b2 byte) int +// func IndexByteTwo(s []byte, b1, b2 byte) int // // Returns the index of the first occurrence of b1 or b2 in s, or -1. // Uses ARM64 NEON to search for both bytes in a single pass over the data. // Adapted from Go's internal/bytealg/indexbyte_arm64.s (single-byte version). -TEXT ·indexByteTwo(SB),NOSPLIT,$0-40 +TEXT ·IndexByteTwo(SB),NOSPLIT,$0-40 MOVD s_base+0(FP), R0 MOVD s_len+8(FP), R2 MOVBU b1+24(FP), R1 diff --git a/src/algo/indexbyte2_other.go b/src/algo/indexbyte2_other.go index 44041ff0..d1a01c21 100644 --- a/src/algo/indexbyte2_other.go +++ b/src/algo/indexbyte2_other.go @@ -6,7 +6,7 @@ import "bytes" // indexByteTwo returns the index of the first occurrence of b1 or b2 in s, // or -1 if neither is present. -func indexByteTwo(s []byte, b1, b2 byte) int { +func IndexByteTwo(s []byte, b1, b2 byte) int { i1 := bytes.IndexByte(s, b1) if i1 == 0 { return 0 diff --git a/src/algo/indexbyte2_test.go b/src/algo/indexbyte2_test.go index 9f4dedac..2c99e43e 100644 --- a/src/algo/indexbyte2_test.go +++ b/src/algo/indexbyte2_test.go @@ -28,9 +28,9 @@ func TestIndexByteTwo(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := indexByteTwo([]byte(tt.s), tt.b1, tt.b2) + got := IndexByteTwo([]byte(tt.s), tt.b1, tt.b2) if got != tt.want { - t.Errorf("indexByteTwo(%q, %c, %c) = %d, want %d", tt.s[:min(len(tt.s), 40)], tt.b1, tt.b2, got, tt.want) + t.Errorf("IndexByteTwo(%q, %c, %c) = %d, want %d", tt.s[:min(len(tt.s), 40)], tt.b1, tt.b2, got, tt.want) } }) } @@ -46,27 +46,27 @@ func TestIndexByteTwo(t *testing.T) { for pos := 0; pos < n; pos++ { for _, b := range []byte{'A', 'B'} { data[pos] = b - got := indexByteTwo(data, 'A', 'B') + got := IndexByteTwo(data, 'A', 'B') want := loopIndexByteTwo(data, 'A', 'B') if got != want { - t.Fatalf("indexByteTwo(len=%d, match=%c@%d) = %d, want %d", n, b, pos, got, want) + t.Fatalf("IndexByteTwo(len=%d, match=%c@%d) = %d, want %d", n, b, pos, got, want) } data[pos] = byte('c' + (pos % 20)) } } // Test with no match - got := indexByteTwo(data, 'A', 'B') + got := IndexByteTwo(data, 'A', 'B') if got != -1 { - t.Fatalf("indexByteTwo(len=%d, no match) = %d, want -1", n, got) + t.Fatalf("IndexByteTwo(len=%d, no match) = %d, want -1", n, got) } // Test with both bytes present if n >= 2 { data[n/3] = 'A' data[n*2/3] = 'B' - got := indexByteTwo(data, 'A', 'B') + got := IndexByteTwo(data, 'A', 'B') want := loopIndexByteTwo(data, 'A', 'B') if got != want { - t.Fatalf("indexByteTwo(len=%d, both@%d,%d) = %d, want %d", n, n/3, n*2/3, got, want) + t.Fatalf("IndexByteTwo(len=%d, both@%d,%d) = %d, want %d", n, n/3, n*2/3, got, want) } data[n/3] = byte('c' + ((n / 3) % 20)) data[n*2/3] = byte('c' + ((n * 2 / 3) % 20)) @@ -147,10 +147,10 @@ func FuzzIndexByteTwo(f *testing.F) { f.Add([]byte(""), byte('a'), byte('b')) f.Add([]byte("aaa"), byte('a'), byte('a')) f.Fuzz(func(t *testing.T, data []byte, b1, b2 byte) { - got := indexByteTwo(data, b1, b2) + got := IndexByteTwo(data, b1, b2) want := loopIndexByteTwo(data, b1, b2) if got != want { - t.Errorf("indexByteTwo(len=%d, b1=%d, b2=%d) = %d, want %d", len(data), b1, b2, got, want) + t.Errorf("IndexByteTwo(len=%d, b1=%d, b2=%d) = %d, want %d", len(data), b1, b2, got, want) } }) } @@ -214,7 +214,7 @@ func benchIndexByteTwo(b *testing.B, size int, pos int) { fn func([]byte, byte, byte) int } impls := []impl{ - {"asm", indexByteTwo}, + {"asm", IndexByteTwo}, {"2xIndexByte", refIndexByteTwo}, {"loop", loopIndexByteTwo}, } diff --git a/src/ansi.go b/src/ansi.go index 9f398103..eb51eb61 100644 --- a/src/ansi.go +++ b/src/ansi.go @@ -6,6 +6,7 @@ import ( "strings" "unicode/utf8" + "github.com/junegunn/fzf/src/algo" "github.com/junegunn/fzf/src/tui" ) @@ -123,31 +124,31 @@ func toAnsiString(color tui.Color, offset int) string { return ret + ";" } -func isPrint(c uint8) bool { - return '\x20' <= c && c <= '\x7e' -} - func matchOperatingSystemCommand(s string, start int) int { // `\x1b][0-9][;:][[:print:]]+(?:\x1b\\\\|\x07)` // ^ match starting here after the first printable character // i := start // prefix matched in nextAnsiEscapeSequence() - for ; i < len(s) && isPrint(s[i]); i++ { + + // Find the terminator: BEL (\x07) or ESC (\x1b) for ST (\x1b\\) + idx := algo.IndexByteTwo(stringBytes(s[i:]), '\x07', '\x1b') + if idx < 0 { + return -1 } - if i < len(s) { - if s[i] == '\x07' { - return i + 1 - } - // `\x1b]8;PARAMS;URI\x1b\\TITLE\x1b]8;;\x1b` - // ------ - if s[i] == '\x1b' && i < len(s)-1 && s[i+1] == '\\' { - return i + 2 - } + i += idx + + if s[i] == '\x07' { + return i + 1 + } + // `\x1b]8;PARAMS;URI\x1b\\TITLE\x1b]8;;\x1b` + // ------ + if i < len(s)-1 && s[i+1] == '\\' { + return i + 2 } // `\x1b]8;PARAMS;URI\x1b\\TITLE\x1b]8;;\x1b` // ------------ - if i < len(s) && s[:i+1] == "\x1b]8;;\x1b" { + if s[:i+1] == "\x1b]8;;\x1b" { return i + 1 } @@ -233,7 +234,7 @@ Loop: // \x1b][0-9]+[;:][[:print:]]+(?:\x1b\\\\|\x07) // --------------- - if j > 2 && i+j+1 < len(s) && (s[i+j] == ';' || s[i+j] == ':') && isPrint(s[i+j+1]) { + if j > 2 && i+j+1 < len(s) && (s[i+j] == ';' || s[i+j] == ':') && s[i+j+1] >= '\x20' { if k := matchOperatingSystemCommand(s[i:], j+2); k != -1 { return i, i + k }