Fix OSC8 hyperlinks mangled when URL contains unicode
Some checks are pending
CodeQL / Analyze (go) (push) Waiting to run
build / build (push) Waiting to run
Test fzf on macOS / build (push) Waiting to run

Fix #4707
This commit is contained in:
Junegunn Choi
2026-03-08 13:47:56 +09:00
parent a8e1ef0989
commit f3ca0b1365
9 changed files with 52 additions and 42 deletions

View File

@@ -4,13 +4,22 @@ CHANGELOG
0.70.1
------
- Performance improvements
- Replaced `[]Result` cache with bitmap cache (~86x less memory per cache entry)
- Raised `queryCacheMax` from `chunkSize/5` to `chunkSize/2` for broader cache coverage
- Replaced procFun map with fixed-size array for faster algo dispatch
- Replaced static chunk partitioning with a shared work queue in matcher
- Changed chunk size from 1000 to 1024 for clean 64-bit alignment
- Fixed AWK tokenizer not treating a new line character as whitespace
- Fixed `--{accept,with}-nth` removing trailing whitespaces with a non-default `--delimiter`
- The search performance now scales linearly with the number of CPU cores, as we dropped static partitioning to allow better load balancing across threads.
```
=== query: 'linux' ===
[all] baseline: 17.12ms current: 14.28ms (1.20x) matches: 179966 (12.79%)
[1T] baseline: 136.49ms current: 137.25ms (0.99x) matches: 179966 (12.79%)
[2T] baseline: 75.74ms current: 68.75ms (1.10x) matches: 179966 (12.79%)
[4T] baseline: 41.16ms current: 34.97ms (1.18x) matches: 179966 (12.79%)
[8T] baseline: 32.82ms current: 17.79ms (1.84x) matches: 179966 (12.79%)
```
- Improved the cache structure, reducing memory footprint per entry by 86x.
- With the reduced per-entry cost, the cache now has broader coverage.
- fish: Improved command history (CTRL-R) (#44703) (@bitraid)
- Bug fixes
- Fixed AWK tokenizer not treating a new line character as whitespace
- Fixed `--{accept,with}-nth` removing trailing whitespaces with a non-default `--delimiter`
- Fixed OSC8 hyperlinks being mangled when the URL contains unicode characters (#4707)
0.70.0
------

View File

@@ -323,7 +323,7 @@ func trySkip(input *util.Chars, caseSensitive bool, b byte, from int) int {
byteArray := input.Bytes()[from:]
// For case-insensitive search of a letter, search for both cases in one pass
if !caseSensitive && b >= 'a' && b <= 'z' {
idx := indexByteTwo(byteArray, b, b-32)
idx := IndexByteTwo(byteArray, b, b-32)
if idx < 0 {
return -1
}

View File

@@ -15,7 +15,7 @@ func cpuHasAVX2() bool
// or -1 if neither is present. Uses AVX2 when available, SSE2 otherwise.
//
//go:noescape
func indexByteTwo(s []byte, b1, b2 byte) int
func IndexByteTwo(s []byte, b1, b2 byte) int
// lastIndexByteTwo returns the index of the last occurrence of b1 or b2 in s,
// or -1 if neither is present. Uses AVX2 when available, SSE2 otherwise.

View File

@@ -41,11 +41,11 @@ cpuid_no:
MOVB $0, ret+0(FP)
RET
// func indexByteTwo(s []byte, b1, b2 byte) int
// func IndexByteTwo(s []byte, b1, b2 byte) int
//
// Returns the index of the first occurrence of b1 or b2 in s, or -1.
// Uses AVX2 (32 bytes/iter) when available, SSE2 (16 bytes/iter) otherwise.
TEXT ·indexByteTwo(SB),NOSPLIT,$0-40
TEXT ·IndexByteTwo(SB),NOSPLIT,$0-40
MOVQ s_base+0(FP), SI
MOVQ s_len+8(FP), BX
MOVBLZX b1+24(FP), AX

View File

@@ -7,7 +7,7 @@ package algo
// to search for both bytes in a single pass.
//
//go:noescape
func indexByteTwo(s []byte, b1, b2 byte) int
func IndexByteTwo(s []byte, b1, b2 byte) int
// lastIndexByteTwo returns the index of the last occurrence of b1 or b2 in s,
// or -1 if neither is present. Implemented in assembly using ARM64 NEON,

View File

@@ -1,11 +1,11 @@
#include "textflag.h"
// func indexByteTwo(s []byte, b1, b2 byte) int
// func IndexByteTwo(s []byte, b1, b2 byte) int
//
// Returns the index of the first occurrence of b1 or b2 in s, or -1.
// Uses ARM64 NEON to search for both bytes in a single pass over the data.
// Adapted from Go's internal/bytealg/indexbyte_arm64.s (single-byte version).
TEXT ·indexByteTwo(SB),NOSPLIT,$0-40
TEXT ·IndexByteTwo(SB),NOSPLIT,$0-40
MOVD s_base+0(FP), R0
MOVD s_len+8(FP), R2
MOVBU b1+24(FP), R1

View File

@@ -6,7 +6,7 @@ import "bytes"
// indexByteTwo returns the index of the first occurrence of b1 or b2 in s,
// or -1 if neither is present.
func indexByteTwo(s []byte, b1, b2 byte) int {
func IndexByteTwo(s []byte, b1, b2 byte) int {
i1 := bytes.IndexByte(s, b1)
if i1 == 0 {
return 0

View File

@@ -28,9 +28,9 @@ func TestIndexByteTwo(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := indexByteTwo([]byte(tt.s), tt.b1, tt.b2)
got := IndexByteTwo([]byte(tt.s), tt.b1, tt.b2)
if got != tt.want {
t.Errorf("indexByteTwo(%q, %c, %c) = %d, want %d", tt.s[:min(len(tt.s), 40)], tt.b1, tt.b2, got, tt.want)
t.Errorf("IndexByteTwo(%q, %c, %c) = %d, want %d", tt.s[:min(len(tt.s), 40)], tt.b1, tt.b2, got, tt.want)
}
})
}
@@ -46,27 +46,27 @@ func TestIndexByteTwo(t *testing.T) {
for pos := 0; pos < n; pos++ {
for _, b := range []byte{'A', 'B'} {
data[pos] = b
got := indexByteTwo(data, 'A', 'B')
got := IndexByteTwo(data, 'A', 'B')
want := loopIndexByteTwo(data, 'A', 'B')
if got != want {
t.Fatalf("indexByteTwo(len=%d, match=%c@%d) = %d, want %d", n, b, pos, got, want)
t.Fatalf("IndexByteTwo(len=%d, match=%c@%d) = %d, want %d", n, b, pos, got, want)
}
data[pos] = byte('c' + (pos % 20))
}
}
// Test with no match
got := indexByteTwo(data, 'A', 'B')
got := IndexByteTwo(data, 'A', 'B')
if got != -1 {
t.Fatalf("indexByteTwo(len=%d, no match) = %d, want -1", n, got)
t.Fatalf("IndexByteTwo(len=%d, no match) = %d, want -1", n, got)
}
// Test with both bytes present
if n >= 2 {
data[n/3] = 'A'
data[n*2/3] = 'B'
got := indexByteTwo(data, 'A', 'B')
got := IndexByteTwo(data, 'A', 'B')
want := loopIndexByteTwo(data, 'A', 'B')
if got != want {
t.Fatalf("indexByteTwo(len=%d, both@%d,%d) = %d, want %d", n, n/3, n*2/3, got, want)
t.Fatalf("IndexByteTwo(len=%d, both@%d,%d) = %d, want %d", n, n/3, n*2/3, got, want)
}
data[n/3] = byte('c' + ((n / 3) % 20))
data[n*2/3] = byte('c' + ((n * 2 / 3) % 20))
@@ -147,10 +147,10 @@ func FuzzIndexByteTwo(f *testing.F) {
f.Add([]byte(""), byte('a'), byte('b'))
f.Add([]byte("aaa"), byte('a'), byte('a'))
f.Fuzz(func(t *testing.T, data []byte, b1, b2 byte) {
got := indexByteTwo(data, b1, b2)
got := IndexByteTwo(data, b1, b2)
want := loopIndexByteTwo(data, b1, b2)
if got != want {
t.Errorf("indexByteTwo(len=%d, b1=%d, b2=%d) = %d, want %d", len(data), b1, b2, got, want)
t.Errorf("IndexByteTwo(len=%d, b1=%d, b2=%d) = %d, want %d", len(data), b1, b2, got, want)
}
})
}
@@ -214,7 +214,7 @@ func benchIndexByteTwo(b *testing.B, size int, pos int) {
fn func([]byte, byte, byte) int
}
impls := []impl{
{"asm", indexByteTwo},
{"asm", IndexByteTwo},
{"2xIndexByte", refIndexByteTwo},
{"loop", loopIndexByteTwo},
}

View File

@@ -6,6 +6,7 @@ import (
"strings"
"unicode/utf8"
"github.com/junegunn/fzf/src/algo"
"github.com/junegunn/fzf/src/tui"
)
@@ -123,31 +124,31 @@ func toAnsiString(color tui.Color, offset int) string {
return ret + ";"
}
func isPrint(c uint8) bool {
return '\x20' <= c && c <= '\x7e'
}
func matchOperatingSystemCommand(s string, start int) int {
// `\x1b][0-9][;:][[:print:]]+(?:\x1b\\\\|\x07)`
// ^ match starting here after the first printable character
//
i := start // prefix matched in nextAnsiEscapeSequence()
for ; i < len(s) && isPrint(s[i]); i++ {
// Find the terminator: BEL (\x07) or ESC (\x1b) for ST (\x1b\\)
idx := algo.IndexByteTwo(stringBytes(s[i:]), '\x07', '\x1b')
if idx < 0 {
return -1
}
if i < len(s) {
if s[i] == '\x07' {
return i + 1
}
// `\x1b]8;PARAMS;URI\x1b\\TITLE\x1b]8;;\x1b`
// ------
if s[i] == '\x1b' && i < len(s)-1 && s[i+1] == '\\' {
return i + 2
}
i += idx
if s[i] == '\x07' {
return i + 1
}
// `\x1b]8;PARAMS;URI\x1b\\TITLE\x1b]8;;\x1b`
// ------
if i < len(s)-1 && s[i+1] == '\\' {
return i + 2
}
// `\x1b]8;PARAMS;URI\x1b\\TITLE\x1b]8;;\x1b`
// ------------
if i < len(s) && s[:i+1] == "\x1b]8;;\x1b" {
if s[:i+1] == "\x1b]8;;\x1b" {
return i + 1
}
@@ -233,7 +234,7 @@ Loop:
// \x1b][0-9]+[;:][[:print:]]+(?:\x1b\\\\|\x07)
// ---------------
if j > 2 && i+j+1 < len(s) && (s[i+j] == ';' || s[i+j] == ':') && isPrint(s[i+j+1]) {
if j > 2 && i+j+1 < len(s) && (s[i+j] == ';' || s[i+j] == ':') && s[i+j+1] >= '\x20' {
if k := matchOperatingSystemCommand(s[i:], j+2); k != -1 {
return i, i + k
}