Fix OSC8 hyperlinks mangled when URL contains unicode

Fix #4707
2026-04-24 00:22:47 +08:00 · 2026-03-08 13:47:56 +09:00
parent a8e1ef0989
commit f3ca0b1365
9 changed files with 52 additions and 42 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,13 +4,22 @@ CHANGELOG
 0.70.1
 ------
 - Performance improvements
-    - Replaced `[]Result` cache with bitmap cache (~86x less memory per cache entry)
-    - Raised `queryCacheMax` from `chunkSize/5` to `chunkSize/2` for broader cache coverage
-    - Replaced procFun map with fixed-size array for faster algo dispatch
-    - Replaced static chunk partitioning with a shared work queue in matcher
-    - Changed chunk size from 1000 to 1024 for clean 64-bit alignment
- Fixed AWK tokenizer not treating a new line character as whitespace
- Fixed `--{accept,with}-nth` removing trailing whitespaces with a non-default `--delimiter`
+    - The search performance now scales linearly with the number of CPU cores, as we dropped static partitioning to allow better load balancing across threads.
+      ```
+      === query: 'linux' ===
+        [all]   baseline:    17.12ms  current:    14.28ms  (1.20x)  matches: 179966 (12.79%)
+        [1T]    baseline:   136.49ms  current:   137.25ms  (0.99x)  matches: 179966 (12.79%)
+        [2T]    baseline:    75.74ms  current:    68.75ms  (1.10x)  matches: 179966 (12.79%)
+        [4T]    baseline:    41.16ms  current:    34.97ms  (1.18x)  matches: 179966 (12.79%)
+        [8T]    baseline:    32.82ms  current:    17.79ms  (1.84x)  matches: 179966 (12.79%)
+      ```
+    - Improved the cache structure, reducing memory footprint per entry by 86x.
+        - With the reduced per-entry cost, the cache now has broader coverage.
+- fish: Improved command history (CTRL-R) (#44703) (@bitraid)
+- Bug fixes
+    - Fixed AWK tokenizer not treating a new line character as whitespace
+    - Fixed `--{accept,with}-nth` removing trailing whitespaces with a non-default `--delimiter`
+    - Fixed OSC8 hyperlinks being mangled when the URL contains unicode characters (#4707)

 0.70.0
 ------
--- a/src/algo/algo.go
+++ b/src/algo/algo.go
@@ -323,7 +323,7 @@ func trySkip(input *util.Chars, caseSensitive bool, b byte, from int) int {
 	byteArray := input.Bytes()[from:]
 	// For case-insensitive search of a letter, search for both cases in one pass
 	if !caseSensitive && b >= 'a' && b <= 'z' {
-		idx := indexByteTwo(byteArray, b, b-32)
+		idx := IndexByteTwo(byteArray, b, b-32)
 		if idx < 0 {
 			return -1
 		}
--- a/src/algo/indexbyte2_amd64.go
+++ b/src/algo/indexbyte2_amd64.go
@@ -15,7 +15,7 @@ func cpuHasAVX2() bool
 // or -1 if neither is present. Uses AVX2 when available, SSE2 otherwise.
 //
 //go:noescape
-func indexByteTwo(s []byte, b1, b2 byte) int
+func IndexByteTwo(s []byte, b1, b2 byte) int

 // lastIndexByteTwo returns the index of the last occurrence of b1 or b2 in s,
 // or -1 if neither is present. Uses AVX2 when available, SSE2 otherwise.
--- a/src/algo/indexbyte2_amd64.s
+++ b/src/algo/indexbyte2_amd64.s
@@ -41,11 +41,11 @@ cpuid_no:
 	MOVB	$0, ret+0(FP)
 	RET

-// func indexByteTwo(s []byte, b1, b2 byte) int
+// func IndexByteTwo(s []byte, b1, b2 byte) int
 //
 // Returns the index of the first occurrence of b1 or b2 in s, or -1.
 // Uses AVX2 (32 bytes/iter) when available, SSE2 (16 bytes/iter) otherwise.
-TEXT ·indexByteTwo(SB),NOSPLIT,$0-40
+TEXT ·IndexByteTwo(SB),NOSPLIT,$0-40
 	MOVQ	s_base+0(FP), SI
 	MOVQ	s_len+8(FP), BX
 	MOVBLZX	b1+24(FP), AX
--- a/src/algo/indexbyte2_arm64.go
+++ b/src/algo/indexbyte2_arm64.go
@@ -7,7 +7,7 @@ package algo
 // to search for both bytes in a single pass.
 //
 //go:noescape
-func indexByteTwo(s []byte, b1, b2 byte) int
+func IndexByteTwo(s []byte, b1, b2 byte) int

 // lastIndexByteTwo returns the index of the last occurrence of b1 or b2 in s,
 // or -1 if neither is present. Implemented in assembly using ARM64 NEON,
--- a/src/algo/indexbyte2_arm64.s
+++ b/src/algo/indexbyte2_arm64.s
@@ -1,11 +1,11 @@
 #include "textflag.h"

-// func indexByteTwo(s []byte, b1, b2 byte) int
+// func IndexByteTwo(s []byte, b1, b2 byte) int
 //
 // Returns the index of the first occurrence of b1 or b2 in s, or -1.
 // Uses ARM64 NEON to search for both bytes in a single pass over the data.
 // Adapted from Go's internal/bytealg/indexbyte_arm64.s (single-byte version).
-TEXT ·indexByteTwo(SB),NOSPLIT,$0-40
+TEXT ·IndexByteTwo(SB),NOSPLIT,$0-40
 	MOVD	s_base+0(FP), R0
 	MOVD	s_len+8(FP), R2
 	MOVBU	b1+24(FP), R1
--- a/src/algo/indexbyte2_other.go
+++ b/src/algo/indexbyte2_other.go
@@ -6,7 +6,7 @@ import "bytes"

 // indexByteTwo returns the index of the first occurrence of b1 or b2 in s,
 // or -1 if neither is present.
-func indexByteTwo(s []byte, b1, b2 byte) int {
+func IndexByteTwo(s []byte, b1, b2 byte) int {
 	i1 := bytes.IndexByte(s, b1)
 	if i1 == 0 {
 		return 0
--- a/src/algo/indexbyte2_test.go
+++ b/src/algo/indexbyte2_test.go
@@ -28,9 +28,9 @@ func TestIndexByteTwo(t *testing.T) {

 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			got := indexByteTwo([]byte(tt.s), tt.b1, tt.b2)
+			got := IndexByteTwo([]byte(tt.s), tt.b1, tt.b2)
 			if got != tt.want {
-				t.Errorf("indexByteTwo(%q, %c, %c) = %d, want %d", tt.s[:min(len(tt.s), 40)], tt.b1, tt.b2, got, tt.want)
+				t.Errorf("IndexByteTwo(%q, %c, %c) = %d, want %d", tt.s[:min(len(tt.s), 40)], tt.b1, tt.b2, got, tt.want)
 			}
 		})
 	}
@@ -46,27 +46,27 @@ func TestIndexByteTwo(t *testing.T) {
 		for pos := 0; pos < n; pos++ {
 			for _, b := range []byte{'A', 'B'} {
 				data[pos] = b
-				got := indexByteTwo(data, 'A', 'B')
+				got := IndexByteTwo(data, 'A', 'B')
 				want := loopIndexByteTwo(data, 'A', 'B')
 				if got != want {
-					t.Fatalf("indexByteTwo(len=%d, match=%c@%d) = %d, want %d", n, b, pos, got, want)
+					t.Fatalf("IndexByteTwo(len=%d, match=%c@%d) = %d, want %d", n, b, pos, got, want)
 				}
 				data[pos] = byte('c' + (pos % 20))
 			}
 		}
 		// Test with no match
-		got := indexByteTwo(data, 'A', 'B')
+		got := IndexByteTwo(data, 'A', 'B')
 		if got != -1 {
-			t.Fatalf("indexByteTwo(len=%d, no match) = %d, want -1", n, got)
+			t.Fatalf("IndexByteTwo(len=%d, no match) = %d, want -1", n, got)
 		}
 		// Test with both bytes present
 		if n >= 2 {
 			data[n/3] = 'A'
 			data[n*2/3] = 'B'
-			got := indexByteTwo(data, 'A', 'B')
+			got := IndexByteTwo(data, 'A', 'B')
 			want := loopIndexByteTwo(data, 'A', 'B')
 			if got != want {
-				t.Fatalf("indexByteTwo(len=%d, both@%d,%d) = %d, want %d", n, n/3, n*2/3, got, want)
+				t.Fatalf("IndexByteTwo(len=%d, both@%d,%d) = %d, want %d", n, n/3, n*2/3, got, want)
 			}
 			data[n/3] = byte('c' + ((n / 3) % 20))
 			data[n*2/3] = byte('c' + ((n * 2 / 3) % 20))
@@ -147,10 +147,10 @@ func FuzzIndexByteTwo(f *testing.F) {
 	f.Add([]byte(""), byte('a'), byte('b'))
 	f.Add([]byte("aaa"), byte('a'), byte('a'))
 	f.Fuzz(func(t *testing.T, data []byte, b1, b2 byte) {
-		got := indexByteTwo(data, b1, b2)
+		got := IndexByteTwo(data, b1, b2)
 		want := loopIndexByteTwo(data, b1, b2)
 		if got != want {
-			t.Errorf("indexByteTwo(len=%d, b1=%d, b2=%d) = %d, want %d", len(data), b1, b2, got, want)
+			t.Errorf("IndexByteTwo(len=%d, b1=%d, b2=%d) = %d, want %d", len(data), b1, b2, got, want)
 		}
 	})
 }
@@ -214,7 +214,7 @@ func benchIndexByteTwo(b *testing.B, size int, pos int) {
 		fn   func([]byte, byte, byte) int
 	}
 	impls := []impl{
-		{"asm", indexByteTwo},
+		{"asm", IndexByteTwo},
 		{"2xIndexByte", refIndexByteTwo},
 		{"loop", loopIndexByteTwo},
 	}
--- a/src/ansi.go
+++ b/src/ansi.go
@@ -6,6 +6,7 @@ import (
 	"strings"
 	"unicode/utf8"

+	"github.com/junegunn/fzf/src/algo"
 	"github.com/junegunn/fzf/src/tui"
 )

@@ -123,31 +124,31 @@ func toAnsiString(color tui.Color, offset int) string {
 	return ret + ";"
 }

-func isPrint(c uint8) bool {
-	return '\x20' <= c && c <= '\x7e'
-}
-
 func matchOperatingSystemCommand(s string, start int) int {
 	// `\x1b][0-9][;:][[:print:]]+(?:\x1b\\\\|\x07)`
 	//                 ^ match starting here after the first printable character
 	//
 	i := start // prefix matched in nextAnsiEscapeSequence()
-	for ; i < len(s) && isPrint(s[i]); i++ {
+
+	// Find the terminator: BEL (\x07) or ESC (\x1b) for ST (\x1b\\)
+	idx := algo.IndexByteTwo(stringBytes(s[i:]), '\x07', '\x1b')
+	if idx < 0 {
+		return -1
 	}
-	if i < len(s) {
-		if s[i] == '\x07' {
-			return i + 1
-		}
-		// `\x1b]8;PARAMS;URI\x1b\\TITLE\x1b]8;;\x1b`
-		//                   ------
-		if s[i] == '\x1b' && i < len(s)-1 && s[i+1] == '\\' {
-			return i + 2
-		}
+	i += idx
+
+	if s[i] == '\x07' {
+		return i + 1
+	}
+	// `\x1b]8;PARAMS;URI\x1b\\TITLE\x1b]8;;\x1b`
+	//                   ------
+	if i < len(s)-1 && s[i+1] == '\\' {
+		return i + 2
 	}

 	// `\x1b]8;PARAMS;URI\x1b\\TITLE\x1b]8;;\x1b`
 	//                              ------------
-	if i < len(s) && s[:i+1] == "\x1b]8;;\x1b" {
+	if s[:i+1] == "\x1b]8;;\x1b" {
 		return i + 1
 	}

@@ -233,7 +234,7 @@ Loop:

 				// \x1b][0-9]+[;:][[:print:]]+(?:\x1b\\\\|\x07)
 				//            ---------------
-				if j > 2 && i+j+1 < len(s) && (s[i+j] == ';' || s[i+j] == ':') && isPrint(s[i+j+1]) {
+				if j > 2 && i+j+1 < len(s) && (s[i+j] == ';' || s[i+j] == ':') && s[i+j+1] >= '\x20' {
 					if k := matchOperatingSystemCommand(s[i:], j+2); k != -1 {
 						return i, i + k
 					}