mirror of
https://github.com/junegunn/fzf.git
synced 2025-12-06 21:04:26 +08:00
Based on the patch by Matt Westcott (@mjwestcott). But with a more conservative approach: - Does not use linearly increasing penalties; It is agreed upon that we should prefer matching characters at the beginnings of the words, but it's not always clear that the relevance is inversely proportional to the distance from the beginning. - The approach here is more conservative in that the bonus is never large enough to override the matchlen, so it can be thought of as the first implicit tiebreak criterion. - One may argue the change breaks the contract of --tiebreak, but the judgement depends on the definition of "tie".
288 lines
6.8 KiB
Go
288 lines
6.8 KiB
Go
package algo
|
|
|
|
import (
|
|
"strings"
|
|
"unicode"
|
|
|
|
"github.com/junegunn/fzf/src/util"
|
|
)
|
|
|
|
/*
|
|
* String matching algorithms here do not use strings.ToLower to avoid
|
|
* performance penalty. And they assume pattern runes are given in lowercase
|
|
* letters when caseSensitive is false.
|
|
*
|
|
* In short: They try to do as little work as possible.
|
|
*/
|
|
|
|
func runeAt(runes []rune, index int, max int, forward bool) rune {
|
|
if forward {
|
|
return runes[index]
|
|
}
|
|
return runes[max-index-1]
|
|
}
|
|
|
|
// Result conatins the results of running a match function.
|
|
type Result struct {
|
|
Start int32
|
|
End int32
|
|
|
|
// Items are basically sorted by the lengths of matched substrings.
|
|
// But we slightly adjust the score with bonus for better results.
|
|
Bonus int32
|
|
}
|
|
|
|
type charClass int
|
|
|
|
const (
|
|
charNonWord charClass = iota
|
|
charLower
|
|
charUpper
|
|
charLetter
|
|
charNumber
|
|
)
|
|
|
|
// FuzzyMatch performs fuzzy-match
|
|
func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) Result {
|
|
if len(pattern) == 0 {
|
|
return Result{0, 0, 0}
|
|
}
|
|
|
|
// 0. (FIXME) How to find the shortest match?
|
|
// a_____b__c__abc
|
|
// ^^^^^^^^^^ ^^^
|
|
// 1. forward scan (abc)
|
|
// *-----*-----*>
|
|
// a_____b___abc__
|
|
// 2. reverse scan (cba)
|
|
// a_____b___abc__
|
|
// <***
|
|
pidx := 0
|
|
sidx := -1
|
|
eidx := -1
|
|
|
|
lenRunes := len(runes)
|
|
lenPattern := len(pattern)
|
|
|
|
for index := range runes {
|
|
char := runeAt(runes, index, lenRunes, forward)
|
|
// This is considerably faster than blindly applying strings.ToLower to the
|
|
// whole string
|
|
if !caseSensitive {
|
|
// Partially inlining `unicode.ToLower`. Ugly, but makes a noticeable
|
|
// difference in CPU cost. (Measured on Go 1.4.1. Also note that the Go
|
|
// compiler as of now does not inline non-leaf functions.)
|
|
if char >= 'A' && char <= 'Z' {
|
|
char += 32
|
|
} else if char > unicode.MaxASCII {
|
|
char = unicode.To(unicode.LowerCase, char)
|
|
}
|
|
}
|
|
pchar := runeAt(pattern, pidx, lenPattern, forward)
|
|
if char == pchar {
|
|
if sidx < 0 {
|
|
sidx = index
|
|
}
|
|
if pidx++; pidx == lenPattern {
|
|
eidx = index + 1
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
if sidx >= 0 && eidx >= 0 {
|
|
pidx--
|
|
for index := eidx - 1; index >= sidx; index-- {
|
|
char := runeAt(runes, index, lenRunes, forward)
|
|
if !caseSensitive {
|
|
if char >= 'A' && char <= 'Z' {
|
|
char += 32
|
|
} else if char > unicode.MaxASCII {
|
|
char = unicode.To(unicode.LowerCase, char)
|
|
}
|
|
}
|
|
|
|
pchar := runeAt(pattern, pidx, lenPattern, forward)
|
|
if char == pchar {
|
|
if pidx--; pidx < 0 {
|
|
sidx = index
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
// Calculate the bonus. This can't be done at the same time as the
|
|
// pattern scan above because 'forward' may be false.
|
|
if !forward {
|
|
sidx, eidx = lenRunes-eidx, lenRunes-sidx
|
|
}
|
|
|
|
var bonus int32
|
|
pidx := 0
|
|
consecutive := false
|
|
prevClass := charNonWord
|
|
for index := 0; index < eidx; index++ {
|
|
char := runes[index]
|
|
var class charClass
|
|
if unicode.IsLower(char) {
|
|
class = charLower
|
|
} else if unicode.IsUpper(char) {
|
|
class = charUpper
|
|
} else if unicode.IsLetter(char) {
|
|
class = charLetter
|
|
} else if unicode.IsNumber(char) {
|
|
class = charNumber
|
|
} else {
|
|
class = charNonWord
|
|
}
|
|
|
|
var point int32
|
|
if prevClass == charNonWord && class != charNonWord {
|
|
// Word boundary
|
|
point = 2
|
|
} else if prevClass == charLower && class == charUpper ||
|
|
prevClass != charNumber && class == charNumber {
|
|
// camelCase letter123
|
|
point = 1
|
|
}
|
|
prevClass = class
|
|
|
|
if index >= sidx {
|
|
if !caseSensitive {
|
|
if char >= 'A' && char <= 'Z' {
|
|
char += 32
|
|
} else if char > unicode.MaxASCII {
|
|
char = unicode.To(unicode.LowerCase, char)
|
|
}
|
|
}
|
|
pchar := pattern[pidx]
|
|
if pchar == char {
|
|
// Boost bonus for the first character in the pattern
|
|
if pidx == 0 {
|
|
point *= 2
|
|
}
|
|
// Bonus to consecutive matching chars
|
|
if consecutive {
|
|
point++
|
|
}
|
|
bonus += point
|
|
|
|
if pidx++; pidx == lenPattern {
|
|
break
|
|
}
|
|
consecutive = true
|
|
} else {
|
|
consecutive = false
|
|
}
|
|
}
|
|
}
|
|
|
|
return Result{int32(sidx), int32(eidx), bonus}
|
|
}
|
|
return Result{-1, -1, 0}
|
|
}
|
|
|
|
// ExactMatchNaive is a basic string searching algorithm that handles case
|
|
// sensitivity. Although naive, it still performs better than the combination
|
|
// of strings.ToLower + strings.Index for typical fzf use cases where input
|
|
// strings and patterns are not very long.
|
|
//
|
|
// We might try to implement better algorithms in the future:
|
|
// http://en.wikipedia.org/wiki/String_searching_algorithm
|
|
func ExactMatchNaive(caseSensitive bool, forward bool, runes []rune, pattern []rune) Result {
|
|
// Note: ExactMatchNaive always return a zero bonus.
|
|
if len(pattern) == 0 {
|
|
return Result{0, 0, 0}
|
|
}
|
|
|
|
lenRunes := len(runes)
|
|
lenPattern := len(pattern)
|
|
|
|
if lenRunes < lenPattern {
|
|
return Result{-1, -1, 0}
|
|
}
|
|
|
|
pidx := 0
|
|
for index := 0; index < lenRunes; index++ {
|
|
char := runeAt(runes, index, lenRunes, forward)
|
|
if !caseSensitive {
|
|
if char >= 'A' && char <= 'Z' {
|
|
char += 32
|
|
} else if char > unicode.MaxASCII {
|
|
char = unicode.To(unicode.LowerCase, char)
|
|
}
|
|
}
|
|
pchar := runeAt(pattern, pidx, lenPattern, forward)
|
|
if pchar == char {
|
|
pidx++
|
|
if pidx == lenPattern {
|
|
if forward {
|
|
return Result{int32(index - lenPattern + 1), int32(index + 1), 0}
|
|
}
|
|
return Result{int32(lenRunes - (index + 1)), int32(lenRunes - (index - lenPattern + 1)), 0}
|
|
}
|
|
} else {
|
|
index -= pidx
|
|
pidx = 0
|
|
}
|
|
}
|
|
return Result{-1, -1, 0}
|
|
}
|
|
|
|
// PrefixMatch performs prefix-match
|
|
func PrefixMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) Result {
|
|
// Note: PrefixMatch always return a zero bonus.
|
|
if len(runes) < len(pattern) {
|
|
return Result{-1, -1, 0}
|
|
}
|
|
|
|
for index, r := range pattern {
|
|
char := runes[index]
|
|
if !caseSensitive {
|
|
char = unicode.ToLower(char)
|
|
}
|
|
if char != r {
|
|
return Result{-1, -1, 0}
|
|
}
|
|
}
|
|
return Result{0, int32(len(pattern)), 0}
|
|
}
|
|
|
|
// SuffixMatch performs suffix-match
|
|
func SuffixMatch(caseSensitive bool, forward bool, input []rune, pattern []rune) Result {
|
|
// Note: SuffixMatch always return a zero bonus.
|
|
runes := util.TrimRight(input)
|
|
trimmedLen := len(runes)
|
|
diff := trimmedLen - len(pattern)
|
|
if diff < 0 {
|
|
return Result{-1, -1, 0}
|
|
}
|
|
|
|
for index, r := range pattern {
|
|
char := runes[index+diff]
|
|
if !caseSensitive {
|
|
char = unicode.ToLower(char)
|
|
}
|
|
if char != r {
|
|
return Result{-1, -1, 0}
|
|
}
|
|
}
|
|
return Result{int32(trimmedLen - len(pattern)), int32(trimmedLen), 0}
|
|
}
|
|
|
|
// EqualMatch performs equal-match
|
|
func EqualMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) Result {
|
|
// Note: EqualMatch always return a zero bonus.
|
|
if len(runes) != len(pattern) {
|
|
return Result{-1, -1, 0}
|
|
}
|
|
runesStr := string(runes)
|
|
if !caseSensitive {
|
|
runesStr = strings.ToLower(runesStr)
|
|
}
|
|
if runesStr == string(pattern) {
|
|
return Result{0, int32(len(pattern)), 0}
|
|
}
|
|
return Result{-1, -1, 0}
|
|
}
|